Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
liyinrong
composable_kernel
Commits
0e4243f2
Commit
0e4243f2
authored
2 years ago
by
Po-Yen, Chen
Browse files
Options
Download
Email Patches
Plain Diff
Use initializer_list<> to init Tensor<> directly
parent
e4e99a49
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp
+1
-1
example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp
example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
+4
-4
...e/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
example/21_gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
+3
-3
..._gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp
+2
-2
...ference_tensor_operation/cpu/reference_gemm_layernorm.hpp
profiler/include/profile_batched_gemm_reduce_impl.hpp
+4
-4
profiler/include/profile_batched_gemm_reduce_impl.hpp
profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp
+1
-1
profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp
profiler/include/profile_conv_fwd_bias_relu_impl.hpp
+1
-1
profiler/include/profile_conv_fwd_bias_relu_impl.hpp
profiler/include/profile_gemm_add_add_fastgelu_impl.hpp
+1
-1
profiler/include/profile_gemm_add_add_fastgelu_impl.hpp
profiler/include/profile_gemm_bias_add_reduce_impl.hpp
+4
-4
profiler/include/profile_gemm_bias_add_reduce_impl.hpp
profiler/include/profile_gemm_bilinear_impl.hpp
+1
-1
profiler/include/profile_gemm_bilinear_impl.hpp
profiler/include/profile_gemm_reduce_impl.hpp
+4
-4
profiler/include/profile_gemm_reduce_impl.hpp
with
26 additions
and
26 deletions
+26
-26
example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp
View file @
0e4243f2
...
...
@@ -272,7 +272,7 @@ int main(int argc, char* argv[])
if
(
do_verification
)
{
Tensor
<
CShuffleDataType
>
c_m_n
(
HostTensorDescriptor
(
{
M
,
N
})
)
;
Tensor
<
CShuffleDataType
>
c_m_n
({
M
,
N
});
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
...
...
This diff is collapsed.
Click to expand it.
example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
View file @
0e4243f2
...
...
@@ -150,13 +150,13 @@ int main(int argc, char* argv[])
Tensor
<
CDataType
>
c_g_m_n_host_result
(
f_host_tensor_descriptor
(
BatchCount
,
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
d0_g_m_host_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d1_g_m_host_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d0_g_m_host_result
({
BatchCount
,
M
});
Tensor
<
ReduceDataType
>
d1_g_m_host_result
({
BatchCount
,
M
});
Tensor
<
CDataType
>
c_g_m_n_device_result
(
f_host_tensor_descriptor
(
BatchCount
,
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
d0_g_m_device_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d1_g_m_device_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d0_g_m_device_result
({
BatchCount
,
M
});
Tensor
<
ReduceDataType
>
d1_g_m_device_result
({
BatchCount
,
M
});
std
::
cout
<<
"a_g_m_k: "
<<
a_g_m_k
.
GetDesc
()
<<
std
::
endl
;
std
::
cout
<<
"b_g_k_n: "
<<
b_g_k_n
.
GetDesc
()
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
example/21_gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
View file @
0e4243f2
...
...
@@ -151,10 +151,10 @@ int main(int argc, char* argv[])
Tensor
<
CDataType
>
c_m_n_host_result
(
f_host_tensor_descriptor
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
CDataType
>
c_m_n_device_result
(
f_host_tensor_descriptor
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
AccDataType
>
acc_m_n_host_result
(
f_host_tensor_descriptor
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
C0DataType
>
c0_n_bias
(
HostTensorDescriptor
(
std
::
vector
<
size_t
>
({
size_t
(
N
)}))
);
Tensor
<
C0DataType
>
c0_n_bias
(
{
N
}
);
Tensor
<
C0DataType
>
c0_m_n_add
(
f_host_tensor_descriptor
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
C0DataType
>
c0_n_gamma
(
HostTensorDescriptor
(
std
::
vector
<
size_t
>
({
size_t
(
N
)}))
);
Tensor
<
C0DataType
>
c0_n_beta
(
HostTensorDescriptor
(
std
::
vector
<
size_t
>
({
size_t
(
N
)}))
);
Tensor
<
C0DataType
>
c0_n_gamma
(
{
N
}
);
Tensor
<
C0DataType
>
c0_n_beta
(
{
N
}
);
std
::
cout
<<
"a_m_k: "
<<
a_m_k
.
GetDesc
()
<<
std
::
endl
;
std
::
cout
<<
"b_k_n: "
<<
b_k_n
.
GetDesc
()
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp
View file @
0e4243f2
...
...
@@ -46,8 +46,8 @@ struct ReferenceGemmLayernorm : public device::BaseOperator
size_t
M
=
acc
.
GetLengths
()[
0
];
size_t
N
=
acc
.
GetLengths
()[
1
];
Tensor
<
ComputeDataType
>
avg_acc_sq
(
HostTensorDescriptor
(
std
::
vector
<
size_t
>
(
{
M
})
))
;
Tensor
<
ComputeDataType
>
avg_acc
(
HostTensorDescriptor
(
std
::
vector
<
size_t
>
(
{
M
})
))
;
Tensor
<
ComputeDataType
>
avg_acc_sq
({
M
});
Tensor
<
ComputeDataType
>
avg_acc
({
M
});
Tensor
<
ComputeDataType
>
acc_layernorm
(
acc
);
// reduce N dim
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_batched_gemm_reduce_impl.hpp
View file @
0e4243f2
...
...
@@ -96,13 +96,13 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
Tensor
<
CDataType
>
c_g_m_n_host_result
(
f_host_tensor_descriptor
(
BatchCount
,
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
d0_g_m_host_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d1_g_m_host_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d0_g_m_host_result
({
BatchCount
,
M
});
Tensor
<
ReduceDataType
>
d1_g_m_host_result
({
BatchCount
,
M
});
Tensor
<
CDataType
>
c_g_m_n_device_result
(
f_host_tensor_descriptor
(
BatchCount
,
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
d0_g_m_device_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d1_g_m_device_result
(
HostTensorDescriptor
(
{
BatchCount
,
M
})
)
;
Tensor
<
ReduceDataType
>
d0_g_m_device_result
({
BatchCount
,
M
});
Tensor
<
ReduceDataType
>
d1_g_m_device_result
({
BatchCount
,
M
});
std
::
cout
<<
"a_g_m_k: "
<<
a_g_m_k
.
GetDesc
()
<<
std
::
endl
;
std
::
cout
<<
"b_g_k_n: "
<<
b_g_k_n
.
GetDesc
()
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp
View file @
0e4243f2
...
...
@@ -93,7 +93,7 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
f_host_tensor_descriptor
(
N
,
K
,
Ho
,
Wo
,
OutLayout
{}));
// bias: assume contiguous 1d vector
Tensor
<
OutDataType
>
bias_k
(
HostTensorDescriptor
(
{
K
})
)
;
Tensor
<
OutDataType
>
bias_k
({
K
});
// residual: assume same layout as output tensor
Tensor
<
OutDataType
>
resi_n_k_ho_wo
(
f_host_tensor_descriptor
(
N
,
K
,
Ho
,
Wo
,
OutLayout
{}));
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_conv_fwd_bias_relu_impl.hpp
View file @
0e4243f2
...
...
@@ -93,7 +93,7 @@ void profile_conv_fwd_bias_relu_impl(int do_verification,
f_host_tensor_descriptor
(
N
,
K
,
Ho
,
Wo
,
OutLayout
{}));
// bias: assume contiguous 1d vector
Tensor
<
OutDataType
>
bias_k
(
HostTensorDescriptor
(
{
K
})
)
;
Tensor
<
OutDataType
>
bias_k
({
K
});
std
::
cout
<<
"in_n_c_hi_wi: "
<<
in_n_c_hi_wi
.
GetDesc
()
<<
std
::
endl
;
std
::
cout
<<
"wei_k_c_y_x: "
<<
wei_k_c_y_x
.
GetDesc
()
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_gemm_add_add_fastgelu_impl.hpp
View file @
0e4243f2
...
...
@@ -123,7 +123,7 @@ bool profile_gemm_add_add_fastgelu_impl(int do_verification,
// run reference
if
(
do_verification
)
{
Tensor
<
AccDataType
>
c_m_n
(
HostTensorDescriptor
(
{
M
,
N
})
)
;
Tensor
<
AccDataType
>
c_m_n
({
M
,
N
});
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_gemm_bias_add_reduce_impl.hpp
View file @
0e4243f2
...
...
@@ -99,12 +99,12 @@ void profile_gemm_bias_add_reduce_impl(int do_verification,
Tensor
<
CDataType
>
c_m_n_host_result
(
f_host_tensor_descriptor2d
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
BiasDataType
>
bias_n
(
f_host_tensor_descriptor1d
(
N
,
1
));
Tensor
<
D0DataType
>
d0_m_n
(
f_host_tensor_descriptor2d
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
reduce0_m_host_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce1_m_host_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce0_m_host_result
({
M
});
Tensor
<
ReduceDataType
>
reduce1_m_host_result
({
M
});
Tensor
<
CDataType
>
c_m_n_device_result
(
f_host_tensor_descriptor2d
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
reduce0_m_device_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce1_m_device_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce0_m_device_result
({
M
});
Tensor
<
ReduceDataType
>
reduce1_m_device_result
({
M
});
std
::
cout
<<
"a_m_k: "
<<
a_m_k
.
GetDesc
()
<<
std
::
endl
;
std
::
cout
<<
"b_k_n: "
<<
b_k_n
.
GetDesc
()
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_gemm_bilinear_impl.hpp
View file @
0e4243f2
...
...
@@ -118,7 +118,7 @@ bool profile_gemm_bilinear_impl(int do_verification,
// run reference
if
(
do_verification
)
{
Tensor
<
AccDataType
>
c_m_n
(
HostTensorDescriptor
(
{
M
,
N
})
)
;
Tensor
<
AccDataType
>
c_m_n
({
M
,
N
});
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
...
...
This diff is collapsed.
Click to expand it.
profiler/include/profile_gemm_reduce_impl.hpp
View file @
0e4243f2
...
...
@@ -92,12 +92,12 @@ bool profile_gemm_reduce_impl(int do_verification,
Tensor
<
BDataType
>
b_k_n
(
f_host_tensor_descriptor
(
K
,
N
,
StrideB
,
BLayout
{}));
Tensor
<
CDataType
>
c_m_n_host_result
(
f_host_tensor_descriptor
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
reduce0_m_host_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce1_m_host_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce0_m_host_result
({
M
});
Tensor
<
ReduceDataType
>
reduce1_m_host_result
({
M
});
Tensor
<
CDataType
>
c_m_n_device_result
(
f_host_tensor_descriptor
(
M
,
N
,
StrideC
,
CLayout
{}));
Tensor
<
ReduceDataType
>
reduce0_m_device_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce1_m_device_result
(
HostTensorDescriptor
(
{
M
})
)
;
Tensor
<
ReduceDataType
>
reduce0_m_device_result
({
M
});
Tensor
<
ReduceDataType
>
reduce1_m_device_result
({
M
});
std
::
cout
<<
"a_m_k: "
<<
a_m_k
.
GetDesc
()
<<
std
::
endl
;
std
::
cout
<<
"b_k_n: "
<<
b_k_n
.
GetDesc
()
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment