Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
liyinrong
composable_kernel
Commits
d7a0a3f9
Commit
d7a0a3f9
authored
3 years ago
by
Jing Zhang
Browse files
Options
Download
Email Patches
Plain Diff
renaming/comments
parent
2cbb8976
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
composable_kernel/include/tensor_operation/blockwise_gemm_xdlops.hpp
+3
-2
...kernel/include/tensor_operation/blockwise_gemm_xdlops.hpp
composable_kernel/include/tensor_operation/xdlops_gemm.hpp
+2
-2
composable_kernel/include/tensor_operation/xdlops_gemm.hpp
with
5 additions
and
4 deletions
+5
-4
composable_kernel/include/tensor_operation/blockwise_gemm_xdlops.hpp
View file @
d7a0a3f9
...
...
@@ -247,14 +247,15 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
}
private:
// A[K, M]
// A[K
0
, M
0, M1, M2, K1
]
static
constexpr
auto
a_thread_desc_
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
Number
<
K0
>
{},
I1
,
I1
,
I1
,
Number
<
K1
>
{}));
// B[K, N]
// B[K
0
, N
0, N1, N2, K1
]
static
constexpr
auto
b_thread_desc_
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
Number
<
K0
>
{},
I1
,
I1
,
I1
,
Number
<
K1
>
{}));
// C[M, N]
static
constexpr
auto
c_thread_desc_
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
Number
<
MRepeat
>
{},
Number
<
NRepeat
>
{}));
...
...
This diff is collapsed.
Click to expand it.
composable_kernel/include/tensor_operation/xdlops_gemm.hpp
View file @
d7a0a3f9
...
...
@@ -545,7 +545,7 @@ struct MfmaSelector
selected_mfma
.
k_per_blk
;
}
static
constexpr
index_t
GetKPer
Thread
()
{
return
selected_mfma
.
k_per_blk
;
}
static
constexpr
index_t
GetK
1
Per
Xdlops
()
{
return
selected_mfma
.
k_per_blk
;
}
};
template
<
typename
base_type
,
index_t
MPerXdlops
,
index_t
NPerXdlops
,
index_t
KPack
>
...
...
@@ -708,7 +708,7 @@ struct XdlopsGemm
static
constexpr
auto
mfma_instr
=
mfma
.
selected_mfma
;
static
constexpr
auto
KPerXdlops
=
mfma
.
GetKPerXdlops
();
static
constexpr
auto
K1PerXdlops
=
mfma
.
GetKPer
Thread
();
static
constexpr
auto
K1PerXdlops
=
mfma
.
GetK
1
Per
Xdlops
();
static
constexpr
auto
K0PerXdlops
=
KPerXdlops
/
K1PerXdlops
;
__host__
__device__
static
constexpr
auto
GetCM0M1M2NThreadBlkLengths
()
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help