37 template <
typename InputFragment_,
typename OutputFragment_>
42 template <
typename InputScalar_,
typename OutputScalar_,
int kScalars_>
54 transform(src, 0, dst);
58 template <
typename Fragment_>
60 for (
int i = 0; i < kScalars_; ++i) {
61 dst[i] =
static_cast<OutputScalar_
>(src[i + offset]);
68 template <
typename Fragment_>
82 template <
typename InputFragment_>
83 CUTLASS_DEVICE
void transform(InputFragment_
const& src,
int offset, Fragment_& dst) {
84 if (
sizeof(
typename Fragment_::Element) == 8) {
85 uint64_t
const* src_ptr =
reinterpret_cast<uint64_t const*
>(&src[offset]);
86 uint64_t* dst_ptr =
reinterpret_cast<uint64_t*
>(&dst[0]);
87 for (
int i = 0; i <
sizeof(Fragment_) / 8; ++i) {
88 dst_ptr[i] = src_ptr[i];
91 uint32_t
const* src_ptr =
reinterpret_cast<uint32_t const*
>(&src[offset]);
92 uint32_t* dst_ptr =
reinterpret_cast<uint32_t*
>(&dst[0]);
93 for (
int i = 0; i <
sizeof(Fragment_) / 4; ++i) {
94 dst_ptr[i] = src_ptr[i];
CUTLASS_DEVICE void transform(Fragment_ const &src, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:79
A template defining Fragment Concept.
Definition: fragment.h:99
CUTLASS_DEVICE void transform(InputFragment_ const &src, int offset, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:83
Fragment_ InputFragment
The input fragment.
Definition: convert.h:71
CUTLASS_DEVICE Copy()
Ctor.
Definition: convert.h:76
Fragment_ OutputFragment
The output fragment.
Definition: convert.h:73
Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers...