Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Template performing matrix multiply-add operation within a thread.
#include <igemm_multiply_add.h>
Public Types | |
typedef Shape< 4, 1, 1 > | InstructionShape |
The shape of the instruction. More... | |
typedef AccumulatorsPerThread_ | AccumulatorsPerThread |
The number of accumulators per thread. More... | |
typedef ThreadsPerWarp_ | ThreadsPerWarp |
The number of threads per warp. More... | |
typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape | AccumulatorsPerWarp |
The number of accumulators per warp. More... | |
typedef int8_t | ScalarA |
The type for A. More... | |
typedef Fragment< ScalarA, AccumulatorsPerThread::kW *4 > | FragmentA |
The fragment for A. More... | |
typedef int8_t | ScalarB |
The type for B. More... | |
typedef Fragment< ScalarB, AccumulatorsPerThread::kH *4 > | FragmentB |
The fragment for B. More... | |
typedef int | ScalarC |
The type for C and D. More... | |
typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > | Accumulators |
The accumulators. More... | |
Public Member Functions | |
CUTLASS_DEVICE | ThreadMultiplyAdd () |
Ctor. More... | |
CUTLASS_DEVICE void | multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d) |
Multiply : d = a*b + c. More... | |
typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators |
typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread |
typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp |
typedef Fragment<ScalarA, AccumulatorsPerThread::kW * 4> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA |
typedef Fragment<ScalarB, AccumulatorsPerThread::kH * 4> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB |
typedef Shape<4, 1, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape |
typedef int8_t cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA |
typedef int8_t cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB |
typedef int cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC |
typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp |
|
inline |
|
inline |