Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | List of all members
cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int > Struct Template Reference

Template performing matrix multiply-add operation within a thread.

#include <igemm_multiply_add.h>

Public Types

typedef Shape< 4, 1, 1 > InstructionShape
 The shape of the instruction. More...
 
typedef ThreadGemmShape_ ThreadGemmShape
 Shape of the thread-level GEMM (K-by-N-by-M) More...
 
typedef ThreadGemmShape AccumulatorsPerThread
 Aliased for compatibility. Will be removed in CUTLASS v2.0. More...
 
typedef ThreadsPerWarp_ ThreadsPerWarp
 The number of threads per warp. More...
 
typedef ShapeMul< ThreadGemmShape, ThreadsPerWarp >::Shape AccumulatorsPerWarp
 The number of accumulators per warp. More...
 
typedef int8_t ScalarA
 The type for A. More...
 
typedef Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
 The fragment for A. More...
 
typedef int8_t ScalarB
 The type for B. More...
 
typedef Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
 The fragment for B. More...
 
typedef int ScalarC
 The type for C and D. More...
 
typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
 The accumulators. More...
 

Public Member Functions

CUTLASS_DEVICE ThreadMultiplyAdd ()
 Ctor. More...
 
CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
 Multiply : d = a*b + c. More...
 

Member Typedef Documentation

◆ Accumulators

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators

◆ AccumulatorsPerThread

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ThreadGemmShape cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread

◆ AccumulatorsPerWarp

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ShapeMul<ThreadGemmShape, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp

◆ FragmentA

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Fragment<ScalarA, AccumulatorsPerThread::kW * 4> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA

◆ FragmentB

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Fragment<ScalarB, AccumulatorsPerThread::kH * 4> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB

◆ InstructionShape

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Shape<4, 1, 1> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape

◆ ScalarA

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef int8_t cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA

◆ ScalarB

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef int8_t cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB

◆ ScalarC

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef int cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC

◆ ThreadGemmShape

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ThreadGemmShape_ cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadGemmShape

◆ ThreadsPerWarp

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp

Constructor & Destructor Documentation

◆ ThreadMultiplyAdd()

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd ( )
inline

Member Function Documentation

◆ multiply_add()

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add ( FragmentA const &  a,
FragmentB const &  b,
Accumulators const &  c,
Accumulators d 
)
inline

The documentation for this struct was generated from the following file: