Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <dgemm_traits.h>
Additional Inherited Members | |
![]() | |
typedef double | ScalarA |
The scalar for A. More... | |
typedef double | ScalarB |
The scalar for B. More... | |
typedef double | ScalarC |
The scalar for C. More... | |
typedef double | ScalarD |
The scalar for D. More... | |
typedef OutputTile_ | OutputTile |
The tile. More... | |
typedef ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, double, double, double > | MultiplyAdd |
The functor to do D = A*B + C. More... | |
typedef MultiplyAdd::InstructionShape | InstructionShape |
The shape of the instruction. More... | |
typedef MultiplyAdd::AccumulatorsPerWarp | AccumulatorsPerWarp |
The shape of warp-level GEMM. More... | |
typedef MultiplyAdd::Accumulators | Accumulators |
The accumulators. More... | |
typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape | Warps |
The number of warps. More... | |
![]() | |
static int const | kWarpSize |
The default warp size (32 threads per warp). More... | |
static int const | kThreads |
The numnber of threads. More... | |
static int const | kScalarsPerLdgA |
The number of scalars per LDG/STS/LDS for A. More... | |
static int const | kScalarsPerStsA |
static int const | kScalarsPerLdsA |
static int const | kScalarsPerLdgB |
The number of scalars per LDG/STS/LDS for B. More... | |
static int const | kScalarsPerStsB |
static int const | kScalarsPerLdsB |
static int const | kScalarsPerLdgC |
The number of scalars per LDG for C. More... | |
static int const | kScalarsPerStgD |
The number of scalars per STS/LDS/STG for D. More... | |
static int const | kScalarsPerStsD |
static int const | kScalarsPerLdsD |
static int const | kAccumulatorsPerLdsA |
The number of accumulators that are going to be fed from one LDS A/B. More... | |
static int const | kAccumulatorsPerLdsB |
static int const | kStages |
The number of stages in shared memory to implement double, triple, more-buffering. More... | |
static bool const | kResidueSeparate |
If true, mainloop is instantiated twice. The first instantiation contains no predicate. More... | |
static bool const | kResidueInProlog |
If true, residue is computed in the prologue. More... | |
static bool const | kLaunchBounds |
If true, kernel is launched with launch bounds specified. More... | |