Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <gemm_traits.h>
Additional Inherited Members | |
![]() | |
typedef GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > | This_ |
This traits. More... | |
typedef cutlass::gemm::Gemm< This_ > | KernelClass |
The struct that consumes this Traits. More... | |
typedef GemmConfig_ | GemmConfig |
The configuration. More... | |
typedef GemmConfig::OutputTile | OutputTile |
The output tile. More... | |
typedef Helper_::GlobalLoadStreamA | GlobalLoadStreamA |
The stream to load A from global memory to shared memory. More... | |
typedef Helper_::GlobalLoadStreamA ::Scalar | ScalarA |
The scalar for A. More... | |
typedef Helper_::GlobalLoadStreamB | GlobalLoadStreamB |
The stream to load B from global memory to shared memory. More... | |
typedef Helper_::GlobalLoadStreamB ::Scalar | ScalarB |
The scalar for B. More... | |
typedef Helper_::SharedLoadStreamA | SharedLoadStreamA |
The iterator for A to load from shared memory. More... | |
typedef Helper_::SharedLoadStreamB | SharedLoadStreamB |
The iterator for B to load from shared memory. More... | |
typedef GemmConfig::MultiplyAdd | MultiplyAdd |
The multiply-add functor. More... | |
typedef Epilogue_ | Epilogue |
The epilogue. More... | |
typedef Epilogue::ScalarC | ScalarC |
The scalars in the epilogue. More... | |
typedef Epilogue::ScalarD | ScalarD |
typedef IdentityBlockSwizzle | BlockSwizzle |
The block swizzle to reorganize the grid. More... | |
typedef Index_ | Index |
The index. More... | |
typedef ClearAccumulators< GemmConfig_::Accumulators::Element > | ClearAccumulators |
Clear the accumulators. More... | |
typedef GlobalLoadStreamPair< GlobalLoadStreamA, GlobalLoadStreamB, GemmConfig::kResidueInProlog > | GlobalLoadStream |
Assemble the global load streams for A/B. More... | |
typedef GlobalLoadStream::ThreadblockTileStorage | ThreadblockTileStorage |
Memory needed to store the threadblock-scoped GEMM tile. More... | |
typedef SharedStreamPair< SharedLoadStreamA, SharedLoadStreamB > | SharedStream |
Assemble the shared load streams for A/B. More... | |
![]() | |
static CUTLASS_DEVICE void | shared_load_fence (bool in_loop) |
The memory fence for shared loads. More... | |
static CUTLASS_DEVICE void | shared_store_fence (bool in_loop) |
The memory fence for shared stores. More... | |
![]() | |
static MatrixLayout::Kind const | kLayoutA |
The layout of A. More... | |
static MatrixLayout::Kind const | kLayoutB |
The layout of B. More... | |