Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <hgemm_traits.h>
Public Types | |
typedef HgemmConfig< OutputTile_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_ > | GemmConfig |
The HGEMM config. More... | |
typedef HgemmTileTraitsHelperA< kLayoutA_, GemmConfig > | GemmTileTraitsHelperA |
The GEMM config for A. More... | |
typedef HgemmTileTraitsHelperB< kLayoutB_, GemmConfig > | GemmTileTraitsHelperB |
The GEMM config for B. More... | |
typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > | GlobalLoadIteratorA |
The iterator to load A from global memory. More... | |
typedef HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer | GlobalTransformerA |
The default transformer for A. More... | |
typedef TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedStoreIteratorA |
The iterator to store A to shared memory. More... | |
typedef GlobalLoadStream< GemmOperand::kA, GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > | GlobalLoadStreamA |
The stream to load A from global memory to shared memory. More... | |
typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > | GlobalLoadIteratorB |
The iterator to load B from global memory. More... | |
typedef HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer | GlobalTransformerB |
typedef TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedStoreIteratorB |
The iterator to store B to shared memory. More... | |
typedef GlobalLoadStream< GemmOperand::kB, GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > | GlobalLoadStreamB |
The stream to load B from global memory to shared memory. More... | |
typedef TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedLoadIteratorA |
The iterator to load A from shared memory. More... | |
typedef SharedLoadStream< SharedLoadIteratorA > | SharedLoadStreamA |
The stream to load A from shared memory. More... | |
typedef TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedLoadIteratorB |
The iterator to load B from shared memory. More... | |
typedef SharedLoadStream< SharedLoadIteratorB > | SharedLoadStreamB |
The stream to load B from shared memory. More... | |
typedef GemmConfig::MultiplyAdd | MultiplyAdd |
The functor to do the multiply-add in the main loop. More... | |
typedef ClearAccumulators< typename MultiplyAdd::ScalarC > | ClearAccumulators |
The object to clear accumulators. More... | |
typedef SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > | GemmEpilogueTraits |
The traits class for the epilogue. More... | |
typedef GemmEpilogue< GemmEpilogueTraits > | Epilogue |
The epilogue. More... | |
typedef ClearAccumulators<typename MultiplyAdd::ScalarC> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::ClearAccumulators |
typedef GemmEpilogue<GemmEpilogueTraits> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::Epilogue |
typedef HgemmConfig<OutputTile_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmConfig |
typedef SimplifiedGemmEpilogueTraits<GemmConfig, EpilogueFunctor_, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmEpilogueTraits |
typedef HgemmTileTraitsHelperA<kLayoutA_, GemmConfig> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmTileTraitsHelperA |
typedef HgemmTileTraitsHelperB<kLayoutB_, GemmConfig> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmTileTraitsHelperB |
typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadIteratorA |
typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadIteratorB |
typedef GlobalLoadStream<GemmOperand::kA, GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadStreamA |
typedef GlobalLoadStream<GemmOperand::kB, GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadStreamB |
typedef HgemmTransformerA<GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA>::Transformer cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalTransformerA |
typedef HgemmTransformerB<GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB>::Transformer cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalTransformerB |
typedef GemmConfig::MultiplyAdd cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::MultiplyAdd |
typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadIteratorA |
typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadIteratorB |
typedef SharedLoadStream<SharedLoadIteratorA> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadStreamA |
typedef SharedLoadStream<SharedLoadIteratorB> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadStreamB |
typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedStoreIteratorA |
typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedStoreIteratorB |