Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Static Public Attributes | List of all members
cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference

#include <gemm_shared_tile.h>

Classes

struct  ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 

Public Types

typedef nv_std::remove_const< Scalar_ >::type Scalar
 The scalar. More...
 
typedef Scalar_ * Pointer
 The pointer. More...
 
typedef OutputTile_ OutputTile
 The dimension of the output tile. More...
 
typedef Warps_ Warps
 The warps in the tile. More...
 
typedef ThreadsPerWarp_ ThreadsPerWarp
 The threads in the warps. More...
 
typedef Shape< 1, 2, kScalarsPerRow/kScalarsPerSts, kScalarsPerStsTile
 The tile. More...
 
typedef Shape< 1, 1, kScalarsPerThread/kScalarsPerStsIterations
 The number of iterations needed to store the tile. More...
 
typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kScalarsPerStsDelta
 The strides in each dimension between different loads/stores. More...
 

Static Public Attributes

static int const kScalarsPerSts = kScalarsPerSts_
 The number of scalars per LDG/STG. More...
 
static int const kSkew = kSkew_
 The skew. More...
 
static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
 The memory space. More...
 
static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
 The number of scalars per thread. More...
 
static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
 The number of threads. More...
 
static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
 The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
 

Member Typedef Documentation

◆ Delta

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Delta

◆ Iterations

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<1, 1, kScalarsPerThread / kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Iterations

◆ OutputTile

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef OutputTile_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::OutputTile

◆ Pointer

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Scalar_* cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Pointer

◆ Scalar

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Scalar

◆ ThreadsPerWarp

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadsPerWarp

◆ Tile

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<1, 2, kScalarsPerRow / kScalarsPerSts, kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Tile

◆ Warps

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Warps_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Warps

Member Data Documentation

◆ kMemorySpace

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
MemorySpace::Kind const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
static

◆ kScalarsPerRow

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
static

◆ kScalarsPerSts

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerSts = kScalarsPerSts_
static

◆ kScalarsPerThread

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
static

◆ kSkew

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
static

◆ kThreads

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
static

The documentation for this struct was generated from the following file: