Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Public Attributes | List of all members
cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ > Struct Template Reference

Collect the global load streams for multiplicands.

#include <gemm_stream_pair.h>

Classes

struct  Params
 Parameters object. More...
 
struct  SharedStorage
 Defines a structure containing shared storage for each pair. More...
 

Public Types

typedef StreamA_ StreamA
 Stream for A multiplicand. More...
 
typedef StreamB_ StreamB
 Stream for B multiplicand. More...
 
typedef StreamA::Index Index
 Assumes the A stream defines the index type. More...
 
typedef ZipTileAllocation< typename StreamA::ThreadblockTileStorage, typename StreamB::ThreadblockTileStorage > ThreadblockTileStorage
 Shared memory allocation for threadblock-scoped GEMM tile. More...
 
typedef ThreadblockTileStorage::TensorRef ThreadblockTileRef
 ZipTensorRef to threadblock tiles. More...
 

Public Member Functions

CUTLASS_DEVICE GlobalLoadStreamPair (Params const &params, SharedStorage &shared_storage, ThreadblockTileRef const &threadblock_tile_ref, Coord< 3 > const bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
 Ctor. More...
 
CUTLASS_DEVICE GlobalLoadStreamPairoperator+= (Coord< 3 > const offset)
 
CUTLASS_DEVICE GlobalLoadStreamPairadd_batch_offset (int batch_id)
 
CUTLASS_DEVICE void copy ()
 Trigger the copies from shared memory to registers. More...
 
CUTLASS_DEVICE void commit ()
 Commit the data. More...
 
CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
 Execute the residue code. More...
 
CUTLASS_DEVICE void move_to_residue (Index k, Index kTileK)
 Move to residue. More...
 
CUTLASS_DEVICE void rollback (bool kRollback)
 Rollback to beginning of first tile. More...
 

Public Attributes

StreamA stream_a
 Stream for A multiplicand. More...
 
StreamB stream_b
 Stream for B multiplicand. More...
 

Member Typedef Documentation

◆ Index

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
typedef StreamA::Index cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::Index

◆ StreamA

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
typedef StreamA_ cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::StreamA

◆ StreamB

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
typedef StreamB_ cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::StreamB

◆ ThreadblockTileRef

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
typedef ThreadblockTileStorage::TensorRef cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::ThreadblockTileRef

◆ ThreadblockTileStorage

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
typedef ZipTileAllocation<typename StreamA::ThreadblockTileStorage, typename StreamB::ThreadblockTileStorage> cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::ThreadblockTileStorage

Constructor & Destructor Documentation

◆ GlobalLoadStreamPair()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::GlobalLoadStreamPair ( Params const &  params,
SharedStorage shared_storage,
ThreadblockTileRef const &  threadblock_tile_ref,
Coord< 3 > const  bounds,
Coord< 3 > const &  block_offset = make_Coord(0, 0, 0) 
)
inline

Member Function Documentation

◆ add_batch_offset()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE GlobalLoadStreamPair& cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::add_batch_offset ( int  batch_id)
inline

◆ commit()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::commit ( )
inline

◆ copy()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::copy ( )
inline

◆ move_to_residue()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::move_to_residue ( Index  k,
Index  kTileK 
)
inline

◆ operator+=()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE GlobalLoadStreamPair& cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::operator+= ( Coord< 3 > const  offset)
inline

◆ residue()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::residue ( Index  k,
bool  skip_clear = false 
)
inline

◆ rollback()

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::rollback ( bool  kRollback)
inline

Member Data Documentation

◆ stream_a

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
StreamA cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::stream_a

◆ stream_b

template<typename StreamA_ , typename StreamB_ , bool kResidueInProlog_>
StreamB cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::stream_b

The documentation for this struct was generated from the following file: