|
struct | AlignedStruct |
|
struct | bin1_t |
|
struct | ComputeOffsetFromShape |
| Compute the offset for the given coordinates in a cube. More...
|
|
struct | ComputeOffsetFromStrides |
| Compute the offset for the given coordinates in a cube. More...
|
|
struct | ComputeThreadOffsetFromStrides |
| Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
|
|
struct | ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > |
| Specialization for D=1 and C=1. More...
|
|
struct | ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > |
| Specialization for D=1. More...
|
|
struct | ConstPredicateTileAdapter |
| Adapter to enable random access to predicates via logical coordinate within a tile. More...
|
|
struct | Convert |
|
struct | Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > |
|
struct | Coord |
| Statically-sized array specifying Coords within a tensor. More...
|
|
struct | Copy |
|
struct | divide_assert |
|
struct | DumpType |
|
struct | Extent |
| Returns the extent of a scalar or vector. More...
|
|
struct | Extent< Vector< T, Lanes > > |
| Returns the number of lanes of a vector if need be. More...
|
|
struct | Extent< Vector< T, Lanes > const > |
| Returns the number of lanes of a vector if need be. More...
|
|
struct | Fragment |
| A template defining Fragment Concept. More...
|
|
struct | FragmentConstIterator |
|
struct | FragmentElementType |
| Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
|
|
struct | FragmentIterator |
| A template defining Fragment Iterator Concept. More...
|
|
struct | GemmOperand |
| Gemm operand - D = A * B + C. More...
|
|
struct | Identity |
| Describes identity elements. More...
|
|
struct | IdentityTensorMapFunc |
|
struct | int4_t |
|
struct | is_pow2 |
|
struct | IteratorAdvance |
| Specifies dimension in which post-increment accesses advance. More...
|
|
struct | KernelLaunchConfiguration |
| Structure containing the basic launch configuration of a CUDA kernel. More...
|
|
struct | Load |
|
struct | Load< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 > |
|
struct | Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 > |
| Partial specialization for 16b loads. More...
|
|
struct | Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 > |
|
struct | Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 > |
|
struct | Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 > |
|
struct | Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > |
|
struct | Load< Vector< bin1_t, 32 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > |
|
struct | Load< Vector< int4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > |
|
struct | Load< Vector< uint4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > |
|
struct | log2_down |
|
struct | log2_down< N, 1, Count > |
|
struct | log2_up |
|
struct | log2_up< N, 1, Count > |
|
struct | MatrixCoord |
|
struct | MatrixTransform |
| Transformation applied to matrix operands. More...
|
|
struct | Max |
|
struct | MemorySpace |
| Enum to specify which memory space data resides in. More...
|
|
struct | Min |
|
struct | PredicatedTileLoadStream |
| Generic stream for loading and transforming fragments. More...
|
|
struct | PredicatedTileStoreStream |
| Generic stream for transforming and storing fragments. More...
|
|
struct | PredicateTileAdapter |
| Adapter to enable random access to predicates via logical coordinate within a tile. More...
|
|
struct | PredicateVector |
| Statically sized array of bits implementing. More...
|
|
struct | RegularTilePredicateFunctor |
| Functor computing a predicate given the logical position of an access. More...
|
|
struct | ReshapeTile |
|
struct | ReshapeTile< Tile_, kAccessSize_, true > |
|
struct | ScalarIO |
| Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
|
|
struct | Shape |
| A Shape implementing Layout Concept describing the dimensions of a cube. More...
|
|
struct | ShapeAdd |
|
struct | ShapeCount |
| Compute derived counted of a Layout Concept based class. More...
|
|
struct | ShapeDiv |
|
struct | ShapeDivCeiling |
|
struct | ShapeMax |
|
struct | ShapeMin |
|
struct | ShapeMul |
|
struct | ShapeScale |
|
struct | ShapeStrides |
|
struct | ShapeSub |
|
struct | sqrt_est |
|
struct | StorageType |
|
struct | StorageType< 1 > |
|
struct | StorageType< 2 > |
|
struct | StorageType< 4 > |
|
struct | Store |
|
struct | Store< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 > |
|
struct | Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 > |
|
struct | Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 > |
|
struct | Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 > |
|
struct | Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 > |
|
struct | Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > |
|
class | TensorRef |
|
class | TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ > |
| Specialization for rank=1 case with no internal StrideVector. More...
|
|
struct | TensorRefArray |
|
struct | TensorRefBatchStrided |
|
class | TensorView |
| Defines a view into a logical tensor. More...
|
|
struct | TileAllocation |
| Class for storing a tile in memory and accessing it through a tensor ref. More...
|
|
struct | TileCoord |
|
struct | TiledThreadOffset |
| Basic thread offset function computed from a thread shape. More...
|
|
struct | TileIteratorBase |
| Iterator for accessing a stripmined tile in memory. More...
|
|
struct | TileLoadIterator |
| An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
|
|
struct | TileLoadStream |
| Generic stream for loading and transforming fragments. More...
|
|
struct | TileStoreIterator |
| An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
|
|
struct | TileStoreStream |
| Generic stream for transforming and storing fragments. More...
|
|
struct | TileTraits |
| A template defining Tile Traits Concept. More...
|
|
struct | TileTraitsContiguousMajor |
|
struct | TileTraitsStandard |
| Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
|
|
struct | TileTraitsStrideMajor |
|
struct | TileTraitsWarpRake |
| Tiling in which warps rake across the contiguous dimension. More...
|
|
struct | TrivialPredicateTileAdapter |
| Always returns true predicate. More...
|
|
struct | uint4_t |
|
union | Vector |
|
union | Vector< bin1_t, kLanes_ > |
| Vector definition for 1-bit binary datatype. More...
|
|
union | Vector< half, 1 > |
|
union | Vector< half, kLanes_ > |
|
union | Vector< int4_t, kLanes_ > |
| Vector definition for 4-bit signed integer datatype. More...
|
|
union | Vector< uint4_t, kLanes_ > |
| Vector definition for 4-bit unsigned integer datatype. More...
|
|
struct | Vectorize |
|
struct | Vectorize< Vector< bin1_t, 32 >, kLanes_ > |
|
struct | Vectorize< Vector< int4_t, 8 >, kLanes_ > |
|
struct | Vectorize< Vector< uint4_t, 8 >, kLanes_ > |
|
struct | VectorTraits |
| Traits describing properties of vectors and scalar-as-vectors. More...
|
|
struct | VectorTraits< Vector< T, Lanes > > |
| Partial specialization for actual cutlass::Vector. More...
|
|
struct | VectorTraits< Vector< T, Lanes > const > |
| Partial specialization for actual cutlass::Vector. More...
|
|
struct | WmmaReshapeTile |
|
struct | WmmaReshapeTile< Tile_, kAccessSize_, kLdsPerAccess_, true > |
|
struct | ZipConvert |
| Zips two convert operations. More...
|
|
struct | ZipFragment |
| A template defining Fragment Concept. More...
|
|
struct | ZipTensorRef |
|
struct | ZipTileAllocation |
| Manages a pair of tile allocations as if they are one allocation. More...
|
|
class | ZipTileIterator |
| Constructs an iterator from a pair of iterators. More...
|
|
|
template<typename T , int Rank, typename Index > |
CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator* (T s, Coord< Rank, Index > coord) |
| Scalar multiplication. More...
|
|
template<typename T , int Rank, typename Index > |
CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator* (Coord< Rank, Index > coord, T s) |
| Scalar multiplication. More...
|
|
template<typename T , int Rank, typename Index > |
CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (T s, Coord< Rank, Index > coord) |
| Scalar division. More...
|
|
template<typename T , int Rank, typename Index > |
CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Coord< Rank, Index > coord, T s) |
| Scalar division. More...
|
|
CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) |
| Helper to make a 2-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) |
| Helper to make a 2-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) |
| Helper to make a 3-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) |
| Helper to make a 4-element coordinate. More...
|
|
template<typename Shape_ > |
CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord_from_shape () |
|
template<int Rank> |
std::ostream & | operator<< (std::ostream &out, Coord< Rank > const &coord) |
|
template<typename T > |
std::ostream & | operator<< (std::ostream &out, ScalarIO< T > const &scalar) |
| Default printing to ostream. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar) |
| Printing to ostream of int8_t as integer rather than character. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar) |
| Printing to ostream of uint8_t as integer rather than character. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::bin1_t, 32 > > const &scalar) |
| Printing to ostream of vector of 1b elements. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::int4_t, 8 > > const &scalar) |
| Printing to ostream of vector of 4b signed integer elements. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::uint4_t, 8 > > const &scalar) |
| Printing to ostream of vector of 4b unsigned integer elements. More...
|
|
template<typename InputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_load (InputIterator &iterator, Fragment &fragment) |
|
template<typename OutputIterator , typename Fragment > |
CUTLASS_HOST_DEVICE void | iterator_store (OutputIterator &iterator, Fragment &fragment) |
|
template<typename TensorRef_ > |
CUTLASS_HOST_DEVICE TensorRefBatchStrided< typename TensorRef_::Storage, TensorRef_::kRank, typename TensorRef_::MapFunc, TensorRef_::kStorageGrank, typename TensorRef_::Index, typename TensorRef_::LongIndex > | make_TensorRefBatchStrided (TensorRef_ const &ref, typename TensorRef_::LongIndex batch_stride=0) |
| Helper to construct a TensorRefBatchStrided<> object using type deduction. More...
|
|
template<typename dividend_t , typename divisor_t > |
CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | clz (value_t x) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | find_log2 (value_t x) |
|
__host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) |
| The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
|
|
template<> |
struct | __align__ (1) AlignedStruct< 1 > |
|
template<> |
struct | __align__ (2) AlignedStruct< 2 > |
|
template<> |
struct | __align__ (4) AlignedStruct< 4 > |
|
template<> |
struct | __align__ (8) AlignedStruct< 8 > |
|
template<> |
struct | __align__ (16) AlignedStruct< 16 > |
|
template<> |
struct | __align__ (32) AlignedStruct< 32 > |
|
template<> |
struct | __align__ (64) AlignedStruct< 64 > |
|
template<typename Scalar_ > |
CUTLASS_HOST_DEVICE void | make_zero (Scalar_ &x) |
|
template<typename Scalar_ , int kLanes_> |
CUTLASS_HOST_DEVICE void | make_zero (Vector< Scalar_, kLanes_ > &vec) |
|
template<typename First , typename Second > |
CUTLASS_HOST_DEVICE ZipFragment< First, Second > | make_ZipFragment (First const &first, Second const &second) |
| Helper to construct a ZipFragment object. More...
|
|
template<typename First , typename Second > |
CUTLASS_HOST_DEVICE ZipConvert< First, Second > | make_ZipConvert (First const &first, Second const &second) |
| Helper to construct a ZipConvert object. More...
|
|
template<typename First , typename Second > |
CUTLASS_HOST_DEVICE ZipTensorRef< First, Second > | make_ZipTensorRef (First const &first, Second const &second) |
| Constructs a ZipTensorRef. More...
|
|