38 template <
typename ThreadShape>
45 int index = threadIdx.x;
47 thread_offset[3] = (index % ThreadShape::kC);
48 index = (index / ThreadShape::kC);
50 thread_offset[2] = (index % ThreadShape::kW);
51 index = (index / ThreadShape::kW);
53 thread_offset[1] = (index % ThreadShape::kH);
54 index = (index / ThreadShape::kH);
56 thread_offset[0] = index;
66 template <
typename Tile_,
int Threads>
76 "Tiling undefined if elements not divisible by threads.");
79 "This specialization assumes there are more threads than the contiguous dimension " 99 template <
typename Tile_,
int Threads>
109 "This specialization assumes there are more threads than the contiguous dimension " 113 "Tiling undefined if elements not divisible by threads.");
116 "The contiguous size of the tile must be divisible by the number of threads.");
134 template <
typename Tile_,
int Threads>
150 "Tiling undefined if elements not divisible by threads.");
176 int tid = threadIdx.x;
185 return make_Coord(0, warp_h, lane + kWarpSpanContiguous * warp_w, 0);
193 template <
typename Tile_,
int Threads>
212 "Tiling undefined if elements not divisible by threads.");
226 typedef typename Traits::Delta Delta;
232 typedef typename Traits::Iterations Iterations;
235 typedef typename Traits::ThreadOffset ThreadOffset;
Shape< 1, Tile::kH/Delta::kH, Tile::kW/ThreadShape::kW > Iterations
Number of iterations.
Definition: tile_traits_standard.h:169
Shape< 1, Tile::kH/ThreadShape::kH, 1, 1 > Iterations
Number of iterations.
Definition: tile_traits_standard.h:89
static int const kWarpCount
Number of participating warps.
Definition: tile_traits_standard.h:205
Definition: tile_traits_standard.h:100
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
static int const kWarpsStrided
Warps strip-mined across strided dimension.
Definition: tile_traits_standard.h:157
static int const kThreads
Number of participating threads.
Definition: tile_traits_standard.h:105
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:368
Computes the thread offset in (H, W) based on thread ID.
Definition: tile_traits_standard.h:172
static int const kThreads
Number of participating threads.
Definition: tile_traits_standard.h:72
Chooses 'best' shape to enable warp raking along contiguous dimension if possible.
Definition: tile_traits_standard.h:194
Tile_ Tile
Shape of tile.
Definition: tile_traits_standard.h:137
static int const kWarpsContiguous
Warps stripmined contiguous dimension.
Definition: tile_traits_standard.h:160
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Computes the logical coordinate from thread shape.
Definition: tile_traits_standard.h:42
Shape< 1, kWarpsStrided, kWarpSize > Delta
The same warp rakes along the contiguous dimension.
Definition: tile_traits_standard.h:166
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Basic thread offset function computed from a thread shape.
Definition: tile_traits_standard.h:175
Basic thread offset function computed from a thread shape.
Definition: tile_traits_standard.h:39
static int const kH
The height of the cube.
Definition: shape.h:68
static int const kThreads
Number of participating threads.
Definition: tile_traits_standard.h:140
Shape< 1, ThreadShape::kH, 1, 1 > Delta
Delta along each dimension.
Definition: tile_traits_standard.h:86
Shape< 1, kThreads/Tile::kW, Tile::kW, 1 > ThreadShape
Shape of threads.
Definition: tile_traits_standard.h:76
static int const kWarpSize
Hard-coded warp size.
Definition: tile_traits_standard.h:143
static int const kAccessSize
By default, do not do scalar loads.
Definition: tile_traits_standard.h:208
Tile_ Tile
Shape of tile.
Definition: tile_traits_standard.h:196
Tile_ Tile
Shape of tile.
Definition: tile_traits_standard.h:69
static int const kWarpCount
Number of participating warps.
Definition: tile_traits_standard.h:146
Shape< 1, kWarpsStrided, kWarpsContiguous *kWarpSize > ThreadShape
Arrangement of threads.
Definition: tile_traits_standard.h:163
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
Definition: tile_traits_standard.h:67
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
TiledThreadOffset< ThreadShape > ThreadOffset
Computes the initial offset.
Definition: tile_traits_standard.h:92
Tile_ Tile
Shape of tile.
Definition: tile_traits_standard.h:102
static int const kW
The width of the cube.
Definition: shape.h:70
Tiling in which warps rake across the contiguous dimension.
Definition: tile_traits_standard.h:135
static int const kWarpSize
Hard-coded warp size.
Definition: tile_traits_standard.h:202
Shape< 1, 1, kThreads > Delta
Delta between each thread's access.
Definition: tile_traits_standard.h:122
Shape< 1, 1, kThreads > ThreadShape
Thread shape.
Definition: tile_traits_standard.h:110
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
TiledThreadOffset< ThreadShape > ThreadOffset
Computes the initial offset.
Definition: tile_traits_standard.h:128
static int const kThreads
Number of participating threads.
Definition: tile_traits_standard.h:199
Shape< 1, Tile::kH, Tile::kW/kThreads > Iterations
Number of iterations.
Definition: tile_traits_standard.h:125