Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
vector.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
31 #include <cuda_fp16.h>
32 #endif
33 
35 #include "cutlass/util/platform.h"
36 
37 namespace cutlass {
38 
40 
41 template <size_t kAlignment_>
42 struct AlignedStruct {};
43 
44 template <>
45 struct __align__(1) AlignedStruct<1>{};
46 template <>
47 struct __align__(2) AlignedStruct<2>{};
48 template <>
49 struct __align__(4) AlignedStruct<4>{};
50 template <>
51 struct __align__(8) AlignedStruct<8>{};
52 template <>
53 struct __align__(16) AlignedStruct<16>{};
54 template <>
55 struct __align__(32) AlignedStruct<32>{};
56 template <>
57 struct __align__(64) AlignedStruct<64>{};
58 
60 
61 template <typename Scalar_, int kLanes_>
62 union Vector {
64  typedef Scalar_ Scalar;
65 
67  enum { kLanes = kLanes_ };
69  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
71  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
72 
73  // Make sure that the vector type makes sense.
74  static_assert(kVectorSize <= 16, "Vector type is too large");
75 
81  uint32_t registers[kRegisters];
82 
84  CUTLASS_HOST_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; }
86  CUTLASS_HOST_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; }
87 };
88 
90 
91 template <>
92 union Vector<half, 1> {
94  typedef half Scalar;
95 
97  enum { kLanes = 1 };
99  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
101  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
102 
103  // Make sure that the vector type makes sense.
104  static_assert(kVectorSize <= 16, "Vector type is too large");
105 
109  uint16_t scalars[kLanes];
110 
112  CUTLASS_HOST_DEVICE Scalar const& operator[](uint32_t i) const {
113  return reinterpret_cast<Scalar const&>(scalars[i]);
114  }
117  return reinterpret_cast<Scalar&>(scalars[i]);
118  }
119 };
120 
121 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
122 
123 template <int kLanes_>
124 union Vector<half, kLanes_> {
126  typedef half Scalar;
127 
129  enum { kLanes = kLanes_ };
131  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
133  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
134 
135  // Make sure that the vector type makes sense.
136  static_assert(kVectorSize <= size_t(16), "Vector type is too large");
137 
141  uint16_t scalars[kLanes];
143  uint32_t registers[kRegisters];
144 
146  CUTLASS_HOST_DEVICE Scalar const& operator[](uint32_t i) const {
147  return reinterpret_cast<Scalar const&>(scalars[i]);
148  }
151  return reinterpret_cast<Scalar&>(scalars[i]);
152  }
153 };
154 
155 #endif
156 
158 
160 template <int kLanes_>
161 union Vector<bin1_t, kLanes_> {
163  typedef bin1_t Scalar;
164 
166  enum { kLanes = kLanes_ };
168  enum { kVectorSize = kLanes / 8 };
170  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
171 
172  static_assert((kLanes >= 8) && !(kLanes % 8),
173  "May only construct vectors of bin1_t that are multiples of 8 bits.");
174 
178  uint32_t registers[kRegisters];
179 
182  Vector() {}
184  CUTLASS_HOST_DEVICE Vector(uint32_t value) { registers[0] = value; }
186  CUTLASS_HOST_DEVICE bool operator[](uint32_t i) const {
187  return ( (registers[i / 32] & (1 << (i % 32))) != 0 );
188  }
189 };
190 
192 
194 template <int kLanes_>
195 union Vector<int4_t, kLanes_> {
197  typedef int4_t Scalar;
198 
200  enum { kLanes = kLanes_ };
202  enum { kVectorSize = kLanes / 2 };
204  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
205 
206  static_assert((kLanes >= 2) && !(kLanes % 2),
207  "May only construct vectors of int4_t that are multiples of 8 bits.");
208 
212  uint32_t registers[kRegisters];
213 
216  Vector() {}
218  CUTLASS_HOST_DEVICE Vector(uint32_t value) { registers[0] = value; }
220  CUTLASS_HOST_DEVICE int operator[](uint32_t i) const {
221  return (registers[i / 8] >> (i % 8 * 4) & 0x0f)
222  - 16 * (registers[i / 8] >> (i % 8 * 4 + 3) & 0x01);
223  }
224 };
225 
227 
229 template <int kLanes_>
230 union Vector<uint4_t, kLanes_> {
232  typedef uint4_t Scalar;
233 
235  enum { kLanes = kLanes_ };
237  enum { kVectorSize = kLanes / 2 };
239  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
240 
241  static_assert((kLanes >= 2) && !(kLanes % 2),
242  "May only construct vectors of uint4_t that are multiples of 8 bits.");
243 
247  uint32_t registers[kRegisters];
248 
251  Vector() {}
253  CUTLASS_HOST_DEVICE Vector(uint32_t value) { registers[0] = value; }
255  CUTLASS_HOST_DEVICE int operator[](uint32_t i) const {
256  return registers[i / 8] >> (i % 8 * 4) & 0x0f;
257  }
258 };
259 
261 
262 template <typename Scalar_>
263 CUTLASS_HOST_DEVICE void make_zero(Scalar_& x) {
264  x = Scalar_(0);
265 }
266 
268 
269 template <typename Element_, int kLanes_ = 1>
270 struct Vectorize {
272 };
273 
275 
276 template <int kLanes_>
277 struct Vectorize<Vector<bin1_t, 32>, kLanes_> {
279 };
280 
282 
283 template <int kLanes_>
284 struct Vectorize<Vector<int4_t, 8>, kLanes_> {
286 };
287 
289 
290 template <int kLanes_>
291 struct Vectorize<Vector<uint4_t, 8>, kLanes_> {
293 };
294 
296 
297 template <typename Scalar_, int kLanes_>
299  for (int i = 0; i < Vector<Scalar_, kLanes_>::kRegisters; ++i) {
300  vec.registers[i] = 0;
301  }
302 }
303 
305 //
306 // cutlass::Extent similar to std::extent but applicable to CUTLASS types
307 //
308 
310 template <typename T>
311 struct Extent {
312  static size_t const kValue = 1;
313 };
314 
316 template <typename T, int Lanes>
317 struct Extent<Vector<T, Lanes> > {
318  static size_t const kValue = Lanes;
319 };
320 
322 template <typename T, int Lanes>
323 struct Extent<Vector<T, Lanes> const> {
324  static size_t const kValue = Lanes;
325 };
326 
328 
330 template <typename T>
331 struct VectorTraits {
333  typedef T Scalar;
334 
336  static int const kLanes = 1;
337 
339  static bool const IsVector = false;
340 
343 };
344 
346 template <typename T, int Lanes>
347 struct VectorTraits<Vector<T, Lanes> > {
349  typedef T Scalar;
350 
352  static int const kLanes = Lanes;
353 
355  static bool const IsVector = true;
356 
359 };
360 
362 template <typename T, int Lanes>
363 struct VectorTraits<Vector<T, Lanes> const> {
365  typedef T Scalar;
366 
368  static int const kLanes = Lanes;
369 
371  static bool const IsVector = true;
372 
375 };
376 
378 
379 } // namespace cutlass
Vector< bin1_t, kLanes_ *32 > Type
Definition: vector.h:278
CUTLASS_HOST_DEVICE Scalar const & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:84
Definition: convert.h:33
CUTLASS_HOST_DEVICE void make_zero(Scalar_ &x)
Definition: vector.h:263
CUTLASS_HOST_DEVICE Vector(uint32_t value)
Constructor to convert from uint32_t type.
Definition: vector.h:218
half Scalar
The scalar type.
Definition: vector.h:94
Definition: vector.h:270
Definition: numeric_types.h:39
Definition: vector.h:42
T Scalar
Scalar type.
Definition: vector.h:333
CUTLASS_HOST_DEVICE int operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:220
CUTLASS_HOST_DEVICE Scalar const & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:112
struct __align__(1) AlignedStruct< 1 >
Definition: vector.h:45
CUTLASS_HOST_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:86
CUTLASS_HOST_DEVICE Vector(uint32_t value)
Constructor to convert from uint32_t type.
Definition: vector.h:253
C++ features that may be otherwise unimplemented for CUDA device functions.
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:242
Scalar_ Scalar
The scalar type.
Definition: vector.h:64
Definition: vector.h:67
CUTLASS_HOST_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:116
half Scalar
The scalar type.
Definition: vector.h:126
Vector< int4_t, kLanes_ *8 > Type
Definition: vector.h:285
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:81
CUTLASS_HOST_DEVICE bool operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:186
uint4_t Scalar
The scalar type.
Definition: vector.h:232
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:104
bin1_t Scalar
The scalar type.
Definition: vector.h:163
CUTLASS_HOST_DEVICE Vector()
Default Constructor.
Definition: vector.h:216
Vector< T, 1 > Vector
Type that is always a vector.
Definition: vector.h:342
CUTLASS_HOST_DEVICE Vector(uint32_t value)
Constructor to convert from uint32_t type.
Definition: vector.h:184
CUTLASS_HOST_DEVICE Scalar & operator[](uint32_t i)
Accessor to the ith lane.
Definition: vector.h:150
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
Traits describing properties of vectors and scalar-as-vectors.
Definition: vector.h:331
#define static_assert(__e, __m)
Definition: platform.h:153
Definition: vector.h:62
static bool const IsVector
True if the type is actually a cutlass::Vector, otherwise false.
Definition: vector.h:339
Scalar scalars[kLanes]
The associated array of scalars.
Definition: vector.h:79
CUTLASS_HOST_DEVICE Vector()
Default Constructor.
Definition: vector.h:182
CUTLASS_HOST_DEVICE int operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:255
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:207
Vector< T, Lanes > Vector
Type that is always a Vector.
Definition: vector.h:374
Definition: vector.h:69
static int const kLanes
Number of lanes of vector.
Definition: vector.h:336
T Scalar
Scalar type.
Definition: vector.h:365
Vector< Element_, kLanes_ > Type
Definition: vector.h:271
T Scalar
Scalar type.
Definition: vector.h:349
Definition: numeric_types.h:43
int4_t Scalar
The scalar type.
Definition: vector.h:197
static size_t const kValue
Definition: vector.h:312
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:74
Vector< uint4_t, kLanes_ *8 > Type
Definition: vector.h:292
Definition: numeric_types.h:41
CUTLASS_HOST_DEVICE Scalar const & operator[](uint32_t i) const
Accessor to the ith lane.
Definition: vector.h:146
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:136
Vector< T, Lanes > Vector
Type that is always a Vector.
Definition: vector.h:358
CUTLASS_HOST_DEVICE Vector()
Default Constructor.
Definition: vector.h:251
Returns the extent of a scalar or vector.
Definition: vector.h:311
AlignedStruct< kVectorSize > aligned_
The aligned storage to make sure we have good alignment.
Definition: vector.h:173