Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
performance_tuning.h
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are not permitted.
6  *
7  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
8  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
9  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
10  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
11  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
12  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
13  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
14  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
15  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
16  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17  *
18  ******************************************************************************/
19 
20 #pragma once
21 #ifndef CUTLASS_PERFORMANCE_TUNING_H
22 #define CUTLASS_PERFORMANCE_TUNING_H
23 
24 // CUTLASS_PRAGMA_(UNROLL|NO_UNROLL) optimization directives for the CUDA compiler.
25 
26 #if defined(__CUDA_ARCH__)
27 #if defined(_MSC_VER)
28 #define CUTLASS_PRAGMA_UNROLL __pragma("unroll")
29 #define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1")
30 #else
31 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")
32 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")
33 #endif
34 #else
35 #define CUTLASS_PRAGMA_UNROLL
36 #define CUTLASS_PRAGMA_NO_UNROLL
37 #endif
38 
39 #define CUTLASS_GEMM_LOOP CUTLASS_PRAGMA_NO_UNROLL
40 #endif // CUTLASS_PERFORMANCE_TUNING_H