Functor to compute linear combination of fragments.
#include <linear_scaling.h>
|
CUTLASS_DEVICE | LinearScaling () |
| Ctor. More...
|
|
CUTLASS_DEVICE | LinearScaling (Params const &_params) |
| Ctor. More...
|
|
CUTLASS_DEVICE bool | source_required () const |
|
template<typename FragmentA_ , typename FragmentB_ > |
CUTLASS_DEVICE void | evaluate (FragmentA_ const &accum, FragmentB_ &output) |
| Evaluate the functor. More...
|
|
template<typename ScalarAccum , typename ScalarOutput , int size> |
CUTLASS_DEVICE void | evaluate (ScalarAccum const *accum, ScalarOutput *output) |
| Evaluate the functor, without using fragment in the API. More...
|
|
template<typename FragmentA_ , typename FragmentB_ > |
CUTLASS_DEVICE void | evaluate (FragmentA_ const &accum, FragmentB_ const &old, FragmentB_ &output) |
| Evaluate the functor. More...
|
|
template<typename ScalarAccum , typename ScalarOutput , int size> |
CUTLASS_DEVICE void | evaluate (ScalarAccum const *accum, ScalarOutput const *old, ScalarOutput *output) |
| Evaluate the functor, without using fragment in the API. More...
|
|
◆ FragmentMultiplyAdd
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
◆ Scalar
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
◆ ScalarAccum
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
◆ LinearScaling() [1/2]
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
◆ LinearScaling() [2/2]
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
◆ evaluate() [1/4]
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
template<typename FragmentA_ , typename FragmentB_ >
CUTLASS_DEVICE void cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::evaluate |
( |
FragmentA_ const & |
accum, |
|
|
FragmentB_ & |
output |
|
) |
| |
|
inline |
◆ evaluate() [2/4]
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
template<typename ScalarAccum , typename ScalarOutput , int size>
◆ evaluate() [3/4]
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
template<typename FragmentA_ , typename FragmentB_ >
CUTLASS_DEVICE void cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::evaluate |
( |
FragmentA_ const & |
accum, |
|
|
FragmentB_ const & |
old, |
|
|
FragmentB_ & |
output |
|
) |
| |
|
inline |
◆ evaluate() [4/4]
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
template<typename ScalarAccum , typename ScalarOutput , int size>
◆ source_required()
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
Method to determine whether the source accumulator matrix C is ever needed. This method may always safely return true, though better performance is possible if the source accumulator matrix is never loaded unnecessarily.
◆ params
template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
The documentation for this struct was generated from the following file: