16 #include "LvArrayConfig.hpp" 21 #include <type_traits> 23 #if defined( LVARRAY_USE_CUDA ) 24 #include <cuda_fp16.h> 47 template<
typename T,
typename U >
57 template<
typename T >
66 template<
typename T >
78 template<
typename T >
88 template<
typename T >
99 template<
typename T >
102 {
return __hlt( x, y ); }
104 #if defined( LVARRAY_USE_CUDA ) 112 template<
typename U >
114 __half
convert( __half
const, U
const u )
115 {
return __float2half_rn( u ); }
124 template<
typename U >
126 __half2
convert( __half2
const, U
const u )
127 {
return __float2half2_rn( u ); }
135 __half2
convert( __half2
const, __half
const u )
137 #if defined( __CUDA_ARCH__ ) 138 return __half2half2( u );
140 return __float2half2_rn( u );
153 template<
typename U,
typename V >
155 __half2
convert( __half2
const, U
const u, V
const v )
156 {
return __floats2half2_rn( u, v ); }
165 __half2
convert( __half2
const, __half
const u, __half
const v )
167 #if defined( __CUDA_ARCH__ ) 168 return __halves2half2( u, v );
170 return __floats2half2_rn( u, v );
198 {
return __low2half( x ); }
206 {
return __high2half( x ); }
214 __half
lessThan( __half
const x, __half
const y )
215 {
return __hlt( x, y ); }
223 __half2
lessThan( __half2
const x, __half2
const y )
224 {
return __hlt2( x, y ); }
240 template<
typename T >
249 template<
typename T >
260 template<
typename T,
typename U >
275 template<
typename T,
typename U,
typename V >
286 template<
typename T >
297 template<
typename T >
308 template<
typename T >
310 std::enable_if_t< std::is_arithmetic< T >::value, T >
311 max( T
const a, T
const b )
313 #if defined(__CUDA_ARCH__) 320 #if defined( LVARRAY_USE_CUDA ) 324 __half
max( __half
const a, __half
const b )
326 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__)) 327 return __hmax( a, b );
329 return a > b ? a : b;
335 __half2
max( __half2
const a, __half2
const b )
337 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__)) 338 return __hmax2( a, b );
340 __half2
const aFactor = __hge2( a, b );
341 __half2
const bFactor = convert< __half2 >( 1 ) - aFactor;
342 return a * aFactor + bFactor * b;
355 template<
typename T >
357 std::enable_if_t< std::is_arithmetic< T >::value, T >
358 min( T
const a, T
const b )
360 #if defined(__CUDA_ARCH__) 367 #if defined( LVARRAY_USE_CUDA ) 371 __half
min( __half
const a, __half
const b )
373 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__)) 374 return __hmin( a, b );
376 return a < b ? a : b;
382 __half2
min( __half2
const a, __half2
const b )
384 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__)) 385 return __hmin2( a, b );
387 __half2
const aFactor = __hle2( a, b );
388 __half2
const bFactor = convert< __half2 >( 1 ) - aFactor;
389 return a * aFactor + bFactor * b;
400 template<
typename T >
404 #if defined(__CUDA_ARCH__) 411 #if defined( LVARRAY_USE_CUDA ) 415 __half
abs( __half
const x )
417 #if CUDART_VERSION > 11000 420 return x > __half( 0 ) ? x : -x;
426 __half2
abs( __half2
const x )
428 #if CUDART_VERSION > 11000 431 return x - __hle2( x, convert< __half2 >( 0 ) ) * ( x + x );
442 template<
typename T >
463 #if defined(__CUDA_ARCH__) 471 template<
typename T >
475 #if defined(__CUDA_ARCH__) 482 #if defined( LVARRAY_USE_CUDA ) 486 __half
sqrt( __half
const x )
487 { return ::hsqrt( x ); }
491 __half2
sqrt( __half2
const x )
492 { return ::h2sqrt( x ); }
505 #if defined(__CUDA_ARCH__) 506 return ::rsqrtf( x );
513 template<
typename T >
517 #if defined( __CUDA_ARCH__ ) 518 return ::rsqrt(
double( x ) );
524 #if defined( LVARRAY_USE_CUDA ) 528 __half
invSqrt( __half
const x )
529 { return ::hrsqrt( x ); }
533 __half2
invSqrt( __half2
const x )
534 { return ::h2rsqrt( x ); }
552 float sin(
float const theta )
554 #if defined(__CUDA_ARCH__) 555 return ::sinf( theta );
562 template<
typename T >
564 double sin( T
const theta )
566 #if defined(__CUDA_ARCH__) 573 #if defined( LVARRAY_USE_CUDA ) 577 __half
sin( __half
const theta )
578 { return ::hsin( theta ); }
582 __half2
sin( __half2
const theta )
583 { return ::h2sin( theta ); }
594 float cos(
float const theta )
596 #if defined(__CUDA_ARCH__) 597 return ::cosf( theta );
604 template<
typename T >
606 double cos( T
const theta )
608 #if defined(__CUDA_ARCH__) 615 #if defined( LVARRAY_USE_CUDA ) 619 __half
cos( __half
const theta )
620 { return ::hcos( theta ); }
624 __half2
cos( __half2
const theta )
625 { return ::h2cos( theta ); }
636 void sincos(
float const theta,
float & sinTheta,
float & cosTheta )
638 #if defined(__CUDA_ARCH__) 639 ::sincos( theta, &sinTheta, &cosTheta );
647 template<
typename T >
649 void sincos(
double const theta,
double & sinTheta,
double & cosTheta )
651 #if defined(__CUDA_ARCH__) 652 ::sincos( theta, &sinTheta, &cosTheta );
660 template<
typename T >
662 void sincos( T
const theta,
double & sinTheta,
double & cosTheta )
664 #if defined(__CUDA_ARCH__) 675 #if defined( LVARRAY_USE_CUDA ) 679 void sincos( __half
const theta, __half & sinTheta, __half & cosTheta )
681 sinTheta = ::hsin( theta );
682 cosTheta = ::hcos( theta );
687 void sincos( __half2
const theta, __half2 & sinTheta, __half2 & cosTheta )
689 sinTheta = ::h2sin( theta );
690 cosTheta = ::h2cos( theta );
702 float tan(
float const theta )
704 #if defined(__CUDA_ARCH__) 705 return ::tanf( theta );
712 template<
typename T >
714 double tan( T
const theta )
716 #if defined(__CUDA_ARCH__) 723 #if defined( LVARRAY_USE_CUDA ) 727 __half
tan( __half
const theta )
736 __half2
tan( __half2
const theta )
766 template<
typename T >
770 T
const negate =
lessThan( x, math::convert< T >( 0 ) );
771 T
const absX =
abs( x );
773 T ret = math::convert< T >( -0.0187293 ) * absX + math::convert< T >( 0.0742610 );
774 ret = ret * absX - math::convert< T >( 0.2121144 );
775 ret = ret * absX + math::convert< T >( 1.5707288 );
776 ret = math::convert< T >( 3.14159265358979 * 0.5 ) - ret *
sqrt( math::convert< T >( 1 ) - absX );
777 ret = ret - negate * ( ret + ret );
778 T
const smallAngle =
lessThan( absX, math::convert< T >( 1.7e-1 ) );
779 return smallAngle * x + ( math::convert< T >( 1 ) - smallAngle ) * ret;
788 template<
typename T >
792 T
const negate =
lessThan( x, math::convert< T >( 0 ) );
793 T
const absX =
abs( x );
795 T ret = math::convert< T >( -0.0187293 ) * absX + math::convert< T >( 0.0742610 );
796 ret = ret * absX - math::convert< T >( 0.2121144 );
797 ret = ret * absX + math::convert< T >( 1.5707288 );
798 ret = ret *
sqrt( math::convert< T >( 1 ) - absX );
799 ret = ret - negate * ( ret + ret );
800 return negate * math::convert< T >( 3.14159265358979 ) + ret;
810 template<
typename T >
814 T
const absX =
abs( x );
815 T
const absY =
abs( y );
816 T
const ratio =
min( absX, absY ) /
max( absX, absY );
817 T
const ratio2 = ratio * ratio;
819 T ret = math::convert< T >( -0.013480470 ) * ratio2 + math::convert< T >( 0.057477314 );
820 ret = ret * ratio2 - math::convert< T >( 0.121239071 );
821 ret = ret * ratio2 + math::convert< T >( 0.195635925 );
822 ret = ret * ratio2 - math::convert< T >( 0.332994597 );
823 ret = ret * ratio2 + math::convert< T >( 0.999995630 );
830 ret = internal::lessThan( absX, absY ) * ( math::convert< T >( 1.570796327 ) - ret - ret ) + ret;
831 ret = internal::lessThan( x, math::convert< T >( 0 ) ) * ( math::convert< T >( 3.141592654 ) - ret - ret ) + ret;
832 ret = ret - internal::lessThan( y, math::convert< T >( 0 ) ) * ( ret + ret );
848 #if defined(__CUDA_ARCH__) 856 template<
typename T >
860 #if defined(__CUDA_ARCH__) 867 #if defined( LVARRAY_USE_CUDA ) 871 __half
asin( __half
const x )
872 {
return internal::asinImpl( x ); }
876 __half2
asin( __half2
const x )
877 {
return internal::asinImpl( x ); }
890 #if defined(__CUDA_ARCH__) 898 template<
typename T >
902 #if defined(__CUDA_ARCH__) 909 #if defined( LVARRAY_USE_CUDA ) 913 __half
acos( __half
const x )
914 {
return internal::acosImpl( x ); }
918 __half2
acos( __half2
const x )
919 {
return internal::acosImpl( x ); }
931 float atan2(
float const y,
float const x )
933 #if defined(__CUDA_ARCH__) 934 return ::atan2f( y, x );
941 template<
typename T >
943 double atan2( T
const y, T
const x )
945 #if defined(__CUDA_ARCH__) 952 #if defined( LVARRAY_USE_CUDA ) 956 __half
atan2( __half
const y, __half
const x )
957 {
return internal::atan2Impl( y, x ); }
961 __half2
atan2( __half2
const y, __half2
const x )
962 {
return internal::atan2Impl( y, x ); }
980 float exp(
float const x )
982 #if defined(__CUDA_ARCH__) 990 template<
typename T >
994 #if defined(__CUDA_ARCH__) 1001 #if defined( LVARRAY_USE_CUDA ) 1005 __half
exp( __half
const x )
1006 { return ::hexp( x ); }
1010 __half2
exp( __half2
const x )
1011 { return ::h2exp( x ); }
1024 #if defined(__CUDA_ARCH__) 1032 template<
typename T >
1036 #if defined(__CUDA_ARCH__) 1043 #if defined( LVARRAY_USE_CUDA ) 1047 __half
log( __half
const x )
1048 { return ::hlog( x ); }
1052 __half2
log( __half2
const x )
1053 { return ::h2log( x ); }
LVARRAY_HOST_DEVICE double exp(T const x)
Definition: math.hpp:992
LVARRAY_HOST_DEVICE constexpr T convert(U const u)
Convert u to type.
Definition: math.hpp:262
LVARRAY_HOST_DEVICE double tan(T const theta)
Definition: math.hpp:714
LVARRAY_HOST_DEVICE float asin(float const x)
Definition: math.hpp:846
LVARRAY_HOST_DEVICE double sqrt(T const x)
Definition: math.hpp:473
LVARRAY_HOST_DEVICE float acos(float const x)
Definition: math.hpp:888
LVARRAY_DEVICE T acosImpl(T const x)
Definition: math.hpp:790
LVARRAY_HOST_DEVICE double acos(T const x)
Definition: math.hpp:900
LVARRAY_HOST_DEVICE float cos(float const theta)
Definition: math.hpp:594
LVARRAY_HOST_DEVICE constexpr std::enable_if_t< std::is_arithmetic< T >::value, T > min(T const a, T const b)
Definition: math.hpp:358
LVARRAY_DEVICE T asinImpl(T const x)
Definition: math.hpp:768
LVARRAY_HOST_DEVICE constexpr T convert(U const u, V const v)
Convert u and v to a dual type.
Definition: math.hpp:277
The type of a single value of type T.
Definition: math.hpp:67
LVARRAY_HOST_DEVICE float log(float const x)
Definition: math.hpp:1022
LVARRAY_HOST_DEVICE float sin(float const theta)
Definition: math.hpp:552
LVARRAY_HOST_DEVICE void sincos(float const theta, float &sinTheta, float &cosTheta)
Compute the sine and cosine of theta.
Definition: math.hpp:636
LVARRAY_HOST_DEVICE double asin(T const x)
Definition: math.hpp:858
T type
An alias for T.
Definition: math.hpp:70
LVARRAY_HOST_DEVICE double cos(T const theta)
Definition: math.hpp:606
#define LVARRAY_DEVICE
Mark a function for only device usage.
Definition: Macros.hpp:552
LVARRAY_HOST_DEVICE constexpr T lessThan(T const x, T const y)
Definition: math.hpp:101
LVARRAY_DEVICE SingleType< T > getFirst(T const x)
Definition: math.hpp:288
LVARRAY_HOST_DEVICE float sqrt(float const x)
Definition: math.hpp:461
The top level namespace.
Definition: Array.hpp:24
LVARRAY_DEVICE SingleType< T > getSecond(T const x)
Definition: math.hpp:299
Contains a bunch of macro definitions.
LVARRAY_HOST_DEVICE constexpr T abs(T const x)
Definition: math.hpp:402
LVARRAY_HOST_DEVICE float atan2(float const y, float const x)
Definition: math.hpp:931
LVARRAY_DEVICE T atan2Impl(T const y, T const x)
Definition: math.hpp:812
LVARRAY_HOST_DEVICE float exp(float const x)
Definition: math.hpp:980
LVARRAY_HOST_DEVICE float tan(float const theta)
Definition: math.hpp:702
LVARRAY_HOST_DEVICE constexpr std::enable_if_t< std::is_arithmetic< T >::value, T > max(T const a, T const b)
Definition: math.hpp:311
LVARRAY_HOST_DEVICE double atan2(T const y, T const x)
Definition: math.hpp:943
typename internal::SingleType< T >::type SingleType
The type of a single value of type T.
Definition: math.hpp:250
LVARRAY_HOST_DEVICE constexpr int numValues()
Return the number of values stored in type.
Definition: math.hpp:242
LVARRAY_HOST_DEVICE double log(T const x)
Definition: math.hpp:1034
LVARRAY_HOST_DEVICE float invSqrt(float const x)
Definition: math.hpp:503
LVARRAY_HOST_DEVICE double sin(T const theta)
Definition: math.hpp:564
#define LVARRAY_HOST_DEVICE
Mark a function for both host and device usage.
Definition: Macros.hpp:549
LVARRAY_HOST_DEVICE constexpr T square(T const x)
Definition: math.hpp:444