LvArray
math.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021, Lawrence Livermore National Security, LLC and LvArray contributors.
3  * All rights reserved.
4  * See the LICENSE file for details.
5  * SPDX-License-Identifier: (BSD-3-Clause)
6  */
7 
13 #pragma once
14 
15 // Source includes
16 #include "LvArrayConfig.hpp"
17 #include "Macros.hpp"
18 
19 // System includes
20 #include <cmath>
21 #include <type_traits>
22 
23 #if defined( LVARRAY_USE_CUDA )
24  #include <cuda_fp16.h>
25 #endif
26 
27 namespace LvArray
28 {
29 
33 namespace math
34 {
35 
36 namespace internal
37 {
38 
47 template< typename T, typename U >
49 T convert( T const, U const u )
50 { return u; }
51 
57 template< typename T >
59 int numValues( T const )
60 { return 1; }
61 
66 template< typename T >
67 struct SingleType
68 {
70  using type = T;
71 };
72 
78 template< typename T >
81 { return x; }
82 
88 template< typename T >
91 { return x; }
92 
99 template< typename T >
100 LVARRAY_HOST_DEVICE inline constexpr
101 T lessThan( T const x, T const y )
102 { return __hlt( x, y ); }
103 
104 #if defined( LVARRAY_USE_CUDA )
105 
112 template< typename U >
114 __half convert( __half const, U const u )
115 { return __float2half_rn( u ); }
116 
124 template< typename U >
126 __half2 convert( __half2 const, U const u )
127 { return __float2half2_rn( u ); }
128 
135 __half2 convert( __half2 const, __half const u )
136 {
137 #if defined( LVARRAY_DEVICE_COMPILE )
138  return __half2half2( u );
139 #else
140  return __float2half2_rn( u );
141 #endif
142 }
143 
153 template< typename U, typename V >
155 __half2 convert( __half2 const, U const u, V const v )
156 { return __floats2half2_rn( u, v ); }
157 
165 __half2 convert( __half2 const, __half const u, __half const v )
166 {
167 #if defined( LVARRAY_DEVICE_COMPILE )
168  return __halves2half2( u, v );
169 #else
170  return __floats2half2_rn( u, v );
171 #endif
172 }
173 
179 int numValues( __half2 const & )
180 { return 2; }
181 
185 template<>
186 struct SingleType< __half2 >
187 {
189  using type = __half;
190 };
191 
197 __half getFirst( __half2 const x )
198 { return __low2half( x ); }
199 
205 __half getSecond( __half2 const x )
206 { return __high2half( x ); }
207 
208 #endif
209 
210 #if defined( LVARRAY_USE_DEVICE )
211 
217 __half lessThan( __half const x, __half const y )
218 { return __hlt( x, y ); }
219 
226 __half2 lessThan( __half2 const x, __half2 const y )
227 { return __hlt2( x, y ); }
228 #endif
229 
230 } // namespace internal
231 
235 
242 template< typename T >
245 { return internal::numValues( T() ); }
246 
251 template< typename T >
253 
262 template< typename T, typename U >
264 T convert( U const u )
265 { return internal::convert( T(), u ); }
266 
277 template< typename T, typename U, typename V >
279 T convert( U const u, V const v )
280 { return internal::convert( T(), u, v ); }
281 
288 template< typename T >
291 { return internal::getFirst( x ); }
292 
299 template< typename T >
302 { return internal::getSecond( x ); }
303 
310 template< typename T >
312 std::enable_if_t< std::is_arithmetic< T >::value, T >
313 max( T const a, T const b )
314 {
315 #if defined(LVARRAY_DEVICE_COMPILE)
316  return ::max( a, b );
317 #else
318  return std::max( a, b );
319 #endif
320 }
321 
322 #if defined( LVARRAY_USE_DEVICE )
323 
326 __half max( __half const a, __half const b )
327 {
328 #if defined(LVARRAY_USE_CUDA) && CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
329  return __hmax( a, b );
330 #elif defined(LVARRAY_USE_HIP)
331  return __hgt( a, b ) ? a : b;
332 #else
333  return a > b ? a : b;
334 #endif
335 }
336 
339 __half2 max( __half2 const a, __half2 const b )
340 {
341 #if defined(LVARRAY_USE_CUDA) && CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
342  return __hmax2( a, b );
343 #else
344  __half2 const aFactor = __hge2( a, b );
345  __half2 const bFactor = convert< __half2 >( 1 ) - aFactor;
346  return a * aFactor + bFactor * b;
347 #endif
348 }
349 
350 #endif
351 
352 
359 template< typename T >
361 std::enable_if_t< std::is_arithmetic< T >::value, T >
362 min( T const a, T const b )
363 {
364 #if defined(LVARRAY_DEVICE_COMPILE)
365  return ::min( a, b );
366 #else
367  return std::min( a, b );
368 #endif
369 }
370 
371 #if defined( LVARRAY_USE_CUDA )
372 
376 __half min( __half const a, __half const b )
377 {
378 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
379  return __hmin( a, b );
380 #else
381  return a < b ? a : b;
382 #endif
383 }
384 
388 __half2 min( __half2 const a, __half2 const b )
389 {
390 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
391  return __hmin2( a, b );
392 #else
393  __half2 const aFactor = __hle2( a, b );
394  __half2 const bFactor = convert< __half2 >( 1 ) - aFactor;
395  return a * aFactor + bFactor * b;
396 #endif
397 }
398 
399 #endif
400 
406 template< typename T >
408 T abs( T const x )
409 {
410 #if defined(LVARRAY_DEVICE_COMPILE)
411  return ::abs( x );
412 #else
413  return std::abs( x );
414 #endif
415 }
416 
417 #if defined( LVARRAY_USE_DEVICE )
418 
421 __half abs( __half const x )
422 {
423 #if CUDART_VERSION > 11000
424  return __habs( x );
425 #else
426  return x > __half( 0 ) ? x : -x;
427 #endif
428 }
429 
432 __half2 abs( __half2 const x )
433 {
434 #if CUDART_VERSION > 11000
435  return __habs2( x );
436 #else
437  return x - __hle2( x, convert< __half2 >( 0 ) ) * ( x + x );
438 #endif
439 }
440 
441 #endif
442 
448 template< typename T >
450 T square( T const x )
451 { return x * x; }
452 
454 
458 
467 float sqrt( float const x )
468 {
469 #if defined(LVARRAY_DEVICE_COMPILE)
470  return ::sqrtf( x );
471 #else
472  return std::sqrt( x );
473 #endif
474 }
475 
477 template< typename T >
479 double sqrt( T const x )
480 {
481 #if defined(LVARRAY_DEVICE_COMPILE)
482  return ::sqrt( double( x ) );
483 #else
484  return std::sqrt( x );
485 #endif
486 }
487 
488 #if defined( LVARRAY_USE_DEVICE )
489 
492 __half sqrt( __half const x )
493 { return ::hsqrt( x ); }
494 
497 __half2 sqrt( __half2 const x )
498 { return ::h2sqrt( x ); }
499 
500 #endif
501 
509 float invSqrt( float const x )
510 {
511 #if defined(LVARRAY_DEVICE_COMPILE)
512  return ::rsqrtf( x );
513 #else
514  return 1 / std::sqrt( x );
515 #endif
516 }
517 
519 template< typename T >
521 double invSqrt( T const x )
522 {
523 #if defined( LVARRAY_DEVICE_COMPILE )
524  return ::rsqrt( double( x ) );
525 #else
526  return 1 / std::sqrt( x );
527 #endif
528 }
529 
530 #if defined( LVARRAY_USE_DEVICE )
531 
534 __half invSqrt( __half const x )
535 { return ::hrsqrt( x ); }
536 
539 __half2 invSqrt( __half2 const x )
540 { return ::h2rsqrt( x ); }
541 
542 #endif
543 
545 
549 
558 float sin( float const theta )
559 {
560 #if defined(LVARRAY_DEVICE_COMPILE)
561  return ::sinf( theta );
562 #else
563  return std::sin( theta );
564 #endif
565 }
566 
568 template< typename T >
570 double sin( T const theta )
571 {
572 #if defined(LVARRAY_DEVICE_COMPILE)
573  return ::sin( double( theta ) );
574 #else
575  return std::sin( theta );
576 #endif
577 }
578 
579 #if defined( LVARRAY_USE_DEVICE )
580 
583 __half sin( __half const theta )
584 { return ::hsin( theta ); }
585 
588 __half2 sin( __half2 const theta )
589 { return ::h2sin( theta ); }
590 
591 #endif
592 
600 float cos( float const theta )
601 {
602 #if defined(LVARRAY_DEVICE_COMPILE)
603  return ::cosf( theta );
604 #else
605  return std::cos( theta );
606 #endif
607 }
608 
610 template< typename T >
612 double cos( T const theta )
613 {
614 #if defined(LVARRAY_DEVICE_COMPILE)
615  return ::cos( double( theta ) );
616 #else
617  return std::cos( theta );
618 #endif
619 }
620 
621 #if defined( LVARRAY_USE_DEVICE )
622 
625 __half cos( __half const theta )
626 { return ::hcos( theta ); }
627 
630 __half2 cos( __half2 const theta )
631 { return ::h2cos( theta ); }
632 
633 #endif
634 
642 void sincos( float const theta, float & sinTheta, float & cosTheta )
643 {
644 #if defined(LVARRAY_DEVICE_COMPILE)
645  #if defined(LVARRAY_USE_CUDA)
646  ::sincos( theta, &sinTheta, &cosTheta );
647  #elif defined(LVARRAY_USE_HIP)
648  ::sincosf( theta, &sinTheta, &cosTheta );
649  #endif
650 #else
651  sinTheta = std::sin( theta );
652  cosTheta = std::cos( theta );
653 #endif
654 }
655 
657 template< typename T >
659 void sincos( double const theta, double & sinTheta, double & cosTheta )
660 {
661 #if defined(LVARRAY_DEVICE_COMPILE)
662  ::sincos( theta, &sinTheta, &cosTheta ); // hip and cuda versions both use double
663 #else
664  sinTheta = std::sin( theta );
665  cosTheta = std::cos( theta );
666 #endif
667 }
668 
670 template< typename T >
672 void sincos( T const theta, double & sinTheta, double & cosTheta )
673 {
674 #if defined(LVARRAY_DEVICE_COMPILE)
675  double s, c;
676  ::sincos( theta, &s, &c );
677  sinTheta = s;
678  cosTheta = c;
679 #else
680  sinTheta = std::sin( theta );
681  cosTheta = std::cos( theta );
682 #endif
683 }
684 
685 #if defined( LVARRAY_USE_DEVICE )
686 
689 void sincos( __half const theta, __half & sinTheta, __half & cosTheta )
690 {
691  sinTheta = ::hsin( theta );
692  cosTheta = ::hcos( theta );
693 }
694 
697 void sincos( __half2 const theta, __half2 & sinTheta, __half2 & cosTheta )
698 {
699  sinTheta = ::h2sin( theta );
700  cosTheta = ::h2cos( theta );
701 }
702 
703 #endif
704 
712 float tan( float const theta )
713 {
714 #if defined(LVARRAY_DEVICE_COMPILE)
715  return ::tanf( theta );
716 #else
717  return std::tan( theta );
718 #endif
719 }
720 
722 template< typename T >
724 double tan( T const theta )
725 {
726 #if defined(LVARRAY_DEVICE_COMPILE)
727  return ::tan( double( theta ) );
728 #else
729  return std::tan( theta );
730 #endif
731 }
732 
733 #if defined( LVARRAY_USE_DEVICE )
734 
737 __half tan( __half const theta )
738 {
739  __half s, c;
740  sincos( theta, s, c );
741  return s / c;
742 }
743 
746 __half2 tan( __half2 const theta )
747 {
748  __half2 s, c;
749  sincos( theta, s, c );
750  return s / c;
751 }
752 
753 #endif
754 
756 
760 
762 namespace internal
763 {
764 
776 template< typename T >
778 T asinImpl( T const x )
779 {
780  T const negate = lessThan( x, math::convert< T >( 0 ) );
781  T const absX = abs( x );
782 
783  T ret = math::convert< T >( -0.0187293 ) * absX + math::convert< T >( 0.0742610 );
784  ret = ret * absX - math::convert< T >( 0.2121144 );
785  ret = ret * absX + math::convert< T >( 1.5707288 );
786  ret = math::convert< T >( 3.14159265358979 * 0.5 ) - ret * sqrt( math::convert< T >( 1 ) - absX );
787  ret = ret - negate * ( ret + ret );
788  T const smallAngle = lessThan( absX, math::convert< T >( 1.7e-1 ) );
789  return smallAngle * x + ( math::convert< T >( 1 ) - smallAngle ) * ret;
790 }
791 
798 template< typename T >
800 T acosImpl( T const x )
801 {
802  T const negate = lessThan( x, math::convert< T >( 0 ) );
803  T const absX = abs( x );
804 
805  T ret = math::convert< T >( -0.0187293 ) * absX + math::convert< T >( 0.0742610 );
806  ret = ret * absX - math::convert< T >( 0.2121144 );
807  ret = ret * absX + math::convert< T >( 1.5707288 );
808  ret = ret * sqrt( math::convert< T >( 1 ) - absX );
809  ret = ret - negate * ( ret + ret );
810  return negate * math::convert< T >( 3.14159265358979 ) + ret;
811 }
812 
820 template< typename T >
823 T atan2Impl( T const y, T const x )
824 {
825  T const absX = abs( x );
826  T const absY = abs( y );
827  T const ratio = min( absX, absY ) / max( absX, absY );
828  T const ratio2 = ratio * ratio;
829 
830  T ret = math::convert< T >( -0.013480470 ) * ratio2 + math::convert< T >( 0.057477314 );
831  ret = ret * ratio2 - math::convert< T >( 0.121239071 );
832  ret = ret * ratio2 + math::convert< T >( 0.195635925 );
833  ret = ret * ratio2 - math::convert< T >( 0.332994597 );
834  ret = ret * ratio2 + math::convert< T >( 0.999995630 );
835  ret = ret * ratio;
836 
837  // For single values the following works out to:
838  // ret = absX < absY ? 1.570796327 - ret : ret;
839  // ret = x < 0 ? 3.141592654 - ret : ret;
840  // ret = y < 0 ? -ret : ret;
841  ret = internal::lessThan( absX, absY ) * ( math::convert< T >( 1.570796327 ) - ret - ret ) + ret;
842  ret = internal::lessThan( x, math::convert< T >( 0 ) ) * ( math::convert< T >( 3.141592654 ) - ret - ret ) + ret;
843  ret = ret - internal::lessThan( y, math::convert< T >( 0 ) ) * ( ret + ret );
844 
845  return ret;
846 }
847 
848 } // namespace internal
849 
857 float asin( float const x )
858 {
859 #if defined(LVARRAY_DEVICE_COMPILE)
860  return ::asinf( x );
861 #else
862  return std::asin( x );
863 #endif
864 }
865 
867 template< typename T >
869 double asin( T const x )
870 {
871 #if defined(LVARRAY_DEVICE_COMPILE)
872  return ::asin( double( x ) );
873 #else
874  return std::asin( x );
875 #endif
876 }
877 
878 #if defined( LVARRAY_USE_DEVICE )
879 
882 __half asin( __half const x )
883 { return internal::asinImpl( x ); }
884 
887 __half2 asin( __half2 const x )
888 { return internal::asinImpl( x ); }
889 
890 #endif
891 
899 float acos( float const x )
900 {
901 #if defined(LVARRAY_DEVICE_COMPILE)
902  return ::acosf( x );
903 #else
904  return std::acos( x );
905 #endif
906 }
907 
909 template< typename T >
911 double acos( T const x )
912 {
913 #if defined(LVARRAY_DEVICE_COMPILE)
914  return ::acos( double( x ) );
915 #else
916  return std::acos( x );
917 #endif
918 }
919 
920 #if defined( LVARRAY_USE_DEVICE )
921 
924 __half acos( __half const x )
925 { return internal::acosImpl( x ); }
926 
929 __half2 acos( __half2 const x )
930 { return internal::acosImpl( x ); }
931 
932 #endif
933 
942 float atan2( float const y, float const x )
943 {
944 #if defined(LVARRAY_DEVICE_COMPILE)
945  return ::atan2f( y, x );
946 #else
947  return std::atan2( y, x );
948 #endif
949 }
950 
952 template< typename T >
954 double atan2( T const y, T const x )
955 {
956 #if defined(LVARRAY_DEVICE_COMPILE)
957  return ::atan2( double( y ), double( x ) );
958 #else
959  return std::atan2( y, x );
960 #endif
961 }
962 
963 #if defined( LVARRAY_USE_CUDA )
964 
967 __half atan2( __half const y, __half const x )
968 { return internal::atan2Impl( y, x ); }
969 
972 __half2 atan2( __half2 const y, __half2 const x )
973 { return internal::atan2Impl( y, x ); }
974 
975 #endif
976 
978 
982 
991 float exp( float const x )
992 {
993 #if defined(LVARRAY_DEVICE_COMPILE)
994  return ::expf( x );
995 #else
996  return std::exp( x );
997 #endif
998 }
999 
1001 template< typename T >
1003 double exp( T const x )
1004 {
1005 #if defined(LVARRAY_DEVICE_COMPILE)
1006  return ::exp( double( x ) );
1007 #else
1008  return std::exp( x );
1009 #endif
1010 }
1011 
1012 #if defined( LVARRAY_USE_DEVICE )
1013 
1016 __half exp( __half const x )
1017 { return ::hexp( x ); }
1018 
1021 __half2 exp( __half2 const x )
1022 { return ::h2exp( x ); }
1023 
1024 #endif
1025 
1033 float log( float const x )
1034 {
1035 #if defined(LVARRAY_DEVICE_COMPILE)
1036  return ::logf( x );
1037 #else
1038  return std::log( x );
1039 #endif
1040 }
1041 
1043 template< typename T >
1045 double log( T const x )
1046 {
1047 #if defined(LVARRAY_DEVICE_COMPILE)
1048  return ::log( double( x ) );
1049 #else
1050  return std::log( x );
1051 #endif
1052 }
1053 
1054 #if defined( LVARRAY_USE_DEVICE )
1055 
1058 __half log( __half const x )
1059 { return ::hlog( x ); }
1060 
1063 __half2 log( __half2 const x )
1064 { return ::h2log( x ); }
1065 
1066 #endif
1067 
1069 
1070 } // namespace math
1071 } // namespace LvArray
LVARRAY_DEVICE LVARRAY_FORCE_INLINE SingleType< T > getSecond(T const x)
Definition: math.hpp:301
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE void sincos(float const theta, float &sinTheta, float &cosTheta)
Compute the sine and cosine of theta.
Definition: math.hpp:642
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double log(T const x)
Definition: math.hpp:1045
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr T square(T const x)
Definition: math.hpp:450
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float invSqrt(float const x)
Definition: math.hpp:509
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double atan2(T const y, T const x)
Definition: math.hpp:954
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float sin(float const theta)
Definition: math.hpp:558
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float exp(float const x)
Definition: math.hpp:991
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double exp(T const x)
Definition: math.hpp:1003
LVARRAY_DEVICE LVARRAY_FORCE_INLINE T atan2Impl(T const y, T const x)
Definition: math.hpp:823
LVARRAY_DEVICE LVARRAY_FORCE_INLINE T asinImpl(T const x)
Definition: math.hpp:778
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr T convert(U const u, V const v)
Convert u and v to a dual type.
Definition: math.hpp:279
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr std::enable_if_t< std::is_arithmetic< T >::value, T > min(T const a, T const b)
Definition: math.hpp:362
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr T convert(U const u)
Convert u to type.
Definition: math.hpp:264
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double cos(T const theta)
Definition: math.hpp:612
#define LVARRAY_FORCE_INLINE
Marks a function/lambda for inlining.
Definition: Macros.hpp:44
The type of a single value of type T.
Definition: math.hpp:67
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double sin(T const theta)
Definition: math.hpp:570
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double acos(T const x)
Definition: math.hpp:911
T type
An alias for T.
Definition: math.hpp:70
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr T abs(T const x)
Definition: math.hpp:408
#define LVARRAY_DEVICE
Mark a function for only device usage.
Definition: Macros.hpp:605
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float cos(float const theta)
Definition: math.hpp:600
LVARRAY_HOST_DEVICE constexpr T lessThan(T const x, T const y)
Definition: math.hpp:101
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float tan(float const theta)
Definition: math.hpp:712
The top level namespace.
Definition: Array.hpp:24
LVARRAY_DEVICE LVARRAY_FORCE_INLINE SingleType< T > getFirst(T const x)
Definition: math.hpp:290
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double sqrt(T const x)
Definition: math.hpp:479
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float asin(float const x)
Definition: math.hpp:857
Contains a bunch of macro definitions.
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr int numValues()
Return the number of values stored in type.
Definition: math.hpp:244
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float sqrt(float const x)
Definition: math.hpp:467
typename internal::SingleType< T >::type SingleType
The type of a single value of type T.
Definition: math.hpp:252
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double tan(T const theta)
Definition: math.hpp:724
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float atan2(float const y, float const x)
Definition: math.hpp:942
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float acos(float const x)
Definition: math.hpp:899
LVARRAY_DEVICE LVARRAY_FORCE_INLINE T acosImpl(T const x)
Definition: math.hpp:800
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr std::enable_if_t< std::is_arithmetic< T >::value, T > max(T const a, T const b)
Definition: math.hpp:313
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE float log(float const x)
Definition: math.hpp:1033
#define LVARRAY_HOST_DEVICE
Mark a function for both host and device usage.
Definition: Macros.hpp:600
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE double asin(T const x)
Definition: math.hpp:869