doxygen/html/math_8hpp_source.html

 /*
  * Copyright (c) 2021, Lawrence Livermore National Security, LLC and LvArray contributors.
  * All rights reserved.
  * See the LICENSE file for details.
  * SPDX-License-Identifier: (BSD-3-Clause)
  */

 #pragma once

 // Source includes
 #include "LvArrayConfig.hpp"
 #include "Macros.hpp"

 // System includes
 #include <cmath>
 #include <type_traits>

 #if defined( LVARRAY_USE_CUDA )
   #include <cuda_fp16.h>
 #endif

 namespace LvArray
 {

 namespace math
 {

 namespace internal
 {

 template< typename T, typename U >
 LVARRAY_HOST_DEVICE inline constexpr
 T convert( T const, U const u )
 { return u; }

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 int numValues( T const )
 { return 1; }

 template< typename T >
 struct SingleType
 {
   using type = T;
 };

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 SingleType< T > getFirst( T const x )
 { return x; }

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 SingleType< T > getSecond( T const x )
 { return x; }

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 T lessThan( T const x, T const y )
 { return __hlt( x, y ); }

 #if defined( LVARRAY_USE_CUDA )

 template< typename U >
 LVARRAY_HOST_DEVICE inline constexpr
 __half convert( __half const, U const u )
 { return __float2half_rn( u ); }

 template< typename U >
 LVARRAY_HOST_DEVICE inline constexpr
 __half2 convert( __half2 const, U const u )
 { return __float2half2_rn( u ); }

 LVARRAY_HOST_DEVICE inline
 __half2 convert( __half2 const, __half const u )
 {
 #if defined( __CUDA_ARCH__ )
   return __half2half2( u );
 #else
   return __float2half2_rn( u );
 #endif
 }

 template< typename U, typename V >
 LVARRAY_HOST_DEVICE inline constexpr
 __half2 convert( __half2 const, U const u, V const v )
 { return __floats2half2_rn( u, v ); }

 LVARRAY_HOST_DEVICE inline
 __half2 convert( __half2 const, __half const u, __half const v )
 {
 #if defined( __CUDA_ARCH__ )
   return __halves2half2( u, v );
 #else
   return __floats2half2_rn( u, v );
 #endif
 }

 LVARRAY_HOST_DEVICE inline constexpr
 int numValues( __half2 const & )
 { return 2; }

 template<>
 struct SingleType< __half2 >
 {
   using type = __half;
 };

 LVARRAY_DEVICE inline
 __half getFirst( __half2 const x )
 { return __low2half( x ); }

 LVARRAY_DEVICE inline
 __half getSecond( __half2 const x )
 { return __high2half( x ); }

 LVARRAY_DEVICE inline
 __half lessThan( __half const x, __half const y )
 { return __hlt( x, y ); }

 LVARRAY_DEVICE inline
 __half2 lessThan( __half2 const x, __half2 const y )
 { return __hlt2( x, y ); }

 #endif

 } // namespace internal


 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 int numValues()
 { return internal::numValues( T() ); }

 template< typename T >
 using SingleType = typename internal::SingleType< T >::type;

 template< typename T, typename U >
 LVARRAY_HOST_DEVICE inline constexpr
 T convert( U const u )
 { return internal::convert( T(), u ); }

 template< typename T, typename U, typename V >
 LVARRAY_HOST_DEVICE inline constexpr
 T convert( U const u, V const v )
 { return internal::convert( T(), u, v ); }

 template< typename T >
 LVARRAY_DEVICE inline
 SingleType< T > getFirst( T const x )
 { return internal::getFirst( x ); }

 template< typename T >
 LVARRAY_DEVICE inline
 SingleType< T > getSecond( T const x )
 { return internal::getSecond( x ); }

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 std::enable_if_t< std::is_arithmetic< T >::value, T >
 max( T const a, T const b )
 {
 #if defined(__CUDA_ARCH__)
   return ::max( a, b );
 #else
   return std::max( a, b );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half max( __half const a, __half const b )
 {
 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
   return __hmax( a, b );
 #else
   return a > b ? a : b;
 #endif
 }

 LVARRAY_DEVICE inline
 __half2 max( __half2 const a, __half2 const b )
 {
 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
   return __hmax2( a, b );
 #else
   __half2 const aFactor = __hge2( a, b );
   __half2 const bFactor = convert< __half2 >( 1 ) - aFactor;
   return a * aFactor + bFactor * b;
 #endif
 }

 #endif


 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 std::enable_if_t< std::is_arithmetic< T >::value, T >
 min( T const a, T const b )
 {
 #if defined(__CUDA_ARCH__)
   return ::min( a, b );
 #else
   return std::min( a, b );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half min( __half const a, __half const b )
 {
 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
   return __hmin( a, b );
 #else
   return a < b ? a : b;
 #endif
 }

 LVARRAY_DEVICE inline
 __half2 min( __half2 const a, __half2 const b )
 {
 #if CUDART_VERSION > 11000 && (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__))
   return __hmin2( a, b );
 #else
   __half2 const aFactor = __hle2( a, b );
   __half2 const bFactor = convert< __half2 >( 1 ) - aFactor;
   return a * aFactor + bFactor * b;
 #endif
 }

 #endif

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 T abs( T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::abs( x );
 #else
   return std::abs( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half abs( __half const x )
 {
 #if CUDART_VERSION > 11000
   return __habs( x );
 #else
   return x > __half( 0 ) ? x : -x;
 #endif
 }

 LVARRAY_DEVICE inline
 __half2 abs( __half2 const x )
 {
 #if CUDART_VERSION > 11000
   return __habs2( x );
 #else
   return x - __hle2( x, convert< __half2 >( 0 ) ) * ( x + x );
 #endif
 }

 #endif

 template< typename T >
 LVARRAY_HOST_DEVICE inline constexpr
 T square( T const x )
 { return x * x; }


 LVARRAY_HOST_DEVICE inline
 float sqrt( float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::sqrtf( x );
 #else
   return std::sqrt( x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double sqrt( T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::sqrt( double( x ) );
 #else
   return std::sqrt( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half sqrt( __half const x )
 { return ::hsqrt( x ); }

 LVARRAY_DEVICE inline
 __half2 sqrt( __half2 const x )
 { return ::h2sqrt( x ); }

 #endif

 LVARRAY_HOST_DEVICE inline
 float invSqrt( float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::rsqrtf( x );
 #else
   return 1 / std::sqrt( x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double invSqrt( T const x )
 {
 #if defined( __CUDA_ARCH__ )
   return ::rsqrt( double( x ) );
 #else
   return 1 / std::sqrt( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half invSqrt( __half const x )
 { return ::hrsqrt( x ); }

 LVARRAY_DEVICE inline
 __half2 invSqrt( __half2 const x )
 { return ::h2rsqrt( x ); }

 #endif


 LVARRAY_HOST_DEVICE inline
 float sin( float const theta )
 {
 #if defined(__CUDA_ARCH__)
   return ::sinf( theta );
 #else
   return std::sin( theta );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double sin( T const theta )
 {
 #if defined(__CUDA_ARCH__)
   return ::sin( double( theta ) );
 #else
   return std::sin( theta );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half sin( __half const theta )
 { return ::hsin( theta ); }

 LVARRAY_DEVICE inline
 __half2 sin( __half2 const theta )
 { return ::h2sin( theta ); }

 #endif

 LVARRAY_HOST_DEVICE inline
 float cos( float const theta )
 {
 #if defined(__CUDA_ARCH__)
   return ::cosf( theta );
 #else
   return std::cos( theta );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double cos( T const theta )
 {
 #if defined(__CUDA_ARCH__)
   return ::cos( double( theta ) );
 #else
   return std::cos( theta );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half cos( __half const theta )
 { return ::hcos( theta ); }

 LVARRAY_DEVICE inline
 __half2 cos( __half2 const theta )
 { return ::h2cos( theta ); }

 #endif

 LVARRAY_HOST_DEVICE inline
 void sincos( float const theta, float & sinTheta, float & cosTheta )
 {
 #if defined(__CUDA_ARCH__)
   ::sincos( theta, &sinTheta, &cosTheta );
 #else
   sinTheta = std::sin( theta );
   cosTheta = std::cos( theta );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 void sincos( double const theta, double & sinTheta, double & cosTheta )
 {
 #if defined(__CUDA_ARCH__)
   ::sincos( theta, &sinTheta, &cosTheta );
 #else
   sinTheta = std::sin( theta );
   cosTheta = std::cos( theta );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 void sincos( T const theta, double & sinTheta, double & cosTheta )
 {
 #if defined(__CUDA_ARCH__)
   double s, c;
   ::sincos( theta, &s, &c );
   sinTheta = s;
   cosTheta = c;
 #else
   sinTheta = std::sin( theta );
   cosTheta = std::cos( theta );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 void sincos( __half const theta, __half & sinTheta, __half & cosTheta )
 {
   sinTheta = ::hsin( theta );
   cosTheta = ::hcos( theta );
 }

 LVARRAY_DEVICE inline
 void sincos( __half2 const theta, __half2 & sinTheta, __half2 & cosTheta )
 {
   sinTheta = ::h2sin( theta );
   cosTheta = ::h2cos( theta );
 }

 #endif

 LVARRAY_HOST_DEVICE inline
 float tan( float const theta )
 {
 #if defined(__CUDA_ARCH__)
   return ::tanf( theta );
 #else
   return std::tan( theta );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double tan( T const theta )
 {
 #if defined(__CUDA_ARCH__)
   return ::tan( double( theta ) );
 #else
   return std::tan( theta );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half tan( __half const theta )
 {
   __half s, c;
   sincos( theta, s, c );
   return s / c;
 }

 LVARRAY_DEVICE inline
 __half2 tan( __half2 const theta )
 {
   __half2 s, c;
   sincos( theta, s, c );
   return s / c;
 }

 #endif


 namespace internal
 {

 template< typename T >
 LVARRAY_DEVICE inline
 T asinImpl( T const x )
 {
   T const negate = lessThan( x, math::convert< T >( 0 ) );
   T const absX = abs( x );

   T ret = math::convert< T >( -0.0187293 ) * absX + math::convert< T >( 0.0742610 );
   ret = ret * absX - math::convert< T >( 0.2121144 );
   ret = ret * absX + math::convert< T >( 1.5707288 );
   ret = math::convert< T >( 3.14159265358979 * 0.5 ) - ret * sqrt( math::convert< T >( 1 ) - absX );
   ret = ret - negate * ( ret + ret );
   T const smallAngle = lessThan( absX, math::convert< T >( 1.7e-1 ) );
   return smallAngle * x + ( math::convert< T >( 1 ) - smallAngle ) * ret;
 }

 template< typename T >
 LVARRAY_DEVICE inline
 T acosImpl( T const x )
 {
   T const negate = lessThan( x, math::convert< T >( 0 ) );
   T const absX = abs( x );

   T ret = math::convert< T >( -0.0187293 ) * absX + math::convert< T >( 0.0742610 );
   ret = ret * absX - math::convert< T >( 0.2121144 );
   ret = ret * absX + math::convert< T >( 1.5707288 );
   ret = ret * sqrt( math::convert< T >( 1 ) - absX );
   ret = ret - negate * ( ret + ret );
   return negate * math::convert< T >( 3.14159265358979 ) + ret;
 }

 template< typename T >
 LVARRAY_DEVICE inline
 T atan2Impl( T const y, T const x )
 {
   T const absX = abs( x );
   T const absY = abs( y );
   T const ratio = min( absX, absY ) / max( absX, absY );
   T const ratio2 = ratio * ratio;

   T ret = math::convert< T >( -0.013480470 ) * ratio2 + math::convert< T >( 0.057477314 );
   ret = ret * ratio2 - math::convert< T >( 0.121239071 );
   ret = ret * ratio2 + math::convert< T >( 0.195635925 );
   ret = ret * ratio2 - math::convert< T >( 0.332994597 );
   ret = ret * ratio2 + math::convert< T >( 0.999995630 );
   ret = ret * ratio;

   // For single values the following works out to:
   // ret = absX < absY ? 1.570796327 - ret : ret;
   // ret = x < 0 ? 3.141592654 - ret : ret;
   // ret = y < 0 ? -ret : ret;
   ret = internal::lessThan( absX, absY ) * ( math::convert< T >( 1.570796327 ) - ret - ret ) + ret;
   ret = internal::lessThan( x, math::convert< T >( 0 ) ) * ( math::convert< T >( 3.141592654 ) - ret - ret ) + ret;
   ret = ret - internal::lessThan( y, math::convert< T >( 0 ) ) * ( ret + ret );

   return ret;
 }

 } // namespace internal

 LVARRAY_HOST_DEVICE inline
 float asin( float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::asinf( x );
 #else
   return std::asin( x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double asin( T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::asin( double( x ) );
 #else
   return std::asin( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half asin( __half const x )
 { return internal::asinImpl( x ); }

 LVARRAY_DEVICE inline
 __half2 asin( __half2 const x )
 { return internal::asinImpl( x ); }

 #endif

 LVARRAY_HOST_DEVICE inline
 float acos( float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::acosf( x );
 #else
   return std::acos( x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double acos( T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::acos( double( x ) );
 #else
   return std::acos( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half acos( __half const x )
 { return internal::acosImpl( x ); }

 LVARRAY_DEVICE inline
 __half2 acos( __half2 const x )
 { return internal::acosImpl( x ); }

 #endif

 LVARRAY_HOST_DEVICE inline
 float atan2( float const y, float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::atan2f( y, x );
 #else
   return std::atan2( y, x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double atan2( T const y, T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::atan2( double( y ), double( x ) );
 #else
   return std::atan2( y, x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half atan2( __half const y, __half const x )
 { return internal::atan2Impl( y, x ); }

 LVARRAY_DEVICE inline
 __half2 atan2( __half2 const y, __half2 const x )
 { return internal::atan2Impl( y, x ); }

 #endif


 LVARRAY_HOST_DEVICE inline
 float exp( float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::expf( x );
 #else
   return std::exp( x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double exp( T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::exp( double( x ) );
 #else
   return std::exp( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half exp( __half const x )
 { return ::hexp( x ); }

 LVARRAY_DEVICE inline
 __half2 exp( __half2 const x )
 { return ::h2exp( x ); }

 #endif

 LVARRAY_HOST_DEVICE inline
 float log( float const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::logf( x );
 #else
   return std::log( x );
 #endif
 }

 template< typename T >
 LVARRAY_HOST_DEVICE inline
 double log( T const x )
 {
 #if defined(__CUDA_ARCH__)
   return ::log( double( x ) );
 #else
   return std::log( x );
 #endif
 }

 #if defined( LVARRAY_USE_CUDA )

 LVARRAY_DEVICE inline
 __half log( __half const x )
 { return ::hlog( x ); }

 LVARRAY_DEVICE inline
 __half2 log( __half2 const x )
 { return ::h2log( x ); }

 #endif


 } // namespace math
 } // namespace LvArray
LvArray::math::exp
LVARRAY_HOST_DEVICE double exp(T const x)
Definition: math.hpp:992

LvArray::math::convert
LVARRAY_HOST_DEVICE constexpr T convert(U const u)
Convert u to type.
Definition: math.hpp:262

LvArray::math::tan
LVARRAY_HOST_DEVICE double tan(T const theta)
Definition: math.hpp:714

LvArray::math::asin
LVARRAY_HOST_DEVICE float asin(float const x)
Definition: math.hpp:846

LvArray::math::sqrt
LVARRAY_HOST_DEVICE double sqrt(T const x)
Definition: math.hpp:473

LvArray::math::acos
LVARRAY_HOST_DEVICE float acos(float const x)
Definition: math.hpp:888

LvArray::math::internal::acosImpl
LVARRAY_DEVICE T acosImpl(T const x)
Definition: math.hpp:790

LvArray::math::acos
LVARRAY_HOST_DEVICE double acos(T const x)
Definition: math.hpp:900

LvArray::math::cos
LVARRAY_HOST_DEVICE float cos(float const theta)
Definition: math.hpp:594

LvArray::math::min
LVARRAY_HOST_DEVICE constexpr std::enable_if_t< std::is_arithmetic< T >::value, T > min(T const a, T const b)
Definition: math.hpp:358

LvArray::math::internal::asinImpl
LVARRAY_DEVICE T asinImpl(T const x)
Definition: math.hpp:768

LvArray::math::convert
LVARRAY_HOST_DEVICE constexpr T convert(U const u, V const v)
Convert u and v to a dual type.
Definition: math.hpp:277

LvArray::math::internal::SingleType
The type of a single value of type T.
Definition: math.hpp:67

LvArray::math::log
LVARRAY_HOST_DEVICE float log(float const x)
Definition: math.hpp:1022

LvArray::math::sin
LVARRAY_HOST_DEVICE float sin(float const theta)
Definition: math.hpp:552

LvArray::math::sincos
LVARRAY_HOST_DEVICE void sincos(float const theta, float &sinTheta, float &cosTheta)
Compute the sine and cosine of theta.
Definition: math.hpp:636

LvArray::math::asin
LVARRAY_HOST_DEVICE double asin(T const x)
Definition: math.hpp:858

LvArray::math::internal::SingleType::type
T type
An alias for T.
Definition: math.hpp:70

LvArray::math::cos
LVARRAY_HOST_DEVICE double cos(T const theta)
Definition: math.hpp:606

LVARRAY_DEVICE
#define LVARRAY_DEVICE
Mark a function for only device usage.
Definition: Macros.hpp:552

LvArray::math::internal::lessThan
LVARRAY_HOST_DEVICE constexpr T lessThan(T const x, T const y)
Definition: math.hpp:101

LvArray::math::getFirst
LVARRAY_DEVICE SingleType< T > getFirst(T const x)
Definition: math.hpp:288

LvArray::math::sqrt
LVARRAY_HOST_DEVICE float sqrt(float const x)
Definition: math.hpp:461

LvArray
The top level namespace.
Definition: Array.hpp:24

LvArray::math::getSecond
LVARRAY_DEVICE SingleType< T > getSecond(T const x)
Definition: math.hpp:299

Macros.hpp
Contains a bunch of macro definitions.

LvArray::math::abs
LVARRAY_HOST_DEVICE constexpr T abs(T const x)
Definition: math.hpp:402

LvArray::math::atan2
LVARRAY_HOST_DEVICE float atan2(float const y, float const x)
Definition: math.hpp:931

LvArray::math::internal::atan2Impl
LVARRAY_DEVICE T atan2Impl(T const y, T const x)
Definition: math.hpp:812

LvArray::math::exp
LVARRAY_HOST_DEVICE float exp(float const x)
Definition: math.hpp:980

LvArray::math::tan
LVARRAY_HOST_DEVICE float tan(float const theta)
Definition: math.hpp:702

LvArray::math::max
LVARRAY_HOST_DEVICE constexpr std::enable_if_t< std::is_arithmetic< T >::value, T > max(T const a, T const b)
Definition: math.hpp:311

LvArray::math::atan2
LVARRAY_HOST_DEVICE double atan2(T const y, T const x)
Definition: math.hpp:943

LvArray::math::SingleType
typename internal::SingleType< T >::type SingleType
The type of a single value of type T.
Definition: math.hpp:250

LvArray::math::numValues
LVARRAY_HOST_DEVICE constexpr int numValues()
Return the number of values stored in type.
Definition: math.hpp:242

LvArray::math::log
LVARRAY_HOST_DEVICE double log(T const x)
Definition: math.hpp:1034

LvArray::math::invSqrt
LVARRAY_HOST_DEVICE float invSqrt(float const x)
Definition: math.hpp:503

LvArray::math::sin
LVARRAY_HOST_DEVICE double sin(T const theta)
Definition: math.hpp:564

LVARRAY_HOST_DEVICE
#define LVARRAY_HOST_DEVICE
Mark a function for both host and device usage.
Definition: Macros.hpp:549

LvArray::math::square
LVARRAY_HOST_DEVICE constexpr T square(T const x)
Definition: math.hpp:444