25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
36 #if !__HIP_DEVICE_COMPILE__
47 #include "kalmar_math.h"
50 #if _LIBCPP_VERSION && __HIP__
53 struct __numeric_type<_Float16>
55 static _Float16 __test(_Float16);
57 typedef _Float16 type;
58 static const bool value =
true;
61 #endif // _LIBCPP_VERSION
63 #pragma push_macro("__DEVICE__")
64 #pragma push_macro("__RETURN_TYPE")
67 #define __DEVICE__ __device__
68 #define __RETURN_TYPE int
69 #else // to be consistent with __clang_cuda_math_forward_declares
70 #define __DEVICE__ static __device__
71 #define __RETURN_TYPE bool
74 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
77 uint64_t __make_mantissa_base8(
const char* tagp)
83 if (tmp >=
'0' && tmp <=
'7') r = (r * 8u) + tmp -
'0';
94 uint64_t __make_mantissa_base10(
const char* tagp)
100 if (tmp >=
'0' && tmp <=
'9') r = (r * 10u) + tmp -
'0';
111 uint64_t __make_mantissa_base16(
const char* tagp)
117 if (tmp >=
'0' && tmp <=
'9') r = (r * 16u) + tmp -
'0';
118 else if (tmp >=
'a' && tmp <=
'f') r = (r * 16u) + tmp -
'a' + 10;
119 else if (tmp >=
'A' && tmp <=
'F') r = (r * 16u) + tmp -
'A' + 10;
130 uint64_t __make_mantissa(
const char* tagp)
132 if (!tagp)
return 0u;
137 if (*tagp ==
'x' || *tagp ==
'X')
return __make_mantissa_base16(tagp);
138 else return __make_mantissa_base8(tagp);
141 return __make_mantissa_base10(tagp);
143 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
146 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
149 int amd_mixed_dot(
short2 a,
short2 b,
int c,
bool saturate) {
150 return __ockl_sdot2(a.data, b.data, c, saturate);
155 return __ockl_udot2(a.data, b.data, c, saturate);
159 int amd_mixed_dot(
char4 a,
char4 b,
int c,
bool saturate) {
160 return __ockl_sdot4(a.data, b.data, c, saturate);
164 uint amd_mixed_dot(
uchar4 a,
uchar4 b, uint c,
bool saturate) {
165 return __ockl_udot4(a.data, b.data, c, saturate);
169 int amd_mixed_dot(
int a,
int b,
int c,
bool saturate) {
170 return __ockl_sdot8(a, b, c, saturate);
174 uint amd_mixed_dot(uint a, uint b, uint c,
bool saturate) {
175 return __ockl_udot8(a, b, c, saturate);
179 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
183 float abs(
float x) {
return __ocml_fabs_f32(x); }
186 float acosf(
float x) {
return __ocml_acos_f32(x); }
189 float acoshf(
float x) {
return __ocml_acosh_f32(x); }
192 float asinf(
float x) {
return __ocml_asin_f32(x); }
195 float asinhf(
float x) {
return __ocml_asinh_f32(x); }
198 float atan2f(
float x,
float y) {
return __ocml_atan2_f32(x, y); }
201 float atanf(
float x) {
return __ocml_atan_f32(x); }
204 float atanhf(
float x) {
return __ocml_atanh_f32(x); }
207 float cbrtf(
float x) {
return __ocml_cbrt_f32(x); }
210 float ceilf(
float x) {
return __ocml_ceil_f32(x); }
213 float copysignf(
float x,
float y) {
return __ocml_copysign_f32(x, y); }
216 float cosf(
float x) {
return __ocml_cos_f32(x); }
219 float coshf(
float x) {
return __ocml_cosh_f32(x); }
222 float cospif(
float x) {
return __ocml_cospi_f32(x); }
225 float cyl_bessel_i0f(
float x) {
return __ocml_i0_f32(x); }
228 float cyl_bessel_i1f(
float x) {
return __ocml_i1_f32(x); }
231 float erfcf(
float x) {
return __ocml_erfc_f32(x); }
234 float erfcinvf(
float x) {
return __ocml_erfcinv_f32(x); }
237 float erfcxf(
float x) {
return __ocml_erfcx_f32(x); }
240 float erff(
float x) {
return __ocml_erf_f32(x); }
243 float erfinvf(
float x) {
return __ocml_erfinv_f32(x); }
246 float exp10f(
float x) {
return __ocml_exp10_f32(x); }
249 float exp2f(
float x) {
return __ocml_exp2_f32(x); }
252 float expf(
float x) {
return __ocml_exp_f32(x); }
255 float expm1f(
float x) {
return __ocml_expm1_f32(x); }
258 float fabsf(
float x) {
return __ocml_fabs_f32(x); }
261 float fdimf(
float x,
float y) {
return __ocml_fdim_f32(x, y); }
264 float fdividef(
float x,
float y) {
return x / y; }
267 float floorf(
float x) {
return __ocml_floor_f32(x); }
270 float fmaf(
float x,
float y,
float z) {
return __ocml_fma_f32(x, y, z); }
273 float fmaxf(
float x,
float y) {
return __ocml_fmax_f32(x, y); }
276 float fminf(
float x,
float y) {
return __ocml_fmin_f32(x, y); }
279 float fmodf(
float x,
float y) {
return __ocml_fmod_f32(x, y); }
282 float frexpf(
float x,
int* nptr)
286 __ocml_frexp_f32(x, (__attribute__((address_space(5)))
int*) &tmp);
293 float hypotf(
float x,
float y) {
return __ocml_hypot_f32(x, y); }
296 int ilogbf(
float x) {
return __ocml_ilogb_f32(x); }
299 __RETURN_TYPE isfinite(
float x) {
return __ocml_isfinite_f32(x); }
302 __RETURN_TYPE isinf(
float x) {
return __ocml_isinf_f32(x); }
305 __RETURN_TYPE isnan(
float x) {
return __ocml_isnan_f32(x); }
308 float j0f(
float x) {
return __ocml_j0_f32(x); }
311 float j1f(
float x) {
return __ocml_j1_f32(x); }
314 float jnf(
int n,
float x)
318 if (n == 0)
return j0f(x);
319 if (n == 1)
return j1f(x);
323 for (
int i = 1; i < n; ++i) {
324 float x2 = (2 * i) / x * x1 - x0;
333 float ldexpf(
float x,
int e) {
return __ocml_ldexp_f32(x, e); }
336 float lgammaf(
float x) {
return __ocml_lgamma_f32(x); }
339 long long int llrintf(
float x) {
return __ocml_rint_f32(x); }
342 long long int llroundf(
float x) {
return __ocml_round_f32(x); }
345 float log10f(
float x) {
return __ocml_log10_f32(x); }
348 float log1pf(
float x) {
return __ocml_log1p_f32(x); }
351 float log2f(
float x) {
return __ocml_log2_f32(x); }
354 float logbf(
float x) {
return __ocml_logb_f32(x); }
357 float logf(
float x) {
return __ocml_log_f32(x); }
360 long int lrintf(
float x) {
return __ocml_rint_f32(x); }
363 long int lroundf(
float x) {
return __ocml_round_f32(x); }
366 float modff(
float x,
float* iptr)
370 __ocml_modf_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
377 float nanf(
const char* tagp)
382 uint32_t mantissa : 22;
384 uint32_t exponent : 8;
388 static_assert(
sizeof(
float) ==
sizeof(ieee_float),
"");
392 tmp.bits.exponent = ~0u;
394 tmp.bits.mantissa = __make_mantissa(tagp);
400 float nearbyintf(
float x) {
return __ocml_nearbyint_f32(x); }
403 float nextafterf(
float x,
float y) {
return __ocml_nextafter_f32(x, y); }
406 float norm3df(
float x,
float y,
float z) {
return __ocml_len3_f32(x, y, z); }
409 float norm4df(
float x,
float y,
float z,
float w)
411 return __ocml_len4_f32(x, y, z, w);
415 float normcdff(
float x) {
return __ocml_ncdf_f32(x); }
418 float normcdfinvf(
float x) {
return __ocml_ncdfinv_f32(x); }
421 float normf(
int dim,
const float* a)
424 while (dim--) { r += a[0] * a[0]; ++a; }
426 return __ocml_sqrt_f32(r);
430 float powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
433 float powif(
float base,
int iexp) {
return __ocml_pown_f32(base, iexp); }
436 float rcbrtf(
float x) {
return __ocml_rcbrt_f32(x); }
439 float remainderf(
float x,
float y) {
return __ocml_remainder_f32(x, y); }
442 float remquof(
float x,
float y,
int* quo)
446 __ocml_remquo_f32(x, y, (__attribute__((address_space(5)))
int*) &tmp);
453 float rhypotf(
float x,
float y) {
return __ocml_rhypot_f32(x, y); }
456 float rintf(
float x) {
return __ocml_rint_f32(x); }
459 float rnorm3df(
float x,
float y,
float z)
461 return __ocml_rlen3_f32(x, y, z);
466 float rnorm4df(
float x,
float y,
float z,
float w)
468 return __ocml_rlen4_f32(x, y, z, w);
472 float rnormf(
int dim,
const float* a)
475 while (dim--) { r += a[0] * a[0]; ++a; }
477 return __ocml_rsqrt_f32(r);
481 float roundf(
float x) {
return __ocml_round_f32(x); }
484 float rsqrtf(
float x) {
return __ocml_rsqrt_f32(x); }
487 float scalblnf(
float x,
long int n)
489 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
493 float scalbnf(
float x,
int n) {
return __ocml_scalbn_f32(x, n); }
496 __RETURN_TYPE signbit(
float x) {
return __ocml_signbit_f32(x); }
499 void sincosf(
float x,
float* sptr,
float* cptr)
504 __ocml_sincos_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
509 void sincospif(
float x,
float* sptr,
float* cptr)
514 __ocml_sincospi_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
519 float sinf(
float x) {
return __ocml_sin_f32(x); }
522 float sinhf(
float x) {
return __ocml_sinh_f32(x); }
525 float sinpif(
float x) {
return __ocml_sinpi_f32(x); }
528 float sqrtf(
float x) {
return __ocml_sqrt_f32(x); }
531 float tanf(
float x) {
return __ocml_tan_f32(x); }
534 float tanhf(
float x) {
return __ocml_tanh_f32(x); }
537 float tgammaf(
float x) {
return __ocml_tgamma_f32(x); }
540 float truncf(
float x) {
return __ocml_trunc_f32(x); }
543 float y0f(
float x) {
return __ocml_y0_f32(x); }
546 float y1f(
float x) {
return __ocml_y1_f32(x); }
549 float ynf(
int n,
float x)
554 if (n == 0)
return y0f(x);
555 if (n == 1)
return y1f(x);
559 for (
int i = 1; i < n; ++i) {
560 float x2 = (2 * i) / x * x1 - x0;
571 float __cosf(
float x) {
return __ocml_native_cos_f32(x); }
574 float __exp10f(
float x) {
return __ocml_native_exp10_f32(x); }
577 float __expf(
float x) {
return __ocml_native_exp_f32(x); }
578 #if defined OCML_BASIC_ROUNDED_OPERATIONS
581 float __fadd_rd(
float x,
float y) {
return __ocml_add_rtn_f32(x, y); }
585 float __fadd_rn(
float x,
float y) {
return x + y; }
586 #if defined OCML_BASIC_ROUNDED_OPERATIONS
589 float __fadd_ru(
float x,
float y) {
return __ocml_add_rtp_f32(x, y); }
592 float __fadd_rz(
float x,
float y) {
return __ocml_add_rtz_f32(x, y); }
595 float __fdiv_rd(
float x,
float y) {
return __ocml_div_rtn_f32(x, y); }
599 float __fdiv_rn(
float x,
float y) {
return x / y; }
600 #if defined OCML_BASIC_ROUNDED_OPERATIONS
603 float __fdiv_ru(
float x,
float y) {
return __ocml_div_rtp_f32(x, y); }
606 float __fdiv_rz(
float x,
float y) {
return __ocml_div_rtz_f32(x, y); }
610 float __fdividef(
float x,
float y) {
return x / y; }
611 #if defined OCML_BASIC_ROUNDED_OPERATIONS
614 float __fmaf_rd(
float x,
float y,
float z)
616 return __ocml_fma_rtn_f32(x, y, z);
621 float __fmaf_rn(
float x,
float y,
float z)
623 return __ocml_fma_f32(x, y, z);
625 #if defined OCML_BASIC_ROUNDED_OPERATIONS
628 float __fmaf_ru(
float x,
float y,
float z)
630 return __ocml_fma_rtp_f32(x, y, z);
634 float __fmaf_rz(
float x,
float y,
float z)
636 return __ocml_fma_rtz_f32(x, y, z);
640 float __fmul_rd(
float x,
float y) {
return __ocml_mul_rtn_f32(x, y); }
644 float __fmul_rn(
float x,
float y) {
return x * y; }
645 #if defined OCML_BASIC_ROUNDED_OPERATIONS
648 float __fmul_ru(
float x,
float y) {
return __ocml_mul_rtp_f32(x, y); }
651 float __fmul_rz(
float x,
float y) {
return __ocml_mul_rtz_f32(x, y); }
654 float __frcp_rd(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
658 float __frcp_rn(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
659 #if defined OCML_BASIC_ROUNDED_OPERATIONS
662 float __frcp_ru(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
665 float __frcp_rz(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
669 float __frsqrt_rn(
float x) {
return __llvm_amdgcn_rsq_f32(x); }
670 #if defined OCML_BASIC_ROUNDED_OPERATIONS
673 float __fsqrt_rd(
float x) {
return __ocml_sqrt_rtn_f32(x); }
677 float __fsqrt_rn(
float x) {
return __ocml_native_sqrt_f32(x); }
678 #if defined OCML_BASIC_ROUNDED_OPERATIONS
681 float __fsqrt_ru(
float x) {
return __ocml_sqrt_rtp_f32(x); }
684 float __fsqrt_rz(
float x) {
return __ocml_sqrt_rtz_f32(x); }
687 float __fsub_rd(
float x,
float y) {
return __ocml_sub_rtn_f32(x, y); }
691 float __fsub_rn(
float x,
float y) {
return x - y; }
692 #if defined OCML_BASIC_ROUNDED_OPERATIONS
695 float __fsub_ru(
float x,
float y) {
return __ocml_sub_rtp_f32(x, y); }
698 float __fsub_rz(
float x,
float y) {
return __ocml_sub_rtz_f32(x, y); }
702 float __log10f(
float x) {
return __ocml_native_log10_f32(x); }
705 float __log2f(
float x) {
return __ocml_native_log2_f32(x); }
708 float __logf(
float x) {
return __ocml_native_log_f32(x); }
711 float __powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
714 float __saturatef(
float x) {
return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
717 void __sincosf(
float x,
float* sptr,
float* cptr)
719 *sptr = __ocml_native_sin_f32(x);
720 *cptr = __ocml_native_cos_f32(x);
724 float __sinf(
float x) {
return __ocml_native_sin_f32(x); }
727 float __tanf(
float x) {
return __ocml_tan_f32(x); }
734 double abs(
double x) {
return __ocml_fabs_f64(x); }
737 double acos(
double x) {
return __ocml_acos_f64(x); }
740 double acosh(
double x) {
return __ocml_acosh_f64(x); }
743 double asin(
double x) {
return __ocml_asin_f64(x); }
746 double asinh(
double x) {
return __ocml_asinh_f64(x); }
749 double atan(
double x) {
return __ocml_atan_f64(x); }
752 double atan2(
double x,
double y) {
return __ocml_atan2_f64(x, y); }
755 double atanh(
double x) {
return __ocml_atanh_f64(x); }
758 double cbrt(
double x) {
return __ocml_cbrt_f64(x); }
761 double ceil(
double x) {
return __ocml_ceil_f64(x); }
764 double copysign(
double x,
double y) {
return __ocml_copysign_f64(x, y); }
767 double cos(
double x) {
return __ocml_cos_f64(x); }
770 double cosh(
double x) {
return __ocml_cosh_f64(x); }
773 double cospi(
double x) {
return __ocml_cospi_f64(x); }
776 double cyl_bessel_i0(
double x) {
return __ocml_i0_f64(x); }
779 double cyl_bessel_i1(
double x) {
return __ocml_i1_f64(x); }
782 double erf(
double x) {
return __ocml_erf_f64(x); }
785 double erfc(
double x) {
return __ocml_erfc_f64(x); }
788 double erfcinv(
double x) {
return __ocml_erfcinv_f64(x); }
791 double erfcx(
double x) {
return __ocml_erfcx_f64(x); }
794 double erfinv(
double x) {
return __ocml_erfinv_f64(x); }
797 double exp(
double x) {
return __ocml_exp_f64(x); }
800 double exp10(
double x) {
return __ocml_exp10_f64(x); }
803 double exp2(
double x) {
return __ocml_exp2_f64(x); }
806 double expm1(
double x) {
return __ocml_expm1_f64(x); }
809 double fabs(
double x) {
return __ocml_fabs_f64(x); }
812 double fdim(
double x,
double y) {
return __ocml_fdim_f64(x, y); }
815 double floor(
double x) {
return __ocml_floor_f64(x); }
818 double fma(
double x,
double y,
double z) {
return __ocml_fma_f64(x, y, z); }
821 double fmax(
double x,
double y) {
return __ocml_fmax_f64(x, y); }
824 double fmin(
double x,
double y) {
return __ocml_fmin_f64(x, y); }
827 double fmod(
double x,
double y) {
return __ocml_fmod_f64(x, y); }
830 double frexp(
double x,
int* nptr)
834 __ocml_frexp_f64(x, (__attribute__((address_space(5)))
int*) &tmp);
841 double hypot(
double x,
double y) {
return __ocml_hypot_f64(x, y); }
844 int ilogb(
double x) {
return __ocml_ilogb_f64(x); }
847 __RETURN_TYPE isfinite(
double x) {
return __ocml_isfinite_f64(x); }
850 __RETURN_TYPE isinf(
double x) {
return __ocml_isinf_f64(x); }
853 __RETURN_TYPE isnan(
double x) {
return __ocml_isnan_f64(x); }
856 double j0(
double x) {
return __ocml_j0_f64(x); }
859 double j1(
double x) {
return __ocml_j1_f64(x); }
862 double jn(
int n,
double x)
867 if (n == 0)
return j0f(x);
868 if (n == 1)
return j1f(x);
872 for (
int i = 1; i < n; ++i) {
873 double x2 = (2 * i) / x * x1 - x0;
882 double ldexp(
double x,
int e) {
return __ocml_ldexp_f64(x, e); }
885 double lgamma(
double x) {
return __ocml_lgamma_f64(x); }
888 long long int llrint(
double x) {
return __ocml_rint_f64(x); }
891 long long int llround(
double x) {
return __ocml_round_f64(x); }
894 double log(
double x) {
return __ocml_log_f64(x); }
897 double log10(
double x) {
return __ocml_log10_f64(x); }
900 double log1p(
double x) {
return __ocml_log1p_f64(x); }
903 double log2(
double x) {
return __ocml_log2_f64(x); }
906 double logb(
double x) {
return __ocml_logb_f64(x); }
909 long int lrint(
double x) {
return __ocml_rint_f64(x); }
912 long int lround(
double x) {
return __ocml_round_f64(x); }
915 double modf(
double x,
double* iptr)
919 __ocml_modf_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
926 double nan(
const char* tagp)
932 uint64_t mantissa : 51;
934 uint32_t exponent : 11;
937 static_assert(
sizeof(
double) ==
sizeof(ieee_double),
"");
941 tmp.bits.exponent = ~0u;
943 tmp.bits.mantissa = __make_mantissa(tagp);
947 static_assert(
sizeof(uint64_t)==
sizeof(
double));
948 uint64_t val = __make_mantissa(tagp);
950 return *
reinterpret_cast<double*
>(&val);
955 double nearbyint(
double x) {
return __ocml_nearbyint_f64(x); }
958 double nextafter(
double x,
double y) {
return __ocml_nextafter_f64(x, y); }
961 double norm(
int dim,
const double* a)
964 while (dim--) { r += a[0] * a[0]; ++a; }
966 return __ocml_sqrt_f64(r);
970 double norm3d(
double x,
double y,
double z)
972 return __ocml_len3_f64(x, y, z);
976 double norm4d(
double x,
double y,
double z,
double w)
978 return __ocml_len4_f64(x, y, z, w);
982 double normcdf(
double x) {
return __ocml_ncdf_f64(x); }
985 double normcdfinv(
double x) {
return __ocml_ncdfinv_f64(x); }
988 double pow(
double x,
double y) {
return __ocml_pow_f64(x, y); }
991 double powi(
double base,
int iexp) {
return __ocml_pown_f64(base, iexp); }
994 double rcbrt(
double x) {
return __ocml_rcbrt_f64(x); }
997 double remainder(
double x,
double y) {
return __ocml_remainder_f64(x, y); }
1000 double remquo(
double x,
double y,
int* quo)
1004 __ocml_remquo_f64(x, y, (__attribute__((address_space(5)))
int*) &tmp);
1011 double rhypot(
double x,
double y) {
return __ocml_rhypot_f64(x, y); }
1014 double rint(
double x) {
return __ocml_rint_f64(x); }
1017 double rnorm(
int dim,
const double* a)
1020 while (dim--) { r += a[0] * a[0]; ++a; }
1022 return __ocml_rsqrt_f64(r);
1026 double rnorm3d(
double x,
double y,
double z)
1028 return __ocml_rlen3_f64(x, y, z);
1032 double rnorm4d(
double x,
double y,
double z,
double w)
1034 return __ocml_rlen4_f64(x, y, z, w);
1038 double round(
double x) {
return __ocml_round_f64(x); }
1041 double rsqrt(
double x) {
return __ocml_rsqrt_f64(x); }
1044 double scalbln(
double x,
long int n)
1046 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1050 double scalbn(
double x,
int n) {
return __ocml_scalbn_f64(x, n); }
1053 __RETURN_TYPE signbit(
double x) {
return __ocml_signbit_f64(x); }
1056 double sin(
double x) {
return __ocml_sin_f64(x); }
1059 void sincos(
double x,
double* sptr,
double* cptr)
1063 __ocml_sincos_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
1068 void sincospi(
double x,
double* sptr,
double* cptr)
1071 *sptr = __ocml_sincospi_f64(
1072 x, (__attribute__((address_space(5)))
double*) &tmp);
1077 double sinh(
double x) {
return __ocml_sinh_f64(x); }
1080 double sinpi(
double x) {
return __ocml_sinpi_f64(x); }
1083 double sqrt(
double x) {
return __ocml_sqrt_f64(x); }
1086 double tan(
double x) {
return __ocml_tan_f64(x); }
1089 double tanh(
double x) {
return __ocml_tanh_f64(x); }
1092 double tgamma(
double x) {
return __ocml_tgamma_f64(x); }
1095 double trunc(
double x) {
return __ocml_trunc_f64(x); }
1098 double y0(
double x) {
return __ocml_y0_f64(x); }
1101 double y1(
double x) {
return __ocml_y1_f64(x); }
1104 double yn(
int n,
double x)
1109 if (n == 0)
return j0f(x);
1110 if (n == 1)
return j1f(x);
1114 for (
int i = 1; i < n; ++i) {
1115 double x2 = (2 * i) / x * x1 - x0;
1124 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1127 double __dadd_rd(
double x,
double y) {
return __ocml_add_rtn_f64(x, y); }
1131 double __dadd_rn(
double x,
double y) {
return x + y; }
1132 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1135 double __dadd_ru(
double x,
double y) {
return __ocml_add_rtp_f64(x, y); }
1138 double __dadd_rz(
double x,
double y) {
return __ocml_add_rtz_f64(x, y); }
1141 double __ddiv_rd(
double x,
double y) {
return __ocml_div_rtn_f64(x, y); }
1145 double __ddiv_rn(
double x,
double y) {
return x / y; }
1146 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1149 double __ddiv_ru(
double x,
double y) {
return __ocml_div_rtp_f64(x, y); }
1152 double __ddiv_rz(
double x,
double y) {
return __ocml_div_rtz_f64(x, y); }
1155 double __dmul_rd(
double x,
double y) {
return __ocml_mul_rtn_f64(x, y); }
1159 double __dmul_rn(
double x,
double y) {
return x * y; }
1160 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1163 double __dmul_ru(
double x,
double y) {
return __ocml_mul_rtp_f64(x, y); }
1166 double __dmul_rz(
double x,
double y) {
return __ocml_mul_rtz_f64(x, y); }
1169 double __drcp_rd(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1173 double __drcp_rn(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1174 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1177 double __drcp_ru(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1180 double __drcp_rz(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1183 double __dsqrt_rd(
double x) {
return __ocml_sqrt_rtn_f64(x); }
1187 double __dsqrt_rn(
double x) {
return __ocml_sqrt_f64(x); }
1188 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1191 double __dsqrt_ru(
double x) {
return __ocml_sqrt_rtp_f64(x); }
1194 double __dsqrt_rz(
double x) {
return __ocml_sqrt_rtz_f64(x); }
1197 double __dsub_rd(
double x,
double y) {
return __ocml_sub_rtn_f64(x, y); }
1201 double __dsub_rn(
double x,
double y) {
return x - y; }
1202 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1205 double __dsub_ru(
double x,
double y) {
return __ocml_sub_rtp_f64(x, y); }
1208 double __dsub_rz(
double x,
double y) {
return __ocml_sub_rtz_f64(x, y); }
1211 double __fma_rd(
double x,
double y,
double z)
1213 return __ocml_fma_rtn_f64(x, y, z);
1218 double __fma_rn(
double x,
double y,
double z)
1220 return __ocml_fma_f64(x, y, z);
1222 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1225 double __fma_ru(
double x,
double y,
double z)
1227 return __ocml_fma_rtp_f64(x, y, z);
1231 double __fma_rz(
double x,
double y,
double z)
1233 return __ocml_fma_rtz_f64(x, y, z);
1244 int sgn = x >> (
sizeof(int) * CHAR_BIT - 1);
1245 return (x ^ sgn) - sgn;
1251 long sgn = x >> (
sizeof(long) * CHAR_BIT - 1);
1252 return (x ^ sgn) - sgn;
1256 long long llabs(
long long x)
1258 long long sgn = x >> (
sizeof(
long long) * CHAR_BIT - 1);
1259 return (x ^ sgn) - sgn;
1262 #if defined(__cplusplus)
1265 long abs(
long x) {
return labs(x); }
1268 long long abs(
long long x) {
return llabs(x); }
1273 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1274 return __ocml_fma_f16(x, y, z);
1278 inline float fma(
float x,
float y,
float z) {
1279 return fmaf(x, y, z);
1282 #pragma push_macro("__DEF_FLOAT_FUN")
1283 #pragma push_macro("__DEF_FLOAT_FUN2")
1284 #pragma push_macro("__DEF_FLOAT_FUN2I")
1285 #pragma push_macro("__HIP_OVERLOAD")
1286 #pragma push_macro("__HIP_OVERLOAD2")
1289 template<
bool __B,
class __T =
void>
1299 #define __HIP_OVERLOAD1(__retty, __fn) \
1300 template <typename __T> \
1302 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1305 return ::__fn((double)__x); \
1311 #define __HIP_OVERLOAD2(__retty, __fn) \
1312 template <typename __T1, typename __T2> \
1313 __DEVICE__ typename __hip_enable_if< \
1314 std::numeric_limits<__T1>::is_specialized && \
1315 std::numeric_limits<__T2>::is_specialized, \
1317 __fn(__T1 __x, __T2 __y) { \
1318 return __fn((double)__x, (double)__y); \
1322 #define __DEF_FUN1(retty, func) \
1325 float func(float x) \
1327 return func##f(x); \
1329 __HIP_OVERLOAD1(retty, func)
1332 #define __DEF_FUNI(retty, func) \
1335 retty func(float x) \
1337 return func##f(x); \
1339 __HIP_OVERLOAD1(retty, func)
1342 #define __DEF_FUN2(retty, func) \
1345 float func(float x, float y) \
1347 return func##f(x, y); \
1349 __HIP_OVERLOAD2(retty, func)
1351 __DEF_FUN1(
double, acos)
1352 __DEF_FUN1(
double, acosh)
1353 __DEF_FUN1(
double, asin)
1354 __DEF_FUN1(
double, asinh)
1355 __DEF_FUN1(
double, atan)
1356 __DEF_FUN2(
double, atan2);
1357 __DEF_FUN1(
double, atanh)
1358 __DEF_FUN1(
double, cbrt)
1359 __DEF_FUN1(
double, ceil)
1360 __DEF_FUN2(
double, copysign);
1361 __DEF_FUN1(
double, cos)
1362 __DEF_FUN1(
double, cosh)
1363 __DEF_FUN1(
double, erf)
1364 __DEF_FUN1(
double, erfc)
1365 __DEF_FUN1(
double, exp)
1366 __DEF_FUN1(
double, exp2)
1367 __DEF_FUN1(
double, expm1)
1368 __DEF_FUN1(
double, fabs)
1369 __DEF_FUN2(
double, fdim);
1370 __DEF_FUN1(
double, floor)
1371 __DEF_FUN2(
double, fmax);
1372 __DEF_FUN2(
double, fmin);
1373 __DEF_FUN2(
double, fmod);
1375 __DEF_FUN2(
double, hypot);
1376 __DEF_FUNI(
int, ilogb)
1377 __HIP_OVERLOAD1(
bool, isfinite)
1378 __HIP_OVERLOAD2(
bool, isgreater);
1379 __HIP_OVERLOAD2(
bool, isgreaterequal);
1380 __HIP_OVERLOAD1(
bool, isinf);
1381 __HIP_OVERLOAD2(
bool, isless);
1382 __HIP_OVERLOAD2(
bool, islessequal);
1383 __HIP_OVERLOAD2(
bool, islessgreater);
1384 __HIP_OVERLOAD1(
bool, isnan);
1386 __HIP_OVERLOAD2(
bool, isunordered);
1387 __DEF_FUN1(
double, lgamma)
1388 __DEF_FUN1(
double, log)
1389 __DEF_FUN1(
double, log10)
1390 __DEF_FUN1(
double, log1p)
1391 __DEF_FUN1(
double, log2)
1392 __DEF_FUN1(
double, logb)
1393 __DEF_FUNI(
long long, llrint)
1394 __DEF_FUNI(
long long, llround)
1395 __DEF_FUNI(
long, lrint)
1396 __DEF_FUNI(
long, lround)
1397 __DEF_FUN1(
double, nearbyint);
1398 __DEF_FUN2(
double, nextafter);
1399 __DEF_FUN2(
double, pow);
1400 __DEF_FUN2(
double, remainder);
1401 __DEF_FUN1(
double, rint);
1402 __DEF_FUN1(
double, round);
1403 __HIP_OVERLOAD1(
bool, signbit)
1404 __DEF_FUN1(
double, sin)
1405 __DEF_FUN1(
double, sinh)
1406 __DEF_FUN1(
double, sqrt)
1407 __DEF_FUN1(
double, tan)
1408 __DEF_FUN1(
double, tanh)
1409 __DEF_FUN1(
double, tgamma)
1410 __DEF_FUN1(
double, trunc);
1413 #define __DEF_FLOAT_FUN2I(func) \
1416 float func(float x, int y) \
1418 return func##f(x, y); \
1420 __DEF_FLOAT_FUN2I(scalbn)
1421 __DEF_FLOAT_FUN2I(ldexp)
1424 __DEVICE__
inline T min(T arg1, T arg2) {
1425 return (arg1 < arg2) ? arg1 : arg2;
1429 __DEVICE__
inline T max(T arg1, T arg2) {
1430 return (arg1 > arg2) ? arg1 : arg2;
1435 __DEVICE__
inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1436 return min(arg1, (uint32_t) arg2);
1456 __DEVICE__
inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1457 return max(arg1, (uint32_t) arg2);
1459 __DEVICE__
inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1460 return max((uint32_t) arg1, arg2);
1477 __DEVICE__
inline int min(
int arg1,
int arg2) {
1478 return (arg1 < arg2) ? arg1 : arg2;
1480 __DEVICE__
inline int max(
int arg1,
int arg2) {
1481 return (arg1 > arg2) ? arg1 : arg2;
1486 float max(
float x,
float y) {
1492 double max(
double x,
double y) {
1498 float min(
float x,
float y) {
1504 double min(
double x,
double y) {
1508 __HIP_OVERLOAD2(
double, max)
1509 __HIP_OVERLOAD2(
double, min)
1513 __host__ inline static int min(
int arg1,
int arg2) {
1514 return std::min(arg1, arg2);
1517 __host__ inline static int max(
int arg1,
int arg2) {
1518 return std::max(arg1, arg2);
1522 inline float pow(
float base,
int iexp) {
1523 return powif(base, iexp);
1527 inline double pow(
double base,
int iexp) {
1528 return powi(base, iexp);
1532 inline _Float16 pow(_Float16 base,
int iexp) {
1533 return __ocml_pown_f16(base, iexp);
1536 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
1538 #pragma pop_macro("__DEF_FLOAT_FUN")
1539 #pragma pop_macro("__DEF_FLOAT_FUN2")
1540 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1541 #pragma pop_macro("__HIP_OVERLOAD")
1542 #pragma pop_macro("__HIP_OVERLOAD2")
1543 #pragma pop_macro("__DEVICE__")
1544 #pragma pop_macro("__RETURN_TYPE")