HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
27 #include "math_fwd.h"
28 
30 
31 #include <algorithm>
32 
33 // assert.h is only for the host version of assert.
34 // The device version of assert is implemented in hip/hcc_detail/hip_runtime.h.
35 // Users should include hip_runtime.h for the device version of assert.
36 #if !__HIP_DEVICE_COMPILE__
37 #include <assert.h>
38 #endif
39 
40 #include <limits.h>
41 #include <limits>
42 #include <stdint.h>
43 
44 // HCC's own math functions should be included first, otherwise there will
45 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
46 #ifdef __HCC__
47 #include "kalmar_math.h"
48 #endif
49 
50 #if _LIBCPP_VERSION && __HIP__
51 namespace std {
52 template <>
53 struct __numeric_type<_Float16>
54 {
55  static _Float16 __test(_Float16);
56 
57  typedef _Float16 type;
58  static const bool value = true;
59 };
60 }
61 #endif // _LIBCPP_VERSION
62 
63 #pragma push_macro("__DEVICE__")
64 #pragma push_macro("__RETURN_TYPE")
65 
66 #ifdef __HCC__
67 #define __DEVICE__ __device__
68 #define __RETURN_TYPE int
69 #else // to be consistent with __clang_cuda_math_forward_declares
70 #define __DEVICE__ static __device__
71 #define __RETURN_TYPE bool
72 #endif
73 
74 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
75 __DEVICE__
76 inline
77 uint64_t __make_mantissa_base8(const char* tagp)
78 {
79  uint64_t r = 0;
80  while (tagp) {
81  char tmp = *tagp;
82 
83  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
84  else return 0;
85 
86  ++tagp;
87  }
88 
89  return r;
90 }
91 
92 __DEVICE__
93 inline
94 uint64_t __make_mantissa_base10(const char* tagp)
95 {
96  uint64_t r = 0;
97  while (tagp) {
98  char tmp = *tagp;
99 
100  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
101  else return 0;
102 
103  ++tagp;
104  }
105 
106  return r;
107 }
108 
109 __DEVICE__
110 inline
111 uint64_t __make_mantissa_base16(const char* tagp)
112 {
113  uint64_t r = 0;
114  while (tagp) {
115  char tmp = *tagp;
116 
117  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
118  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
119  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
120  else return 0;
121 
122  ++tagp;
123  }
124 
125  return r;
126 }
127 
128 __DEVICE__
129 inline
130 uint64_t __make_mantissa(const char* tagp)
131 {
132  if (!tagp) return 0u;
133 
134  if (*tagp == '0') {
135  ++tagp;
136 
137  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
138  else return __make_mantissa_base8(tagp);
139  }
140 
141  return __make_mantissa_base10(tagp);
142 }
143 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
144 
145 // DOT FUNCTIONS
146 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
147 __DEVICE__
148 inline
149 int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
150  return __ockl_sdot2(a.data, b.data, c, saturate);
151 }
152 __DEVICE__
153 inline
154 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
155  return __ockl_udot2(a.data, b.data, c, saturate);
156 }
157 __DEVICE__
158 inline
159 int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
160  return __ockl_sdot4(a.data, b.data, c, saturate);
161 }
162 __DEVICE__
163 inline
164 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
165  return __ockl_udot4(a.data, b.data, c, saturate);
166 }
167 __DEVICE__
168 inline
169 int amd_mixed_dot(int a, int b, int c, bool saturate) {
170  return __ockl_sdot8(a, b, c, saturate);
171 }
172 __DEVICE__
173 inline
174 uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
175  return __ockl_udot8(a, b, c, saturate);
176 }
177 #endif
178 
179 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
180 // BEGIN FLOAT
181 __DEVICE__
182 inline
183 float abs(float x) { return __ocml_fabs_f32(x); }
184 __DEVICE__
185 inline
186 float acosf(float x) { return __ocml_acos_f32(x); }
187 __DEVICE__
188 inline
189 float acoshf(float x) { return __ocml_acosh_f32(x); }
190 __DEVICE__
191 inline
192 float asinf(float x) { return __ocml_asin_f32(x); }
193 __DEVICE__
194 inline
195 float asinhf(float x) { return __ocml_asinh_f32(x); }
196 __DEVICE__
197 inline
198 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
199 __DEVICE__
200 inline
201 float atanf(float x) { return __ocml_atan_f32(x); }
202 __DEVICE__
203 inline
204 float atanhf(float x) { return __ocml_atanh_f32(x); }
205 __DEVICE__
206 inline
207 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
208 __DEVICE__
209 inline
210 float ceilf(float x) { return __ocml_ceil_f32(x); }
211 __DEVICE__
212 inline
213 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
214 __DEVICE__
215 inline
216 float cosf(float x) { return __ocml_cos_f32(x); }
217 __DEVICE__
218 inline
219 float coshf(float x) { return __ocml_cosh_f32(x); }
220 __DEVICE__
221 inline
222 float cospif(float x) { return __ocml_cospi_f32(x); }
223 __DEVICE__
224 inline
225 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
226 __DEVICE__
227 inline
228 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
229 __DEVICE__
230 inline
231 float erfcf(float x) { return __ocml_erfc_f32(x); }
232 __DEVICE__
233 inline
234 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
235 __DEVICE__
236 inline
237 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
238 __DEVICE__
239 inline
240 float erff(float x) { return __ocml_erf_f32(x); }
241 __DEVICE__
242 inline
243 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
244 __DEVICE__
245 inline
246 float exp10f(float x) { return __ocml_exp10_f32(x); }
247 __DEVICE__
248 inline
249 float exp2f(float x) { return __ocml_exp2_f32(x); }
250 __DEVICE__
251 inline
252 float expf(float x) { return __ocml_exp_f32(x); }
253 __DEVICE__
254 inline
255 float expm1f(float x) { return __ocml_expm1_f32(x); }
256 __DEVICE__
257 inline
258 float fabsf(float x) { return __ocml_fabs_f32(x); }
259 __DEVICE__
260 inline
261 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
262 __DEVICE__
263 inline
264 float fdividef(float x, float y) { return x / y; }
265 __DEVICE__
266 inline
267 float floorf(float x) { return __ocml_floor_f32(x); }
268 __DEVICE__
269 inline
270 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
271 __DEVICE__
272 inline
273 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
274 __DEVICE__
275 inline
276 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
277 __DEVICE__
278 inline
279 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
280 __DEVICE__
281 inline
282 float frexpf(float x, int* nptr)
283 {
284  int tmp;
285  float r =
286  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
287  *nptr = tmp;
288 
289  return r;
290 }
291 __DEVICE__
292 inline
293 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
294 __DEVICE__
295 inline
296 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
297 __DEVICE__
298 inline
299 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
300 __DEVICE__
301 inline
302 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
303 __DEVICE__
304 inline
305 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
306 __DEVICE__
307 inline
308 float j0f(float x) { return __ocml_j0_f32(x); }
309 __DEVICE__
310 inline
311 float j1f(float x) { return __ocml_j1_f32(x); }
312 __DEVICE__
313 inline
314 float jnf(int n, float x)
315 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
316  // for linear recurrences to get O(log n) steps, but it's unclear if
317  // it'd be beneficial in this case.
318  if (n == 0) return j0f(x);
319  if (n == 1) return j1f(x);
320 
321  float x0 = j0f(x);
322  float x1 = j1f(x);
323  for (int i = 1; i < n; ++i) {
324  float x2 = (2 * i) / x * x1 - x0;
325  x0 = x1;
326  x1 = x2;
327  }
328 
329  return x1;
330 }
331 __DEVICE__
332 inline
333 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
334 __DEVICE__
335 inline
336 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
337 __DEVICE__
338 inline
339 long long int llrintf(float x) { return __ocml_rint_f32(x); }
340 __DEVICE__
341 inline
342 long long int llroundf(float x) { return __ocml_round_f32(x); }
343 __DEVICE__
344 inline
345 float log10f(float x) { return __ocml_log10_f32(x); }
346 __DEVICE__
347 inline
348 float log1pf(float x) { return __ocml_log1p_f32(x); }
349 __DEVICE__
350 inline
351 float log2f(float x) { return __ocml_log2_f32(x); }
352 __DEVICE__
353 inline
354 float logbf(float x) { return __ocml_logb_f32(x); }
355 __DEVICE__
356 inline
357 float logf(float x) { return __ocml_log_f32(x); }
358 __DEVICE__
359 inline
360 long int lrintf(float x) { return __ocml_rint_f32(x); }
361 __DEVICE__
362 inline
363 long int lroundf(float x) { return __ocml_round_f32(x); }
364 __DEVICE__
365 inline
366 float modff(float x, float* iptr)
367 {
368  float tmp;
369  float r =
370  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
371  *iptr = tmp;
372 
373  return r;
374 }
375 __DEVICE__
376 inline
377 float nanf(const char* tagp)
378 {
379  union {
380  float val;
381  struct ieee_float {
382  uint32_t mantissa : 22;
383  uint32_t quiet : 1;
384  uint32_t exponent : 8;
385  uint32_t sign : 1;
386  } bits;
387 
388  static_assert(sizeof(float) == sizeof(ieee_float), "");
389  } tmp;
390 
391  tmp.bits.sign = 0u;
392  tmp.bits.exponent = ~0u;
393  tmp.bits.quiet = 1u;
394  tmp.bits.mantissa = __make_mantissa(tagp);
395 
396  return tmp.val;
397 }
398 __DEVICE__
399 inline
400 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
401 __DEVICE__
402 inline
403 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
404 __DEVICE__
405 inline
406 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
407 __DEVICE__
408 inline
409 float norm4df(float x, float y, float z, float w)
410 {
411  return __ocml_len4_f32(x, y, z, w);
412 }
413 __DEVICE__
414 inline
415 float normcdff(float x) { return __ocml_ncdf_f32(x); }
416 __DEVICE__
417 inline
418 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
419 __DEVICE__
420 inline
421 float normf(int dim, const float* a)
422 { // TODO: placeholder until OCML adds support.
423  float r = 0;
424  while (dim--) { r += a[0] * a[0]; ++a; }
425 
426  return __ocml_sqrt_f32(r);
427 }
428 __DEVICE__
429 inline
430 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
431 __DEVICE__
432 inline
433 float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); }
434 __DEVICE__
435 inline
436 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
437 __DEVICE__
438 inline
439 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
440 __DEVICE__
441 inline
442 float remquof(float x, float y, int* quo)
443 {
444  int tmp;
445  float r =
446  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
447  *quo = tmp;
448 
449  return r;
450 }
451 __DEVICE__
452 inline
453 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
454 __DEVICE__
455 inline
456 float rintf(float x) { return __ocml_rint_f32(x); }
457 __DEVICE__
458 inline
459 float rnorm3df(float x, float y, float z)
460 {
461  return __ocml_rlen3_f32(x, y, z);
462 }
463 
464 __DEVICE__
465 inline
466 float rnorm4df(float x, float y, float z, float w)
467 {
468  return __ocml_rlen4_f32(x, y, z, w);
469 }
470 __DEVICE__
471 inline
472 float rnormf(int dim, const float* a)
473 { // TODO: placeholder until OCML adds support.
474  float r = 0;
475  while (dim--) { r += a[0] * a[0]; ++a; }
476 
477  return __ocml_rsqrt_f32(r);
478 }
479 __DEVICE__
480 inline
481 float roundf(float x) { return __ocml_round_f32(x); }
482 __DEVICE__
483 inline
484 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
485 __DEVICE__
486 inline
487 float scalblnf(float x, long int n)
488 {
489  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
490 }
491 __DEVICE__
492 inline
493 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
494 __DEVICE__
495 inline
496 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
497 __DEVICE__
498 inline
499 void sincosf(float x, float* sptr, float* cptr)
500 {
501  float tmp;
502 
503  *sptr =
504  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
505  *cptr = tmp;
506 }
507 __DEVICE__
508 inline
509 void sincospif(float x, float* sptr, float* cptr)
510 {
511  float tmp;
512 
513  *sptr =
514  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
515  *cptr = tmp;
516 }
517 __DEVICE__
518 inline
519 float sinf(float x) { return __ocml_sin_f32(x); }
520 __DEVICE__
521 inline
522 float sinhf(float x) { return __ocml_sinh_f32(x); }
523 __DEVICE__
524 inline
525 float sinpif(float x) { return __ocml_sinpi_f32(x); }
526 __DEVICE__
527 inline
528 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
529 __DEVICE__
530 inline
531 float tanf(float x) { return __ocml_tan_f32(x); }
532 __DEVICE__
533 inline
534 float tanhf(float x) { return __ocml_tanh_f32(x); }
535 __DEVICE__
536 inline
537 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
538 __DEVICE__
539 inline
540 float truncf(float x) { return __ocml_trunc_f32(x); }
541 __DEVICE__
542 inline
543 float y0f(float x) { return __ocml_y0_f32(x); }
544 __DEVICE__
545 inline
546 float y1f(float x) { return __ocml_y1_f32(x); }
547 __DEVICE__
548 inline
549 float ynf(int n, float x)
550 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
551  // for linear recurrences to get O(log n) steps, but it's unclear if
552  // it'd be beneficial in this case. Placeholder until OCML adds
553  // support.
554  if (n == 0) return y0f(x);
555  if (n == 1) return y1f(x);
556 
557  float x0 = y0f(x);
558  float x1 = y1f(x);
559  for (int i = 1; i < n; ++i) {
560  float x2 = (2 * i) / x * x1 - x0;
561  x0 = x1;
562  x1 = x2;
563  }
564 
565  return x1;
566 }
567 
568 // BEGIN INTRINSICS
569 __DEVICE__
570 inline
571 float __cosf(float x) { return __ocml_native_cos_f32(x); }
572 __DEVICE__
573 inline
574 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
575 __DEVICE__
576 inline
577 float __expf(float x) { return __ocml_native_exp_f32(x); }
578 #if defined OCML_BASIC_ROUNDED_OPERATIONS
579 __DEVICE__
580 inline
581 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
582 #endif
583 __DEVICE__
584 inline
585 float __fadd_rn(float x, float y) { return x + y; }
586 #if defined OCML_BASIC_ROUNDED_OPERATIONS
587 __DEVICE__
588 inline
589 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
590 __DEVICE__
591 inline
592 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
593 __DEVICE__
594 inline
595 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
596 #endif
597 __DEVICE__
598 inline
599 float __fdiv_rn(float x, float y) { return x / y; }
600 #if defined OCML_BASIC_ROUNDED_OPERATIONS
601 __DEVICE__
602 inline
603 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
604 __DEVICE__
605 inline
606 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
607 #endif
608 __DEVICE__
609 inline
610 float __fdividef(float x, float y) { return x / y; }
611 #if defined OCML_BASIC_ROUNDED_OPERATIONS
612 __DEVICE__
613 inline
614 float __fmaf_rd(float x, float y, float z)
615 {
616  return __ocml_fma_rtn_f32(x, y, z);
617 }
618 #endif
619 __DEVICE__
620 inline
621 float __fmaf_rn(float x, float y, float z)
622 {
623  return __ocml_fma_f32(x, y, z);
624 }
625 #if defined OCML_BASIC_ROUNDED_OPERATIONS
626 __DEVICE__
627 inline
628 float __fmaf_ru(float x, float y, float z)
629 {
630  return __ocml_fma_rtp_f32(x, y, z);
631 }
632 __DEVICE__
633 inline
634 float __fmaf_rz(float x, float y, float z)
635 {
636  return __ocml_fma_rtz_f32(x, y, z);
637 }
638 __DEVICE__
639 inline
640 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
641 #endif
642 __DEVICE__
643 inline
644 float __fmul_rn(float x, float y) { return x * y; }
645 #if defined OCML_BASIC_ROUNDED_OPERATIONS
646 __DEVICE__
647 inline
648 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
649 __DEVICE__
650 inline
651 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
652 __DEVICE__
653 inline
654 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
655 #endif
656 __DEVICE__
657 inline
658 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
659 #if defined OCML_BASIC_ROUNDED_OPERATIONS
660 __DEVICE__
661 inline
662 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
663 __DEVICE__
664 inline
665 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
666 #endif
667 __DEVICE__
668 inline
669 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
670 #if defined OCML_BASIC_ROUNDED_OPERATIONS
671 __DEVICE__
672 inline
673 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
674 #endif
675 __DEVICE__
676 inline
677 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
678 #if defined OCML_BASIC_ROUNDED_OPERATIONS
679 __DEVICE__
680 inline
681 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
682 __DEVICE__
683 inline
684 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
685 __DEVICE__
686 inline
687 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
688 #endif
689 __DEVICE__
690 inline
691 float __fsub_rn(float x, float y) { return x - y; }
692 #if defined OCML_BASIC_ROUNDED_OPERATIONS
693 __DEVICE__
694 inline
695 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
696 __DEVICE__
697 inline
698 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
699 #endif
700 __DEVICE__
701 inline
702 float __log10f(float x) { return __ocml_native_log10_f32(x); }
703 __DEVICE__
704 inline
705 float __log2f(float x) { return __ocml_native_log2_f32(x); }
706 __DEVICE__
707 inline
708 float __logf(float x) { return __ocml_native_log_f32(x); }
709 __DEVICE__
710 inline
711 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
712 __DEVICE__
713 inline
714 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
715 __DEVICE__
716 inline
717 void __sincosf(float x, float* sptr, float* cptr)
718 {
719  *sptr = __ocml_native_sin_f32(x);
720  *cptr = __ocml_native_cos_f32(x);
721 }
722 __DEVICE__
723 inline
724 float __sinf(float x) { return __ocml_native_sin_f32(x); }
725 __DEVICE__
726 inline
727 float __tanf(float x) { return __ocml_tan_f32(x); }
728 // END INTRINSICS
729 // END FLOAT
730 
731 // BEGIN DOUBLE
732 __DEVICE__
733 inline
734 double abs(double x) { return __ocml_fabs_f64(x); }
735 __DEVICE__
736 inline
737 double acos(double x) { return __ocml_acos_f64(x); }
738 __DEVICE__
739 inline
740 double acosh(double x) { return __ocml_acosh_f64(x); }
741 __DEVICE__
742 inline
743 double asin(double x) { return __ocml_asin_f64(x); }
744 __DEVICE__
745 inline
746 double asinh(double x) { return __ocml_asinh_f64(x); }
747 __DEVICE__
748 inline
749 double atan(double x) { return __ocml_atan_f64(x); }
750 __DEVICE__
751 inline
752 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
753 __DEVICE__
754 inline
755 double atanh(double x) { return __ocml_atanh_f64(x); }
756 __DEVICE__
757 inline
758 double cbrt(double x) { return __ocml_cbrt_f64(x); }
759 __DEVICE__
760 inline
761 double ceil(double x) { return __ocml_ceil_f64(x); }
762 __DEVICE__
763 inline
764 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
765 __DEVICE__
766 inline
767 double cos(double x) { return __ocml_cos_f64(x); }
768 __DEVICE__
769 inline
770 double cosh(double x) { return __ocml_cosh_f64(x); }
771 __DEVICE__
772 inline
773 double cospi(double x) { return __ocml_cospi_f64(x); }
774 __DEVICE__
775 inline
776 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
777 __DEVICE__
778 inline
779 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
780 __DEVICE__
781 inline
782 double erf(double x) { return __ocml_erf_f64(x); }
783 __DEVICE__
784 inline
785 double erfc(double x) { return __ocml_erfc_f64(x); }
786 __DEVICE__
787 inline
788 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
789 __DEVICE__
790 inline
791 double erfcx(double x) { return __ocml_erfcx_f64(x); }
792 __DEVICE__
793 inline
794 double erfinv(double x) { return __ocml_erfinv_f64(x); }
795 __DEVICE__
796 inline
797 double exp(double x) { return __ocml_exp_f64(x); }
798 __DEVICE__
799 inline
800 double exp10(double x) { return __ocml_exp10_f64(x); }
801 __DEVICE__
802 inline
803 double exp2(double x) { return __ocml_exp2_f64(x); }
804 __DEVICE__
805 inline
806 double expm1(double x) { return __ocml_expm1_f64(x); }
807 __DEVICE__
808 inline
809 double fabs(double x) { return __ocml_fabs_f64(x); }
810 __DEVICE__
811 inline
812 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
813 __DEVICE__
814 inline
815 double floor(double x) { return __ocml_floor_f64(x); }
816 __DEVICE__
817 inline
818 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
819 __DEVICE__
820 inline
821 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
822 __DEVICE__
823 inline
824 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
825 __DEVICE__
826 inline
827 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
828 __DEVICE__
829 inline
830 double frexp(double x, int* nptr)
831 {
832  int tmp;
833  double r =
834  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
835  *nptr = tmp;
836 
837  return r;
838 }
839 __DEVICE__
840 inline
841 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
842 __DEVICE__
843 inline
844 int ilogb(double x) { return __ocml_ilogb_f64(x); }
845 __DEVICE__
846 inline
847 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
848 __DEVICE__
849 inline
850 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
851 __DEVICE__
852 inline
853 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
854 __DEVICE__
855 inline
856 double j0(double x) { return __ocml_j0_f64(x); }
857 __DEVICE__
858 inline
859 double j1(double x) { return __ocml_j1_f64(x); }
860 __DEVICE__
861 inline
862 double jn(int n, double x)
863 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
864  // for linear recurrences to get O(log n) steps, but it's unclear if
865  // it'd be beneficial in this case. Placeholder until OCML adds
866  // support.
867  if (n == 0) return j0f(x);
868  if (n == 1) return j1f(x);
869 
870  double x0 = j0f(x);
871  double x1 = j1f(x);
872  for (int i = 1; i < n; ++i) {
873  double x2 = (2 * i) / x * x1 - x0;
874  x0 = x1;
875  x1 = x2;
876  }
877 
878  return x1;
879 }
880 __DEVICE__
881 inline
882 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
883 __DEVICE__
884 inline
885 double lgamma(double x) { return __ocml_lgamma_f64(x); }
886 __DEVICE__
887 inline
888 long long int llrint(double x) { return __ocml_rint_f64(x); }
889 __DEVICE__
890 inline
891 long long int llround(double x) { return __ocml_round_f64(x); }
892 __DEVICE__
893 inline
894 double log(double x) { return __ocml_log_f64(x); }
895 __DEVICE__
896 inline
897 double log10(double x) { return __ocml_log10_f64(x); }
898 __DEVICE__
899 inline
900 double log1p(double x) { return __ocml_log1p_f64(x); }
901 __DEVICE__
902 inline
903 double log2(double x) { return __ocml_log2_f64(x); }
904 __DEVICE__
905 inline
906 double logb(double x) { return __ocml_logb_f64(x); }
907 __DEVICE__
908 inline
909 long int lrint(double x) { return __ocml_rint_f64(x); }
910 __DEVICE__
911 inline
912 long int lround(double x) { return __ocml_round_f64(x); }
913 __DEVICE__
914 inline
915 double modf(double x, double* iptr)
916 {
917  double tmp;
918  double r =
919  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
920  *iptr = tmp;
921 
922  return r;
923 }
924 __DEVICE__
925 inline
926 double nan(const char* tagp)
927 {
928 #if !_WIN32
929  union {
930  double val;
931  struct ieee_double {
932  uint64_t mantissa : 51;
933  uint32_t quiet : 1;
934  uint32_t exponent : 11;
935  uint32_t sign : 1;
936  } bits;
937  static_assert(sizeof(double) == sizeof(ieee_double), "");
938  } tmp;
939 
940  tmp.bits.sign = 0u;
941  tmp.bits.exponent = ~0u;
942  tmp.bits.quiet = 1u;
943  tmp.bits.mantissa = __make_mantissa(tagp);
944 
945  return tmp.val;
946 #else
947  static_assert(sizeof(uint64_t)==sizeof(double));
948  uint64_t val = __make_mantissa(tagp);
949  val |= 0xFFF << 51;
950  return *reinterpret_cast<double*>(&val);
951 #endif
952 }
953 __DEVICE__
954 inline
955 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
956 __DEVICE__
957 inline
958 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
959 __DEVICE__
960 inline
961 double norm(int dim, const double* a)
962 { // TODO: placeholder until OCML adds support.
963  double r = 0;
964  while (dim--) { r += a[0] * a[0]; ++a; }
965 
966  return __ocml_sqrt_f64(r);
967 }
968 __DEVICE__
969 inline
970 double norm3d(double x, double y, double z)
971 {
972  return __ocml_len3_f64(x, y, z);
973 }
974 __DEVICE__
975 inline
976 double norm4d(double x, double y, double z, double w)
977 {
978  return __ocml_len4_f64(x, y, z, w);
979 }
980 __DEVICE__
981 inline
982 double normcdf(double x) { return __ocml_ncdf_f64(x); }
983 __DEVICE__
984 inline
985 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
986 __DEVICE__
987 inline
988 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
989 __DEVICE__
990 inline
991 double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); }
992 __DEVICE__
993 inline
994 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
995 __DEVICE__
996 inline
997 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
998 __DEVICE__
999 inline
1000 double remquo(double x, double y, int* quo)
1001 {
1002  int tmp;
1003  double r =
1004  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
1005  *quo = tmp;
1006 
1007  return r;
1008 }
1009 __DEVICE__
1010 inline
1011 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
1012 __DEVICE__
1013 inline
1014 double rint(double x) { return __ocml_rint_f64(x); }
1015 __DEVICE__
1016 inline
1017 double rnorm(int dim, const double* a)
1018 { // TODO: placeholder until OCML adds support.
1019  double r = 0;
1020  while (dim--) { r += a[0] * a[0]; ++a; }
1021 
1022  return __ocml_rsqrt_f64(r);
1023 }
1024 __DEVICE__
1025 inline
1026 double rnorm3d(double x, double y, double z)
1027 {
1028  return __ocml_rlen3_f64(x, y, z);
1029 }
1030 __DEVICE__
1031 inline
1032 double rnorm4d(double x, double y, double z, double w)
1033 {
1034  return __ocml_rlen4_f64(x, y, z, w);
1035 }
1036 __DEVICE__
1037 inline
1038 double round(double x) { return __ocml_round_f64(x); }
1039 __DEVICE__
1040 inline
1041 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1042 __DEVICE__
1043 inline
1044 double scalbln(double x, long int n)
1045 {
1046  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1047 }
1048 __DEVICE__
1049 inline
1050 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1051 __DEVICE__
1052 inline
1053 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1054 __DEVICE__
1055 inline
1056 double sin(double x) { return __ocml_sin_f64(x); }
1057 __DEVICE__
1058 inline
1059 void sincos(double x, double* sptr, double* cptr)
1060 {
1061  double tmp;
1062  *sptr =
1063  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1064  *cptr = tmp;
1065 }
1066 __DEVICE__
1067 inline
1068 void sincospi(double x, double* sptr, double* cptr)
1069 {
1070  double tmp;
1071  *sptr = __ocml_sincospi_f64(
1072  x, (__attribute__((address_space(5))) double*) &tmp);
1073  *cptr = tmp;
1074 }
1075 __DEVICE__
1076 inline
1077 double sinh(double x) { return __ocml_sinh_f64(x); }
1078 __DEVICE__
1079 inline
1080 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1081 __DEVICE__
1082 inline
1083 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1084 __DEVICE__
1085 inline
1086 double tan(double x) { return __ocml_tan_f64(x); }
1087 __DEVICE__
1088 inline
1089 double tanh(double x) { return __ocml_tanh_f64(x); }
1090 __DEVICE__
1091 inline
1092 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1093 __DEVICE__
1094 inline
1095 double trunc(double x) { return __ocml_trunc_f64(x); }
1096 __DEVICE__
1097 inline
1098 double y0(double x) { return __ocml_y0_f64(x); }
1099 __DEVICE__
1100 inline
1101 double y1(double x) { return __ocml_y1_f64(x); }
1102 __DEVICE__
1103 inline
1104 double yn(int n, double x)
1105 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1106  // for linear recurrences to get O(log n) steps, but it's unclear if
1107  // it'd be beneficial in this case. Placeholder until OCML adds
1108  // support.
1109  if (n == 0) return j0f(x);
1110  if (n == 1) return j1f(x);
1111 
1112  double x0 = j0f(x);
1113  double x1 = j1f(x);
1114  for (int i = 1; i < n; ++i) {
1115  double x2 = (2 * i) / x * x1 - x0;
1116  x0 = x1;
1117  x1 = x2;
1118  }
1119 
1120  return x1;
1121 }
1122 
1123 // BEGIN INTRINSICS
1124 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1125 __DEVICE__
1126 inline
1127 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1128 #endif
1129 __DEVICE__
1130 inline
1131 double __dadd_rn(double x, double y) { return x + y; }
1132 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1133 __DEVICE__
1134 inline
1135 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1136 __DEVICE__
1137 inline
1138 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1139 __DEVICE__
1140 inline
1141 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1142 #endif
1143 __DEVICE__
1144 inline
1145 double __ddiv_rn(double x, double y) { return x / y; }
1146 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1147 __DEVICE__
1148 inline
1149 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1150 __DEVICE__
1151 inline
1152 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1153 __DEVICE__
1154 inline
1155 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1156 #endif
1157 __DEVICE__
1158 inline
1159 double __dmul_rn(double x, double y) { return x * y; }
1160 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1161 __DEVICE__
1162 inline
1163 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1164 __DEVICE__
1165 inline
1166 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1167 __DEVICE__
1168 inline
1169 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1170 #endif
1171 __DEVICE__
1172 inline
1173 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1174 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1175 __DEVICE__
1176 inline
1177 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1178 __DEVICE__
1179 inline
1180 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1181 __DEVICE__
1182 inline
1183 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1184 #endif
1185 __DEVICE__
1186 inline
1187 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1188 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1189 __DEVICE__
1190 inline
1191 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1192 __DEVICE__
1193 inline
1194 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1195 __DEVICE__
1196 inline
1197 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1198 #endif
1199 __DEVICE__
1200 inline
1201 double __dsub_rn(double x, double y) { return x - y; }
1202 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1203 __DEVICE__
1204 inline
1205 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1206 __DEVICE__
1207 inline
1208 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1209 __DEVICE__
1210 inline
1211 double __fma_rd(double x, double y, double z)
1212 {
1213  return __ocml_fma_rtn_f64(x, y, z);
1214 }
1215 #endif
1216 __DEVICE__
1217 inline
1218 double __fma_rn(double x, double y, double z)
1219 {
1220  return __ocml_fma_f64(x, y, z);
1221 }
1222 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1223 __DEVICE__
1224 inline
1225 double __fma_ru(double x, double y, double z)
1226 {
1227  return __ocml_fma_rtp_f64(x, y, z);
1228 }
1229 __DEVICE__
1230 inline
1231 double __fma_rz(double x, double y, double z)
1232 {
1233  return __ocml_fma_rtz_f64(x, y, z);
1234 }
1235 #endif
1236 // END INTRINSICS
1237 // END DOUBLE
1238 
1239 // BEGIN INTEGER
1240 __DEVICE__
1241 inline
1242 int abs(int x)
1243 {
1244  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1245  return (x ^ sgn) - sgn;
1246 }
1247 __DEVICE__
1248 inline
1249 long labs(long x)
1250 {
1251  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1252  return (x ^ sgn) - sgn;
1253 }
1254 __DEVICE__
1255 inline
1256 long long llabs(long long x)
1257 {
1258  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1259  return (x ^ sgn) - sgn;
1260 }
1261 
1262 #if defined(__cplusplus)
1263  __DEVICE__
1264  inline
1265  long abs(long x) { return labs(x); }
1266  __DEVICE__
1267  inline
1268  long long abs(long long x) { return llabs(x); }
1269 #endif
1270 // END INTEGER
1271 
1272 __DEVICE__
1273 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1274  return __ocml_fma_f16(x, y, z);
1275 }
1276 
1277 __DEVICE__
1278 inline float fma(float x, float y, float z) {
1279  return fmaf(x, y, z);
1280 }
1281 
1282 #pragma push_macro("__DEF_FLOAT_FUN")
1283 #pragma push_macro("__DEF_FLOAT_FUN2")
1284 #pragma push_macro("__DEF_FLOAT_FUN2I")
1285 #pragma push_macro("__HIP_OVERLOAD")
1286 #pragma push_macro("__HIP_OVERLOAD2")
1287 
1288 // __hip_enable_if::type is a type function which returns __T if __B is true.
1289 template<bool __B, class __T = void>
1291 
1292 template <class __T> struct __hip_enable_if<true, __T> {
1293  typedef __T type;
1294 };
1295 
1296 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1297 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1298 // floor(double).
1299 #define __HIP_OVERLOAD1(__retty, __fn) \
1300  template <typename __T> \
1301  __DEVICE__ \
1302  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1303  __retty>::type \
1304  __fn(__T __x) { \
1305  return ::__fn((double)__x); \
1306  }
1307 
1308 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1309 // or integer argument to avoid compilation error due to ambibuity. e.g.
1310 // max(5.0f, 6.0) is resolved with max(double, double).
1311 #define __HIP_OVERLOAD2(__retty, __fn) \
1312  template <typename __T1, typename __T2> \
1313  __DEVICE__ typename __hip_enable_if< \
1314  std::numeric_limits<__T1>::is_specialized && \
1315  std::numeric_limits<__T2>::is_specialized, \
1316  __retty>::type \
1317  __fn(__T1 __x, __T2 __y) { \
1318  return __fn((double)__x, (double)__y); \
1319  }
1320 
1321 // Define cmath functions with float argument and returns float.
1322 #define __DEF_FUN1(retty, func) \
1323 __DEVICE__ \
1324 inline \
1325 float func(float x) \
1326 { \
1327  return func##f(x); \
1328 } \
1329 __HIP_OVERLOAD1(retty, func)
1330 
1331 // Define cmath functions with float argument and returns retty.
1332 #define __DEF_FUNI(retty, func) \
1333 __DEVICE__ \
1334 inline \
1335 retty func(float x) \
1336 { \
1337  return func##f(x); \
1338 } \
1339 __HIP_OVERLOAD1(retty, func)
1340 
1341 // define cmath functions with two float arguments.
1342 #define __DEF_FUN2(retty, func) \
1343 __DEVICE__ \
1344 inline \
1345 float func(float x, float y) \
1346 { \
1347  return func##f(x, y); \
1348 } \
1349 __HIP_OVERLOAD2(retty, func)
1350 
1351 __DEF_FUN1(double, acos)
1352 __DEF_FUN1(double, acosh)
1353 __DEF_FUN1(double, asin)
1354 __DEF_FUN1(double, asinh)
1355 __DEF_FUN1(double, atan)
1356 __DEF_FUN2(double, atan2);
1357 __DEF_FUN1(double, atanh)
1358 __DEF_FUN1(double, cbrt)
1359 __DEF_FUN1(double, ceil)
1360 __DEF_FUN2(double, copysign);
1361 __DEF_FUN1(double, cos)
1362 __DEF_FUN1(double, cosh)
1363 __DEF_FUN1(double, erf)
1364 __DEF_FUN1(double, erfc)
1365 __DEF_FUN1(double, exp)
1366 __DEF_FUN1(double, exp2)
1367 __DEF_FUN1(double, expm1)
1368 __DEF_FUN1(double, fabs)
1369 __DEF_FUN2(double, fdim);
1370 __DEF_FUN1(double, floor)
1371 __DEF_FUN2(double, fmax);
1372 __DEF_FUN2(double, fmin);
1373 __DEF_FUN2(double, fmod);
1374 //__HIP_OVERLOAD1(int, fpclassify)
1375 __DEF_FUN2(double, hypot);
1376 __DEF_FUNI(int, ilogb)
1377 __HIP_OVERLOAD1(bool, isfinite)
1378 __HIP_OVERLOAD2(bool, isgreater);
1379 __HIP_OVERLOAD2(bool, isgreaterequal);
1380 __HIP_OVERLOAD1(bool, isinf);
1381 __HIP_OVERLOAD2(bool, isless);
1382 __HIP_OVERLOAD2(bool, islessequal);
1383 __HIP_OVERLOAD2(bool, islessgreater);
1384 __HIP_OVERLOAD1(bool, isnan);
1385 //__HIP_OVERLOAD1(bool, isnormal)
1386 __HIP_OVERLOAD2(bool, isunordered);
1387 __DEF_FUN1(double, lgamma)
1388 __DEF_FUN1(double, log)
1389 __DEF_FUN1(double, log10)
1390 __DEF_FUN1(double, log1p)
1391 __DEF_FUN1(double, log2)
1392 __DEF_FUN1(double, logb)
1393 __DEF_FUNI(long long, llrint)
1394 __DEF_FUNI(long long, llround)
1395 __DEF_FUNI(long, lrint)
1396 __DEF_FUNI(long, lround)
1397 __DEF_FUN1(double, nearbyint);
1398 __DEF_FUN2(double, nextafter);
1399 __DEF_FUN2(double, pow);
1400 __DEF_FUN2(double, remainder);
1401 __DEF_FUN1(double, rint);
1402 __DEF_FUN1(double, round);
1403 __HIP_OVERLOAD1(bool, signbit)
1404 __DEF_FUN1(double, sin)
1405 __DEF_FUN1(double, sinh)
1406 __DEF_FUN1(double, sqrt)
1407 __DEF_FUN1(double, tan)
1408 __DEF_FUN1(double, tanh)
1409 __DEF_FUN1(double, tgamma)
1410 __DEF_FUN1(double, trunc);
1411 
1412 // define cmath functions with a float and an integer argument.
1413 #define __DEF_FLOAT_FUN2I(func) \
1414 __DEVICE__ \
1415 inline \
1416 float func(float x, int y) \
1417 { \
1418  return func##f(x, y); \
1419 }
1420 __DEF_FLOAT_FUN2I(scalbn)
1421 __DEF_FLOAT_FUN2I(ldexp)
1422 
1423 template<class T>
1424 __DEVICE__ inline T min(T arg1, T arg2) {
1425  return (arg1 < arg2) ? arg1 : arg2;
1426 }
1427 
1428 template<class T>
1429 __DEVICE__ inline T max(T arg1, T arg2) {
1430  return (arg1 > arg2) ? arg1 : arg2;
1431 }
1432 
1433 #if __HCC__
1434 
1435 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1436  return min(arg1, (uint32_t) arg2);
1437 }
1438 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1439  return min((uint32_t) arg1, arg2);
1440 }
1441 
1442 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1443  return min(arg1, (uint64_t) arg2);
1444 }
1445 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1446  return min((uint64_t) arg1, arg2);
1447 }
1448 
1449 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1450  return min(arg1, (unsigned long long) arg2);
1451 }
1452 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1453  return min((unsigned long long) arg1, arg2);
1454 }*/
1455 
1456 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1457  return max(arg1, (uint32_t) arg2);
1458 }
1459 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1460  return max((uint32_t) arg1, arg2);
1461 }
1462 
1463 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1464  return max(arg1, (uint64_t) arg2);
1465 }
1466 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1467  return max((uint64_t) arg1, arg2);
1468 }
1469 
1470 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1471  return max(arg1, (unsigned long long) arg2);
1472 }
1473 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1474  return max((unsigned long long) arg1, arg2);
1475 }*/
1476 #else
1477 __DEVICE__ inline int min(int arg1, int arg2) {
1478  return (arg1 < arg2) ? arg1 : arg2;
1479 }
1480 __DEVICE__ inline int max(int arg1, int arg2) {
1481  return (arg1 > arg2) ? arg1 : arg2;
1482 }
1483 
1484 __DEVICE__
1485 inline
1486 float max(float x, float y) {
1487  return fmaxf(x, y);
1488 }
1489 
1490 __DEVICE__
1491 inline
1492 double max(double x, double y) {
1493  return fmax(x, y);
1494 }
1495 
1496 __DEVICE__
1497 inline
1498 float min(float x, float y) {
1499  return fminf(x, y);
1500 }
1501 
1502 __DEVICE__
1503 inline
1504 double min(double x, double y) {
1505  return fmin(x, y);
1506 }
1507 
1508 __HIP_OVERLOAD2(double, max)
1509 __HIP_OVERLOAD2(double, min)
1510 
1511 #endif
1512 
1513 __host__ inline static int min(int arg1, int arg2) {
1514  return std::min(arg1, arg2);
1515 }
1516 
1517 __host__ inline static int max(int arg1, int arg2) {
1518  return std::max(arg1, arg2);
1519 }
1520 
1521 __DEVICE__
1522 inline float pow(float base, int iexp) {
1523  return powif(base, iexp);
1524 }
1525 
1526 __DEVICE__
1527 inline double pow(double base, int iexp) {
1528  return powi(base, iexp);
1529 }
1530 
1531 __DEVICE__
1532 inline _Float16 pow(_Float16 base, int iexp) {
1533  return __ocml_pown_f16(base, iexp);
1534 }
1535 
1536 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
1537 
1538 #pragma pop_macro("__DEF_FLOAT_FUN")
1539 #pragma pop_macro("__DEF_FLOAT_FUN2")
1540 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1541 #pragma pop_macro("__HIP_OVERLOAD")
1542 #pragma pop_macro("__HIP_OVERLOAD2")
1543 #pragma pop_macro("__DEVICE__")
1544 #pragma pop_macro("__RETURN_TYPE")
1545 
1546 // For backward compatibility.
1547 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1548 // defined after including math_functions.h.
hip_runtime.h
Contains definitions of APIs for HIP runtime.
char4
Definition: hip_vector_types.h:1509
hip_vector_types.h
Defines the different newt vector types for HIP runtime.
__host__
#define __host__
Definition: host_defines.h:41
host_defines.h
TODO-doc.
__hip_enable_if
Definition: math_functions.h:1290
uchar4
Definition: hip_vector_types.h:1516
short2
Definition: hip_vector_types.h:1522
ushort2
Definition: hip_vector_types.h:1529