24 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H 25 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H 27 #include <hip/hcc_detail/hip_common.h> 31 #if defined(__cplusplus) 33 #include <type_traits> 37 #if __HCC_OR_HIP_CLANG__ 38 typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
42 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
51 static_assert(
sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
61 #if defined(__cplusplus) 62 #include "hip_fp16_math_fwd.h" 63 #include "hip_vector_types.h" 68 template<>
struct is_floating_point<_Float16> : std::true_type {};
71 template<
bool cond,
typename T =
void>
72 using Enable_if_t =
typename std::enable_if<cond, T>::type;
78 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
89 #if !defined(__HIP_NO_HALF_CONVERSIONS__) 91 __half(decltype(data) x) : data{x} {}
94 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
96 __half(T x) : data{
static_cast<_Float16
>(x)} {}
99 __half(
const __half&) =
default;
101 __half(__half&&) =
default;
106 #if !defined(__HIP_NO_HALF_CONVERSIONS__) 108 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
110 __half(T x) : data{
static_cast<_Float16
>(x)} {}
115 __half& operator=(
const __half&) =
default;
117 __half& operator=(__half&&) =
default;
125 volatile __half& operator=(
const __half_raw& x)
volatile 130 volatile __half& operator=(
const volatile __half_raw& x)
volatile 140 volatile __half& operator=(
__half_raw&& x)
volatile 145 volatile __half& operator=(
volatile __half_raw&& x)
volatile 150 #if !defined(__HIP_NO_HALF_CONVERSIONS__) 153 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
155 __half& operator=(T x)
157 data =
static_cast<_Float16
>(x);
163 #if !defined(__HIP_NO_HALF_CONVERSIONS__) 165 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
167 __half& operator=(T x)
169 data =
static_cast<_Float16
>(x);
174 #if !defined(__HIP_NO_HALF_OPERATORS__) 176 __half& operator+=(
const __half& x)
182 __half& operator-=(
const __half& x)
188 __half& operator*=(
const __half& x)
194 __half& operator/=(
const __half& x)
200 __half& operator++() { ++data;
return *
this; }
202 __half operator++(
int)
209 __half& operator--() { --data;
return *
this; }
211 __half operator--(
int)
220 #if !defined(__HIP_NO_HALF_CONVERSIONS__) 224 std::is_floating_point<T>{} &&
225 !std::is_same<T, double>{}>* =
nullptr>
226 operator T()
const {
return data; }
236 #if !defined(__HIP_NO_HALF_CONVERSIONS__) 238 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
240 operator T()
const {
return data; }
243 #if !defined(__HIP_NO_HALF_OPERATORS__) 245 __half operator+()
const {
return *
this; }
247 __half operator-()
const 250 tmp.data = -tmp.data;
256 #if !defined(__HIP_NO_HALF_OPERATORS__) 260 __half operator+(
const __half& x,
const __half& y)
262 return __half{x} += y;
267 __half operator-(
const __half& x,
const __half& y)
269 return __half{x} -= y;
274 __half operator*(
const __half& x,
const __half& y)
276 return __half{x} *= y;
281 __half operator/(
const __half& x,
const __half& y)
283 return __half{x} /= y;
288 bool operator==(
const __half& x,
const __half& y)
290 return x.data == y.data;
295 bool operator!=(
const __half& x,
const __half& y)
302 bool operator<(
const __half& x,
const __half& y)
304 return x.data < y.data;
309 bool operator>(
const __half& x,
const __half& y)
311 return y.data < x.data;
316 bool operator<=(
const __half& x,
const __half& y)
323 bool operator>=(
const __half& x,
const __half& y)
327 #endif // !defined(__HIP_NO_HALF_OPERATORS__) 336 sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
351 __half2(decltype(data) x) : data{x} {}
353 __half2(
const __half& x,
const __half& y)
357 static_cast<__half_raw>(y).data}
360 __half2(
const __half2&) =
default;
362 __half2(__half2&&) =
default;
364 ~__half2() =
default;
368 __half2& operator=(
const __half2&) =
default;
370 __half2& operator=(__half2&&) =
default;
379 #if !defined(__HIP_NO_HALF_OPERATORS__) 381 __half2& operator+=(
const __half2& x)
387 __half2& operator-=(
const __half2& x)
393 __half2& operator*=(
const __half2& x)
399 __half2& operator/=(
const __half2& x)
405 __half2& operator++() {
return *
this += _Float16_2{1, 1}; }
407 __half2 operator++(
int)
414 __half2& operator--() {
return *
this -= _Float16_2{1, 1}; }
416 __half2 operator--(
int)
426 operator decltype(data)()
const {
return data; }
431 #if !defined(__HIP_NO_HALF_OPERATORS__) 433 __half2 operator+()
const {
return *
this; }
435 __half2 operator-()
const 438 tmp.data = -tmp.data;
444 #if !defined(__HIP_NO_HALF_OPERATORS__) 448 __half2 operator+(
const __half2& x,
const __half2& y)
450 return __half2{x} += y;
455 __half2 operator-(
const __half2& x,
const __half2& y)
457 return __half2{x} -= y;
462 __half2 operator*(
const __half2& x,
const __half2& y)
464 return __half2{x} *= y;
469 __half2 operator/(
const __half2& x,
const __half2& y)
471 return __half2{x} /= y;
476 bool operator==(
const __half2& x,
const __half2& y)
478 auto r = x.data == y.data;
479 return r.x != 0 && r.y != 0;
484 bool operator!=(
const __half2& x,
const __half2& y)
491 bool operator<(
const __half2& x,
const __half2& y)
493 auto r = x.data < y.data;
494 return r.x != 0 && r.y != 0;
499 bool operator>(
const __half2& x,
const __half2& y)
506 bool operator<=(
const __half2& x,
const __half2& y)
513 bool operator>=(
const __half2& x,
const __half2& y)
517 #endif // !defined(__HIP_NO_HALF_OPERATORS__) 525 __half2 make_half2(__half x, __half y)
527 return __half2{x, y};
532 __half __low2half(__half2 x)
539 __half __high2half(__half2 x)
546 __half2 __half2half2(__half x)
548 return __half2{x, x};
553 __half2 __halves2half2(__half x, __half y)
555 return __half2{x, y};
560 __half2 __low2half2(__half2 x)
565 static_cast<__half2_raw>(x).data.x}};
570 __half2 __high2half2(__half2 x)
575 static_cast<__half2_raw>(x).data.y}};
580 __half2 __lows2half2(__half2 x, __half2 y)
585 static_cast<__half2_raw>(y).data.x}};
590 __half2 __highs2half2(__half2 x, __half2 y)
595 static_cast<__half2_raw>(y).data.y}};
600 __half2 __lowhigh2highlow(__half2 x)
605 static_cast<__half2_raw>(x).data.x}};
611 short __half_as_short(__half x)
618 unsigned short __half_as_ushort(__half x)
625 __half __short_as_half(
short x)
633 __half __ushort_as_half(
unsigned short x)
643 __half __float2half(
float x)
649 __half __float2half_rn(
float x)
655 __half __float2half_rz(
float x)
661 __half __float2half_rd(
float x)
667 __half __float2half_ru(
float x)
673 __half2 __float2half2_rn(
float x)
677 static_cast<_Float16
>(x), static_cast<_Float16>(x)}};
681 __half2 __floats2half2_rn(
float x,
float y)
684 static_cast<_Float16
>(x), static_cast<_Float16>(y)}};
688 __half2 __float22half2_rn(float2 x)
690 return __floats2half2_rn(x.x, x.y);
696 float __half2float(__half x)
702 float __low2float(__half2 x)
708 float __high2float(__half2 x)
714 float2 __half22float2(__half2 x)
717 static_cast<__half2_raw>(x).data.x,
718 static_cast<__half2_raw>(x).data.y);
724 int __half2int_rn(__half x)
730 int __half2int_rz(__half x)
736 int __half2int_rd(__half x)
742 int __half2int_ru(__half x)
750 __half __int2half_rn(
int x)
756 __half __int2half_rz(
int x)
762 __half __int2half_rd(
int x)
768 __half __int2half_ru(
int x)
776 short __half2short_rn(__half x)
782 short __half2short_rz(__half x)
788 short __half2short_rd(__half x)
794 short __half2short_ru(__half x)
802 __half __short2half_rn(
short x)
808 __half __short2half_rz(
short x)
814 __half __short2half_rd(
short x)
820 __half __short2half_ru(
short x)
828 long long __half2ll_rn(__half x)
834 long long __half2ll_rz(__half x)
840 long long __half2ll_rd(__half x)
846 long long __half2ll_ru(__half x)
854 __half __ll2half_rn(
long long x)
860 __half __ll2half_rz(
long long x)
866 __half __ll2half_rd(
long long x)
872 __half __ll2half_ru(
long long x)
880 unsigned int __half2uint_rn(__half x)
886 unsigned int __half2uint_rz(__half x)
892 unsigned int __half2uint_rd(__half x)
898 unsigned int __half2uint_ru(__half x)
906 __half __uint2half_rn(
unsigned int x)
912 __half __uint2half_rz(
unsigned int x)
918 __half __uint2half_rd(
unsigned int x)
924 __half __uint2half_ru(
unsigned int x)
932 unsigned short __half2ushort_rn(__half x)
938 unsigned short __half2ushort_rz(__half x)
944 unsigned short __half2ushort_rd(__half x)
950 unsigned short __half2ushort_ru(__half x)
958 __half __ushort2half_rn(
unsigned short x)
964 __half __ushort2half_rz(
unsigned short x)
970 __half __ushort2half_rd(
unsigned short x)
976 __half __ushort2half_ru(
unsigned short x)
984 unsigned long long __half2ull_rn(__half x)
990 unsigned long long __half2ull_rz(__half x)
996 unsigned long long __half2ull_rd(__half x)
1002 unsigned long long __half2ull_ru(__half x)
1010 __half __ull2half_rn(
unsigned long long x)
1016 __half __ull2half_rz(
unsigned long long x)
1022 __half __ull2half_rd(
unsigned long long x)
1028 __half __ull2half_ru(
unsigned long long x)
1036 __half __ldg(
const __half* ptr) {
return *ptr; }
1039 __half __ldcg(
const __half* ptr) {
return *ptr; }
1042 __half __ldca(
const __half* ptr) {
return *ptr; }
1045 __half __ldcs(
const __half* ptr) {
return *ptr; }
1049 __half2 __ldg(
const __half2* ptr) {
return *ptr; }
1052 __half2 __ldcg(
const __half2* ptr) {
return *ptr; }
1055 __half2 __ldca(
const __half2* ptr) {
return *ptr; }
1058 __half2 __ldcs(
const __half2* ptr) {
return *ptr; }
1063 bool __heq(__half x, __half y)
1066 static_cast<__half_raw>(y).data;
1070 bool __hne(__half x, __half y)
1073 static_cast<__half_raw>(y).data;
1077 bool __hle(__half x, __half y)
1080 static_cast<__half_raw>(y).data;
1084 bool __hge(__half x, __half y)
1087 static_cast<__half_raw>(y).data;
1091 bool __hlt(__half x, __half y)
1094 static_cast<__half_raw>(y).data;
1098 bool __hgt(__half x, __half y)
1101 static_cast<__half_raw>(y).data;
1105 bool __hequ(__half x, __half y) {
return __heq(x, y); }
1108 bool __hneu(__half x, __half y) {
return __hne(x, y); }
1111 bool __hleu(__half x, __half y) {
return __hle(x, y); }
1114 bool __hgeu(__half x, __half y) {
return __hge(x, y); }
1117 bool __hltu(__half x, __half y) {
return __hlt(x, y); }
1120 bool __hgtu(__half x, __half y) {
return __hgt(x, y); }
1124 __half2 __heq2(__half2 x, __half2 y)
1127 static_cast<__half2_raw>(y).data;
1129 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1133 __half2 __hne2(__half2 x, __half2 y)
1136 static_cast<__half2_raw>(y).data;
1138 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1142 __half2 __hle2(__half2 x, __half2 y)
1145 static_cast<__half2_raw>(y).data;
1147 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1151 __half2 __hge2(__half2 x, __half2 y)
1154 static_cast<__half2_raw>(y).data;
1156 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1160 __half2 __hlt2(__half2 x, __half2 y)
1163 static_cast<__half2_raw>(y).data;
1165 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1169 __half2 __hgt2(__half2 x, __half2 y)
1172 static_cast<__half2_raw>(y).data;
1174 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1178 __half2 __hequ2(__half2 x, __half2 y) {
return __heq2(x, y); }
1181 __half2 __hneu2(__half2 x, __half2 y) {
return __hne2(x, y); }
1184 __half2 __hleu2(__half2 x, __half2 y) {
return __hle2(x, y); }
1187 __half2 __hgeu2(__half2 x, __half2 y) {
return __hge2(x, y); }
1190 __half2 __hltu2(__half2 x, __half2 y) {
return __hlt2(x, y); }
1193 __half2 __hgtu2(__half2 x, __half2 y) {
return __hgt2(x, y); }
1197 bool __hbeq2(__half2 x, __half2 y)
1200 return r.data.x != 0 && r.data.y != 0;
1204 bool __hbne2(__half2 x, __half2 y)
1207 return r.data.x != 0 && r.data.y != 0;
1211 bool __hble2(__half2 x, __half2 y)
1214 return r.data.x != 0 && r.data.y != 0;
1218 bool __hbge2(__half2 x, __half2 y)
1221 return r.data.x != 0 && r.data.y != 0;
1225 bool __hblt2(__half2 x, __half2 y)
1228 return r.data.x != 0 && r.data.y != 0;
1232 bool __hbgt2(__half2 x, __half2 y)
1235 return r.data.x != 0 && r.data.y != 0;
1239 bool __hbequ2(__half2 x, __half2 y) {
return __hbeq2(x, y); }
1242 bool __hbneu2(__half2 x, __half2 y) {
return __hbne2(x, y); }
1245 bool __hbleu2(__half2 x, __half2 y) {
return __hble2(x, y); }
1248 bool __hbgeu2(__half2 x, __half2 y) {
return __hbge2(x, y); }
1251 bool __hbltu2(__half2 x, __half2 y) {
return __hblt2(x, y); }
1254 bool __hbgtu2(__half2 x, __half2 y) {
return __hbgt2(x, y); }
1259 __half __clamp_01(__half x)
1270 __half __hadd(__half x, __half y)
1274 static_cast<__half_raw>(y).data};
1278 __half __hsub(__half x, __half y)
1282 static_cast<__half_raw>(y).data};
1286 __half __hmul(__half x, __half y)
1290 static_cast<__half_raw>(y).data};
1294 __half __hadd_sat(__half x, __half y)
1296 return __clamp_01(__hadd(x, y));
1300 __half __hsub_sat(__half x, __half y)
1302 return __clamp_01(__hsub(x, y));
1306 __half __hmul_sat(__half x, __half y)
1308 return __clamp_01(__hmul(x, y));
1312 __half __hfma(__half x, __half y, __half z)
1315 static_cast<__half_raw>(x).data,
1316 static_cast<__half_raw>(y).data,
1317 static_cast<__half_raw>(z).data)};
1321 __half __hfma_sat(__half x, __half y, __half z)
1323 return __clamp_01(__hfma(x, y, z));
1327 __half __hdiv(__half x, __half y)
1331 static_cast<__half_raw>(y).data};
1336 __half2 __hadd2(__half2 x, __half2 y)
1340 static_cast<__half2_raw>(y).data};
1344 __half2 __hsub2(__half2 x, __half2 y)
1348 static_cast<__half2_raw>(y).data};
1352 __half2 __hmul2(__half2 x, __half2 y)
1356 static_cast<__half2_raw>(y).data};
1360 __half2 __hadd2_sat(__half2 x, __half2 y)
1369 __half2 __hsub2_sat(__half2 x, __half2 y)
1378 __half2 __hmul2_sat(__half2 x, __half2 y)
1387 __half2 __hfma2(__half2 x, __half2 y, __half2 z)
1393 __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
1395 auto r =
static_cast<__half2_raw>(__hfma2(x, y, z));
1402 __half2 __h2div(__half2 x, __half2 y)
1406 static_cast<__half2_raw>(y).data};
1410 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ 1413 float amd_mixed_dot(__half2 a, __half2 b,
float c,
bool saturate) {
1414 return __ockl_fdot2(static_cast<__half2_raw>(a).data,
1415 static_cast<__half2_raw>(b).data,
1421 __half htrunc(__half x)
1424 __ocml_trunc_f16(static_cast<__half_raw>(x).data)};
1428 __half hceil(__half x)
1431 __ocml_ceil_f16(static_cast<__half_raw>(x).data)};
1435 __half hfloor(__half x)
1438 __ocml_floor_f16(static_cast<__half_raw>(x).data)};
1442 __half hrint(__half x)
1445 __ocml_rint_f16(static_cast<__half_raw>(x).data)};
1449 __half hsin(__half x)
1452 __ocml_sin_f16(static_cast<__half_raw>(x).data)};
1456 __half hcos(__half x)
1459 __ocml_cos_f16(static_cast<__half_raw>(x).data)};
1463 __half hexp(__half x)
1466 __ocml_exp_f16(static_cast<__half_raw>(x).data)};
1470 __half hexp2(__half x)
1473 __ocml_exp2_f16(static_cast<__half_raw>(x).data)};
1477 __half hexp10(__half x)
1480 __ocml_exp10_f16(static_cast<__half_raw>(x).data)};
1484 __half hlog2(__half x)
1487 __ocml_log2_f16(static_cast<__half_raw>(x).data)};
1491 __half hlog(__half x)
1494 __ocml_log_f16(static_cast<__half_raw>(x).data)};
1498 __half hlog10(__half x)
1501 __ocml_log10_f16(static_cast<__half_raw>(x).data)};
1505 __half hrcp(__half x)
1508 __llvm_amdgcn_rcp_f16(static_cast<__half_raw>(x).data)};
1512 __half hrsqrt(__half x)
1515 __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)};
1519 __half hsqrt(__half x)
1522 __ocml_sqrt_f16(static_cast<__half_raw>(x).data)};
1526 bool __hisinf(__half x)
1528 return __ocml_isinf_f16(static_cast<__half_raw>(x).data);
1532 bool __hisnan(__half x)
1534 return __ocml_isnan_f16(static_cast<__half_raw>(x).data);
1538 __half __hneg(__half x)
1545 __half2 h2trunc(__half2 x)
1551 __half2 h2ceil(__half2 x)
1557 __half2 h2floor(__half2 x)
1563 __half2 h2rint(__half2 x)
1569 __half2 h2sin(__half2 x)
1575 __half2 h2cos(__half2 x)
1581 __half2 h2exp(__half2 x)
1587 __half2 h2exp2(__half2 x)
1593 __half2 h2exp10(__half2 x)
1599 __half2 h2log2(__half2 x)
1605 __half2 h2log(__half2 x) {
return __ocml_log_2f16(x); }
1608 __half2 h2log10(__half2 x) {
return __ocml_log10_2f16(x); }
1611 __half2 h2rcp(__half2 x) {
return __llvm_amdgcn_rcp_2f16(x); }
1614 __half2 h2rsqrt(__half2 x) {
return __ocml_rsqrt_2f16(x); }
1617 __half2 h2sqrt(__half2 x) {
return __ocml_sqrt_2f16(x); }
1620 __half2 __hisinf2(__half2 x)
1622 auto r = __ocml_isinf_2f16(x);
1624 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1628 __half2 __hisnan2(__half2 x)
1630 auto r = __ocml_isnan_2f16(x);
1632 static_cast<_Float16
>(r.x), static_cast<_Float16>(r.y)}};
1636 __half2 __hneg2(__half2 x)
1642 #if !defined(HIP_NO_HALF) 1643 using half = __half;
1644 using half2 = __half2;
1646 #endif // defined(__cplusplus) 1647 #elif defined(__GNUC__) 1648 #include "hip_fp16_gcc.h" 1649 #endif // !defined(__clang__) && defined(__GNUC__) 1651 #endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
Definition: hip_fp16_gcc.h:11
#define __host__
Definition: host_defines.h:41
Definition: hip_fp16_gcc.h:7