3 #include "device_functions.h"
7 int atomicCAS(
int* address,
int compare,
int val)
9 __atomic_compare_exchange_n(
10 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
16 unsigned int atomicCAS(
17 unsigned int* address,
unsigned int compare,
unsigned int val)
19 __atomic_compare_exchange_n(
20 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
26 unsigned long long atomicCAS(
27 unsigned long long* address,
28 unsigned long long compare,
29 unsigned long long val)
31 __atomic_compare_exchange_n(
32 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
39 int atomicAdd(
int* address,
int val)
41 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
45 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
47 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
51 unsigned long long atomicAdd(
52 unsigned long long* address,
unsigned long long val)
54 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
58 float atomicAdd(
float* address,
float val)
60 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
63 DEPRECATED(
"use atomicAdd instead")
66 void atomicAddNoRet(
float* address,
float val)
68 __ockl_atomic_add_noret_f32(address, val);
73 double atomicAdd(
double* address,
double val)
75 unsigned long long* uaddr{
reinterpret_cast<unsigned long long*
>(address)};
76 unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
78 unsigned long long old;
80 old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
82 if (r != old) { r = old;
continue; }
85 uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
90 return __longlong_as_double(r);
95 int atomicSub(
int* address,
int val)
97 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
101 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
103 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
108 int atomicExch(
int* address,
int val)
110 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
114 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
116 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
120 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
122 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
126 float atomicExch(
float* address,
float val)
128 return __uint_as_float(__atomic_exchange_n(
129 reinterpret_cast<unsigned int*
>(address),
130 __float_as_uint(val),
136 int atomicMin(
int* address,
int val)
138 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
142 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
144 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
148 unsigned long long atomicMin(
149 unsigned long long* address,
unsigned long long val)
151 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
153 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
155 if (tmp1 != tmp) { tmp = tmp1;
continue; }
157 tmp = atomicCAS(address, tmp, val);
165 int atomicMax(
int* address,
int val)
167 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
171 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
173 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
177 unsigned long long atomicMax(
178 unsigned long long* address,
unsigned long long val)
180 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
182 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
184 if (tmp1 != tmp) { tmp = tmp1;
continue; }
186 tmp = atomicCAS(address, tmp, val);
194 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
198 unsigned int __builtin_amdgcn_atomic_inc(
203 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
205 return __builtin_amdgcn_atomic_inc(
206 address, val, __ATOMIC_RELAXED, 1 ,
false);
211 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
215 unsigned int __builtin_amdgcn_atomic_dec(
220 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
222 return __builtin_amdgcn_atomic_dec(
223 address, val, __ATOMIC_RELAXED, 1 ,
false);
228 int atomicAnd(
int* address,
int val)
230 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
234 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
236 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
240 unsigned long long atomicAnd(
241 unsigned long long* address,
unsigned long long val)
243 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
248 int atomicOr(
int* address,
int val)
250 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
254 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
256 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
260 unsigned long long atomicOr(
261 unsigned long long* address,
unsigned long long val)
263 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
268 int atomicXor(
int* address,
int val)
270 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
274 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
276 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
280 unsigned long long atomicXor(
281 unsigned long long* address,
unsigned long long val)
283 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);