3 #include "device_functions.h"
7 int atomicCAS(
int* address,
int compare,
int val)
9 __atomic_compare_exchange_n(
10 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
16 unsigned int atomicCAS(
17 unsigned int* address,
unsigned int compare,
unsigned int val)
19 __atomic_compare_exchange_n(
20 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
26 unsigned long long atomicCAS(
27 unsigned long long* address,
28 unsigned long long compare,
29 unsigned long long val)
31 __atomic_compare_exchange_n(
32 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
39 int atomicAdd(
int* address,
int val)
41 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
45 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
47 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
51 unsigned long long atomicAdd(
52 unsigned long long* address,
unsigned long long val)
54 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
58 float atomicAdd(
float* address,
float val)
60 unsigned int* uaddr{
reinterpret_cast<unsigned int*
>(address)};
61 unsigned int r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
65 old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
67 if (r != old) { r = old;
continue; }
69 r = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
74 return __uint_as_float(r);
79 void atomicAddNoRet(
float* address,
float val)
81 __ockl_atomic_add_noret_f32(address, val);
86 double atomicAdd(
double* address,
double val)
88 unsigned long long* uaddr{
reinterpret_cast<unsigned long long*
>(address)};
89 unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
91 unsigned long long old;
93 old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
95 if (r != old) { r = old;
continue; }
98 uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
103 return __longlong_as_double(r);
108 int atomicSub(
int* address,
int val)
110 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
114 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
116 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
121 int atomicExch(
int* address,
int val)
123 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
127 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
129 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
133 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
135 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
139 float atomicExch(
float* address,
float val)
141 return __uint_as_float(__atomic_exchange_n(
142 reinterpret_cast<unsigned int*
>(address),
143 __float_as_uint(val),
149 int atomicMin(
int* address,
int val)
151 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
155 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
157 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
161 unsigned long long atomicMin(
162 unsigned long long* address,
unsigned long long val)
164 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
166 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
168 if (tmp1 != tmp) { tmp = tmp1;
continue; }
170 tmp = atomicCAS(address, tmp, val);
178 int atomicMax(
int* address,
int val)
180 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
184 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
186 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
190 unsigned long long atomicMax(
191 unsigned long long* address,
unsigned long long val)
193 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
195 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
197 if (tmp1 != tmp) { tmp = tmp1;
continue; }
199 tmp = atomicCAS(address, tmp, val);
207 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
211 unsigned int __builtin_amdgcn_atomic_inc(
216 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
218 return __builtin_amdgcn_atomic_inc(
219 address, val, __ATOMIC_RELAXED, 1 ,
false);
224 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
228 unsigned int __builtin_amdgcn_atomic_dec(
233 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
235 return __builtin_amdgcn_atomic_dec(
236 address, val, __ATOMIC_RELAXED, 1 ,
false);
241 int atomicAnd(
int* address,
int val)
243 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
247 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
249 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
253 unsigned long long atomicAnd(
254 unsigned long long* address,
unsigned long long val)
256 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
261 int atomicOr(
int* address,
int val)
263 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
267 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
269 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
273 unsigned long long atomicOr(
274 unsigned long long* address,
unsigned long long val)
276 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
281 int atomicXor(
int* address,
int val)
283 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
287 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
289 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
293 unsigned long long atomicXor(
294 unsigned long long* address,
unsigned long long val)
296 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);