3 #include "device_functions.h" 7 int atomicCAS(
int* address,
int compare,
int val)
9 __atomic_compare_exchange_n(
10 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
16 unsigned int atomicCAS(
17 unsigned int* address,
unsigned int compare,
unsigned int val)
19 __atomic_compare_exchange_n(
20 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
26 unsigned long long atomicCAS(
27 unsigned long long* address,
28 unsigned long long compare,
29 unsigned long long val)
31 __atomic_compare_exchange_n(
32 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
39 int atomicAdd(
int* address,
int val)
41 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
45 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
47 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
51 unsigned long long atomicAdd(
52 unsigned long long* address,
unsigned long long val)
54 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
58 float atomicAdd(
float* address,
float val)
60 unsigned int* uaddr{
reinterpret_cast<unsigned int*
>(address)};
61 unsigned int r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
65 old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
67 if (r != old) { r = old;
continue; }
69 r = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
74 return __uint_as_float(r);
78 double atomicAdd(
double* address,
double val)
80 unsigned long long* uaddr{
reinterpret_cast<unsigned long long*
>(address)};
81 unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
83 unsigned long long old;
85 old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
87 if (r != old) { r = old;
continue; }
90 uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
95 return __longlong_as_double(r);
100 int atomicSub(
int* address,
int val)
102 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
106 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
108 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
113 int atomicExch(
int* address,
int val)
115 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
119 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
121 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
125 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
127 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
131 float atomicExch(
float* address,
float val)
133 return __uint_as_float(__atomic_exchange_n(
134 reinterpret_cast<unsigned int*>(address),
135 __float_as_uint(val),
141 int atomicMin(
int* address,
int val)
143 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
147 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
149 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
153 unsigned long long atomicMin(
154 unsigned long long* address,
unsigned long long val)
156 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
158 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
160 if (tmp1 != tmp) { tmp = tmp1;
continue; }
162 tmp = atomicCAS(address, tmp, val);
170 int atomicMax(
int* address,
int val)
172 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
176 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
178 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
182 unsigned long long atomicMax(
183 unsigned long long* address,
unsigned long long val)
185 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
187 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
189 if (tmp1 != tmp) { tmp = tmp1;
continue; }
191 tmp = atomicCAS(address, tmp, val);
199 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
203 unsigned int __builtin_amdgcn_atomic_inc(
208 bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
210 return __builtin_amdgcn_atomic_inc(
211 address, val, __ATOMIC_RELAXED, 1 , false);
216 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
220 unsigned int __builtin_amdgcn_atomic_dec(
225 bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
227 return __builtin_amdgcn_atomic_dec(
228 address, val, __ATOMIC_RELAXED, 1 , false);
233 int atomicAnd(
int* address,
int val)
235 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
239 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
241 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
245 unsigned long long atomicAnd(
246 unsigned long long* address,
unsigned long long val)
248 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
253 int atomicOr(
int* address,
int val)
255 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
259 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
261 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
265 unsigned long long atomicOr(
266 unsigned long long* address,
unsigned long long val)
268 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
273 int atomicXor(
int* address,
int val)
275 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
279 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
281 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
285 unsigned long long atomicXor(
286 unsigned long long* address,
unsigned long long val)
288 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);