HIP: Heterogenous-computing Interface for Portability
hip_atomic.h
1 #pragma once
2 
3 #include "device_functions.h"
4 
5 __device__
6 inline
7 int atomicCAS(int* address, int compare, int val)
8 {
9  __atomic_compare_exchange_n(
10  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
11 
12  return compare;
13 }
14 __device__
15 inline
16 unsigned int atomicCAS(
17  unsigned int* address, unsigned int compare, unsigned int val)
18 {
19  __atomic_compare_exchange_n(
20  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
21 
22  return compare;
23 }
24 __device__
25 inline
26 unsigned long long atomicCAS(
27  unsigned long long* address,
28  unsigned long long compare,
29  unsigned long long val)
30 {
31  __atomic_compare_exchange_n(
32  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
33 
34  return compare;
35 }
36 
37 __device__
38 inline
39 int atomicAdd(int* address, int val)
40 {
41  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
42 }
43 __device__
44 inline
45 unsigned int atomicAdd(unsigned int* address, unsigned int val)
46 {
47  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
48 }
49 __device__
50 inline
51 unsigned long long atomicAdd(
52  unsigned long long* address, unsigned long long val)
53 {
54  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
55 }
56 __device__
57 inline
58 float atomicAdd(float* address, float val)
59 {
60  unsigned int* uaddr{reinterpret_cast<unsigned int*>(address)};
61  unsigned int r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
62 
63  unsigned int old;
64  do {
65  old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
66 
67  if (r != old) { r = old; continue; }
68 
69  r = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
70 
71  if (r == old) break;
72  } while (true);
73 
74  return __uint_as_float(r);
75 }
76 __device__
77 inline
78 double atomicAdd(double* address, double val)
79 {
80  unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
81  unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
82 
83  unsigned long long old;
84  do {
85  old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
86 
87  if (r != old) { r = old; continue; }
88 
89  r = atomicCAS(
90  uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
91 
92  if (r == old) break;
93  } while (true);
94 
95  return __longlong_as_double(r);
96 }
97 
98 __device__
99 inline
100 int atomicSub(int* address, int val)
101 {
102  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
103 }
104 __device__
105 inline
106 unsigned int atomicSub(unsigned int* address, unsigned int val)
107 {
108  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
109 }
110 
111 __device__
112 inline
113 int atomicExch(int* address, int val)
114 {
115  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
116 }
117 __device__
118 inline
119 unsigned int atomicExch(unsigned int* address, unsigned int val)
120 {
121  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
122 }
123 __device__
124 inline
125 unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
126 {
127  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
128 }
129 __device__
130 inline
131 float atomicExch(float* address, float val)
132 {
133  return __uint_as_float(__atomic_exchange_n(
134  reinterpret_cast<unsigned int*>(address),
135  __float_as_uint(val),
136  __ATOMIC_RELAXED));
137 }
138 
139 __device__
140 inline
141 int atomicMin(int* address, int val)
142 {
143  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
144 }
145 __device__
146 inline
147 unsigned int atomicMin(unsigned int* address, unsigned int val)
148 {
149  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
150 }
151 __device__
152 inline
153 unsigned long long atomicMin(
154  unsigned long long* address, unsigned long long val)
155 {
156  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
157  while (val < tmp) {
158  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
159 
160  if (tmp1 != tmp) { tmp = tmp1; continue; }
161 
162  tmp = atomicCAS(address, tmp, val);
163  }
164 
165  return tmp;
166 }
167 
168 __device__
169 inline
170 int atomicMax(int* address, int val)
171 {
172  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
173 }
174 __device__
175 inline
176 unsigned int atomicMax(unsigned int* address, unsigned int val)
177 {
178  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
179 }
180 __device__
181 inline
182 unsigned long long atomicMax(
183  unsigned long long* address, unsigned long long val)
184 {
185  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
186  while (tmp < val) {
187  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
188 
189  if (tmp1 != tmp) { tmp = tmp1; continue; }
190 
191  tmp = atomicCAS(address, tmp, val);
192  }
193 
194  return tmp;
195 }
196 
197 __device__
198 inline
199 unsigned int atomicInc(unsigned int* address, unsigned int val)
200 {
201  __device__
202  extern
203  unsigned int __builtin_amdgcn_atomic_inc(
204  unsigned int*,
205  unsigned int,
206  unsigned int,
207  unsigned int,
208  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
209 
210  return __builtin_amdgcn_atomic_inc(
211  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
212 }
213 
214 __device__
215 inline
216 unsigned int atomicDec(unsigned int* address, unsigned int val)
217 {
218  __device__
219  extern
220  unsigned int __builtin_amdgcn_atomic_dec(
221  unsigned int*,
222  unsigned int,
223  unsigned int,
224  unsigned int,
225  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
226 
227  return __builtin_amdgcn_atomic_dec(
228  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
229 }
230 
231 __device__
232 inline
233 int atomicAnd(int* address, int val)
234 {
235  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
236 }
237 __device__
238 inline
239 unsigned int atomicAnd(unsigned int* address, unsigned int val)
240 {
241  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
242 }
243 __device__
244 inline
245 unsigned long long atomicAnd(
246  unsigned long long* address, unsigned long long val)
247 {
248  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
249 }
250 
251 __device__
252 inline
253 int atomicOr(int* address, int val)
254 {
255  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
256 }
257 __device__
258 inline
259 unsigned int atomicOr(unsigned int* address, unsigned int val)
260 {
261  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
262 }
263 __device__
264 inline
265 unsigned long long atomicOr(
266  unsigned long long* address, unsigned long long val)
267 {
268  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
269 }
270 
271 __device__
272 inline
273 int atomicXor(int* address, int val)
274 {
275  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
276 }
277 __device__
278 inline
279 unsigned int atomicXor(unsigned int* address, unsigned int val)
280 {
281  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
282 }
283 __device__
284 inline
285 unsigned long long atomicXor(
286  unsigned long long* address, unsigned long long val)
287 {
288  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
289 }
290 
291 // TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.