HIP: Heterogenous-computing Interface for Portability
hip_atomic.h
1 #pragma once
2 
3 #include "device_functions.h"
4 
5 __device__
6 inline
7 int atomicCAS(int* address, int compare, int val)
8 {
9  __atomic_compare_exchange_n(
10  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
11 
12  return compare;
13 }
14 __device__
15 inline
16 unsigned int atomicCAS(
17  unsigned int* address, unsigned int compare, unsigned int val)
18 {
19  __atomic_compare_exchange_n(
20  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
21 
22  return compare;
23 }
24 __device__
25 inline
26 unsigned long long atomicCAS(
27  unsigned long long* address,
28  unsigned long long compare,
29  unsigned long long val)
30 {
31  __atomic_compare_exchange_n(
32  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
33 
34  return compare;
35 }
36 
37 __device__
38 inline
39 int atomicAdd(int* address, int val)
40 {
41  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
42 }
43 __device__
44 inline
45 unsigned int atomicAdd(unsigned int* address, unsigned int val)
46 {
47  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
48 }
49 __device__
50 inline
51 unsigned long long atomicAdd(
52  unsigned long long* address, unsigned long long val)
53 {
54  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
55 }
56 __device__
57 inline
58 float atomicAdd(float* address, float val)
59 {
60  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
61 }
62 
63 DEPRECATED("use atomicAdd instead")
64 __device__
65 inline
66 void atomicAddNoRet(float* address, float val)
67 {
68  __ockl_atomic_add_noret_f32(address, val);
69 }
70 
71 __device__
72 inline
73 double atomicAdd(double* address, double val)
74 {
75  unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
76  unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
77 
78  unsigned long long old;
79  do {
80  old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
81 
82  if (r != old) { r = old; continue; }
83 
84  r = atomicCAS(
85  uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
86 
87  if (r == old) break;
88  } while (true);
89 
90  return __longlong_as_double(r);
91 }
92 
93 __device__
94 inline
95 int atomicSub(int* address, int val)
96 {
97  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
98 }
99 __device__
100 inline
101 unsigned int atomicSub(unsigned int* address, unsigned int val)
102 {
103  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
104 }
105 
106 __device__
107 inline
108 int atomicExch(int* address, int val)
109 {
110  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
111 }
112 __device__
113 inline
114 unsigned int atomicExch(unsigned int* address, unsigned int val)
115 {
116  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
117 }
118 __device__
119 inline
120 unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
121 {
122  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
123 }
124 __device__
125 inline
126 float atomicExch(float* address, float val)
127 {
128  return __uint_as_float(__atomic_exchange_n(
129  reinterpret_cast<unsigned int*>(address),
130  __float_as_uint(val),
131  __ATOMIC_RELAXED));
132 }
133 
134 __device__
135 inline
136 int atomicMin(int* address, int val)
137 {
138  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
139 }
140 __device__
141 inline
142 unsigned int atomicMin(unsigned int* address, unsigned int val)
143 {
144  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
145 }
146 __device__
147 inline
148 unsigned long long atomicMin(
149  unsigned long long* address, unsigned long long val)
150 {
151  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
152  while (val < tmp) {
153  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
154 
155  if (tmp1 != tmp) { tmp = tmp1; continue; }
156 
157  tmp = atomicCAS(address, tmp, val);
158  }
159 
160  return tmp;
161 }
162 
163 __device__
164 inline
165 int atomicMax(int* address, int val)
166 {
167  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
168 }
169 __device__
170 inline
171 unsigned int atomicMax(unsigned int* address, unsigned int val)
172 {
173  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
174 }
175 __device__
176 inline
177 unsigned long long atomicMax(
178  unsigned long long* address, unsigned long long val)
179 {
180  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
181  while (tmp < val) {
182  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
183 
184  if (tmp1 != tmp) { tmp = tmp1; continue; }
185 
186  tmp = atomicCAS(address, tmp, val);
187  }
188 
189  return tmp;
190 }
191 
192 __device__
193 inline
194 unsigned int atomicInc(unsigned int* address, unsigned int val)
195 {
196  __device__
197  extern
198  unsigned int __builtin_amdgcn_atomic_inc(
199  unsigned int*,
200  unsigned int,
201  unsigned int,
202  unsigned int,
203  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
204 
205  return __builtin_amdgcn_atomic_inc(
206  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
207 }
208 
209 __device__
210 inline
211 unsigned int atomicDec(unsigned int* address, unsigned int val)
212 {
213  __device__
214  extern
215  unsigned int __builtin_amdgcn_atomic_dec(
216  unsigned int*,
217  unsigned int,
218  unsigned int,
219  unsigned int,
220  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
221 
222  return __builtin_amdgcn_atomic_dec(
223  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
224 }
225 
226 __device__
227 inline
228 int atomicAnd(int* address, int val)
229 {
230  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
231 }
232 __device__
233 inline
234 unsigned int atomicAnd(unsigned int* address, unsigned int val)
235 {
236  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
237 }
238 __device__
239 inline
240 unsigned long long atomicAnd(
241  unsigned long long* address, unsigned long long val)
242 {
243  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
244 }
245 
246 __device__
247 inline
248 int atomicOr(int* address, int val)
249 {
250  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
251 }
252 __device__
253 inline
254 unsigned int atomicOr(unsigned int* address, unsigned int val)
255 {
256  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
257 }
258 __device__
259 inline
260 unsigned long long atomicOr(
261  unsigned long long* address, unsigned long long val)
262 {
263  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
264 }
265 
266 __device__
267 inline
268 int atomicXor(int* address, int val)
269 {
270  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
271 }
272 __device__
273 inline
274 unsigned int atomicXor(unsigned int* address, unsigned int val)
275 {
276  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
277 }
278 __device__
279 inline
280 unsigned long long atomicXor(
281  unsigned long long* address, unsigned long long val)
282 {
283  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
284 }
285 
286 // TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.