HIP: Heterogenous-computing Interface for Portability
hip_runtime.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 //#pragma once
29 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
30 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
31 
32 #include <hip/hcc_detail/hip_common.h>
33 
34 //---
35 // Top part of file can be compiled with any compiler
36 
37 //#include <cstring>
38 #if __cplusplus
39 #include <cmath>
40 #include <cstdint>
41 #else
42 #include <math.h>
43 #include <string.h>
44 #include <stddef.h>
45 #endif //__cplusplus
46 
47 // __hip_malloc is not working. Disable it by default.
48 #ifndef __HIP_ENABLE_DEVICE_MALLOC__
49 #define __HIP_ENABLE_DEVICE_MALLOC__ 0
50 #endif
51 
52 #if __HCC_OR_HIP_CLANG__
53 
54 #if __HIP__
55 #if !defined(__align__)
56 #define __align__(x) __attribute__((aligned(x)))
57 #endif
58 #endif
59 
60 #define CUDA_SUCCESS hipSuccess
61 
62 #include <hip/hip_runtime_api.h>
63 #endif // __HCC_OR_HIP_CLANG__
64 
65 #if __HCC__
66 // define HIP_ENABLE_PRINTF to enable printf
67 #ifdef HIP_ENABLE_PRINTF
68 #define HCC_ENABLE_ACCELERATOR_PRINTF 1
69 #endif
70 
71 //---
72 // Remainder of this file only compiles with HCC
73 #if defined __HCC__
74 #include "grid_launch.h"
75 #include "hc_printf.hpp"
76 // TODO-HCC-GL - change this to typedef.
77 // typedef grid_launch_parm hipLaunchParm ;
78 
79 #if GENERIC_GRID_LAUNCH == 0
80 #define hipLaunchParm grid_launch_parm
81 #else
82 namespace hip_impl {
83 struct Empty_launch_parm {};
84 } // namespace hip_impl
85 #define hipLaunchParm hip_impl::Empty_launch_parm
86 #endif // GENERIC_GRID_LAUNCH
87 
88 #if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1
89 #else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
90 #error(HCC must support GRID_LAUNCH_20)
91 #endif // GRID_LAUNCH_VERSION
92 
93 #endif // HCC
94 
95 #if GENERIC_GRID_LAUNCH == 1 && defined __HCC__
96 #include "grid_launch_GGL.hpp"
97 #endif // GENERIC_GRID_LAUNCH
98 
99 #endif // HCC
100 
101 #if __HCC_OR_HIP_CLANG__
102 extern int HIP_TRACE_API;
103 
104 #ifdef __cplusplus
105 #include <hip/hcc_detail/hip_ldg.h>
106 #endif
107 #include <hip/hcc_detail/hip_atomic.h>
109 #include <hip/hcc_detail/device_functions.h>
110 #include <hip/hcc_detail/surface_functions.h>
111 #if __HCC__
112  #include <hip/hcc_detail/math_functions.h>
113  #include <hip/hcc_detail/texture_functions.h>
114 #else
115  #include <hip/hcc_detail/texture_fetch_functions.h>
116  #include <hip/hcc_detail/texture_indirect_functions.h>
117 #endif
118 // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
119 #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
120 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
121 #endif
122 
123 // TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call.
124 #if defined(__HCC__) && __HIP_DEVICE_COMPILE__ == 1
125 #undef assert
126 #define assert(COND) \
127  { \
128  if (!(COND)) { \
129  abort(); \
130  } \
131  }
132 #endif
133 
134 
135 // Feature tests:
136 #if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
137 // Device compile and not host compile:
138 
139 // 32-bit Atomics:
140 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
141 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
142 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
143 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
144 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
145 
146 // 64-bit Atomics:
147 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
148 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
149 
150 // Doubles
151 #define __HIP_ARCH_HAS_DOUBLES__ (1)
152 
153 // warp cross-lane operations:
154 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
155 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
156 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
157 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
158 
159 // sync
160 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
161 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
162 
163 // misc
164 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
165 #define __HIP_ARCH_HAS_3DGRID__ (1)
166 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
167 
168 #endif /* Device feature flags */
169 
170 
171 #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
172  __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
173 #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
174  __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
175  amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
176 #define select_impl_(_1, _2, impl_, ...) impl_
177 #define __launch_bounds__(...) \
178  select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
179 
180 // Detect if we are compiling C++ mode or C mode
181 #if defined(__cplusplus)
182 #define __HCC_CPP__
183 #elif defined(__STDC_VERSION__)
184 #define __HCC_C__
185 #endif
186 
187 __host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
188 
189 #if __HIP_ARCH_GFX701__ == 0
190 
191 __device__ unsigned __hip_ds_bpermute(int index, unsigned src);
192 __device__ float __hip_ds_bpermutef(int index, float src);
193 __device__ unsigned __hip_ds_permute(int index, unsigned src);
194 __device__ float __hip_ds_permutef(int index, float src);
195 
196 template <int pattern>
197 __device__ unsigned __hip_ds_swizzle_N(unsigned int src);
198 template <int pattern>
199 __device__ float __hip_ds_swizzlef_N(float src);
200 
201 template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
202 __device__ int __hip_move_dpp_N(int src);
203 
204 #endif //__HIP_ARCH_GFX803__ == 1
205 
206 #endif // __HCC_OR_HIP_CLANG__
207 
208 #if defined __HCC__
209 
210 namespace hip_impl {
211  struct GroupId {
212  using R = decltype(hc_get_group_id(0));
213 
214  __device__
215  R operator()(std::uint32_t x) const noexcept { return hc_get_group_id(x); }
216  };
217  struct GroupSize {
218  using R = decltype(hc_get_group_size(0));
219 
220  __device__
221  R operator()(std::uint32_t x) const noexcept {
222  return hc_get_group_size(x);
223  }
224  };
225  struct NumGroups {
226  using R = decltype(hc_get_num_groups(0));
227 
228  __device__
229  R operator()(std::uint32_t x) const noexcept {
230  return hc_get_num_groups(x);
231  }
232  };
233  struct WorkitemId {
234  using R = decltype(hc_get_workitem_id(0));
235 
236  __device__
237  R operator()(std::uint32_t x) const noexcept {
238  return hc_get_workitem_id(x);
239  }
240  };
241 } // Namespace hip_impl.
242 
243 template <typename F>
244 struct Coordinates {
245  using R = decltype(F{}(0));
246 
247  struct X { __device__ operator R() const noexcept { return F{}(0); } };
248  struct Y { __device__ operator R() const noexcept { return F{}(1); } };
249  struct Z { __device__ operator R() const noexcept { return F{}(2); } };
250 
251  static constexpr X x{};
252  static constexpr Y y{};
253  static constexpr Z z{};
254 };
255 
256 inline
257 __device__
258 std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::X,
260  return hc_get_grid_size(0);
261 }
262 inline
263 __device__
264 std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::X,
266  return hc_get_grid_size(0);
267 }
268 inline
269 __device__
270 std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Y,
272  return hc_get_grid_size(1);
273 }
274 inline
275 __device__
276 std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Y,
278  return hc_get_grid_size(1);
279 }
280 inline
281 __device__
282 std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Z,
284  return hc_get_grid_size(2);
285 }
286 inline
287 __device__
288 std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Z,
290  return hc_get_grid_size(2);
291 }
292 
293 static constexpr Coordinates<hip_impl::GroupSize> blockDim{};
294 static constexpr Coordinates<hip_impl::GroupId> blockIdx{};
295 static constexpr Coordinates<hip_impl::NumGroups> gridDim{};
296 static constexpr Coordinates<hip_impl::WorkitemId> threadIdx{};
297 
298 #define hipThreadIdx_x (hc_get_workitem_id(0))
299 #define hipThreadIdx_y (hc_get_workitem_id(1))
300 #define hipThreadIdx_z (hc_get_workitem_id(2))
301 
302 #define hipBlockIdx_x (hc_get_group_id(0))
303 #define hipBlockIdx_y (hc_get_group_id(1))
304 #define hipBlockIdx_z (hc_get_group_id(2))
305 
306 #define hipBlockDim_x (hc_get_group_size(0))
307 #define hipBlockDim_y (hc_get_group_size(1))
308 #define hipBlockDim_z (hc_get_group_size(2))
309 
310 #define hipGridDim_x (hc_get_num_groups(0))
311 #define hipGridDim_y (hc_get_num_groups(1))
312 #define hipGridDim_z (hc_get_num_groups(2))
313 
314 #endif // defined __HCC__
315 
316 #ifndef __OPENMP_AMDGCN__
317 #if __HCC_OR_HIP_CLANG__
318 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
319 #if __HIP_ENABLE_DEVICE_MALLOC__
320 extern "C" __device__ void* __hip_malloc(size_t);
321 extern "C" __device__ void* __hip_free(void* ptr);
322 static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); }
323 static inline __device__ void* free(void* ptr) { return __hip_free(ptr); }
324 #else
325 static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; }
326 static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; }
327 #endif
328 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
329 #endif //__HCC_OR_HIP_CLANG__
330 #endif // !__OPENMP_AMDGCN__
331 
332 #ifdef __HCC__
333 
334 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
335 
336 #define HIP_KERNEL_NAME(...) (__VA_ARGS__)
337 #define HIP_SYMBOL(X) #X
338 
339 #if defined __HCC_CPP__
340 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block,
341  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
342 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block,
343  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
344 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block,
345  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
346 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block,
347  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
348 extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp, bool unlockPostponed = 0);
349 
350 #if GENERIC_GRID_LAUNCH == 0
351 //#warning "Original hipLaunchKernel defined"
352 // Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be
353 // either size_t or dim3 types
354 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
355  do { \
356  grid_launch_parm lp; \
357  lp.dynamic_group_mem_bytes = _groupMemBytes; \
358  hipStream_t trueStream = \
359  (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
360  _kernelName(lp, ##__VA_ARGS__); \
361  ihipPostLaunchKernel(#_kernelName, trueStream, lp); \
362  } while (0)
363 #endif // GENERIC_GRID_LAUNCH
364 
365 #elif defined(__HCC_C__)
366 
367 // TODO - develop C interface.
368 
369 #endif //__HCC_CPP__
370 
371 // End doxygen API:
376 //
377 // hip-clang functions
378 //
379 #elif defined(__clang__) && defined(__HIP__)
380 
381 #define HIP_KERNEL_NAME(...) __VA_ARGS__
382 #define HIP_SYMBOL(X) X
383 
384 typedef int hipLaunchParm;
385 
386 template <std::size_t n, typename... Ts,
387  typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
388 void pArgs(const std::tuple<Ts...>&, void*) {}
389 
390 template <std::size_t n, typename... Ts,
391  typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
392 void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
393  using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
394 
395  static_assert(!std::is_reference<T>{},
396  "A __global__ function cannot have a reference as one of its "
397  "arguments.");
398 #if defined(HIP_STRICT)
399  static_assert(std::is_trivially_copyable<T>{},
400  "Only TriviallyCopyable types can be arguments to a __global__ "
401  "function");
402 #endif
403  _vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
404  return pArgs<n + 1>(formals, _vargs);
405 }
406 
407 template <typename... Formals, typename... Actuals>
408 std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
409  static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
410  std::tuple<Formals...> to_formals{std::move(actuals)};
411  return to_formals;
412 }
413 
414 #if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
415 template <typename... Args, typename F = void (*)(Args...)>
416 void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
417  std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
418  constexpr size_t count = sizeof...(Args);
419  auto tup_ = std::tuple<Args...>{args...};
420  auto tup = validateArgsCountType(kernel, tup_);
421  void* _Args[count];
422  pArgs<0>(tup, _Args);
423 
424  auto k = reinterpret_cast<void*>(kernel);
425  hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
426 }
427 #else
428 #define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
429  do { \
430  kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \
431  } while (0)
432 
433 #define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
434 #endif
435 
436 #include <hip/hip_runtime_api.h>
437 extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
438 extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
439 extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
440 extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
441 struct __HIP_BlockIdx {
442  __device__
443  std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); }
444 };
445 struct __HIP_BlockDim {
446  __device__
447  std::uint32_t operator()(std::uint32_t x) const noexcept {
448  return __ockl_get_local_size(x);
449  }
450 };
451 struct __HIP_GridDim {
452  __device__
453  std::uint32_t operator()(std::uint32_t x) const noexcept {
454  return __ockl_get_num_groups(x);
455  }
456 };
457 struct __HIP_ThreadIdx {
458  __device__
459  std::uint32_t operator()(std::uint32_t x) const noexcept {
460  return __ockl_get_local_id(x);
461  }
462 };
463 
464 template <typename F>
465 struct __HIP_Coordinates {
466  using R = decltype(F{}(0));
467 
468  struct X { __device__ operator R() const noexcept { return F{}(0); } };
469  struct Y { __device__ operator R() const noexcept { return F{}(1); } };
470  struct Z { __device__ operator R() const noexcept { return F{}(2); } };
471 
472  static constexpr X x{};
473  static constexpr Y y{};
474  static constexpr Z z{};
475 #ifdef __cplusplus
476  __device__ operator dim3() const { return dim3(x, y, z); }
477 #endif
478 
479 };
480 template <typename F>
481 #if !defined(_MSC_VER)
482 __attribute__((weak))
483 #endif
484 constexpr typename __HIP_Coordinates<F>::X __HIP_Coordinates<F>::x;
485 template <typename F>
486 #if !defined(_MSC_VER)
487 __attribute__((weak))
488 #endif
489 constexpr typename __HIP_Coordinates<F>::Y __HIP_Coordinates<F>::y;
490 template <typename F>
491 #if !defined(_MSC_VER)
492 __attribute__((weak))
493 #endif
494 constexpr typename __HIP_Coordinates<F>::Z __HIP_Coordinates<F>::z;
495 
496 extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(uint);
497 inline
498 __device__
499 std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::X,
500  __HIP_Coordinates<__HIP_BlockDim>::X) noexcept {
501  return __ockl_get_global_size(0);
502 }
503 inline
504 __device__
505 std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::X,
506  __HIP_Coordinates<__HIP_GridDim>::X) noexcept {
507  return __ockl_get_global_size(0);
508 }
509 inline
510 __device__
511 std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::Y,
512  __HIP_Coordinates<__HIP_BlockDim>::Y) noexcept {
513  return __ockl_get_global_size(1);
514 }
515 inline
516 __device__
517 std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::Y,
518  __HIP_Coordinates<__HIP_GridDim>::Y) noexcept {
519  return __ockl_get_global_size(1);
520 }
521 inline
522 __device__
523 std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::Z,
524  __HIP_Coordinates<__HIP_BlockDim>::Z) noexcept {
525  return __ockl_get_global_size(2);
526 }
527 inline
528 __device__
529 std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::Z,
530  __HIP_Coordinates<__HIP_GridDim>::Z) noexcept {
531  return __ockl_get_global_size(2);
532 }
533 
534 static constexpr __HIP_Coordinates<__HIP_BlockDim> blockDim{};
535 static constexpr __HIP_Coordinates<__HIP_BlockIdx> blockIdx{};
536 static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{};
537 static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{};
538 
539 extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
540 #define hipThreadIdx_x (__ockl_get_local_id(0))
541 #define hipThreadIdx_y (__ockl_get_local_id(1))
542 #define hipThreadIdx_z (__ockl_get_local_id(2))
543 
544 extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
545 #define hipBlockIdx_x (__ockl_get_group_id(0))
546 #define hipBlockIdx_y (__ockl_get_group_id(1))
547 #define hipBlockIdx_z (__ockl_get_group_id(2))
548 
549 extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
550 #define hipBlockDim_x (__ockl_get_local_size(0))
551 #define hipBlockDim_y (__ockl_get_local_size(1))
552 #define hipBlockDim_z (__ockl_get_local_size(2))
553 
554 extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
555 #define hipGridDim_x (__ockl_get_num_groups(0))
556 #define hipGridDim_y (__ockl_get_num_groups(1))
557 #define hipGridDim_z (__ockl_get_num_groups(2))
558 
559 #include <hip/hcc_detail/math_functions.h>
560 
561 #if __HIP_HCC_COMPAT_MODE__
562 // Define HCC work item functions in terms of HIP builtin variables.
563 #pragma push_macro("__DEFINE_HCC_FUNC")
564 #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
565 inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \
566  if (i==0) \
567  return hip_var.x; \
568  else if(i==1) \
569  return hip_var.y; \
570  else \
571  return hip_var.z; \
572 }
573 
574 __DEFINE_HCC_FUNC(workitem_id, threadIdx)
575 __DEFINE_HCC_FUNC(group_id, blockIdx)
576 __DEFINE_HCC_FUNC(group_size, blockDim)
577 __DEFINE_HCC_FUNC(num_groups, gridDim)
578 #pragma pop_macro("__DEFINE_HCC_FUNC")
579 
580 extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint);
581 inline __device__ __attribute__((always_inline)) uint
582 hc_get_workitem_absolute_id(int dim)
583 {
584  return (uint)__ockl_get_global_id(dim);
585 }
586 
587 #endif
588 
589 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
590 // Support std::complex.
591 #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
592 #pragma push_macro("__CUDA__")
593 #define __CUDA__
594 #include <__clang_cuda_math_forward_declares.h>
595 #include <__clang_cuda_complex_builtins.h>
596 // Workaround for using libc++ with HIP-Clang.
597 // The following headers requires clang include path before standard C++ include path.
598 // However libc++ include path requires to be before clang include path.
599 // To workaround this, we pass -isystem with the parent directory of clang include
600 // path instead of the clang include path itself.
601 #include <include/cuda_wrappers/algorithm>
602 #include <include/cuda_wrappers/complex>
603 #include <include/cuda_wrappers/new>
604 #undef __CUDA__
605 #pragma pop_macro("__CUDA__")
606 #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
607 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
608 #endif // defined(__clang__) && defined(__HIP__)
609 
610 #include <hip/hcc_detail/hip_memory.h>
611 
612 #endif // HIP_HCC_DETAIL_RUNTIME_H
hip_impl::GroupSize
Definition: hip_runtime.h:217
hip_impl::NumGroups
Definition: hip_runtime.h:225
hip_runtime_api.h
Defines the API signatures for HIP runtime. This file can be compiled with a standard compiler.
grid_launch_parm
Definition: grid_launch.h:32
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
Coordinates::Y
Definition: hip_runtime.h:248
Coordinates
Definition: hip_runtime.h:244
dim3
struct dim3 dim3
__host__
#define __host__
Definition: host_defines.h:41
Coordinates::X
Definition: hip_runtime.h:247
host_defines.h
TODO-doc.
dim3
Definition: hip_runtime_api.h:330
hip_impl::WorkitemId
Definition: hip_runtime.h:233
hip_impl::GroupId
Definition: hip_runtime.h:211
Coordinates::Z
Definition: hip_runtime.h:249