HIP: Heterogenous-computing Interface for Portability
hip_runtime.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 //#pragma once
29 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
30 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
31 
32 #include <hip/hcc_detail/hip_common.h>
33 
34 //---
35 // Top part of file can be compiled with any compiler
36 
37 //#include <cstring>
38 #if __cplusplus
39 #include <cmath>
40 #include <cstdint>
41 #else
42 #include <math.h>
43 #include <string.h>
44 #include <stddef.h>
45 #endif //__cplusplus
46 
47 // __hip_malloc is not working. Disable it by default.
48 #ifndef __HIP_ENABLE_DEVICE_MALLOC__
49 #define __HIP_ENABLE_DEVICE_MALLOC__ 0
50 #endif
51 
52 #if __HCC_OR_HIP_CLANG__
53 
54 #if __HIP__
55 #if !defined(__align__)
56 #define __align__(x) __attribute__((aligned(x)))
57 #endif
58 #endif
59 
60 #define CUDA_SUCCESS hipSuccess
61 
62 #include <hip/hip_runtime_api.h>
63 #endif // __HCC_OR_HIP_CLANG__
64 
65 #if __HCC__
66 // define HIP_ENABLE_PRINTF to enable printf
67 #ifdef HIP_ENABLE_PRINTF
68 #define HCC_ENABLE_ACCELERATOR_PRINTF 1
69 #endif
70 
71 //---
72 // Remainder of this file only compiles with HCC
73 #if defined __HCC__
74 #include "grid_launch.h"
75 #include "hc_printf.hpp"
76 // TODO-HCC-GL - change this to typedef.
77 // typedef grid_launch_parm hipLaunchParm ;
78 
79 #if GENERIC_GRID_LAUNCH == 0
80 #define hipLaunchParm grid_launch_parm
81 #else
82 namespace hip_impl {
83 struct Empty_launch_parm {};
84 } // namespace hip_impl
85 #define hipLaunchParm hip_impl::Empty_launch_parm
86 #endif // GENERIC_GRID_LAUNCH
87 
88 #if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1
89 #else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
90 #error(HCC must support GRID_LAUNCH_20)
91 #endif // GRID_LAUNCH_VERSION
92 
93 #endif // HCC
94 
95 #if GENERIC_GRID_LAUNCH == 1 && defined __HCC__
96 #include "grid_launch_GGL.hpp"
97 #endif // GENERIC_GRID_LAUNCH
98 
99 #endif // HCC
100 
101 #if __HCC_OR_HIP_CLANG__
102 extern int HIP_TRACE_API;
103 
104 #ifdef __cplusplus
105 #include <hip/hcc_detail/hip_ldg.h>
106 #endif
107 #include <hip/hcc_detail/hip_atomic.h>
109 #include <hip/hcc_detail/device_functions.h>
110 #include <hip/hcc_detail/surface_functions.h>
111 #if __HCC__
112  #include <hip/hcc_detail/math_functions.h>
113  #include <hip/hcc_detail/texture_functions.h>
114 #else
115  #include <hip/hcc_detail/texture_fetch_functions.h>
116  #include <hip/hcc_detail/texture_indirect_functions.h>
117 #endif
118 // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
119 #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
120 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
121 #endif
122 
123 // TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call.
124 #if defined(__HCC__) && __HIP_DEVICE_COMPILE__ == 1
125 #undef assert
126 #define assert(COND) \
127  { \
128  if (!(COND)) { \
129  abort(); \
130  } \
131  }
132 #endif
133 
134 
135 // Feature tests:
136 #if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
137 // Device compile and not host compile:
138 
139 // 32-bit Atomics:
140 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
141 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
142 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
143 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
144 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
145 
146 // 64-bit Atomics:
147 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
148 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
149 
150 // Doubles
151 #define __HIP_ARCH_HAS_DOUBLES__ (1)
152 
153 // warp cross-lane operations:
154 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
155 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
156 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
157 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
158 
159 // sync
160 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
161 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
162 
163 // misc
164 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
165 #define __HIP_ARCH_HAS_3DGRID__ (1)
166 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
167 
168 #endif /* Device feature flags */
169 
170 
171 #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
172  __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
173 #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
174  __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
175  amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
176 #define select_impl_(_1, _2, impl_, ...) impl_
177 #define __launch_bounds__(...) \
178  select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
179 
180 // Detect if we are compiling C++ mode or C mode
181 #if defined(__cplusplus)
182 #define __HCC_CPP__
183 #elif defined(__STDC_VERSION__)
184 #define __HCC_C__
185 #endif
186 
187 __host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
188 
189 #if __HIP_ARCH_GFX701__ == 0
190 
191 __device__ unsigned __hip_ds_bpermute(int index, unsigned src);
192 __device__ float __hip_ds_bpermutef(int index, float src);
193 __device__ unsigned __hip_ds_permute(int index, unsigned src);
194 __device__ float __hip_ds_permutef(int index, float src);
195 
196 template <int pattern>
197 __device__ unsigned __hip_ds_swizzle_N(unsigned int src);
198 template <int pattern>
199 __device__ float __hip_ds_swizzlef_N(float src);
200 
201 template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
202 __device__ int __hip_move_dpp_N(int src);
203 
204 #endif //__HIP_ARCH_GFX803__ == 1
205 
206 #endif // __HCC_OR_HIP_CLANG__
207 
208 #if defined __HCC__
209 
210 namespace hip_impl {
211  struct GroupId {
212  using R = decltype(hc_get_group_id(0));
213 
214  __device__
215  R operator()(std::uint32_t x) const noexcept { return hc_get_group_id(x); }
216  };
217  struct GroupSize {
218  using R = decltype(hc_get_group_size(0));
219 
220  __device__
221  R operator()(std::uint32_t x) const noexcept {
222  return hc_get_group_size(x);
223  }
224  };
225  struct NumGroups {
226  using R = decltype(hc_get_num_groups(0));
227 
228  __device__
229  R operator()(std::uint32_t x) const noexcept {
230  return hc_get_num_groups(x);
231  }
232  };
233  struct WorkitemId {
234  using R = decltype(hc_get_workitem_id(0));
235 
236  __device__
237  R operator()(std::uint32_t x) const noexcept {
238  return hc_get_workitem_id(x);
239  }
240  };
241 } // Namespace hip_impl.
242 
243 template <typename F>
244 struct Coordinates {
245  using R = decltype(F{}(0));
246 
247  struct X { __device__ operator R() const noexcept { return F{}(0); } };
248  struct Y { __device__ operator R() const noexcept { return F{}(1); } };
249  struct Z { __device__ operator R() const noexcept { return F{}(2); } };
250 
251  static constexpr X x{};
252  static constexpr Y y{};
253  static constexpr Z z{};
254 };
255 
256 inline
257 __device__
258 std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::X,
260  return hc_get_grid_size(0);
261 }
262 inline
263 __device__
264 std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::X,
266  return hc_get_grid_size(0);
267 }
268 inline
269 __device__
270 std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Y,
272  return hc_get_grid_size(1);
273 }
274 inline
275 __device__
276 std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Y,
278  return hc_get_grid_size(1);
279 }
280 inline
281 __device__
282 std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Z,
284  return hc_get_grid_size(2);
285 }
286 inline
287 __device__
288 std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Z,
290  return hc_get_grid_size(2);
291 }
292 
293 static constexpr Coordinates<hip_impl::GroupSize> blockDim{};
294 static constexpr Coordinates<hip_impl::GroupId> blockIdx{};
295 static constexpr Coordinates<hip_impl::NumGroups> gridDim{};
296 static constexpr Coordinates<hip_impl::WorkitemId> threadIdx{};
297 
298 #define hipThreadIdx_x (hc_get_workitem_id(0))
299 #define hipThreadIdx_y (hc_get_workitem_id(1))
300 #define hipThreadIdx_z (hc_get_workitem_id(2))
301 
302 #define hipBlockIdx_x (hc_get_group_id(0))
303 #define hipBlockIdx_y (hc_get_group_id(1))
304 #define hipBlockIdx_z (hc_get_group_id(2))
305 
306 #define hipBlockDim_x (hc_get_group_size(0))
307 #define hipBlockDim_y (hc_get_group_size(1))
308 #define hipBlockDim_z (hc_get_group_size(2))
309 
310 #define hipGridDim_x (hc_get_num_groups(0))
311 #define hipGridDim_y (hc_get_num_groups(1))
312 #define hipGridDim_z (hc_get_num_groups(2))
313 
314 #endif // defined __HCC__
315 #if __HCC_OR_HIP_CLANG__
316 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
317 #if __HIP_ENABLE_DEVICE_MALLOC__
318 extern "C" __device__ void* __hip_malloc(size_t);
319 extern "C" __device__ void* __hip_free(void* ptr);
320 static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); }
321 static inline __device__ void* free(void* ptr) { return __hip_free(ptr); }
322 #else
323 static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; }
324 static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; }
325 #endif
326 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
327 #endif //__HCC_OR_HIP_CLANG__
328 
329 #ifdef __HCC__
330 
331 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
332 
333 #define HIP_KERNEL_NAME(...) (__VA_ARGS__)
334 #define HIP_SYMBOL(X) #X
335 
336 #if defined __HCC_CPP__
337 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block,
338  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
339 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block,
340  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
341 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block,
342  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
343 extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block,
344  grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
345 extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp, bool unlockPostponed = 0);
346 
347 #if GENERIC_GRID_LAUNCH == 0
348 //#warning "Original hipLaunchKernel defined"
349 // Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be
350 // either size_t or dim3 types
351 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
352  do { \
353  grid_launch_parm lp; \
354  lp.dynamic_group_mem_bytes = _groupMemBytes; \
355  hipStream_t trueStream = \
356  (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
357  _kernelName(lp, ##__VA_ARGS__); \
358  ihipPostLaunchKernel(#_kernelName, trueStream, lp); \
359  } while (0)
360 #endif // GENERIC_GRID_LAUNCH
361 
362 #elif defined(__HCC_C__)
363 
364 // TODO - develop C interface.
365 
366 #endif //__HCC_CPP__
367 
372 // extern int HIP_PRINT_ENV ; ///< Print all HIP-related environment variables.
373 // extern int HIP_TRACE_API; ///< Trace HIP APIs.
374 // extern int HIP_LAUNCH_BLOCKING ; ///< Make all HIP APIs host-synchronous
375 
381 // End doxygen API:
386 //
387 // hip-clang functions
388 //
389 #elif defined(__clang__) && defined(__HIP__)
390 
391 #define HIP_KERNEL_NAME(...) __VA_ARGS__
392 #define HIP_SYMBOL(X) X
393 
394 typedef int hipLaunchParm;
395 
396 template <std::size_t n, typename... Ts,
397  typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
398 void pArgs(const std::tuple<Ts...>&, void*) {}
399 
400 template <std::size_t n, typename... Ts,
401  typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
402 void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
403  using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
404 
405  static_assert(!std::is_reference<T>{},
406  "A __global__ function cannot have a reference as one of its "
407  "arguments.");
408 #if defined(HIP_STRICT)
409  static_assert(std::is_trivially_copyable<T>{},
410  "Only TriviallyCopyable types can be arguments to a __global__ "
411  "function");
412 #endif
413  _vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
414  return pArgs<n + 1>(formals, _vargs);
415 }
416 
417 template <typename... Formals, typename... Actuals>
418 std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
419  static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
420  std::tuple<Formals...> to_formals{std::move(actuals)};
421  return to_formals;
422 }
423 
424 #if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
425 template <typename... Args, typename F = void (*)(Args...)>
426 void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
427  std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
428  constexpr size_t count = sizeof...(Args);
429  auto tup_ = std::tuple<Args...>{args...};
430  auto tup = validateArgsCountType(kernel, tup_);
431  void* _Args[count];
432  pArgs<0>(tup, _Args);
433 
434  auto k = reinterpret_cast<void*>(kernel);
435  hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
436 }
437 #else
438 #define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
439  do { \
440  kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \
441  } while (0)
442 
443 #define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
444 #endif
445 
446 #include <hip/hip_runtime_api.h>
447 
448 #pragma push_macro("__DEVICE__")
449 #define __DEVICE__ static __device__ __forceinline__
450 
451 extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
452 __DEVICE__ uint __hip_get_thread_idx_x() { return __ockl_get_local_id(0); }
453 __DEVICE__ uint __hip_get_thread_idx_y() { return __ockl_get_local_id(1); }
454 __DEVICE__ uint __hip_get_thread_idx_z() { return __ockl_get_local_id(2); }
455 
456 extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
457 __DEVICE__ uint __hip_get_block_idx_x() { return __ockl_get_group_id(0); }
458 __DEVICE__ uint __hip_get_block_idx_y() { return __ockl_get_group_id(1); }
459 __DEVICE__ uint __hip_get_block_idx_z() { return __ockl_get_group_id(2); }
460 
461 extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
462 __DEVICE__ uint __hip_get_block_dim_x() { return __ockl_get_local_size(0); }
463 __DEVICE__ uint __hip_get_block_dim_y() { return __ockl_get_local_size(1); }
464 __DEVICE__ uint __hip_get_block_dim_z() { return __ockl_get_local_size(2); }
465 
466 extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
467 __DEVICE__ uint __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); }
468 __DEVICE__ uint __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); }
469 __DEVICE__ uint __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); }
470 
471 #define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \
472  __declspec(property(get = __get_##DIMENSION)) uint DIMENSION; \
473  __DEVICE__ uint __get_##DIMENSION(void) { \
474  return FUNCTION; \
475  }
476 
477 struct __hip_builtin_threadIdx_t {
478  __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x());
479  __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y());
480  __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z());
481 };
482 
483 struct __hip_builtin_blockIdx_t {
484  __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x());
485  __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y());
486  __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z());
487 };
488 
489 struct __hip_builtin_blockDim_t {
490  __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x());
491  __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y());
492  __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z());
493 };
494 
495 struct __hip_builtin_gridDim_t {
496  __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x());
497  __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y());
498  __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z());
499 };
500 
501 #undef __HIP_DEVICE_BUILTIN
502 #pragma pop_macro("__DEVICE__")
503 
504 extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx;
505 extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx;
506 extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim;
507 extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
508 
509 
510 #define hipThreadIdx_x threadIdx.x
511 #define hipThreadIdx_y threadIdx.y
512 #define hipThreadIdx_z threadIdx.z
513 
514 #define hipBlockIdx_x blockIdx.x
515 #define hipBlockIdx_y blockIdx.y
516 #define hipBlockIdx_z blockIdx.z
517 
518 #define hipBlockDim_x blockDim.x
519 #define hipBlockDim_y blockDim.y
520 #define hipBlockDim_z blockDim.z
521 
522 #define hipGridDim_x gridDim.x
523 #define hipGridDim_y gridDim.y
524 #define hipGridDim_z gridDim.z
525 
526 #include <hip/hcc_detail/math_functions.h>
527 
528 #if __HIP_HCC_COMPAT_MODE__
529 // Define HCC work item functions in terms of HIP builtin variables.
530 #pragma push_macro("__DEFINE_HCC_FUNC")
531 #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
532 inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \
533  if (i==0) \
534  return hip_var.x; \
535  else if(i==1) \
536  return hip_var.y; \
537  else \
538  return hip_var.z; \
539 }
540 
541 __DEFINE_HCC_FUNC(workitem_id, threadIdx)
542 __DEFINE_HCC_FUNC(group_id, blockIdx)
543 __DEFINE_HCC_FUNC(group_size, blockDim)
544 __DEFINE_HCC_FUNC(num_groups, gridDim)
545 #pragma pop_macro("__DEFINE_HCC_FUNC")
546 
547 extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint);
548 inline __device__ __attribute__((always_inline)) uint
549 hc_get_workitem_absolute_id(int dim)
550 {
551  return (uint)__ockl_get_global_id(dim);
552 }
553 
554 #endif
555 
556 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
557 // Support std::complex.
558 #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
559 #pragma push_macro("__CUDA__")
560 #define __CUDA__
561 #include <__clang_cuda_math_forward_declares.h>
562 #include <__clang_cuda_complex_builtins.h>
563 // Workaround for using libc++ with HIP-Clang.
564 // The following headers requires clang include path before standard C++ include path.
565 // However libc++ include path requires to be before clang include path.
566 // To workaround this, we pass -isystem with the parent directory of clang include
567 // path instead of the clang include path itself.
568 #include <include/cuda_wrappers/algorithm>
569 #include <include/cuda_wrappers/complex>
570 #include <include/cuda_wrappers/new>
571 #undef __CUDA__
572 #pragma pop_macro("__CUDA__")
573 #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
574 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
575 #endif // defined(__clang__) && defined(__HIP__)
576 
577 #include <hip/hcc_detail/hip_memory.h>
578 
579 #endif // HIP_HCC_DETAIL_RUNTIME_H
hip_impl::GroupSize
Definition: hip_runtime.h:217
hip_impl::NumGroups
Definition: hip_runtime.h:225
hip_runtime_api.h
Defines the API signatures for HIP runtime. This file can be compiled with a standard compiler.
grid_launch_parm
Definition: grid_launch.h:32
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
Coordinates::Y
Definition: hip_runtime.h:248
Coordinates
Definition: hip_runtime.h:244
__host__
#define __host__
Definition: host_defines.h:41
Coordinates::X
Definition: hip_runtime.h:247
host_defines.h
TODO-doc.
ihipStream_t
Definition: hip_hcc_internal.h:580
dim3
Definition: hip_runtime_api.h:320
hip_impl::WorkitemId
Definition: hip_runtime.h:233
hip_impl::GroupId
Definition: hip_runtime.h:211
Coordinates::Z
Definition: hip_runtime.h:249