29 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H 30 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H 32 #include <hip/hcc_detail/hip_common.h> 47 #if __HCC_OR_HIP_CLANG__ 50 #if !defined(__align__) 51 #define __align__(x) __attribute__((aligned(x))) 55 #define CUDA_SUCCESS hipSuccess 57 #include <hip/hip_runtime_api.h> 58 #endif // __HCC_OR_HIP_CLANG__ 62 #ifdef HIP_ENABLE_PRINTF 63 #define HCC_ENABLE_ACCELERATOR_PRINTF 1 69 #include "grid_launch.h" 70 #include "hc_printf.hpp" 74 #if GENERIC_GRID_LAUNCH == 0 75 #define hipLaunchParm grid_launch_parm 78 struct Empty_launch_parm {};
80 #define hipLaunchParm hip_impl::Empty_launch_parm 81 #endif // GENERIC_GRID_LAUNCH 83 #if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1 84 #else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. 85 #error(HCC must support GRID_LAUNCH_20) 86 #endif // GRID_LAUNCH_VERSION 90 #if GENERIC_GRID_LAUNCH == 1 && defined __HCC__ 91 #include "grid_launch_GGL.hpp" 92 #endif // GENERIC_GRID_LAUNCH 96 #if __HCC_OR_HIP_CLANG__ 97 extern int HIP_TRACE_API;
100 #include <hip/hcc_detail/hip_ldg.h> 102 #include <hip/hcc_detail/hip_atomic.h> 104 #include <hip/hcc_detail/device_functions.h> 105 #include <hip/hcc_detail/surface_functions.h> 106 #include <hip/hcc_detail/texture_functions.h> 108 #include <hip/hcc_detail/math_functions.h> 111 #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) 112 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__ 116 #if __HIP_DEVICE_COMPILE__ == 1 118 #define assert(COND) \ 128 #if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ 132 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) 133 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) 134 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) 135 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) 136 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) 139 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) 140 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) 143 #define __HIP_ARCH_HAS_DOUBLES__ (1) 146 #define __HIP_ARCH_HAS_WARP_VOTE__ (1) 147 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1) 148 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1) 149 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) 152 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) 153 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) 156 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) 157 #define __HIP_ARCH_HAS_3DGRID__ (1) 158 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) 163 #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \ 164 __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) 165 #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \ 166 __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \ 167 amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) 168 #define select_impl_(_1, _2, impl_, ...) impl_ 169 #define __launch_bounds__(...) \ 170 select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) 173 #if defined(__cplusplus) 175 #elif defined(__STDC_VERSION__) 179 __host__ inline void* __get_dynamicgroupbaseptr() {
return nullptr; }
181 #if __HIP_ARCH_GFX701__ == 0 183 __device__
unsigned __hip_ds_bpermute(
int index,
unsigned src);
184 __device__
float __hip_ds_bpermutef(
int index,
float src);
185 __device__
unsigned __hip_ds_permute(
int index,
unsigned src);
186 __device__
float __hip_ds_permutef(
int index,
float src);
188 template <
int pattern>
189 __device__
unsigned __hip_ds_swizzle_N(
unsigned int src);
190 template <
int pattern>
191 __device__
float __hip_ds_swizzlef_N(
float src);
193 template <
int dpp_ctrl,
int row_mask,
int bank_mask,
bool bound_ctrl>
194 __device__
int __hip_move_dpp_N(
int src);
196 #endif //__HIP_ARCH_GFX803__ == 1 198 #endif // __HCC_OR_HIP_CLANG__ 204 using R = decltype(hc_get_group_id(0));
207 R operator()(std::uint32_t x)
const noexcept {
return hc_get_group_id(x); }
210 using R = decltype(hc_get_group_size(0));
213 R operator()(std::uint32_t x)
const noexcept {
214 return hc_get_group_size(x);
218 using R = decltype(hc_get_num_groups(0));
221 R operator()(std::uint32_t x)
const noexcept {
222 return hc_get_num_groups(x);
226 using R = decltype(hc_get_workitem_id(0));
229 R operator()(std::uint32_t x)
const noexcept {
230 return hc_get_workitem_id(x);
235 template <
typename F>
237 using R = decltype(F{}(0));
239 struct X { __device__
operator R()
const noexcept {
return F{}(0); } };
240 struct Y { __device__
operator R()
const noexcept {
return F{}(1); } };
241 struct Z { __device__
operator R()
const noexcept {
return F{}(2); } };
243 static constexpr
X x{};
244 static constexpr
Y y{};
245 static constexpr
Z z{};
252 return hc_get_grid_size(0);
258 return hc_get_grid_size(0);
264 return hc_get_grid_size(1);
270 return hc_get_grid_size(1);
276 return hc_get_grid_size(2);
282 return hc_get_grid_size(2);
290 #define hipThreadIdx_x (hc_get_workitem_id(0)) 291 #define hipThreadIdx_y (hc_get_workitem_id(1)) 292 #define hipThreadIdx_z (hc_get_workitem_id(2)) 294 #define hipBlockIdx_x (hc_get_group_id(0)) 295 #define hipBlockIdx_y (hc_get_group_id(1)) 296 #define hipBlockIdx_z (hc_get_group_id(2)) 298 #define hipBlockDim_x (hc_get_group_size(0)) 299 #define hipBlockDim_y (hc_get_group_size(1)) 300 #define hipBlockDim_z (hc_get_group_size(2)) 302 #define hipGridDim_x (hc_get_num_groups(0)) 303 #define hipGridDim_y (hc_get_num_groups(1)) 304 #define hipGridDim_z (hc_get_num_groups(2)) 306 #endif // defined __HCC__ 307 #if __HCC_OR_HIP_CLANG__ 308 extern "C" __device__
void* __hip_malloc(
size_t);
309 extern "C" __device__
void* __hip_free(
void* ptr);
311 static inline __device__
void* malloc(
size_t size) {
return __hip_malloc(size); }
312 static inline __device__
void* free(
void* ptr) {
return __hip_free(ptr); }
314 #if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF) 315 template <
typename... All>
316 static inline __device__
void printf(
const char* format, All... all) {
317 hc::printf(format, all...);
319 #elif defined(__HCC_ACCELERATOR__) || __HIP__ 320 template <
typename... All>
321 static inline __device__
void printf(
const char* format, All... all) {}
324 #endif //__HCC_OR_HIP_CLANG__ 328 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) 330 #define HIP_KERNEL_NAME(...) (__VA_ARGS__) 331 #define HIP_SYMBOL(X) #X 333 #if defined __HCC_CPP__ 344 #if GENERIC_GRID_LAUNCH == 0 348 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ 350 grid_launch_parm lp; \ 351 lp.dynamic_group_mem_bytes = _groupMemBytes; \ 352 hipStream_t trueStream = \ 353 (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \ 354 _kernelName(lp, ##__VA_ARGS__); \ 355 ihipPostLaunchKernel(#_kernelName, trueStream, lp); \ 357 #endif // GENERIC_GRID_LAUNCH 359 #elif defined(__HCC_C__) 386 #elif defined(__clang__) && defined(__HIP__) 388 #define HIP_KERNEL_NAME(...) __VA_ARGS__ 389 #define HIP_SYMBOL(X) #X 391 typedef int hipLaunchParm;
393 #define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ 395 kernelName<<<(numblocks), (numthreads), (memperblock), (streamId)>>>(__VA_ARGS__); \ 398 #include <hip/hip_runtime_api.h> 400 #pragma push_macro("__DEVICE__") 401 #define __DEVICE__ static __device__ __forceinline__ 403 extern "C" __device__
size_t __ockl_get_local_id(uint);
404 __DEVICE__ uint __hip_get_thread_idx_x() {
return __ockl_get_local_id(0); }
405 __DEVICE__ uint __hip_get_thread_idx_y() {
return __ockl_get_local_id(1); }
406 __DEVICE__ uint __hip_get_thread_idx_z() {
return __ockl_get_local_id(2); }
408 extern "C" __device__
size_t __ockl_get_group_id(uint);
409 __DEVICE__ uint __hip_get_block_idx_x() {
return __ockl_get_group_id(0); }
410 __DEVICE__ uint __hip_get_block_idx_y() {
return __ockl_get_group_id(1); }
411 __DEVICE__ uint __hip_get_block_idx_z() {
return __ockl_get_group_id(2); }
413 extern "C" __device__
size_t __ockl_get_local_size(uint);
414 __DEVICE__ uint __hip_get_block_dim_x() {
return __ockl_get_local_size(0); }
415 __DEVICE__ uint __hip_get_block_dim_y() {
return __ockl_get_local_size(1); }
416 __DEVICE__ uint __hip_get_block_dim_z() {
return __ockl_get_local_size(2); }
418 extern "C" __device__
size_t __ockl_get_num_groups(uint);
419 __DEVICE__ uint __hip_get_grid_dim_x() {
return __ockl_get_num_groups(0); }
420 __DEVICE__ uint __hip_get_grid_dim_y() {
return __ockl_get_num_groups(1); }
421 __DEVICE__ uint __hip_get_grid_dim_z() {
return __ockl_get_num_groups(2); }
423 #define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \ 424 __declspec(property(get = __get_##DIMENSION)) uint DIMENSION; \ 425 __DEVICE__ uint __get_##DIMENSION(void) { \ 429 struct __hip_builtin_threadIdx_t {
430 __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x());
431 __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y());
432 __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z());
435 struct __hip_builtin_blockIdx_t {
436 __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x());
437 __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y());
438 __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z());
441 struct __hip_builtin_blockDim_t {
442 __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x());
443 __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y());
444 __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z());
447 struct __hip_builtin_gridDim_t {
448 __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x());
449 __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y());
450 __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z());
453 #undef __HIP_DEVICE_BUILTIN 454 #pragma pop_macro("__DEVICE__") 456 extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx;
457 extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx;
458 extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim;
459 extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
462 #define hipThreadIdx_x threadIdx.x 463 #define hipThreadIdx_y threadIdx.y 464 #define hipThreadIdx_z threadIdx.z 466 #define hipBlockIdx_x blockIdx.x 467 #define hipBlockIdx_y blockIdx.y 468 #define hipBlockIdx_z blockIdx.z 470 #define hipBlockDim_x blockDim.x 471 #define hipBlockDim_y blockDim.y 472 #define hipBlockDim_z blockDim.z 474 #define hipGridDim_x gridDim.x 475 #define hipGridDim_y gridDim.y 476 #define hipGridDim_z gridDim.z 478 #include <hip/hcc_detail/math_functions.h> 480 #if __HIP_HCC_COMPAT_MODE__ 482 #pragma push_macro("__DEFINE_HCC_FUNC") 483 #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \ 484 inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \ 493 __DEFINE_HCC_FUNC(workitem_id, threadIdx)
494 __DEFINE_HCC_FUNC(group_id, blockIdx)
495 __DEFINE_HCC_FUNC(group_size, blockDim)
496 __DEFINE_HCC_FUNC(num_groups, gridDim)
497 #pragma pop_macro("__DEFINE_HCC_FUNC") 499 extern "C" __device__ __attribute__((
const))
size_t __ockl_get_global_id(uint);
500 inline __device__ __attribute__((always_inline)) uint
501 hc_get_workitem_absolute_id(
int dim)
503 return (uint)__ockl_get_global_id(dim);
510 #pragma push_macro("__CUDA__") 512 #include <__clang_cuda_math_forward_declares.h> 513 #include <__clang_cuda_complex_builtins.h> 514 #include <cuda_wrappers/algorithm> 515 #include <cuda_wrappers/complex> 516 #include <cuda_wrappers/new> 518 #pragma pop_macro("__CUDA__") 519 #endif // ndef _OPENMP 521 #endif // defined(__clang__) && defined(__HIP__) 523 #include <hip/hcc_detail/hip_memory.h> 525 #endif // HIP_HCC_DETAIL_RUNTIME_H Definition: hip_runtime.h:225
Definition: hip_runtime.h:240
Definition: hip_runtime.h:241
Definition: hip_runtime.h:217
Definition: hip_runtime_api.h:269
#define __host__
Definition: host_defines.h:41
Definition: hip_runtime.h:203
Definition: grid_launch.h:31
Definition: hip_runtime.h:202
Definition: hip_hcc_internal.h:544
Definition: hip_runtime.h:239
Definition: hip_runtime.h:209
Definition: hip_runtime.h:236