31 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
32 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
36 #include <hip/hcc_detail/device_functions.h>
38 #if !defined(__align__)
39 #define __align__(x) __attribute__((aligned(x)))
42 #if !defined(__CG_QUALIFIER__)
43 #define __CG_QUALIFIER__ __device__ __forceinline__
46 #if !defined(__CG_STATIC_QUALIFIER__)
47 #define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__
50 #if !defined(WAVEFRONT_SIZE)
51 #define WAVEFRONT_SIZE 64
54 namespace cooperative_groups {
70 namespace multi_grid {
72 __CG_STATIC_QUALIFIER__ uint32_t num_grids() {
73 return (uint32_t)__ockl_multi_grid_num_grids();
76 __CG_STATIC_QUALIFIER__ uint32_t grid_rank() {
77 return (uint32_t)__ockl_multi_grid_grid_rank();
80 __CG_STATIC_QUALIFIER__ uint32_t size() {
81 return (uint32_t)__ockl_multi_grid_size();
84 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
85 return (uint32_t)__ockl_multi_grid_thread_rank();
88 __CG_STATIC_QUALIFIER__
bool is_valid() {
89 return (
bool)__ockl_multi_grid_is_valid();
92 __CG_STATIC_QUALIFIER__
void sync() {
93 __ockl_multi_grid_sync();
103 __CG_STATIC_QUALIFIER__ uint32_t size() {
104 return (uint32_t)((hipBlockDim_z * hipGridDim_z) *
105 (hipBlockDim_y * hipGridDim_y) *
106 (hipBlockDim_x * hipGridDim_x));
109 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
112 (uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
113 (hipBlockIdx_y * hipGridDim_x) +
118 uint32_t num_threads_till_current_workgroup =
119 (uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
122 uint32_t local_thread_rank =
123 (uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
124 (hipThreadIdx_y * hipBlockDim_x) +
127 return (num_threads_till_current_workgroup + local_thread_rank);
130 __CG_STATIC_QUALIFIER__
bool is_valid() {
131 return (
bool)__ockl_grid_is_valid();
134 __CG_STATIC_QUALIFIER__
void sync() {
144 namespace workgroup {
146 __CG_STATIC_QUALIFIER__
dim3 group_index() {
147 return (
dim3((uint32_t)hipBlockIdx_x, (uint32_t)hipBlockIdx_y,
148 (uint32_t)hipBlockIdx_z));
151 __CG_STATIC_QUALIFIER__
dim3 thread_index() {
152 return (
dim3((uint32_t)hipThreadIdx_x, (uint32_t)hipThreadIdx_y,
153 (uint32_t)hipThreadIdx_z));
156 __CG_STATIC_QUALIFIER__ uint32_t size() {
157 return((uint32_t)(hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
160 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
161 return ((uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
162 (hipThreadIdx_y * hipBlockDim_x) +
166 __CG_STATIC_QUALIFIER__
bool is_valid() {
171 __CG_STATIC_QUALIFIER__
void sync() {
181 #endif // __cplusplus
182 #endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H