HIP: Heterogenous-computing Interface for Portability
HIP Bugs

HIP is more restrictive in enforcing restrictions

The language specification for HIP and CUDA forbid calling a __device__ function in a __host__ context. In practice, you may observe differences in the strictness of this restriction, with HIP exhibiting a tighter adherence to the specification and thus less tolerant of infringing code. The solution is to ensure that all functions which are called in a __device__ context are correctly annotated to reflect it.

The following is an example of codes using the specification,

1 #include <hip/hip_runtime.h>
2 #include <type_traits>
3 #include <random>
4 #include "test_common.h"
5 
6 static std::random_device dev;
7 static std::mt19937 rng(dev());
8 
9 template <typename T, typename M>
10 __host__ __device__ inline constexpr int count() {
11  return sizeof(T) / sizeof(M);
12 }
13 
14 inline float getRandomFloat(float min = 10, float max = 100) {
15  std::uniform_real_distribution<float> gen(min, max);
16  return gen(rng);
17 }
18 
19 template <typename T, typename B>
20 void fillMatrix(T* a, int size) {
21  for (int i = 0; i < size; i++) {
22  T t;
23  t.x = getRandomFloat();
24  if constexpr (count<T, B>() >= 2) t.y = getRandomFloat();
25  if constexpr (count<T, B>() >= 3) t.z = getRandomFloat();
26  if constexpr (count<T, B>() >= 4) t.w = getRandomFloat();
27 
28  a[i] = t;
29  }
30 }
31 
32 // Test operations
33 template <typename T, typename B>
34 __host__ __device__ void testOperations(T& a, T& b) {
35  a.x += b.x;
36  a.x++;
37  b.x++;
38  if constexpr (count<T, B>() >= 2) {
39  a.y = b.x;
40  a.x = b.y;
41  }
42  if constexpr (count<T, B>() >= 3) {
43  if (a.x > 0) b.x /= a.x;
44  a.x *= b.z;
45  a.y--;
46  }
47  if constexpr (count<T, B>() >= 4) {
48  b.w = a.x;
49  a.w += (-b.y);
50  }
51 }
52 
53 template <typename T, typename B>
54 __global__ void testOperationsGPU(T* d_a, T* d_b, int size) {
55  int id = threadIdx.x;
56  if (id > size) return;
57  T &a = d_a[id];
58  T &b = d_b[id];
59 
60  testOperations<T, B>(a, b);
61 }
62 
63 
64 template <typename T>
65 void dcopy(T* a, T* b, int size) {
66  for (int i = 0; i < size; i++) {
67  a[i] = b[i];
68  }
69 }
70 
71 template <typename T>
72 bool isEqual(T* a, T* b, int size) {
73  for (int i = 0; i < size; i++) {
74  if (a[i] != b[i]) {
75  return false;
76  }
77  }
78  return true;
79 }
80 
81 // Main function that tests type
82 // T = what you want to test
83 // D = pack of 1 i.e. float1 int1
84 template <typename T, typename D>
85 void testType(int msize) {
86  T *fa, *fb, *fc, *h_fa, *h_fb;
87  fa = new T[msize];
88  fb = new T[msize];
89  fc = new T[msize];
90  h_fa = new T[msize];
91  h_fb = new T[msize];
92 
93  T *d_fa, *d_fb;
94 
95  constexpr int c = count<T, D>();
96 
97  if (c <= 0 || c >= 5) {
98  failed("Invalid Size\n");
99  }
100 
101  fillMatrix<T, D>(fa, msize);
102  dcopy(fb, fa, msize);
103  dcopy(h_fa, fa, msize);
104  dcopy(h_fb, fa, msize);
105  for (int i = 0; i < msize; i++) testOperations<T, D>(h_fa[i], h_fb[i]);
106 
107  hipMalloc(&d_fa, sizeof(T) * msize);
108  hipMalloc(&d_fb, sizeof(T) * msize);
109 
110  hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice);
111  hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice);
112 
113  auto kernel = testOperationsGPU<T, D>;
114  hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize);
115 
116  hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost);
117 
118  bool pass = true;
119  if (!isEqual<T>(h_fa, fc, msize)) {
120  pass = false;
121  }
122 
123  delete[] fa;
124  delete[] fb;
125  delete[] fc;
126  delete[] h_fa;
127  delete[] h_fb;
128  hipFree(d_fa);
129  hipFree(d_fb);
130 
131  if (!pass) {
132  failed("Failed");
133  }
134 }
135 
136 int main() {
137  const int msize = 100;
138  // double
139  testType<double1, double1>(msize);
140  testType<double2, double1>(msize);
141  testType<double3, double1>(msize);
142  testType<double4, double1>(msize);
143 
144  // floats
145  testType<float1, float1>(msize);
146  testType<float2, float1>(msize);
147  testType<float3, float1>(msize);
148  testType<float4, float1>(msize);
149  ...
150  passed();
151 }

For more details for the complete program, please refer to HIP test application at the link, https://github.com/ROCm-Developer-Tools/HIP/blob/main/tests/src/deviceLib/hip_floatnTM.cpp