HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
25 
26 #include <cuda_runtime_api.h>
27 #include <cuda.h>
28 #include <cuda_profiler_api.h>
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 #ifdef __cplusplus
35 #define __dparm(x) = x
36 #else
37 #define __dparm(x)
38 #endif
39 
40 // Add Deprecated Support for CUDA Mapped HIP APIs
41 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
42 #define __HIP_DEPRECATED
43 #elif defined(_MSC_VER)
44 #define __HIP_DEPRECATED __declspec(deprecated)
45 #elif defined(__GNUC__)
46 #define __HIP_DEPRECATED __attribute__((deprecated))
47 #else
48 #define __HIP_DEPRECATED
49 #endif
50 
51 
52 // TODO -move to include/hip_runtime_api.h as a common implementation.
57 typedef enum hipMemcpyKind {
58  hipMemcpyHostToHost,
59  hipMemcpyHostToDevice,
60  hipMemcpyDeviceToHost,
61  hipMemcpyDeviceToDevice,
62  hipMemcpyDefault
63 } hipMemcpyKind;
64 
65 // hipDataType
66 #define hipDataType cudaDataType
67 #define HIP_R_16F CUDA_R_16F
68 #define HIP_R_32F CUDA_R_32F
69 #define HIP_R_64F CUDA_R_64F
70 #define HIP_C_16F CUDA_C_16F
71 #define HIP_C_32F CUDA_C_32F
72 #define HIP_C_64F CUDA_C_64F
73 
74 // hipLibraryPropertyType
75 #define hipLibraryPropertyType libraryPropertyType
76 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
77 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
78 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
79 
80 // hipTextureAddressMode
81 typedef enum cudaTextureAddressMode hipTextureAddressMode;
82 #define hipAddressModeWrap cudaAddressModeWrap
83 #define hipAddressModeClamp cudaAddressModeClamp
84 #define hipAddressModeMirror cudaAddressModeMirror
85 #define hipAddressModeBorder cudaAddressModeBorder
86 
87 // hipTextureFilterMode
88 typedef enum cudaTextureFilterMode hipTextureFilterMode;
89 #define hipFilterModePoint cudaFilterModePoint
90 #define hipFilterModeLinear cudaFilterModeLinear
91 
92 // hipTextureReadMode
93 typedef enum cudaTextureReadMode hipTextureReadMode;
94 #define hipReadModeElementType cudaReadModeElementType
95 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
96 
97 // hipChannelFormatKind
98 typedef enum cudaChannelFormatKind hipChannelFormatKind;
99 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
100 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
101 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
102 #define hipChannelFormatKindNone cudaChannelFormatKindNone
103 
104 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
105 #define hipBoundaryModeZero cudaBoundaryModeZero
106 #define hipBoundaryModeTrap cudaBoundaryModeTrap
107 #define hipBoundaryModeClamp cudaBoundaryModeClamp
108 
109 // hipFuncCache
110 #define hipFuncCachePreferNone cudaFuncCachePreferNone
111 #define hipFuncCachePreferShared cudaFuncCachePreferShared
112 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
113 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
114 
115 // hipResourceType
116 #define hipResourceType cudaResourceType
117 #define hipResourceTypeArray cudaResourceTypeArray
118 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
119 #define hipResourceTypeLinear cudaResourceTypeLinear
120 #define hipResourceTypePitch2D cudaResourceTypePitch2D
121 //
122 // hipErrorNoDevice.
123 
124 
126 #define hipEventDefault cudaEventDefault
127 #define hipEventBlockingSync cudaEventBlockingSync
128 #define hipEventDisableTiming cudaEventDisableTiming
129 #define hipEventInterprocess cudaEventInterprocess
130 #define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
131 #define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
132 
133 
134 #define hipHostMallocDefault cudaHostAllocDefault
135 #define hipHostMallocPortable cudaHostAllocPortable
136 #define hipHostMallocMapped cudaHostAllocMapped
137 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
138 #define hipHostMallocCoherent 0x0
139 #define hipHostMallocNonCoherent 0x0
140 
141 #define hipMemAttachGlobal cudaMemAttachGlobal
142 #define hipMemAttachHost cudaMemAttachHost
143 
144 #define hipHostRegisterDefault cudaHostRegisterDefault
145 #define hipHostRegisterPortable cudaHostRegisterPortable
146 #define hipHostRegisterMapped cudaHostRegisterMapped
147 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
148 
149 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
150 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
151 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
152 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
153 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
154 
155 #define hipOccupancyDefault cudaOccupancyDefault
156 
157 #define hipCooperativeLaunchMultiDeviceNoPreSync \
158  cudaCooperativeLaunchMultiDeviceNoPreSync
159 #define hipCooperativeLaunchMultiDeviceNoPostSync \
160  cudaCooperativeLaunchMultiDeviceNoPostSync
161 
162 
163 // enum CUjit_option redefines
164 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
165 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
166 #define hipJitOptionWallTime CU_JIT_WALL_TIME
167 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
168 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
169 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
170 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
171 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
172 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
173 #define hipJitOptionTarget CU_JIT_TARGET
174 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
175 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
176 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
177 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
178 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
179 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
180 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
181 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
182 
183 typedef cudaEvent_t hipEvent_t;
184 typedef cudaStream_t hipStream_t;
185 typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
186 typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
187 typedef enum cudaLimit hipLimit_t;
188 typedef enum cudaFuncCache hipFuncCache_t;
189 typedef CUcontext hipCtx_t;
190 typedef enum cudaSharedMemConfig hipSharedMemConfig;
191 typedef CUfunc_cache hipFuncCache;
192 typedef CUjit_option hipJitOption;
193 typedef CUdevice hipDevice_t;
194 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
195 typedef CUmodule hipModule_t;
196 typedef CUfunction hipFunction_t;
197 typedef CUdeviceptr hipDeviceptr_t;
198 typedef struct cudaArray hipArray;
199 typedef struct cudaArray* hipArray_t;
200 typedef struct cudaArray* hipArray_const_t;
201 typedef struct cudaFuncAttributes hipFuncAttributes;
202 typedef struct cudaLaunchParams hipLaunchParams;
203 #define hipFunction_attribute CUfunction_attribute
204 #define hip_Memcpy2D CUDA_MEMCPY2D
205 #define hipMemcpy3DParms cudaMemcpy3DParms
206 #define hipArrayDefault cudaArrayDefault
207 #define hipArrayLayered cudaArrayLayered
208 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
209 #define hipArrayCubemap cudaArrayCubemap
210 #define hipArrayTextureGather cudaArrayTextureGather
211 
212 typedef cudaTextureObject_t hipTextureObject_t;
213 typedef cudaSurfaceObject_t hipSurfaceObject_t;
214 #define hipTextureType1D cudaTextureType1D
215 #define hipTextureType1DLayered cudaTextureType1DLayered
216 #define hipTextureType2D cudaTextureType2D
217 #define hipTextureType2DLayered cudaTextureType2DLayered
218 #define hipTextureType3D cudaTextureType3D
219 #define hipDeviceMapHost cudaDeviceMapHost
220 
221 typedef struct cudaExtent hipExtent;
222 typedef struct cudaPitchedPtr hipPitchedPtr;
223 #define make_hipExtent make_cudaExtent
224 #define make_hipPos make_cudaPos
225 #define make_hipPitchedPtr make_cudaPitchedPtr
226 // Flags that can be used with hipStreamCreateWithFlags
227 #define hipStreamDefault cudaStreamDefault
228 #define hipStreamNonBlocking cudaStreamNonBlocking
229 
230 typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
231 typedef struct cudaResourceDesc hipResourceDesc;
232 typedef struct cudaTextureDesc hipTextureDesc;
233 typedef struct cudaResourceViewDesc hipResourceViewDesc;
234 // adding code for hipmemSharedConfig
235 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
236 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
237 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
238 
239 //Function Attributes
240 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
241 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
242 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
243 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
244 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
245 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
246 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
247 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
248 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
249 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
250 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
251 
252 #if CUDA_VERSION >= 9000
253 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
254 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
255 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
256 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
257 #endif // CUDA_VERSION >= 9000
258 
259 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
260  switch (cuError) {
261  case cudaSuccess:
262  return hipSuccess;
263  case cudaErrorProfilerDisabled:
264  return hipErrorProfilerDisabled;
265  case cudaErrorProfilerNotInitialized:
266  return hipErrorProfilerNotInitialized;
267  case cudaErrorProfilerAlreadyStarted:
268  return hipErrorProfilerAlreadyStarted;
269  case cudaErrorProfilerAlreadyStopped:
270  return hipErrorProfilerAlreadyStopped;
271  case cudaErrorInsufficientDriver:
272  return hipErrorInsufficientDriver;
273  case cudaErrorUnsupportedLimit:
274  return hipErrorUnsupportedLimit;
275  case cudaErrorPeerAccessUnsupported:
276  return hipErrorPeerAccessUnsupported;
277  case cudaErrorInvalidGraphicsContext:
278  return hipErrorInvalidGraphicsContext;
279  case cudaErrorSharedObjectSymbolNotFound:
280  return hipErrorSharedObjectSymbolNotFound;
281  case cudaErrorSharedObjectInitFailed:
282  return hipErrorSharedObjectInitFailed;
283  case cudaErrorOperatingSystem:
284  return hipErrorOperatingSystem;
285  case cudaErrorSetOnActiveProcess:
286  return hipErrorSetOnActiveProcess;
287  case cudaErrorIllegalAddress:
288  return hipErrorIllegalAddress;
289  case cudaErrorInvalidSymbol:
290  return hipErrorInvalidSymbol;
291  case cudaErrorMissingConfiguration:
292  return hipErrorMissingConfiguration;
293  case cudaErrorMemoryAllocation:
294  return hipErrorOutOfMemory;
295  case cudaErrorInitializationError:
296  return hipErrorNotInitialized;
297  case cudaErrorLaunchFailure:
298  return hipErrorLaunchFailure;
299  case cudaErrorCooperativeLaunchTooLarge:
301  case cudaErrorPriorLaunchFailure:
302  return hipErrorPriorLaunchFailure;
303  case cudaErrorLaunchOutOfResources:
305  case cudaErrorInvalidDeviceFunction:
306  return hipErrorInvalidDeviceFunction;
307  case cudaErrorInvalidConfiguration:
308  return hipErrorInvalidConfiguration;
309  case cudaErrorInvalidDevice:
310  return hipErrorInvalidDevice;
311  case cudaErrorInvalidValue:
312  return hipErrorInvalidValue;
313  case cudaErrorInvalidDevicePointer:
315  case cudaErrorInvalidMemcpyDirection:
317  case cudaErrorInvalidResourceHandle:
318  return hipErrorInvalidHandle;
319  case cudaErrorNotReady:
320  return hipErrorNotReady;
321  case cudaErrorNoDevice:
322  return hipErrorNoDevice;
323  case cudaErrorPeerAccessAlreadyEnabled:
325  case cudaErrorPeerAccessNotEnabled:
327  case cudaErrorHostMemoryAlreadyRegistered:
329  case cudaErrorHostMemoryNotRegistered:
331  case cudaErrorMapBufferObjectFailed:
332  return hipErrorMapFailed;
333  case cudaErrorAssert:
334  return hipErrorAssert;
335  case cudaErrorNotSupported:
336  return hipErrorNotSupported;
337  case cudaErrorCudartUnloading:
338  return hipErrorDeinitialized;
339  case cudaErrorInvalidKernelImage:
340  return hipErrorInvalidImage;
341  case cudaErrorUnmapBufferObjectFailed:
342  return hipErrorUnmapFailed;
343  case cudaErrorNoKernelImageForDevice:
344  return hipErrorNoBinaryForGpu;
345  case cudaErrorECCUncorrectable:
346  return hipErrorECCNotCorrectable;
347  case cudaErrorDeviceAlreadyInUse:
348  return hipErrorContextAlreadyInUse;
349  case cudaErrorInvalidPtx:
351  case cudaErrorLaunchTimeout:
352  return hipErrorLaunchTimeOut;
353 #if CUDA_VERSION >= 10010
354  case cudaErrorInvalidSource:
355  return hipErrorInvalidSource;
356  case cudaErrorFileNotFound:
357  return hipErrorFileNotFound;
358  case cudaErrorSymbolNotFound:
359  return hipErrorNotFound;
360  case cudaErrorArrayIsMapped:
361  return hipErrorArrayIsMapped;
362  case cudaErrorNotMappedAsPointer:
363  return hipErrorNotMappedAsPointer;
364  case cudaErrorNotMappedAsArray:
365  return hipErrorNotMappedAsArray;
366  case cudaErrorNotMapped:
367  return hipErrorNotMapped;
368  case cudaErrorAlreadyAcquired:
369  return hipErrorAlreadyAcquired;
370  case cudaErrorAlreadyMapped:
371  return hipErrorAlreadyMapped;
372 #endif
373 #if CUDA_VERSION >= 10020
374  case cudaErrorDeviceUninitialized:
375  return hipErrorInvalidContext;
376 #endif
377  case cudaErrorUnknown:
378  default:
379  return hipErrorUnknown; // Note - translated error.
380  }
381 }
382 
383 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
384  switch (cuError) {
385  case CUDA_SUCCESS:
386  return hipSuccess;
387  case CUDA_ERROR_OUT_OF_MEMORY:
388  return hipErrorOutOfMemory;
389  case CUDA_ERROR_INVALID_VALUE:
390  return hipErrorInvalidValue;
391  case CUDA_ERROR_INVALID_DEVICE:
392  return hipErrorInvalidDevice;
393  case CUDA_ERROR_DEINITIALIZED:
394  return hipErrorDeinitialized;
395  case CUDA_ERROR_NO_DEVICE:
396  return hipErrorNoDevice;
397  case CUDA_ERROR_INVALID_CONTEXT:
398  return hipErrorInvalidContext;
399  case CUDA_ERROR_NOT_INITIALIZED:
400  return hipErrorNotInitialized;
401  case CUDA_ERROR_INVALID_HANDLE:
402  return hipErrorInvalidHandle;
403  case CUDA_ERROR_MAP_FAILED:
404  return hipErrorMapFailed;
405  case CUDA_ERROR_PROFILER_DISABLED:
406  return hipErrorProfilerDisabled;
407  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
408  return hipErrorProfilerNotInitialized;
409  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
410  return hipErrorProfilerAlreadyStarted;
411  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
412  return hipErrorProfilerAlreadyStopped;
413  case CUDA_ERROR_INVALID_IMAGE:
414  return hipErrorInvalidImage;
415  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
416  return hipErrorContextAlreadyCurrent;
417  case CUDA_ERROR_UNMAP_FAILED:
418  return hipErrorUnmapFailed;
419  case CUDA_ERROR_ARRAY_IS_MAPPED:
420  return hipErrorArrayIsMapped;
421  case CUDA_ERROR_ALREADY_MAPPED:
422  return hipErrorAlreadyMapped;
423  case CUDA_ERROR_NO_BINARY_FOR_GPU:
424  return hipErrorNoBinaryForGpu;
425  case CUDA_ERROR_ALREADY_ACQUIRED:
426  return hipErrorAlreadyAcquired;
427  case CUDA_ERROR_NOT_MAPPED:
428  return hipErrorNotMapped;
429  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
430  return hipErrorNotMappedAsArray;
431  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
432  return hipErrorNotMappedAsPointer;
433  case CUDA_ERROR_ECC_UNCORRECTABLE:
434  return hipErrorECCNotCorrectable;
435  case CUDA_ERROR_UNSUPPORTED_LIMIT:
436  return hipErrorUnsupportedLimit;
437  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
438  return hipErrorContextAlreadyInUse;
439  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
440  return hipErrorPeerAccessUnsupported;
441  case CUDA_ERROR_INVALID_PTX:
443  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
444  return hipErrorInvalidGraphicsContext;
445  case CUDA_ERROR_INVALID_SOURCE:
446  return hipErrorInvalidSource;
447  case CUDA_ERROR_FILE_NOT_FOUND:
448  return hipErrorFileNotFound;
449  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
450  return hipErrorSharedObjectSymbolNotFound;
451  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
452  return hipErrorSharedObjectInitFailed;
453  case CUDA_ERROR_OPERATING_SYSTEM:
454  return hipErrorOperatingSystem;
455  case CUDA_ERROR_NOT_FOUND:
456  return hipErrorNotFound;
457  case CUDA_ERROR_NOT_READY:
458  return hipErrorNotReady;
459  case CUDA_ERROR_ILLEGAL_ADDRESS:
460  return hipErrorIllegalAddress;
461  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
463  case CUDA_ERROR_LAUNCH_TIMEOUT:
464  return hipErrorLaunchTimeOut;
465  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
467  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
469  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
470  return hipErrorSetOnActiveProcess;
471  case CUDA_ERROR_ASSERT:
472  return hipErrorAssert;
473  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
475  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
477  case CUDA_ERROR_LAUNCH_FAILED:
478  return hipErrorLaunchFailure;
479  case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
481  case CUDA_ERROR_NOT_SUPPORTED:
482  return hipErrorNotSupported;
483  case CUDA_ERROR_UNKNOWN:
484  default:
485  return hipErrorUnknown; // Note - translated error.
486  }
487 }
488 
489 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
490  switch (hError) {
491  case hipSuccess:
492  return cudaSuccess;
493  case hipErrorOutOfMemory:
494  return cudaErrorMemoryAllocation;
495  case hipErrorProfilerDisabled:
496  return cudaErrorProfilerDisabled;
497  case hipErrorProfilerNotInitialized:
498  return cudaErrorProfilerNotInitialized;
499  case hipErrorProfilerAlreadyStarted:
500  return cudaErrorProfilerAlreadyStarted;
501  case hipErrorProfilerAlreadyStopped:
502  return cudaErrorProfilerAlreadyStopped;
503  case hipErrorInvalidConfiguration:
504  return cudaErrorInvalidConfiguration;
506  return cudaErrorLaunchOutOfResources;
508  return cudaErrorInvalidValue;
509  case hipErrorInvalidHandle:
510  return cudaErrorInvalidResourceHandle;
512  return cudaErrorInvalidDevice;
514  return cudaErrorInvalidMemcpyDirection;
516  return cudaErrorInvalidDevicePointer;
517  case hipErrorNotInitialized:
518  return cudaErrorInitializationError;
519  case hipErrorNoDevice:
520  return cudaErrorNoDevice;
521  case hipErrorNotReady:
522  return cudaErrorNotReady;
524  return cudaErrorPeerAccessNotEnabled;
526  return cudaErrorPeerAccessAlreadyEnabled;
528  return cudaErrorHostMemoryAlreadyRegistered;
530  return cudaErrorHostMemoryNotRegistered;
531  case hipErrorDeinitialized:
532  return cudaErrorCudartUnloading;
533  case hipErrorInvalidSymbol:
534  return cudaErrorInvalidSymbol;
535  case hipErrorInsufficientDriver:
536  return cudaErrorInsufficientDriver;
537  case hipErrorMissingConfiguration:
538  return cudaErrorMissingConfiguration;
539  case hipErrorPriorLaunchFailure:
540  return cudaErrorPriorLaunchFailure;
541  case hipErrorInvalidDeviceFunction:
542  return cudaErrorInvalidDeviceFunction;
543  case hipErrorInvalidImage:
544  return cudaErrorInvalidKernelImage;
546 #if CUDA_VERSION >= 10020
547  return cudaErrorDeviceUninitialized;
548 #else
549  return cudaErrorUnknown;
550 #endif
551  case hipErrorMapFailed:
552  return cudaErrorMapBufferObjectFailed;
553  case hipErrorUnmapFailed:
554  return cudaErrorUnmapBufferObjectFailed;
555  case hipErrorArrayIsMapped:
556 #if CUDA_VERSION >= 10010
557  return cudaErrorArrayIsMapped;
558 #else
559  return cudaErrorUnknown;
560 #endif
561  case hipErrorAlreadyMapped:
562 #if CUDA_VERSION >= 10010
563  return cudaErrorAlreadyMapped;
564 #else
565  return cudaErrorUnknown;
566 #endif
567  case hipErrorNoBinaryForGpu:
568  return cudaErrorNoKernelImageForDevice;
569  case hipErrorAlreadyAcquired:
570 #if CUDA_VERSION >= 10010
571  return cudaErrorAlreadyAcquired;
572 #else
573  return cudaErrorUnknown;
574 #endif
575  case hipErrorNotMapped:
576 #if CUDA_VERSION >= 10010
577  return cudaErrorNotMapped;
578 #else
579  return cudaErrorUnknown;
580 #endif
581  case hipErrorNotMappedAsArray:
582 #if CUDA_VERSION >= 10010
583  return cudaErrorNotMappedAsArray;
584 #else
585  return cudaErrorUnknown;
586 #endif
587  case hipErrorNotMappedAsPointer:
588 #if CUDA_VERSION >= 10010
589  return cudaErrorNotMappedAsPointer;
590 #else
591  return cudaErrorUnknown;
592 #endif
593  case hipErrorECCNotCorrectable:
594  return cudaErrorECCUncorrectable;
595  case hipErrorUnsupportedLimit:
596  return cudaErrorUnsupportedLimit;
597  case hipErrorContextAlreadyInUse:
598  return cudaErrorDeviceAlreadyInUse;
599  case hipErrorPeerAccessUnsupported:
600  return cudaErrorPeerAccessUnsupported;
602  return cudaErrorInvalidPtx;
603  case hipErrorInvalidGraphicsContext:
604  return cudaErrorInvalidGraphicsContext;
605  case hipErrorInvalidSource:
606 #if CUDA_VERSION >= 10010
607  return cudaErrorInvalidSource;
608 #else
609  return cudaErrorUnknown;
610 #endif
611  case hipErrorFileNotFound:
612 #if CUDA_VERSION >= 10010
613  return cudaErrorFileNotFound;
614 #else
615  return cudaErrorUnknown;
616 #endif
617  case hipErrorSharedObjectSymbolNotFound:
618  return cudaErrorSharedObjectSymbolNotFound;
619  case hipErrorSharedObjectInitFailed:
620  return cudaErrorSharedObjectInitFailed;
621  case hipErrorOperatingSystem:
622  return cudaErrorOperatingSystem;
623  case hipErrorNotFound:
624 #if CUDA_VERSION >= 10010
625  return cudaErrorSymbolNotFound;
626 #else
627  return cudaErrorUnknown;
628 #endif
629  case hipErrorIllegalAddress:
630  return cudaErrorIllegalAddress;
631  case hipErrorLaunchTimeOut:
632  return cudaErrorLaunchTimeout;
633  case hipErrorSetOnActiveProcess:
634  return cudaErrorSetOnActiveProcess;
636  return cudaErrorLaunchFailure;
638  return cudaErrorCooperativeLaunchTooLarge;
640  return cudaErrorNotSupported;
641  // HSA: does not exist in CUDA
643  // HSA: does not exist in CUDA
645  case hipErrorUnknown:
646  case hipErrorTbd:
647  default:
648  return cudaErrorUnknown; // Note - translated error.
649  }
650 }
651 
652 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
653  switch (kind) {
654  case hipMemcpyHostToHost:
655  return cudaMemcpyHostToHost;
656  case hipMemcpyHostToDevice:
657  return cudaMemcpyHostToDevice;
658  case hipMemcpyDeviceToHost:
659  return cudaMemcpyDeviceToHost;
660  case hipMemcpyDeviceToDevice:
661  return cudaMemcpyDeviceToDevice;
662  default:
663  return cudaMemcpyDefault;
664  }
665 }
666 
667 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
668  hipTextureAddressMode kind) {
669  switch (kind) {
670  case hipAddressModeWrap:
671  return cudaAddressModeWrap;
672  case hipAddressModeClamp:
673  return cudaAddressModeClamp;
674  case hipAddressModeMirror:
675  return cudaAddressModeMirror;
676  case hipAddressModeBorder:
677  return cudaAddressModeBorder;
678  default:
679  return cudaAddressModeWrap;
680  }
681 }
682 
683 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
684  hipTextureFilterMode kind) {
685  switch (kind) {
686  case hipFilterModePoint:
687  return cudaFilterModePoint;
688  case hipFilterModeLinear:
689  return cudaFilterModeLinear;
690  default:
691  return cudaFilterModePoint;
692  }
693 }
694 
695 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
696  switch (kind) {
697  case hipReadModeElementType:
698  return cudaReadModeElementType;
699  case hipReadModeNormalizedFloat:
700  return cudaReadModeNormalizedFloat;
701  default:
702  return cudaReadModeElementType;
703  }
704 }
705 
706 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
707  hipChannelFormatKind kind) {
708  switch (kind) {
709  case hipChannelFormatKindSigned:
710  return cudaChannelFormatKindSigned;
711  case hipChannelFormatKindUnsigned:
712  return cudaChannelFormatKindUnsigned;
713  case hipChannelFormatKindFloat:
714  return cudaChannelFormatKindFloat;
715  case hipChannelFormatKindNone:
716  return cudaChannelFormatKindNone;
717  default:
718  return cudaChannelFormatKindNone;
719  }
720 }
721 
725 #define HIPRT_CB CUDART_CB
726 typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
727 inline static hipError_t hipInit(unsigned int flags) {
728  return hipCUResultTohipError(cuInit(flags));
729 }
730 
731 inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
732 
733 inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
734 
735 inline static hipError_t hipPeekAtLastError() {
736  return hipCUDAErrorTohipError(cudaPeekAtLastError());
737 }
738 
739 inline static hipError_t hipMalloc(void** ptr, size_t size) {
740  return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
741 }
742 
743 inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
744  return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
745 }
746 
747 inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
748  return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
749 }
750 
751 inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
752  return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
753 }
754 
755 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
756 
757 inline static hipError_t hipMallocHost(void** ptr, size_t size)
758  __attribute__((deprecated("use hipHostMalloc instead")));
759 inline static hipError_t hipMallocHost(void** ptr, size_t size) {
760  return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
761 }
762 
763 inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
764  __attribute__((deprecated("use hipHostMalloc instead")));
765 inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
766  return hipCUResultTohipError(cuMemAllocHost(ptr, size));
767 }
768 
769 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
770  __attribute__((deprecated("use hipHostMalloc instead")));
771 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
772  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
773 }
774 
775 inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
776  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
777 }
778 
779 inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
780  return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
781 }
782 
783 inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
784  size_t width, size_t height,
785  unsigned int flags __dparm(hipArrayDefault)) {
786  return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
787 }
788 
789 inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
790  hipExtent extent, unsigned int flags) {
791  return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
792 }
793 
794 inline static hipError_t hipFreeArray(hipArray* array) {
795  return hipCUDAErrorTohipError(cudaFreeArray(array));
796 }
797 
798 inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
799  return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
800 }
801 
802 inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
803  return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
804 }
805 
806 inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
807  return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
808 }
809 
810 inline static hipError_t hipHostUnregister(void* ptr) {
811  return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
812 }
813 
814 inline static hipError_t hipFreeHost(void* ptr)
815  __attribute__((deprecated("use hipHostFree instead")));
816 inline static hipError_t hipFreeHost(void* ptr) {
817  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
818 }
819 
820 inline static hipError_t hipHostFree(void* ptr) {
821  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
822 }
823 
824 inline static hipError_t hipSetDevice(int device) {
825  return hipCUDAErrorTohipError(cudaSetDevice(device));
826 }
827 
828 inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
829  struct cudaDeviceProp cdprop;
830  memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
831  cdprop.major = prop->major;
832  cdprop.minor = prop->minor;
833  cdprop.totalGlobalMem = prop->totalGlobalMem;
834  cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
835  cdprop.regsPerBlock = prop->regsPerBlock;
836  cdprop.warpSize = prop->warpSize;
837  cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
838  cdprop.clockRate = prop->clockRate;
839  cdprop.totalConstMem = prop->totalConstMem;
840  cdprop.multiProcessorCount = prop->multiProcessorCount;
841  cdprop.l2CacheSize = prop->l2CacheSize;
842  cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
843  cdprop.computeMode = prop->computeMode;
844  cdprop.canMapHostMemory = prop->canMapHostMemory;
845  cdprop.memoryClockRate = prop->memoryClockRate;
846  cdprop.memoryBusWidth = prop->memoryBusWidth;
847  return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
848 }
849 
850 inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
851  return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
852 }
853 
854 inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
855  return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
856 }
857 
858 inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
859  return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
860 }
861 
862 inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
863  hipStream_t stream) {
864  return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
865 }
866 
867 inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
868  hipStream_t stream) {
869  return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
870 }
871 
872 inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
873  hipStream_t stream) {
874  return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
875 }
876 
877 inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
878  hipMemcpyKind copyKind) {
879  return hipCUDAErrorTohipError(
880  cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
881 }
882 
883 
884 inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
885  size_t sizeBytes, hipMemcpyKind copyKind,
886  hipStream_t stream) {
887  cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
888  hipMemcpyKindToCudaMemcpyKind(copyKind),
889  stream);
890 
891  if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
892 
893  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
894 }
895 
896 inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
897  hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
898  return hipCUDAErrorTohipError(
899  cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
900 }
901 
902 inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
903  size_t offset __dparm(0),
904  hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
905  return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
906  hipMemcpyKindToCudaMemcpyKind(copyType)));
907 }
908 
909 inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
910  size_t sizeBytes, size_t offset,
911  hipMemcpyKind copyType,
912  hipStream_t stream __dparm(0)) {
913  return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
914  symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
915 }
916 
917 inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
918  size_t offset __dparm(0),
919  hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
920  return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
921  hipMemcpyKindToCudaMemcpyKind(kind)));
922 }
923 
924 inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
925  size_t sizeBytes, size_t offset,
926  hipMemcpyKind kind,
927  hipStream_t stream __dparm(0)) {
928  return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
929  dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
930 }
931 
932 inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
933  return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
934 }
935 
936 inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
937  return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
938 }
939 
940 inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
941  size_t width, size_t height, hipMemcpyKind kind) {
942  return hipCUDAErrorTohipError(
943  cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
944 }
945 
946 inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
947  return hipCUResultTohipError(cuMemcpy2D(pCopy));
948 }
949 
950 inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
951  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
952 }
953 
954 inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
955 {
956  return hipCUDAErrorTohipError(cudaMemcpy3D(p));
957 }
958 
959 inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
960 {
961  return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
962 }
963 
964 inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
965  size_t width, size_t height, hipMemcpyKind kind,
966  hipStream_t stream) {
967  return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
968  hipMemcpyKindToCudaMemcpyKind(kind), stream));
969 }
970 
971 inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
972  const void* src, size_t spitch, size_t width,
973  size_t height, hipMemcpyKind kind) {
974  return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
975  height, hipMemcpyKindToCudaMemcpyKind(kind)));
976 }
977 
978 __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
979  size_t hOffset, const void* src,
980  size_t count, hipMemcpyKind kind) {
981  return hipCUDAErrorTohipError(
982  cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
983 }
984 
985 __HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
986  size_t wOffset, size_t hOffset,
987  size_t count, hipMemcpyKind kind) {
988  return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
989  hipMemcpyKindToCudaMemcpyKind(kind)));
990 }
991 
992 inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
993  size_t count) {
994  return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
995 }
996 
997 inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
998  size_t count) {
999  return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1000 }
1001 
1002 inline static hipError_t hipDeviceSynchronize() {
1003  return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1004 }
1005 
1006 inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
1007  return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1008 }
1009 
1010 inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
1011  return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1012 }
1013 
1014 inline static const char* hipGetErrorString(hipError_t error) {
1015  return cudaGetErrorString(hipErrorToCudaError(error));
1016 }
1017 
1018 inline static const char* hipGetErrorName(hipError_t error) {
1019  return cudaGetErrorName(hipErrorToCudaError(error));
1020 }
1021 
1022 inline static hipError_t hipGetDeviceCount(int* count) {
1023  return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1024 }
1025 
1026 inline static hipError_t hipGetDevice(int* device) {
1027  return hipCUDAErrorTohipError(cudaGetDevice(device));
1028 }
1029 
1030 inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
1031  return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1032 }
1033 
1034 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1035  return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1036 }
1037 
1038 inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
1039  return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1040 }
1041 
1042 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1043  return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1044 }
1045 
1046 inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
1047  unsigned int flags) {
1048  return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1049 }
1050 
1051 inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
1052  return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1053 }
1054 
1055 inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
1056  return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1057 }
1058 
1059 inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
1060  hipStream_t stream __dparm(0)) {
1061  return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1062 }
1063 
1064 inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
1065  hipStream_t stream __dparm(0)) {
1066  return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1067 }
1068 
1069 inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
1070  return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1071 }
1072 
1073 inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
1074  hipStream_t stream __dparm(0)) {
1075  return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1076 }
1077 
1078 inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
1079  return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1080 }
1081 
1082 inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
1083  hipStream_t stream __dparm(0)) {
1084  return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1085 }
1086 
1087 inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
1088  return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1089 }
1090 
1091 inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
1092  return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1093 }
1094 
1095 inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
1096  return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1097 }
1098 
1099 inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
1100  return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1101 }
1102 
1103 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
1104  struct cudaDeviceProp cdprop;
1105  cudaError_t cerror;
1106  cerror = cudaGetDeviceProperties(&cdprop, device);
1107 
1108  strncpy(p_prop->name, cdprop.name, 256);
1109  p_prop->totalGlobalMem = cdprop.totalGlobalMem;
1110  p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
1111  p_prop->regsPerBlock = cdprop.regsPerBlock;
1112  p_prop->warpSize = cdprop.warpSize;
1113  p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
1114  for (int i = 0; i < 3; i++) {
1115  p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
1116  p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
1117  }
1118  p_prop->clockRate = cdprop.clockRate;
1119  p_prop->memoryClockRate = cdprop.memoryClockRate;
1120  p_prop->memoryBusWidth = cdprop.memoryBusWidth;
1121  p_prop->totalConstMem = cdprop.totalConstMem;
1122  p_prop->major = cdprop.major;
1123  p_prop->minor = cdprop.minor;
1124  p_prop->multiProcessorCount = cdprop.multiProcessorCount;
1125  p_prop->l2CacheSize = cdprop.l2CacheSize;
1126  p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
1127  p_prop->computeMode = cdprop.computeMode;
1128  p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
1129 
1130  int ccVers = p_prop->major * 100 + p_prop->minor * 10;
1131  p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
1132  p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
1133  p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
1134  p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
1135  p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
1136  p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
1137  p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
1138  p_prop->arch.hasDoubles = (ccVers >= 130);
1139  p_prop->arch.hasWarpVote = (ccVers >= 120);
1140  p_prop->arch.hasWarpBallot = (ccVers >= 200);
1141  p_prop->arch.hasWarpShuffle = (ccVers >= 300);
1142  p_prop->arch.hasFunnelShift = (ccVers >= 350);
1143  p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
1144  p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
1145  p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
1146  p_prop->arch.has3dGrid = (ccVers >= 200);
1147  p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
1148 
1149  p_prop->concurrentKernels = cdprop.concurrentKernels;
1150  p_prop->pciDomainID = cdprop.pciDomainID;
1151  p_prop->pciBusID = cdprop.pciBusID;
1152  p_prop->pciDeviceID = cdprop.pciDeviceID;
1153  p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
1154  p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
1155  p_prop->canMapHostMemory = cdprop.canMapHostMemory;
1156  p_prop->gcnArch = 0; // Not a GCN arch
1157  p_prop->integrated = cdprop.integrated;
1158  p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
1159  p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
1164 
1165  p_prop->maxTexture1D = cdprop.maxTexture1D;
1166  p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
1167  p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
1168  p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
1169  p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
1170  p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
1171 
1172  p_prop->memPitch = cdprop.memPitch;
1173  p_prop->textureAlignment = cdprop.textureAlignment;
1174  p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
1175  p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
1176  p_prop->ECCEnabled = cdprop.ECCEnabled;
1177  p_prop->tccDriver = cdprop.tccDriver;
1178 
1179  return hipCUDAErrorTohipError(cerror);
1180 }
1181 
1182 inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
1183  enum cudaDeviceAttr cdattr;
1184  cudaError_t cerror;
1185 
1186  switch (attr) {
1188  cdattr = cudaDevAttrMaxThreadsPerBlock;
1189  break;
1191  cdattr = cudaDevAttrMaxBlockDimX;
1192  break;
1194  cdattr = cudaDevAttrMaxBlockDimY;
1195  break;
1197  cdattr = cudaDevAttrMaxBlockDimZ;
1198  break;
1200  cdattr = cudaDevAttrMaxGridDimX;
1201  break;
1203  cdattr = cudaDevAttrMaxGridDimY;
1204  break;
1206  cdattr = cudaDevAttrMaxGridDimZ;
1207  break;
1209  cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1210  break;
1212  cdattr = cudaDevAttrTotalConstantMemory;
1213  break;
1215  cdattr = cudaDevAttrWarpSize;
1216  break;
1218  cdattr = cudaDevAttrMaxRegistersPerBlock;
1219  break;
1221  cdattr = cudaDevAttrClockRate;
1222  break;
1224  cdattr = cudaDevAttrMemoryClockRate;
1225  break;
1227  cdattr = cudaDevAttrGlobalMemoryBusWidth;
1228  break;
1230  cdattr = cudaDevAttrMultiProcessorCount;
1231  break;
1233  cdattr = cudaDevAttrComputeMode;
1234  break;
1236  cdattr = cudaDevAttrL2CacheSize;
1237  break;
1239  cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1240  break;
1242  cdattr = cudaDevAttrComputeCapabilityMajor;
1243  break;
1245  cdattr = cudaDevAttrComputeCapabilityMinor;
1246  break;
1248  cdattr = cudaDevAttrConcurrentKernels;
1249  break;
1251  cdattr = cudaDevAttrPciBusId;
1252  break;
1254  cdattr = cudaDevAttrPciDeviceId;
1255  break;
1257  cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1258  break;
1260  cdattr = cudaDevAttrIsMultiGpuBoard;
1261  break;
1263  cdattr = cudaDevAttrIntegrated;
1264  break;
1266  cdattr = cudaDevAttrMaxTexture1DWidth;
1267  break;
1269  cdattr = cudaDevAttrMaxTexture2DWidth;
1270  break;
1272  cdattr = cudaDevAttrMaxTexture2DHeight;
1273  break;
1275  cdattr = cudaDevAttrMaxTexture3DWidth;
1276  break;
1278  cdattr = cudaDevAttrMaxTexture3DHeight;
1279  break;
1281  cdattr = cudaDevAttrMaxTexture3DDepth;
1282  break;
1284  cdattr = cudaDevAttrMaxPitch;
1285  break;
1287  cdattr = cudaDevAttrTextureAlignment;
1288  break;
1290  cdattr = cudaDevAttrTexturePitchAlignment;
1291  break;
1293  cdattr = cudaDevAttrKernelExecTimeout;
1294  break;
1296  cdattr = cudaDevAttrCanMapHostMemory;
1297  break;
1299  cdattr = cudaDevAttrEccEnabled;
1300  break;
1302  cdattr = cudaDevAttrCooperativeLaunch;
1303  break;
1305  cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1306  break;
1307  default:
1308  return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1309  }
1310 
1311  cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1312 
1313  return hipCUDAErrorTohipError(cerror);
1314 }
1315 
1316 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1317  const void* func,
1318  int blockSize,
1319  size_t dynamicSMemSize) {
1320  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1321  blockSize, dynamicSMemSize));
1322 }
1323 
1324 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1325  const void* func,
1326  int blockSize,
1327  size_t dynamicSMemSize,
1328  unsigned int flags) {
1329  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1330  blockSize, dynamicSMemSize, flags));
1331 }
1332 
1333 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1334  hipFunction_t f,
1335  int blockSize,
1336  size_t dynamicSMemSize ){
1337  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1338  blockSize, dynamicSMemSize));
1339 }
1340 
1341 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1342  hipFunction_t f,
1343  int blockSize,
1344  size_t dynamicSMemSize,
1345  unsigned int flags ) {
1346  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1347  blockSize, dynamicSMemSize, flags));
1348 }
1349 
1350 //TODO - Match CUoccupancyB2DSize
1351 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
1352  hipFunction_t f, size_t dynSharedMemPerBlk,
1353  int blockSizeLimit){
1354  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1355  dynSharedMemPerBlk, blockSizeLimit));
1356 }
1357 
1358 //TODO - Match CUoccupancyB2DSize
1359 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
1360  hipFunction_t f, size_t dynSharedMemPerBlk,
1361  int blockSizeLimit, unsigned int flags){
1362  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1363  dynSharedMemPerBlk, blockSizeLimit, flags));
1364 }
1365 
1366 inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
1367  struct cudaPointerAttributes cPA;
1368  hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1369  if (err == hipSuccess) {
1370 #if (CUDART_VERSION >= 11000)
1371  auto memType = cPA.type;
1372 #else
1373  unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
1374 #endif
1375  switch (memType) {
1376  case cudaMemoryTypeDevice:
1377  attributes->memoryType = hipMemoryTypeDevice;
1378  break;
1379  case cudaMemoryTypeHost:
1380  attributes->memoryType = hipMemoryTypeHost;
1381  break;
1382  default:
1383  return hipErrorUnknown;
1384  }
1385  attributes->device = cPA.device;
1386  attributes->devicePointer = cPA.devicePointer;
1387  attributes->hostPointer = cPA.hostPointer;
1388  attributes->isManaged = 0;
1389  attributes->allocationFlags = 0;
1390  }
1391  return err;
1392 }
1393 
1394 inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
1395  return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1396 }
1397 
1398 inline static hipError_t hipEventCreate(hipEvent_t* event) {
1399  return hipCUDAErrorTohipError(cudaEventCreate(event));
1400 }
1401 
1402 inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1403  return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1404 }
1405 
1406 inline static hipError_t hipEventSynchronize(hipEvent_t event) {
1407  return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1408 }
1409 
1410 inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
1411  return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1412 }
1413 
1414 inline static hipError_t hipEventDestroy(hipEvent_t event) {
1415  return hipCUDAErrorTohipError(cudaEventDestroy(event));
1416 }
1417 
1418 inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
1419  return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1420 }
1421 
1422 inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
1423  return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1424 }
1425 
1426 inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
1427  return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1428 }
1429 
1430 inline static hipError_t hipStreamCreate(hipStream_t* stream) {
1431  return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1432 }
1433 
1434 inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
1435  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1436 }
1437 
1438 inline static hipError_t hipStreamDestroy(hipStream_t stream) {
1439  return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1440 }
1441 
1442 inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
1443  return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1444 }
1445 
1446 inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
1447  return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1448 }
1449 
1450 inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
1451  unsigned int flags) {
1452  return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1453 }
1454 
1455 inline static hipError_t hipStreamQuery(hipStream_t stream) {
1456  return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1457 }
1458 
1459 inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
1460  void* userData, unsigned int flags) {
1461  return hipCUDAErrorTohipError(
1462  cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1463 }
1464 
1465 inline static hipError_t hipDriverGetVersion(int* driverVersion) {
1466  cudaError_t err = cudaDriverGetVersion(driverVersion);
1467 
1468  // Override driver version to match version reported on HCC side.
1469  *driverVersion = 4;
1470 
1471  return hipCUDAErrorTohipError(err);
1472 }
1473 
1474 inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
1475  return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1476 }
1477 
1478 inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
1479  return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1480 }
1481 
1482 inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
1483  return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1484 }
1485 
1486 inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
1487  return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1488 }
1489 
1490 inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
1491  return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1492 }
1493 
1494 inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
1495  return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1496 }
1497 
1498 inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
1499  int* active) {
1500  return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1501 }
1502 
1503 inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
1504  return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1505 }
1506 
1507 inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
1508  return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1509 }
1510 
1511 inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
1512  return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1513 }
1514 
1515 inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
1516  return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1517 }
1518 
1519 inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
1520  hipDeviceptr_t dptr) {
1521  return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1522 }
1523 
1524 inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
1525  size_t count) {
1526  return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1527 }
1528 
1529 inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
1530  int srcDevice, size_t count,
1531  hipStream_t stream __dparm(0)) {
1532  return hipCUDAErrorTohipError(
1533  cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1534 }
1535 
1536 // Profile APIs:
1537 inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
1538 
1539 inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
1540 
1541 inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
1542  return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1543 }
1544 
1545 inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
1546  return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1547 }
1548 
1549 inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
1550  return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1551 }
1552 
1553 inline static hipError_t hipEventQuery(hipEvent_t event) {
1554  return hipCUDAErrorTohipError(cudaEventQuery(event));
1555 }
1556 
1557 inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
1558  return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1559 }
1560 
1561 inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
1562  return hipCUResultTohipError(cuCtxDestroy(ctx));
1563 }
1564 
1565 inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
1566  return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1567 }
1568 
1569 inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
1570  return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1571 }
1572 
1573 inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
1574  return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1575 }
1576 
1577 inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
1578  return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1579 }
1580 
1581 inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
1582  return hipCUResultTohipError(cuCtxGetDevice(device));
1583 }
1584 
1585 inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
1586  return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
1587 }
1588 
1589 inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
1590  return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1591 }
1592 
1593 inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
1594  return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1595 }
1596 
1597 inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
1598  return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1599 }
1600 
1601 inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
1602  return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1603 }
1604 
1605 inline static hipError_t hipCtxSynchronize(void) {
1606  return hipCUResultTohipError(cuCtxSynchronize());
1607 }
1608 
1609 inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
1610  return hipCUResultTohipError(cuCtxGetFlags(flags));
1611 }
1612 
1613 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1614  return hipCUResultTohipError(cuCtxDetach(ctx));
1615 }
1616 
1617 inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
1618  return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1619 }
1620 
1621 inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
1622  return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1623 }
1624 
1625 inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
1626  return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1627 }
1628 
1629 inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
1630  int srcDevice, int dstDevice) {
1631  return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1632 }
1633 
1634 inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
1635  return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1636 }
1637 
1638 inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
1639  return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1640 }
1641 
1642 inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
1643  return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1644 }
1645 
1646 inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
1647  return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1648 }
1649 
1650 inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
1651  return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1652 }
1653 
1654 inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
1655  return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1656 }
1657 
1658 inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
1659  return hipCUResultTohipError(cuModuleLoad(module, fname));
1660 }
1661 
1662 inline static hipError_t hipModuleUnload(hipModule_t hmod) {
1663  return hipCUResultTohipError(cuModuleUnload(hmod));
1664 }
1665 
1666 inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
1667  const char* kname) {
1668  return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
1669 }
1670 
1671 inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
1672  return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1673 }
1674 
1675 inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1676  return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1677 }
1678 
1679 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
1680  const char* name) {
1681  return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1682 }
1683 
1684 inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
1685  return hipCUResultTohipError(cuModuleLoadData(module, image));
1686 }
1687 
1688 inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
1689  unsigned int numOptions, hipJitOption* options,
1690  void** optionValues) {
1691  return hipCUResultTohipError(
1692  cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1693 }
1694 
1695 inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
1696  dim3 dimBlocks, void** args, size_t sharedMemBytes,
1697  hipStream_t stream)
1698 {
1699  return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1700 }
1701 
1702 inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
1703  unsigned int gridDimY, unsigned int gridDimZ,
1704  unsigned int blockDimX, unsigned int blockDimY,
1705  unsigned int blockDimZ, unsigned int sharedMemBytes,
1706  hipStream_t stream, void** kernelParams,
1707  void** extra) {
1708  return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1709  blockDimY, blockDimZ, sharedMemBytes, stream,
1710  kernelParams, extra));
1711 }
1712 
1713 inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
1714  return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1715 }
1716 
1717 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
1718  struct textureReference* tex,
1719  const void* devPtr,
1720  const hipChannelFormatDesc* desc,
1721  size_t size __dparm(UINT_MAX)) {
1722  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1723 }
1724 
1725 __HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
1726  size_t* offset, struct textureReference* tex, const void* devPtr,
1727  const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
1728  return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1729 }
1730 
1731 inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
1732  hipChannelFormatKind f) {
1733  return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1734 }
1735 
1736 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1737  const hipResourceDesc* pResDesc,
1738  const hipTextureDesc* pTexDesc,
1739  const hipResourceViewDesc* pResViewDesc) {
1740  return hipCUDAErrorTohipError(
1741  cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1742 }
1743 
1744 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1745  return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1746 }
1747 
1748 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
1749  const hipResourceDesc* pResDesc) {
1750  return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1751 }
1752 
1753 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
1754  return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1755 }
1756 
1757 inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
1758  hipTextureObject_t textureObject) {
1759  return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1760 }
1761 
1762 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
1763  size_t* offset, const struct textureReference* texref) {
1764  return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1765 }
1766 
1767 inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
1768 {
1769  return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1770 }
1771 
1772 inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
1773  void** kernelParams, unsigned int sharedMemBytes,
1774  hipStream_t stream) {
1775  return hipCUDAErrorTohipError(
1776  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1777 }
1778 
1779 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
1780  int numDevices, unsigned int flags) {
1781  return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1782 }
1783 
1784 #ifdef __cplusplus
1785 }
1786 #endif
1787 
1788 #ifdef __CUDACC__
1789 
1790 template<class T>
1791 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1792  T func,
1793  int blockSize,
1794  size_t dynamicSMemSize) {
1795  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1796  blockSize, dynamicSMemSize));
1797 }
1798 
1799 template <class T>
1800 inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
1801  size_t dynamicSMemSize = 0,
1802  int blockSizeLimit = 0) {
1803  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1804  dynamicSMemSize, blockSizeLimit));
1805 }
1806 
1807 template <class T>
1808 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
1809  size_t dynamicSMemSize = 0,
1810  int blockSizeLimit = 0, unsigned int flags = 0) {
1811  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1812  dynamicSMemSize, blockSizeLimit, flags));
1813 }
1814 
1815 template <class T>
1816 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
1817  int blockSize, size_t dynamicSMemSize,unsigned int flags) {
1818  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1819  blockSize, dynamicSMemSize, flags));
1820 }
1821 
1822 template <class T, int dim, enum cudaTextureReadMode readMode>
1823 inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
1824  const void* devPtr, size_t size = UINT_MAX) {
1825  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1826 }
1827 
1828 template <class T, int dim, enum cudaTextureReadMode readMode>
1829 inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
1830  const void* devPtr, const hipChannelFormatDesc& desc,
1831  size_t size = UINT_MAX) {
1832  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1833 }
1834 
1835 template <class T, int dim, enum cudaTextureReadMode readMode>
1836 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
1837  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1838 }
1839 
1840 template <class T, int dim, enum cudaTextureReadMode readMode>
1841 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
1842  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1843 }
1844 
1845 template <class T, int dim, enum cudaTextureReadMode readMode>
1846 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1847  struct texture<T, dim, readMode>& tex, hipArray_const_t array,
1848  const hipChannelFormatDesc& desc) {
1849  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1850 }
1851 
1852 template <class T, int dim, enum cudaTextureReadMode readMode>
1853 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1854  struct texture<T, dim, readMode>* tex, hipArray_const_t array,
1855  const hipChannelFormatDesc* desc) {
1856  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1857 }
1858 
1859 template <class T, int dim, enum cudaTextureReadMode readMode>
1860 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1861  struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
1862  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1863 }
1864 
1865 template <class T>
1866 inline static hipChannelFormatDesc hipCreateChannelDesc() {
1867  return cudaCreateChannelDesc<T>();
1868 }
1869 
1870 template <class T>
1871 inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
1872  void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
1873  return hipCUDAErrorTohipError(
1874  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1875 }
1876 
1877 #endif //__CUDACC__
1878 
1879 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
hipFuncAttributes
Definition: hip_runtime_api.h:128
hipCtxSynchronize
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
hipPointerGetAttributes
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
hipDeviceAttributeMaxPitch
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:336
hipMemset3DAsync
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipMemcpy3D
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipCtxGetCurrent
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
hipDeviceProp_t::regsPerBlock
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:87
hipMallocPitch
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
hipSetDevice
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipDeviceGetP2PAttribute
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipMemsetD16Async
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceAttributeMemoryBusWidth
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:307
hipGetErrorString
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
hipGetDeviceFlags
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipDeviceGetByPCIBusId
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
hipErrorInvalidMemcpyDirection
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:213
hipMalloc3DArray
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
hipDeviceArch_t::hasGlobalInt64Atomics
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:53
hipDeviceProp_t::minor
int minor
Definition: hip_runtime_api.h:99
hipDeviceAttributeMaxBlockDimX
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:291
hipErrorInvalidDevicePointer
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:212
hipChooseDevice
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
hipIpcCloseMemHandle
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipMemcpy2DAsync
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipMemsetD32
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
hipDeviceProp_t::texturePitchAlignment
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:128
hipDeviceAttributeMaxGridDimX
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:294
hipDeviceArch_t::hasThreadFenceSystem
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:66
hipStreamCreate
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipDeviceGetStreamPriorityRange
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
hipIpcEventHandle_st
Definition: hip_runtime_api.h:120
hipDeviceProp_t::maxTexture3D
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:123
hipStreamCreateWithPriority
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipCtxPushCurrent
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipCtxGetDevice
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipFuncCache_t
hipFuncCache_t
Definition: hip_runtime_api.h:297
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:136
hipPeekAtLastError
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipMemcpy3DAsync
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceGetPCIBusId
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
hipHostGetFlags
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
hipErrorHostMemoryNotRegistered
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:264
hipErrorRuntimeOther
hipErrorRuntimeOther
Definition: hip_runtime_api.h:277
hipDeviceAttributeClockRate
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:305
hipMemGetAddressRange
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
hipSurfaceObject_t
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipStreamWaitEvent
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
hipDeviceAttributeMaxGridDimZ
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:296
hipGetDevice
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1656
hipMallocArray
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipMemcpyToArray
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
hipModuleLoadData
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
Definition: hip_module.cpp:1492
hipMemoryTypeDevice
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:150
hipDeviceAttributeMaxRegistersPerBlock
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:301
hipMemcpyDtoDAsync
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
hipErrorNoDevice
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:218
hipDeviceAttributeComputeCapabilityMinor
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:315
hipModuleLaunchKernel
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipDeviceProp_t::l2CacheSize
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:103
hipDevicePrimaryCtxRelease
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
hipCtxGetApiVersion
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
hipDeviceProp_t::textureAlignment
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:127
hipHostMalloc
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
hipDeviceAttributeKernelExecTimeout
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:339
hipDeviceAttributeL2CacheSize
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:310
hipDeviceGetName
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
hipDeviceAttributeMaxTexture3DWidth
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:329
hipDeviceArch_t::hasSurfaceFuncs
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:70
hipDeviceAttributeIntegrated
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:323
hipDeviceProp_t::isMultiGpuBoard
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:114
hipMemcpyParam2DAsync
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipModuleUnload
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
hipDeviceAttributeMaxGridDimY
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:295
hipMemoryTypeHost
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:149
hipDeviceEnablePeerAccess
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipErrorInvalidContext
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:221
hipDeviceArch_t::hasSharedInt64Atomics
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:54
hipDeviceProp_t::computeMode
int computeMode
Compute mode.
Definition: hip_runtime_api.h:105
hipDeviceAttributeIsMultiGpuBoard
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:322
hipSharedMemConfig
hipSharedMemConfig
Definition: hip_runtime_api.h:308
hipDeviceProp_t::clockRate
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:92
hipErrorPeerAccessNotEnabled
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:258
hipFuncGetAttribute
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1411
hipDeviceComputeCapability
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipModuleOccupancyMaxPotentialBlockSize
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1646
hipStreamCallback_t
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:924
hipDeviceArch_t::hasDynamicParallelism
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:72
hip_Memcpy2D
Definition: driver_types.h:91
hipDeviceProp_t::canMapHostMemory
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:115
hipDeviceProp_t::sharedMemPerBlock
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:86
hipCtxGetFlags
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
hipDevicePrimaryCtxGetState
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
hipDeviceAttributeCooperativeMultiDeviceLaunch
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:325
hipDeviceProp_t::maxThreadsPerMultiProcessor
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:104
hipDeviceSetCacheConfig
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipCtxDestroy
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
hipCtxEnablePeerAccess
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
hipDeviceProp_t::major
int major
Definition: hip_runtime_api.h:96
hipDeviceAttributeMaxSharedMemoryPerBlock
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:297
hipMemcpyAtoH
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipGetDeviceCount
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipSuccess
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:196
hipHostUnregister
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
hipStreamGetFlags
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
hipMemsetD8Async
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceAttributeMaxThreadsPerBlock
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:290
hipDeviceProp_t::gcnArch
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:116
hipStreamSynchronize
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
hipGetErrorName
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
hipDeviceProp_t::kernelExecTimeoutEnabled
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:129
hipDeviceGet
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipMemcpyDtoD
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
hipDeviceProp_t::maxTexture1D
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:121
hipMemcpy3DParms
Definition: driver_types.h:383
hipDeviceAttributeMaxBlockDimZ
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:293
hipMallocManaged
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipMemcpyHtoD
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipDriverGetVersion
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
hipDeviceArch_t::hasDoubles
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:57
hipErrorInvalidKernelFile
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:238
hipDeviceProp_t::maxThreadsPerBlock
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:89
hipDeviceAttributeMaxBlockDimY
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:292
hipMemcpy2DToArray
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
hipMemAllocPitch
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
hipDeviceProp_t
Definition: hip_runtime_api.h:83
hipMemAllocHost
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:765
hipMallocHost
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:759
hipDeviceAttributeMaxTexture2DHeight
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:328
hipDeviceArch_t::hasSharedInt32Atomics
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:48
hipFuncSetCacheConfig
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
hipErrorInvalidValue
hipErrorInvalidValue
Definition: hip_runtime_api.h:197
hipDeviceProp_t::memPitch
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:126
hipMemsetD32Async
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipDeviceProp_t::pciBusID
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:111
hipRuntimeGetVersion
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
hipDeviceAttributeComputeCapabilityMajor
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:314
hipEventQuery
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
hipDeviceAttributeMaxTexture3DDepth
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:331
ihipCtx_t
Definition: hip_hcc_internal.h:938
hipErrorRuntimeMemory
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:275
hipDeviceAttributeMaxThreadsPerMultiProcessor
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:312
hipStreamGetPriority
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipDeviceProp_t::arch
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:108
hipEventSynchronize
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1667
hipHostFree
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
hipDeviceAttributePciBusId
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:318
hipIpcOpenMemHandle
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
hipMemsetD16
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
hipDeviceProp_t::tccDriver
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:131
ihipEvent_t
Definition: hip_hcc_internal.h:759
hipDeviceGetLimit
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
hipMalloc
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
hipIpcMemHandle_st
Definition: hip_runtime_api.h:111
hipEventElapsedTime
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipGetLastError
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
Definition: hip_error.cpp:32
hipInit
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
ihipStream_t
Definition: hip_hcc_internal.h:580
hipDeviceAttributeTexturePitchAlignment
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:338
hipDeviceAttributeWarpSize
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:300
hipDeviceArch_t::hasGlobalInt32Atomics
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:46
hipArray
Definition: driver_types.h:78
hipDeviceArch_t::hasSyncThreadsExt
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:67
hipIpcGetMemHandle
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
hipErrorInvalidDevice
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:219
hipDeviceArch_t::hasFunnelShift
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:63
hipCtxDisablePeerAccess
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
hipDeviceAttributeMaxTexture3DHeight
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:330
hipDeviceAttributeMemoryClockRate
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:306
hipErrorNotReady
hipErrorNotReady
Definition: hip_runtime_api.h:249
hipHostGetDevicePointer
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipMemGetInfo
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
Definition: hip_memory.cpp:2296
hipEventDestroy
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
hipDeviceSetSharedMemConfig
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipDeviceReset
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
hipDeviceProp_t::maxGridSize
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:91
hipDeviceAttributeComputeMode
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:309
hipSetDeviceFlags
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipDeviceAttributePciDeviceId
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:319
hipDeviceProp_t::maxSharedMemoryPerMultiProcessor
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:113
hipDeviceProp_t::clockInstructionRate
int clockInstructionRate
Definition: hip_runtime_api.h:106
dim3
Definition: hip_runtime_api.h:320
hipStreamQuery
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
Definition: hip_stream.cpp:161
hipDevicePrimaryCtxSetFlags
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
hipPointerAttribute_t
Definition: hip_runtime_api.h:161
hipDeviceAttributeTotalConstantMemory
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:299
hipFree
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
hipDeviceArch_t::hasWarpShuffle
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:62
hipArrayDefault
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipDevicePrimaryCtxRetain
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipOccupancyMaxPotentialBlockSize
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipModuleLoad
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1497
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1698
hipFreeHost
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:816
hipMemcpyHtoA
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipModuleGetFunction
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
hipDeviceProp_t::memoryBusWidth
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:94
hipStreamAddCallback
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
hipDeviceArch_t::hasWarpVote
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:60
hipDeviceProp_t::name
char name[256]
Device name.
Definition: hip_runtime_api.h:84
hipMemcpyDtoHAsync
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
hipDeviceArch_t::hasGlobalFloatAtomicExch
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:47
hipDeviceProp_t::concurrentKernels
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:109
hipDeviceArch_t::hasWarpBallot
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:61
hipDeviceProp_t::totalGlobalMem
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:85
hipDeviceAttributeTextureAlignment
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:337
hipFuncGetAttributes
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipEventRecord
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
hipMemcpy2D
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
hipExtent
Definition: driver_types.h:370
hipPitchedPtr
Definition: driver_types.h:363
hipModuleGetGlobal
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
Definition: hip_module.cpp:1113
hipMemset2D
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
hipMemset3D
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
Definition: hip_memory.cpp:2286
hipStreamCreateWithFlags
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipDeviceGetAttribute
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipMemcpyFromArray
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
hipDeviceAttributeCanMapHostMemory
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:340
hipDeviceProp_t::maxThreadsDim
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:90
ihipModuleSymbol_t
Definition: hip_module.cpp:108
hipMemcpyPeerAsync
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipMemcpyHtoDAsync
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
hipDeviceProp_t::cooperativeMultiDeviceLaunch
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:120
hipMemcpyDtoH
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
hipDeviceArch_t::has3dGrid
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:71
hipDeviceGetCacheConfig
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
hipMemcpyPeer
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
hipDeviceAttributeMaxTexture1DWidth
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:326
hipDeviceAttributeCooperativeLaunch
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:324
hipDeviceAttributeMultiprocessorCount
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:308
hipDeviceProp_t::pciDeviceID
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:112
hipGetDeviceProperties
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
hipMemcpy
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
hipDeviceProp_t::memoryClockRate
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:93
hipEventCreateWithFlags
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
hipErrorCooperativeLaunchTooLarge
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:268
hipDeviceProp_t::warpSize
int warpSize
Warp size.
Definition: hip_runtime_api.h:88
hipCtxGetSharedMemConfig
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
hipDeviceTotalMem
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipFreeArray
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipErrorAssert
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:261
textureReference
Definition: texture_types.h:74
hipCtxPopCurrent
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:132
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:134
hipDeviceCanAccessPeer
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
hipDeviceArch_t::hasFloatAtomicAdd
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:50
hipCtxSetCurrent
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
hipDeviceProp_t::cooperativeLaunch
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:119
hipDeviceArch_t::hasSharedFloatAtomicExch
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:49
hipTextureDesc
Definition: texture_types.h:95
hipResourceViewDesc
Definition: driver_types.h:323
hipDeviceProp_t::multiProcessorCount
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:102
hipDeviceProp_t::integrated
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:118
hipMemsetD8
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipCtxSetCacheConfig
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipMemset2DAsync
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipDeviceProp_t::ECCEnabled
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:130
hipCtxCreate
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
hipDeviceProp_t::totalConstMem
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:95
hipDeviceProp_t::maxTexture2D
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:122
hipLaunchParams_t
Definition: hip_runtime_api.h:329
hipErrorHostMemoryAlreadyRegistered
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:262
hipCtxSetSharedMemConfig
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1677
hipDeviceAttribute_t
hipDeviceAttribute_t
Definition: hip_runtime_api.h:289
hipResourceDesc
Definition: driver_types.h:262
hipErrorLaunchFailure
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:266
ihipModule_t
Definition: hip_hcc_internal.h:415
hipDeviceSynchronize
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
hipDeviceAttributeConcurrentKernels
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:316
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:138
hipProfilerStart
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
hipDeviceGetSharedMemConfig
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
hipErrorNotSupported
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:272
hipMemcpyAsync
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:254
hipStreamDestroy
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
hipHostRegister
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
hipProfilerStop
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
hipModuleLoadDataEx
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
Definition: hip_module.cpp:1511
hipEventCreate
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
hipMemsetAsync
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipDeviceAttributeMaxTexture2DWidth
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:327
hipDeviceProp_t::pciDomainID
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:110
hipCtxGetCacheConfig
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipMemcpyParam2D
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipHostAlloc
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:771
hipMemset
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
hipDeviceDisablePeerAccess
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
hipDeviceAttributeEccEnabled
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:341
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:320
hipDevicePrimaryCtxReset
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipChannelFormatDesc
Definition: driver_types.h:38
hipErrorPeerAccessAlreadyEnabled
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:256