HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
25 
26 #include <cuda_runtime_api.h>
27 #include <cuda.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 #ifdef __cplusplus
36 #define __dparm(x) = x
37 #else
38 #define __dparm(x)
39 #endif
40 
41 // Add Deprecated Support for CUDA Mapped HIP APIs
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
48 #else
49 #define __HIP_DEPRECATED
50 #endif
51 
52 
53 // TODO -move to include/hip_runtime_api.h as a common implementation.
58 typedef enum hipMemcpyKind {
59  hipMemcpyHostToHost,
60  hipMemcpyHostToDevice,
61  hipMemcpyDeviceToHost,
62  hipMemcpyDeviceToDevice,
63  hipMemcpyDefault
64 } hipMemcpyKind;
65 
66 // hipDataType
67 #define hipDataType cudaDataType
68 #define HIP_R_16F CUDA_R_16F
69 #define HIP_R_32F CUDA_R_32F
70 #define HIP_R_64F CUDA_R_64F
71 #define HIP_C_16F CUDA_C_16F
72 #define HIP_C_32F CUDA_C_32F
73 #define HIP_C_64F CUDA_C_64F
74 
75 // hipLibraryPropertyType
76 #define hipLibraryPropertyType libraryPropertyType
77 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
78 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
79 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
80 
81 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
82 
83 //hipArray_Format
84 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
85 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
86 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
87 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
88 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
89 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
90 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
91 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
92 
93 // hipArray_Format
94 #define hipArray_Format CUarray_format
95 
96 inline static CUarray_format hipArray_FormatToCUarray_format(
97  hipArray_Format format) {
98  switch (format) {
99  case HIP_AD_FORMAT_UNSIGNED_INT8:
100  return CU_AD_FORMAT_UNSIGNED_INT8;
101  case HIP_AD_FORMAT_UNSIGNED_INT16:
102  return CU_AD_FORMAT_UNSIGNED_INT16;
103  case HIP_AD_FORMAT_UNSIGNED_INT32:
104  return CU_AD_FORMAT_UNSIGNED_INT32;
105  case HIP_AD_FORMAT_SIGNED_INT8:
106  return CU_AD_FORMAT_SIGNED_INT8;
107  case HIP_AD_FORMAT_SIGNED_INT16:
108  return CU_AD_FORMAT_SIGNED_INT16;
109  case HIP_AD_FORMAT_SIGNED_INT32:
110  return CU_AD_FORMAT_SIGNED_INT32;
111  case HIP_AD_FORMAT_HALF:
112  return CU_AD_FORMAT_HALF;
113  case HIP_AD_FORMAT_FLOAT:
114  return CU_AD_FORMAT_FLOAT;
115  default:
116  return CU_AD_FORMAT_UNSIGNED_INT8;
117  }
118 }
119 
120 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
121 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
122 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
123 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
124 
125 // hipAddress_mode
126 #define hipAddress_mode CUaddress_mode
127 
128 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
129  hipAddress_mode mode) {
130  switch (mode) {
131  case HIP_TR_ADDRESS_MODE_WRAP:
132  return CU_TR_ADDRESS_MODE_WRAP;
133  case HIP_TR_ADDRESS_MODE_CLAMP:
134  return CU_TR_ADDRESS_MODE_CLAMP;
135  case HIP_TR_ADDRESS_MODE_MIRROR:
136  return CU_TR_ADDRESS_MODE_MIRROR;
137  case HIP_TR_ADDRESS_MODE_BORDER:
138  return CU_TR_ADDRESS_MODE_BORDER;
139  default:
140  return CU_TR_ADDRESS_MODE_WRAP;
141  }
142 }
143 
144 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
145 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
146 
147 // hipFilter_mode
148 #define hipFilter_mode CUfilter_mode
149 
150 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
151  hipFilter_mode mode) {
152  switch (mode) {
153  case HIP_TR_FILTER_MODE_POINT:
154  return CU_TR_FILTER_MODE_POINT;
155  case HIP_TR_FILTER_MODE_LINEAR:
156  return CU_TR_FILTER_MODE_LINEAR;
157  default:
158  return CU_TR_FILTER_MODE_POINT;
159  }
160 }
161 
162 //hipResourcetype
163 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
164 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
165 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
166 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
167 
168 // hipResourcetype
169 #define hipResourcetype CUresourcetype
170 
171 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
172  hipResourcetype resType) {
173  switch (resType) {
174  case HIP_RESOURCE_TYPE_ARRAY:
175  return CU_RESOURCE_TYPE_ARRAY;
176  case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
177  return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
178  case HIP_RESOURCE_TYPE_LINEAR:
179  return CU_RESOURCE_TYPE_LINEAR;
180  case HIP_RESOURCE_TYPE_PITCH2D:
181  return CU_RESOURCE_TYPE_PITCH2D;
182  default:
183  return CU_RESOURCE_TYPE_ARRAY;
184  }
185 }
186 
187 #define hipTexRef CUtexref
188 #define hiparray CUarray
189 
190 // hipTextureAddressMode
191 typedef enum cudaTextureAddressMode hipTextureAddressMode;
192 #define hipAddressModeWrap cudaAddressModeWrap
193 #define hipAddressModeClamp cudaAddressModeClamp
194 #define hipAddressModeMirror cudaAddressModeMirror
195 #define hipAddressModeBorder cudaAddressModeBorder
196 
197 // hipTextureFilterMode
198 typedef enum cudaTextureFilterMode hipTextureFilterMode;
199 #define hipFilterModePoint cudaFilterModePoint
200 #define hipFilterModeLinear cudaFilterModeLinear
201 
202 // hipTextureReadMode
203 typedef enum cudaTextureReadMode hipTextureReadMode;
204 #define hipReadModeElementType cudaReadModeElementType
205 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
206 
207 // hipChannelFormatKind
208 typedef enum cudaChannelFormatKind hipChannelFormatKind;
209 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
210 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
211 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
212 #define hipChannelFormatKindNone cudaChannelFormatKindNone
213 
214 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
215 #define hipBoundaryModeZero cudaBoundaryModeZero
216 #define hipBoundaryModeTrap cudaBoundaryModeTrap
217 #define hipBoundaryModeClamp cudaBoundaryModeClamp
218 
219 // hipFuncCache
220 #define hipFuncCachePreferNone cudaFuncCachePreferNone
221 #define hipFuncCachePreferShared cudaFuncCachePreferShared
222 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
223 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
224 
225 // hipResourceType
226 #define hipResourceType cudaResourceType
227 #define hipResourceTypeArray cudaResourceTypeArray
228 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
229 #define hipResourceTypeLinear cudaResourceTypeLinear
230 #define hipResourceTypePitch2D cudaResourceTypePitch2D
231 //
232 // hipErrorNoDevice.
233 
234 
236 #define hipEventDefault cudaEventDefault
237 #define hipEventBlockingSync cudaEventBlockingSync
238 #define hipEventDisableTiming cudaEventDisableTiming
239 #define hipEventInterprocess cudaEventInterprocess
240 #define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
241 #define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
242 
243 
244 #define hipHostMallocDefault cudaHostAllocDefault
245 #define hipHostMallocPortable cudaHostAllocPortable
246 #define hipHostMallocMapped cudaHostAllocMapped
247 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
248 #define hipHostMallocCoherent 0x0
249 #define hipHostMallocNonCoherent 0x0
250 
251 #define hipMemAttachGlobal cudaMemAttachGlobal
252 #define hipMemAttachHost cudaMemAttachHost
253 
254 #define hipHostRegisterDefault cudaHostRegisterDefault
255 #define hipHostRegisterPortable cudaHostRegisterPortable
256 #define hipHostRegisterMapped cudaHostRegisterMapped
257 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
258 
259 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
260 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
261 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
262 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
263 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
264 
265 #define hipOccupancyDefault cudaOccupancyDefault
266 
267 #define hipCooperativeLaunchMultiDeviceNoPreSync \
268  cudaCooperativeLaunchMultiDeviceNoPreSync
269 #define hipCooperativeLaunchMultiDeviceNoPostSync \
270  cudaCooperativeLaunchMultiDeviceNoPostSync
271 
272 
273 // enum CUjit_option redefines
274 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
275 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
276 #define hipJitOptionWallTime CU_JIT_WALL_TIME
277 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
278 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
279 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
280 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
281 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
282 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
283 #define hipJitOptionTarget CU_JIT_TARGET
284 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
285 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
286 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
287 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
288 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
289 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
290 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
291 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
292 
293 typedef cudaEvent_t hipEvent_t;
294 typedef cudaStream_t hipStream_t;
295 typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
296 typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
297 typedef enum cudaLimit hipLimit_t;
298 typedef enum cudaFuncAttribute hipFuncAttribute;
299 typedef enum cudaFuncCache hipFuncCache_t;
300 typedef CUcontext hipCtx_t;
301 typedef enum cudaSharedMemConfig hipSharedMemConfig;
302 typedef CUfunc_cache hipFuncCache;
303 typedef CUjit_option hipJitOption;
304 typedef CUdevice hipDevice_t;
305 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
306 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
307 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
308 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
309 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
310 #define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
311 #define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
312 
313 typedef CUmodule hipModule_t;
314 typedef CUfunction hipFunction_t;
315 typedef CUdeviceptr hipDeviceptr_t;
316 typedef struct cudaArray hipArray;
317 typedef struct cudaArray* hipArray_t;
318 typedef struct cudaArray* hipArray_const_t;
319 typedef struct cudaFuncAttributes hipFuncAttributes;
320 typedef struct cudaLaunchParams hipLaunchParams;
321 #define hipFunction_attribute CUfunction_attribute
322 #define hip_Memcpy2D CUDA_MEMCPY2D
323 #define hipMemcpy3DParms cudaMemcpy3DParms
324 #define hipArrayDefault cudaArrayDefault
325 #define hipArrayLayered cudaArrayLayered
326 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
327 #define hipArrayCubemap cudaArrayCubemap
328 #define hipArrayTextureGather cudaArrayTextureGather
329 
330 typedef cudaTextureObject_t hipTextureObject_t;
331 typedef cudaSurfaceObject_t hipSurfaceObject_t;
332 #define hipTextureType1D cudaTextureType1D
333 #define hipTextureType1DLayered cudaTextureType1DLayered
334 #define hipTextureType2D cudaTextureType2D
335 #define hipTextureType2DLayered cudaTextureType2DLayered
336 #define hipTextureType3D cudaTextureType3D
337 #define hipDeviceMapHost cudaDeviceMapHost
338 
339 typedef struct cudaExtent hipExtent;
340 typedef struct cudaPitchedPtr hipPitchedPtr;
341 #define make_hipExtent make_cudaExtent
342 #define make_hipPos make_cudaPos
343 #define make_hipPitchedPtr make_cudaPitchedPtr
344 // Flags that can be used with hipStreamCreateWithFlags
345 #define hipStreamDefault cudaStreamDefault
346 #define hipStreamNonBlocking cudaStreamNonBlocking
347 
348 typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
349 typedef struct cudaResourceDesc hipResourceDesc;
350 typedef struct cudaTextureDesc hipTextureDesc;
351 typedef struct cudaResourceViewDesc hipResourceViewDesc;
352 // adding code for hipmemSharedConfig
353 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
354 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
355 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
356 
357 //Function Attributes
358 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
359 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
360 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
361 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
362 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
363 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
364 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
365 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
366 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
367 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
368 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
369 
370 #if CUDA_VERSION >= 9000
371 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
372 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
373 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
374 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
375 #endif // CUDA_VERSION >= 9000
376 
377 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
378  switch (cuError) {
379  case cudaSuccess:
380  return hipSuccess;
381  case cudaErrorProfilerDisabled:
382  return hipErrorProfilerDisabled;
383  case cudaErrorProfilerNotInitialized:
384  return hipErrorProfilerNotInitialized;
385  case cudaErrorProfilerAlreadyStarted:
386  return hipErrorProfilerAlreadyStarted;
387  case cudaErrorProfilerAlreadyStopped:
388  return hipErrorProfilerAlreadyStopped;
389  case cudaErrorInsufficientDriver:
390  return hipErrorInsufficientDriver;
391  case cudaErrorUnsupportedLimit:
392  return hipErrorUnsupportedLimit;
393  case cudaErrorPeerAccessUnsupported:
394  return hipErrorPeerAccessUnsupported;
395  case cudaErrorInvalidGraphicsContext:
396  return hipErrorInvalidGraphicsContext;
397  case cudaErrorSharedObjectSymbolNotFound:
398  return hipErrorSharedObjectSymbolNotFound;
399  case cudaErrorSharedObjectInitFailed:
400  return hipErrorSharedObjectInitFailed;
401  case cudaErrorOperatingSystem:
402  return hipErrorOperatingSystem;
403  case cudaErrorSetOnActiveProcess:
404  return hipErrorSetOnActiveProcess;
405  case cudaErrorIllegalAddress:
406  return hipErrorIllegalAddress;
407  case cudaErrorInvalidSymbol:
408  return hipErrorInvalidSymbol;
409  case cudaErrorMissingConfiguration:
410  return hipErrorMissingConfiguration;
411  case cudaErrorMemoryAllocation:
412  return hipErrorOutOfMemory;
413  case cudaErrorInitializationError:
414  return hipErrorNotInitialized;
415  case cudaErrorLaunchFailure:
416  return hipErrorLaunchFailure;
417  case cudaErrorCooperativeLaunchTooLarge:
419  case cudaErrorPriorLaunchFailure:
420  return hipErrorPriorLaunchFailure;
421  case cudaErrorLaunchOutOfResources:
423  case cudaErrorInvalidDeviceFunction:
424  return hipErrorInvalidDeviceFunction;
425  case cudaErrorInvalidConfiguration:
426  return hipErrorInvalidConfiguration;
427  case cudaErrorInvalidDevice:
428  return hipErrorInvalidDevice;
429  case cudaErrorInvalidValue:
430  return hipErrorInvalidValue;
431  case cudaErrorInvalidDevicePointer:
433  case cudaErrorInvalidMemcpyDirection:
435  case cudaErrorInvalidResourceHandle:
436  return hipErrorInvalidHandle;
437  case cudaErrorNotReady:
438  return hipErrorNotReady;
439  case cudaErrorNoDevice:
440  return hipErrorNoDevice;
441  case cudaErrorPeerAccessAlreadyEnabled:
443  case cudaErrorPeerAccessNotEnabled:
445  case cudaErrorHostMemoryAlreadyRegistered:
447  case cudaErrorHostMemoryNotRegistered:
449  case cudaErrorMapBufferObjectFailed:
450  return hipErrorMapFailed;
451  case cudaErrorAssert:
452  return hipErrorAssert;
453  case cudaErrorNotSupported:
454  return hipErrorNotSupported;
455  case cudaErrorCudartUnloading:
456  return hipErrorDeinitialized;
457  case cudaErrorInvalidKernelImage:
458  return hipErrorInvalidImage;
459  case cudaErrorUnmapBufferObjectFailed:
460  return hipErrorUnmapFailed;
461  case cudaErrorNoKernelImageForDevice:
462  return hipErrorNoBinaryForGpu;
463  case cudaErrorECCUncorrectable:
464  return hipErrorECCNotCorrectable;
465  case cudaErrorDeviceAlreadyInUse:
466  return hipErrorContextAlreadyInUse;
467  case cudaErrorInvalidPtx:
469  case cudaErrorLaunchTimeout:
470  return hipErrorLaunchTimeOut;
471 #if CUDA_VERSION >= 10010
472  case cudaErrorInvalidSource:
473  return hipErrorInvalidSource;
474  case cudaErrorFileNotFound:
475  return hipErrorFileNotFound;
476  case cudaErrorSymbolNotFound:
477  return hipErrorNotFound;
478  case cudaErrorArrayIsMapped:
479  return hipErrorArrayIsMapped;
480  case cudaErrorNotMappedAsPointer:
481  return hipErrorNotMappedAsPointer;
482  case cudaErrorNotMappedAsArray:
483  return hipErrorNotMappedAsArray;
484  case cudaErrorNotMapped:
485  return hipErrorNotMapped;
486  case cudaErrorAlreadyAcquired:
487  return hipErrorAlreadyAcquired;
488  case cudaErrorAlreadyMapped:
489  return hipErrorAlreadyMapped;
490 #endif
491 #if CUDA_VERSION >= 10020
492  case cudaErrorDeviceUninitialized:
493  return hipErrorInvalidContext;
494 #endif
495  case cudaErrorUnknown:
496  default:
497  return hipErrorUnknown; // Note - translated error.
498  }
499 }
500 
501 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
502  switch (cuError) {
503  case CUDA_SUCCESS:
504  return hipSuccess;
505  case CUDA_ERROR_OUT_OF_MEMORY:
506  return hipErrorOutOfMemory;
507  case CUDA_ERROR_INVALID_VALUE:
508  return hipErrorInvalidValue;
509  case CUDA_ERROR_INVALID_DEVICE:
510  return hipErrorInvalidDevice;
511  case CUDA_ERROR_DEINITIALIZED:
512  return hipErrorDeinitialized;
513  case CUDA_ERROR_NO_DEVICE:
514  return hipErrorNoDevice;
515  case CUDA_ERROR_INVALID_CONTEXT:
516  return hipErrorInvalidContext;
517  case CUDA_ERROR_NOT_INITIALIZED:
518  return hipErrorNotInitialized;
519  case CUDA_ERROR_INVALID_HANDLE:
520  return hipErrorInvalidHandle;
521  case CUDA_ERROR_MAP_FAILED:
522  return hipErrorMapFailed;
523  case CUDA_ERROR_PROFILER_DISABLED:
524  return hipErrorProfilerDisabled;
525  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
526  return hipErrorProfilerNotInitialized;
527  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
528  return hipErrorProfilerAlreadyStarted;
529  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
530  return hipErrorProfilerAlreadyStopped;
531  case CUDA_ERROR_INVALID_IMAGE:
532  return hipErrorInvalidImage;
533  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
534  return hipErrorContextAlreadyCurrent;
535  case CUDA_ERROR_UNMAP_FAILED:
536  return hipErrorUnmapFailed;
537  case CUDA_ERROR_ARRAY_IS_MAPPED:
538  return hipErrorArrayIsMapped;
539  case CUDA_ERROR_ALREADY_MAPPED:
540  return hipErrorAlreadyMapped;
541  case CUDA_ERROR_NO_BINARY_FOR_GPU:
542  return hipErrorNoBinaryForGpu;
543  case CUDA_ERROR_ALREADY_ACQUIRED:
544  return hipErrorAlreadyAcquired;
545  case CUDA_ERROR_NOT_MAPPED:
546  return hipErrorNotMapped;
547  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
548  return hipErrorNotMappedAsArray;
549  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
550  return hipErrorNotMappedAsPointer;
551  case CUDA_ERROR_ECC_UNCORRECTABLE:
552  return hipErrorECCNotCorrectable;
553  case CUDA_ERROR_UNSUPPORTED_LIMIT:
554  return hipErrorUnsupportedLimit;
555  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
556  return hipErrorContextAlreadyInUse;
557  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
558  return hipErrorPeerAccessUnsupported;
559  case CUDA_ERROR_INVALID_PTX:
561  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
562  return hipErrorInvalidGraphicsContext;
563  case CUDA_ERROR_INVALID_SOURCE:
564  return hipErrorInvalidSource;
565  case CUDA_ERROR_FILE_NOT_FOUND:
566  return hipErrorFileNotFound;
567  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
568  return hipErrorSharedObjectSymbolNotFound;
569  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
570  return hipErrorSharedObjectInitFailed;
571  case CUDA_ERROR_OPERATING_SYSTEM:
572  return hipErrorOperatingSystem;
573  case CUDA_ERROR_NOT_FOUND:
574  return hipErrorNotFound;
575  case CUDA_ERROR_NOT_READY:
576  return hipErrorNotReady;
577  case CUDA_ERROR_ILLEGAL_ADDRESS:
578  return hipErrorIllegalAddress;
579  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
581  case CUDA_ERROR_LAUNCH_TIMEOUT:
582  return hipErrorLaunchTimeOut;
583  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
585  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
587  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
588  return hipErrorSetOnActiveProcess;
589  case CUDA_ERROR_ASSERT:
590  return hipErrorAssert;
591  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
593  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
595  case CUDA_ERROR_LAUNCH_FAILED:
596  return hipErrorLaunchFailure;
597  case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
599  case CUDA_ERROR_NOT_SUPPORTED:
600  return hipErrorNotSupported;
601  case CUDA_ERROR_UNKNOWN:
602  default:
603  return hipErrorUnknown; // Note - translated error.
604  }
605 }
606 
607 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
608  switch (hError) {
609  case hipSuccess:
610  return cudaSuccess;
611  case hipErrorOutOfMemory:
612  return cudaErrorMemoryAllocation;
613  case hipErrorProfilerDisabled:
614  return cudaErrorProfilerDisabled;
615  case hipErrorProfilerNotInitialized:
616  return cudaErrorProfilerNotInitialized;
617  case hipErrorProfilerAlreadyStarted:
618  return cudaErrorProfilerAlreadyStarted;
619  case hipErrorProfilerAlreadyStopped:
620  return cudaErrorProfilerAlreadyStopped;
621  case hipErrorInvalidConfiguration:
622  return cudaErrorInvalidConfiguration;
624  return cudaErrorLaunchOutOfResources;
626  return cudaErrorInvalidValue;
627  case hipErrorInvalidHandle:
628  return cudaErrorInvalidResourceHandle;
630  return cudaErrorInvalidDevice;
632  return cudaErrorInvalidMemcpyDirection;
634  return cudaErrorInvalidDevicePointer;
635  case hipErrorNotInitialized:
636  return cudaErrorInitializationError;
637  case hipErrorNoDevice:
638  return cudaErrorNoDevice;
639  case hipErrorNotReady:
640  return cudaErrorNotReady;
642  return cudaErrorPeerAccessNotEnabled;
644  return cudaErrorPeerAccessAlreadyEnabled;
646  return cudaErrorHostMemoryAlreadyRegistered;
648  return cudaErrorHostMemoryNotRegistered;
649  case hipErrorDeinitialized:
650  return cudaErrorCudartUnloading;
651  case hipErrorInvalidSymbol:
652  return cudaErrorInvalidSymbol;
653  case hipErrorInsufficientDriver:
654  return cudaErrorInsufficientDriver;
655  case hipErrorMissingConfiguration:
656  return cudaErrorMissingConfiguration;
657  case hipErrorPriorLaunchFailure:
658  return cudaErrorPriorLaunchFailure;
659  case hipErrorInvalidDeviceFunction:
660  return cudaErrorInvalidDeviceFunction;
661  case hipErrorInvalidImage:
662  return cudaErrorInvalidKernelImage;
664 #if CUDA_VERSION >= 10020
665  return cudaErrorDeviceUninitialized;
666 #else
667  return cudaErrorUnknown;
668 #endif
669  case hipErrorMapFailed:
670  return cudaErrorMapBufferObjectFailed;
671  case hipErrorUnmapFailed:
672  return cudaErrorUnmapBufferObjectFailed;
673  case hipErrorArrayIsMapped:
674 #if CUDA_VERSION >= 10010
675  return cudaErrorArrayIsMapped;
676 #else
677  return cudaErrorUnknown;
678 #endif
679  case hipErrorAlreadyMapped:
680 #if CUDA_VERSION >= 10010
681  return cudaErrorAlreadyMapped;
682 #else
683  return cudaErrorUnknown;
684 #endif
685  case hipErrorNoBinaryForGpu:
686  return cudaErrorNoKernelImageForDevice;
687  case hipErrorAlreadyAcquired:
688 #if CUDA_VERSION >= 10010
689  return cudaErrorAlreadyAcquired;
690 #else
691  return cudaErrorUnknown;
692 #endif
693  case hipErrorNotMapped:
694 #if CUDA_VERSION >= 10010
695  return cudaErrorNotMapped;
696 #else
697  return cudaErrorUnknown;
698 #endif
699  case hipErrorNotMappedAsArray:
700 #if CUDA_VERSION >= 10010
701  return cudaErrorNotMappedAsArray;
702 #else
703  return cudaErrorUnknown;
704 #endif
705  case hipErrorNotMappedAsPointer:
706 #if CUDA_VERSION >= 10010
707  return cudaErrorNotMappedAsPointer;
708 #else
709  return cudaErrorUnknown;
710 #endif
711  case hipErrorECCNotCorrectable:
712  return cudaErrorECCUncorrectable;
713  case hipErrorUnsupportedLimit:
714  return cudaErrorUnsupportedLimit;
715  case hipErrorContextAlreadyInUse:
716  return cudaErrorDeviceAlreadyInUse;
717  case hipErrorPeerAccessUnsupported:
718  return cudaErrorPeerAccessUnsupported;
720  return cudaErrorInvalidPtx;
721  case hipErrorInvalidGraphicsContext:
722  return cudaErrorInvalidGraphicsContext;
723  case hipErrorInvalidSource:
724 #if CUDA_VERSION >= 10010
725  return cudaErrorInvalidSource;
726 #else
727  return cudaErrorUnknown;
728 #endif
729  case hipErrorFileNotFound:
730 #if CUDA_VERSION >= 10010
731  return cudaErrorFileNotFound;
732 #else
733  return cudaErrorUnknown;
734 #endif
735  case hipErrorSharedObjectSymbolNotFound:
736  return cudaErrorSharedObjectSymbolNotFound;
737  case hipErrorSharedObjectInitFailed:
738  return cudaErrorSharedObjectInitFailed;
739  case hipErrorOperatingSystem:
740  return cudaErrorOperatingSystem;
741  case hipErrorNotFound:
742 #if CUDA_VERSION >= 10010
743  return cudaErrorSymbolNotFound;
744 #else
745  return cudaErrorUnknown;
746 #endif
747  case hipErrorIllegalAddress:
748  return cudaErrorIllegalAddress;
749  case hipErrorLaunchTimeOut:
750  return cudaErrorLaunchTimeout;
751  case hipErrorSetOnActiveProcess:
752  return cudaErrorSetOnActiveProcess;
754  return cudaErrorLaunchFailure;
756  return cudaErrorCooperativeLaunchTooLarge;
758  return cudaErrorNotSupported;
759  // HSA: does not exist in CUDA
761  // HSA: does not exist in CUDA
763  case hipErrorUnknown:
764  case hipErrorTbd:
765  default:
766  return cudaErrorUnknown; // Note - translated error.
767  }
768 }
769 
770 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
771  switch (kind) {
772  case hipMemcpyHostToHost:
773  return cudaMemcpyHostToHost;
774  case hipMemcpyHostToDevice:
775  return cudaMemcpyHostToDevice;
776  case hipMemcpyDeviceToHost:
777  return cudaMemcpyDeviceToHost;
778  case hipMemcpyDeviceToDevice:
779  return cudaMemcpyDeviceToDevice;
780  default:
781  return cudaMemcpyDefault;
782  }
783 }
784 
785 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
786  hipTextureAddressMode kind) {
787  switch (kind) {
788  case hipAddressModeWrap:
789  return cudaAddressModeWrap;
790  case hipAddressModeClamp:
791  return cudaAddressModeClamp;
792  case hipAddressModeMirror:
793  return cudaAddressModeMirror;
794  case hipAddressModeBorder:
795  return cudaAddressModeBorder;
796  default:
797  return cudaAddressModeWrap;
798  }
799 }
800 
801 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
802  hipTextureFilterMode kind) {
803  switch (kind) {
804  case hipFilterModePoint:
805  return cudaFilterModePoint;
806  case hipFilterModeLinear:
807  return cudaFilterModeLinear;
808  default:
809  return cudaFilterModePoint;
810  }
811 }
812 
813 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
814  switch (kind) {
815  case hipReadModeElementType:
816  return cudaReadModeElementType;
817  case hipReadModeNormalizedFloat:
818  return cudaReadModeNormalizedFloat;
819  default:
820  return cudaReadModeElementType;
821  }
822 }
823 
824 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
825  hipChannelFormatKind kind) {
826  switch (kind) {
827  case hipChannelFormatKindSigned:
828  return cudaChannelFormatKindSigned;
829  case hipChannelFormatKindUnsigned:
830  return cudaChannelFormatKindUnsigned;
831  case hipChannelFormatKindFloat:
832  return cudaChannelFormatKindFloat;
833  case hipChannelFormatKindNone:
834  return cudaChannelFormatKindNone;
835  default:
836  return cudaChannelFormatKindNone;
837  }
838 }
839 
843 #define HIPRT_CB CUDART_CB
844 typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
845 inline static hipError_t hipInit(unsigned int flags) {
846  return hipCUResultTohipError(cuInit(flags));
847 }
848 
849 inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
850 
851 inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
852 
853 inline static hipError_t hipPeekAtLastError() {
854  return hipCUDAErrorTohipError(cudaPeekAtLastError());
855 }
856 
857 inline static hipError_t hipMalloc(void** ptr, size_t size) {
858  return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
859 }
860 
861 inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
862  return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
863 }
864 
865 inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
866  return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
867 }
868 
869 inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
870  return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
871 }
872 
873 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
874 
875 inline static hipError_t hipMallocHost(void** ptr, size_t size)
876  __attribute__((deprecated("use hipHostMalloc instead")));
877 inline static hipError_t hipMallocHost(void** ptr, size_t size) {
878  return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
879 }
880 
881 inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
882  __attribute__((deprecated("use hipHostMalloc instead")));
883 inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
884  return hipCUResultTohipError(cuMemAllocHost(ptr, size));
885 }
886 
887 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
888  __attribute__((deprecated("use hipHostMalloc instead")));
889 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
890  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
891 }
892 
893 inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
894  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
895 }
896 
897 inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
898  return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
899 }
900 
901 inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
902  size_t width, size_t height,
903  unsigned int flags __dparm(hipArrayDefault)) {
904  return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
905 }
906 
907 inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
908  hipExtent extent, unsigned int flags) {
909  return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
910 }
911 
912 inline static hipError_t hipFreeArray(hipArray* array) {
913  return hipCUDAErrorTohipError(cudaFreeArray(array));
914 }
915 
916 inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
917  return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
918 }
919 
920 inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
921  return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
922 }
923 
924 inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
925  return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
926 }
927 
928 inline static hipError_t hipHostUnregister(void* ptr) {
929  return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
930 }
931 
932 inline static hipError_t hipFreeHost(void* ptr)
933  __attribute__((deprecated("use hipHostFree instead")));
934 inline static hipError_t hipFreeHost(void* ptr) {
935  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
936 }
937 
938 inline static hipError_t hipHostFree(void* ptr) {
939  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
940 }
941 
942 inline static hipError_t hipSetDevice(int device) {
943  return hipCUDAErrorTohipError(cudaSetDevice(device));
944 }
945 
946 inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
947  struct cudaDeviceProp cdprop;
948  memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
949  cdprop.major = prop->major;
950  cdprop.minor = prop->minor;
951  cdprop.totalGlobalMem = prop->totalGlobalMem;
952  cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
953  cdprop.regsPerBlock = prop->regsPerBlock;
954  cdprop.warpSize = prop->warpSize;
955  cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
956  cdprop.clockRate = prop->clockRate;
957  cdprop.totalConstMem = prop->totalConstMem;
958  cdprop.multiProcessorCount = prop->multiProcessorCount;
959  cdprop.l2CacheSize = prop->l2CacheSize;
960  cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
961  cdprop.computeMode = prop->computeMode;
962  cdprop.canMapHostMemory = prop->canMapHostMemory;
963  cdprop.memoryClockRate = prop->memoryClockRate;
964  cdprop.memoryBusWidth = prop->memoryBusWidth;
965  return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
966 }
967 
968 inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
969  return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
970 }
971 
972 inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
973  return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
974 }
975 
976 inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
977  return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
978 }
979 
980 inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
981  hipStream_t stream) {
982  return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
983 }
984 
985 inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
986  hipStream_t stream) {
987  return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
988 }
989 
990 inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
991  hipStream_t stream) {
992  return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
993 }
994 
995 inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
996  hipMemcpyKind copyKind) {
997  return hipCUDAErrorTohipError(
998  cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
999 }
1000 
1001 
1002 inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
1003  size_t sizeBytes, hipMemcpyKind copyKind,
1004  hipStream_t stream) {
1005  cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1006  hipMemcpyKindToCudaMemcpyKind(copyKind),
1007  stream);
1008 
1009  if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
1010 
1011  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1012 }
1013 
1014 inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
1015  hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
1016  return hipCUDAErrorTohipError(
1017  cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1018 }
1019 
1020 inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
1021  size_t offset __dparm(0),
1022  hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1023  return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1024  hipMemcpyKindToCudaMemcpyKind(copyType)));
1025 }
1026 
1027 inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
1028  size_t sizeBytes, size_t offset,
1029  hipMemcpyKind copyType,
1030  hipStream_t stream __dparm(0)) {
1031  return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1032  symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1033 }
1034 
1035 inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
1036  size_t offset __dparm(0),
1037  hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1038  return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1039  hipMemcpyKindToCudaMemcpyKind(kind)));
1040 }
1041 
1042 inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
1043  size_t sizeBytes, size_t offset,
1044  hipMemcpyKind kind,
1045  hipStream_t stream __dparm(0)) {
1046  return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1047  dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1048 }
1049 
1050 inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
1051  return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1052 }
1053 
1054 inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
1055  return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1056 }
1057 
1058 inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
1059  size_t width, size_t height, hipMemcpyKind kind) {
1060  return hipCUDAErrorTohipError(
1061  cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1062 }
1063 
1064 inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
1065  return hipCUResultTohipError(cuMemcpy2D(pCopy));
1066 }
1067 
1068 inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
1069  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1070 }
1071 
1072 inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
1073 {
1074  return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1075 }
1076 
1077 inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
1078 {
1079  return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1080 }
1081 
1082 inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
1083  size_t width, size_t height, hipMemcpyKind kind,
1084  hipStream_t stream) {
1085  return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1086  hipMemcpyKindToCudaMemcpyKind(kind), stream));
1087 }
1088 
1089 inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
1090  const void* src, size_t spitch, size_t width,
1091  size_t height, hipMemcpyKind kind) {
1092  return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1093  height, hipMemcpyKindToCudaMemcpyKind(kind)));
1094 }
1095 
1096 __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
1097  size_t hOffset, const void* src,
1098  size_t count, hipMemcpyKind kind) {
1099  return hipCUDAErrorTohipError(
1100  cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1101 }
1102 
1103 __HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
1104  size_t wOffset, size_t hOffset,
1105  size_t count, hipMemcpyKind kind) {
1106  return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1107  hipMemcpyKindToCudaMemcpyKind(kind)));
1108 }
1109 
1110 inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
1111  size_t count) {
1112  return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1113 }
1114 
1115 inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
1116  size_t count) {
1117  return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1118 }
1119 
1120 inline static hipError_t hipDeviceSynchronize() {
1121  return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1122 }
1123 
1124 inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
1125  return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1126 }
1127 
1128 inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
1129  return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1130 }
1131 
1132 inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
1133  return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1134 }
1135 
1136 inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
1137  return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1138 }
1139 
1140 inline static const char* hipGetErrorString(hipError_t error) {
1141  return cudaGetErrorString(hipErrorToCudaError(error));
1142 }
1143 
1144 inline static const char* hipGetErrorName(hipError_t error) {
1145  return cudaGetErrorName(hipErrorToCudaError(error));
1146 }
1147 
1148 inline static hipError_t hipGetDeviceCount(int* count) {
1149  return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1150 }
1151 
1152 inline static hipError_t hipGetDevice(int* device) {
1153  return hipCUDAErrorTohipError(cudaGetDevice(device));
1154 }
1155 
1156 inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
1157  return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1158 }
1159 
1160 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1161  return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1162 }
1163 
1164 inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
1165  return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1166 }
1167 
1168 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1169  return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1170 }
1171 
1172 inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
1173  unsigned int flags) {
1174  return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1175 }
1176 
1177 inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
1178  return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1179 }
1180 
1181 inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
1182  return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1183 }
1184 
1185 inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
1186  hipStream_t stream __dparm(0)) {
1187  return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1188 }
1189 
1190 inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
1191  hipStream_t stream __dparm(0)) {
1192  return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1193 }
1194 
1195 inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
1196  return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1197 }
1198 
1199 inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
1200  hipStream_t stream __dparm(0)) {
1201  return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1202 }
1203 
1204 inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
1205  return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1206 }
1207 
1208 inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
1209  hipStream_t stream __dparm(0)) {
1210  return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1211 }
1212 
1213 inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
1214  return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1215 }
1216 
1217 inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
1218  return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1219 }
1220 
1221 inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
1222  return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1223 }
1224 
1225 inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
1226  return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1227 }
1228 
1229 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
1230  struct cudaDeviceProp cdprop;
1231  cudaError_t cerror;
1232  cerror = cudaGetDeviceProperties(&cdprop, device);
1233 
1234  strncpy(p_prop->name, cdprop.name, 256);
1235  p_prop->totalGlobalMem = cdprop.totalGlobalMem;
1236  p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
1237  p_prop->regsPerBlock = cdprop.regsPerBlock;
1238  p_prop->warpSize = cdprop.warpSize;
1239  p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
1240  for (int i = 0; i < 3; i++) {
1241  p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
1242  p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
1243  }
1244  p_prop->clockRate = cdprop.clockRate;
1245  p_prop->memoryClockRate = cdprop.memoryClockRate;
1246  p_prop->memoryBusWidth = cdprop.memoryBusWidth;
1247  p_prop->totalConstMem = cdprop.totalConstMem;
1248  p_prop->major = cdprop.major;
1249  p_prop->minor = cdprop.minor;
1250  p_prop->multiProcessorCount = cdprop.multiProcessorCount;
1251  p_prop->l2CacheSize = cdprop.l2CacheSize;
1252  p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
1253  p_prop->computeMode = cdprop.computeMode;
1254  p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
1255 
1256  int ccVers = p_prop->major * 100 + p_prop->minor * 10;
1257  p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
1258  p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
1259  p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
1260  p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
1261  p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
1262  p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
1263  p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
1264  p_prop->arch.hasDoubles = (ccVers >= 130);
1265  p_prop->arch.hasWarpVote = (ccVers >= 120);
1266  p_prop->arch.hasWarpBallot = (ccVers >= 200);
1267  p_prop->arch.hasWarpShuffle = (ccVers >= 300);
1268  p_prop->arch.hasFunnelShift = (ccVers >= 350);
1269  p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
1270  p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
1271  p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
1272  p_prop->arch.has3dGrid = (ccVers >= 200);
1273  p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
1274 
1275  p_prop->concurrentKernels = cdprop.concurrentKernels;
1276  p_prop->pciDomainID = cdprop.pciDomainID;
1277  p_prop->pciBusID = cdprop.pciBusID;
1278  p_prop->pciDeviceID = cdprop.pciDeviceID;
1279  p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
1280  p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
1281  p_prop->canMapHostMemory = cdprop.canMapHostMemory;
1282  p_prop->gcnArch = 0; // Not a GCN arch
1283  p_prop->integrated = cdprop.integrated;
1284  p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
1285  p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
1290 
1291  p_prop->maxTexture1D = cdprop.maxTexture1D;
1292  p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
1293  p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
1294  p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
1295  p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
1296  p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
1297 
1298  p_prop->memPitch = cdprop.memPitch;
1299  p_prop->textureAlignment = cdprop.textureAlignment;
1300  p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
1301  p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
1302  p_prop->ECCEnabled = cdprop.ECCEnabled;
1303  p_prop->tccDriver = cdprop.tccDriver;
1304 
1305  return hipCUDAErrorTohipError(cerror);
1306 }
1307 
1308 inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
1309  enum cudaDeviceAttr cdattr;
1310  cudaError_t cerror;
1311 
1312  switch (attr) {
1314  cdattr = cudaDevAttrMaxThreadsPerBlock;
1315  break;
1317  cdattr = cudaDevAttrMaxBlockDimX;
1318  break;
1320  cdattr = cudaDevAttrMaxBlockDimY;
1321  break;
1323  cdattr = cudaDevAttrMaxBlockDimZ;
1324  break;
1326  cdattr = cudaDevAttrMaxGridDimX;
1327  break;
1329  cdattr = cudaDevAttrMaxGridDimY;
1330  break;
1332  cdattr = cudaDevAttrMaxGridDimZ;
1333  break;
1335  cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1336  break;
1338  cdattr = cudaDevAttrTotalConstantMemory;
1339  break;
1341  cdattr = cudaDevAttrWarpSize;
1342  break;
1344  cdattr = cudaDevAttrMaxRegistersPerBlock;
1345  break;
1347  cdattr = cudaDevAttrClockRate;
1348  break;
1350  cdattr = cudaDevAttrMemoryClockRate;
1351  break;
1353  cdattr = cudaDevAttrGlobalMemoryBusWidth;
1354  break;
1356  cdattr = cudaDevAttrMultiProcessorCount;
1357  break;
1359  cdattr = cudaDevAttrComputeMode;
1360  break;
1362  cdattr = cudaDevAttrL2CacheSize;
1363  break;
1365  cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1366  break;
1368  cdattr = cudaDevAttrComputeCapabilityMajor;
1369  break;
1371  cdattr = cudaDevAttrComputeCapabilityMinor;
1372  break;
1374  cdattr = cudaDevAttrConcurrentKernels;
1375  break;
1377  cdattr = cudaDevAttrPciBusId;
1378  break;
1380  cdattr = cudaDevAttrPciDeviceId;
1381  break;
1383  cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1384  break;
1386  cdattr = cudaDevAttrIsMultiGpuBoard;
1387  break;
1389  cdattr = cudaDevAttrIntegrated;
1390  break;
1392  cdattr = cudaDevAttrMaxTexture1DWidth;
1393  break;
1395  cdattr = cudaDevAttrMaxTexture2DWidth;
1396  break;
1398  cdattr = cudaDevAttrMaxTexture2DHeight;
1399  break;
1401  cdattr = cudaDevAttrMaxTexture3DWidth;
1402  break;
1404  cdattr = cudaDevAttrMaxTexture3DHeight;
1405  break;
1407  cdattr = cudaDevAttrMaxTexture3DDepth;
1408  break;
1410  cdattr = cudaDevAttrMaxPitch;
1411  break;
1413  cdattr = cudaDevAttrTextureAlignment;
1414  break;
1416  cdattr = cudaDevAttrTexturePitchAlignment;
1417  break;
1419  cdattr = cudaDevAttrKernelExecTimeout;
1420  break;
1422  cdattr = cudaDevAttrCanMapHostMemory;
1423  break;
1425  cdattr = cudaDevAttrEccEnabled;
1426  break;
1428  cdattr = cudaDevAttrCooperativeLaunch;
1429  break;
1431  cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1432  break;
1433  default:
1434  return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1435  }
1436 
1437  cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1438 
1439  return hipCUDAErrorTohipError(cerror);
1440 }
1441 
1442 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1443  const void* func,
1444  int blockSize,
1445  size_t dynamicSMemSize) {
1446  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1447  blockSize, dynamicSMemSize));
1448 }
1449 
1450 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1451  const void* func,
1452  int blockSize,
1453  size_t dynamicSMemSize,
1454  unsigned int flags) {
1455  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1456  blockSize, dynamicSMemSize, flags));
1457 }
1458 
1459 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1460  hipFunction_t f,
1461  int blockSize,
1462  size_t dynamicSMemSize ){
1463  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1464  blockSize, dynamicSMemSize));
1465 }
1466 
1467 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1468  hipFunction_t f,
1469  int blockSize,
1470  size_t dynamicSMemSize,
1471  unsigned int flags ) {
1472  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1473  blockSize, dynamicSMemSize, flags));
1474 }
1475 
1476 //TODO - Match CUoccupancyB2DSize
1477 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
1478  hipFunction_t f, size_t dynSharedMemPerBlk,
1479  int blockSizeLimit){
1480  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1481  dynSharedMemPerBlk, blockSizeLimit));
1482 }
1483 
1484 //TODO - Match CUoccupancyB2DSize
1485 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
1486  hipFunction_t f, size_t dynSharedMemPerBlk,
1487  int blockSizeLimit, unsigned int flags){
1488  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1489  dynSharedMemPerBlk, blockSizeLimit, flags));
1490 }
1491 
1492 inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
1493  struct cudaPointerAttributes cPA;
1494  hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1495  if (err == hipSuccess) {
1496 #if (CUDART_VERSION >= 11000)
1497  auto memType = cPA.type;
1498 #else
1499  unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
1500 #endif
1501  switch (memType) {
1502  case cudaMemoryTypeDevice:
1503  attributes->memoryType = hipMemoryTypeDevice;
1504  break;
1505  case cudaMemoryTypeHost:
1506  attributes->memoryType = hipMemoryTypeHost;
1507  break;
1508  default:
1509  return hipErrorUnknown;
1510  }
1511  attributes->device = cPA.device;
1512  attributes->devicePointer = cPA.devicePointer;
1513  attributes->hostPointer = cPA.hostPointer;
1514  attributes->isManaged = 0;
1515  attributes->allocationFlags = 0;
1516  }
1517  return err;
1518 }
1519 
1520 inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
1521  return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1522 }
1523 
1524 inline static hipError_t hipEventCreate(hipEvent_t* event) {
1525  return hipCUDAErrorTohipError(cudaEventCreate(event));
1526 }
1527 
1528 inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1529  return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1530 }
1531 
1532 inline static hipError_t hipEventSynchronize(hipEvent_t event) {
1533  return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1534 }
1535 
1536 inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
1537  return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1538 }
1539 
1540 inline static hipError_t hipEventDestroy(hipEvent_t event) {
1541  return hipCUDAErrorTohipError(cudaEventDestroy(event));
1542 }
1543 
1544 inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
1545  return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1546 }
1547 
1548 inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
1549  return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1550 }
1551 
1552 inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
1553  return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1554 }
1555 
1556 inline static hipError_t hipStreamCreate(hipStream_t* stream) {
1557  return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1558 }
1559 
1560 inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
1561  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1562 }
1563 
1564 inline static hipError_t hipStreamDestroy(hipStream_t stream) {
1565  return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1566 }
1567 
1568 inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
1569  return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1570 }
1571 
1572 inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
1573  return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1574 }
1575 
1576 inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
1577  unsigned int flags) {
1578  return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1579 }
1580 
1581 inline static hipError_t hipStreamQuery(hipStream_t stream) {
1582  return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1583 }
1584 
1585 inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
1586  void* userData, unsigned int flags) {
1587  return hipCUDAErrorTohipError(
1588  cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1589 }
1590 
1591 inline static hipError_t hipDriverGetVersion(int* driverVersion) {
1592  cudaError_t err = cudaDriverGetVersion(driverVersion);
1593 
1594  // Override driver version to match version reported on HCC side.
1595  *driverVersion = 4;
1596 
1597  return hipCUDAErrorTohipError(err);
1598 }
1599 
1600 inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
1601  return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1602 }
1603 
1604 inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
1605  return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1606 }
1607 
1608 inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
1609  return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1610 }
1611 
1612 inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
1613  return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1614 }
1615 
1616 inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
1617  return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1618 }
1619 
1620 inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
1621  return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1622 }
1623 
1624 inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
1625  int* active) {
1626  return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1627 }
1628 
1629 inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
1630  return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1631 }
1632 
1633 inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
1634  return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1635 }
1636 
1637 inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
1638  return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1639 }
1640 
1641 inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
1642  return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1643 }
1644 
1645 inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
1646  hipDeviceptr_t dptr) {
1647  return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1648 }
1649 
1650 inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
1651  size_t count) {
1652  return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1653 }
1654 
1655 inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
1656  int srcDevice, size_t count,
1657  hipStream_t stream __dparm(0)) {
1658  return hipCUDAErrorTohipError(
1659  cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1660 }
1661 
1662 // Profile APIs:
1663 inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
1664 
1665 inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
1666 
1667 inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
1668  return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1669 }
1670 
1671 inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
1672  return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1673 }
1674 
1675 inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
1676  return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1677 }
1678 
1679 inline static hipError_t hipEventQuery(hipEvent_t event) {
1680  return hipCUDAErrorTohipError(cudaEventQuery(event));
1681 }
1682 
1683 inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
1684  return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1685 }
1686 
1687 inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
1688  return hipCUResultTohipError(cuCtxDestroy(ctx));
1689 }
1690 
1691 inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
1692  return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1693 }
1694 
1695 inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
1696  return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1697 }
1698 
1699 inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
1700  return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1701 }
1702 
1703 inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
1704  return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1705 }
1706 
1707 inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
1708  return hipCUResultTohipError(cuCtxGetDevice(device));
1709 }
1710 
1711 inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
1712  return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
1713 }
1714 
1715 inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
1716  return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1717 }
1718 
1719 inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
1720  return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1721 }
1722 
1723 inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
1724  return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1725 }
1726 
1727 inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
1728  return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1729 }
1730 
1731 inline static hipError_t hipCtxSynchronize(void) {
1732  return hipCUResultTohipError(cuCtxSynchronize());
1733 }
1734 
1735 inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
1736  return hipCUResultTohipError(cuCtxGetFlags(flags));
1737 }
1738 
1739 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1740  return hipCUResultTohipError(cuCtxDetach(ctx));
1741 }
1742 
1743 inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
1744  return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1745 }
1746 
1747 inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
1748  return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1749 }
1750 
1751 inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
1752  return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1753 }
1754 
1755 inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
1756  int srcDevice, int dstDevice) {
1757  return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1758 }
1759 
1760 inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
1761  return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1762 }
1763 
1764 inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
1765  return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1766 }
1767 
1768 inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
1769  return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1770 }
1771 
1772 inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
1773  return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1774 }
1775 
1776 inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
1777  return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1778 }
1779 
1780 inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
1781  return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1782 }
1783 
1784 inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
1785  return hipCUResultTohipError(cuModuleLoad(module, fname));
1786 }
1787 
1788 inline static hipError_t hipModuleUnload(hipModule_t hmod) {
1789  return hipCUResultTohipError(cuModuleUnload(hmod));
1790 }
1791 
1792 inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
1793  const char* kname) {
1794  return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
1795 }
1796 
1797 inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){
1798  hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1799 }
1800 
1801 inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
1802  return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1803 }
1804 
1805 inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1806  return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1807 }
1808 
1809 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
1810  const char* name) {
1811  return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1812 }
1813 
1814 inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
1815  return hipCUResultTohipError(cuModuleLoadData(module, image));
1816 }
1817 
1818 inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
1819  unsigned int numOptions, hipJitOption* options,
1820  void** optionValues) {
1821  return hipCUResultTohipError(
1822  cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1823 }
1824 
1825 inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
1826  dim3 dimBlocks, void** args, size_t sharedMemBytes,
1827  hipStream_t stream)
1828 {
1829  return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1830 }
1831 
1832 inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
1833  unsigned int gridDimY, unsigned int gridDimZ,
1834  unsigned int blockDimX, unsigned int blockDimY,
1835  unsigned int blockDimZ, unsigned int sharedMemBytes,
1836  hipStream_t stream, void** kernelParams,
1837  void** extra) {
1838  return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1839  blockDimY, blockDimZ, sharedMemBytes, stream,
1840  kernelParams, extra));
1841 }
1842 
1843 inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
1844  return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1845 }
1846 
1847 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
1848  struct textureReference* tex,
1849  const void* devPtr,
1850  const hipChannelFormatDesc* desc,
1851  size_t size __dparm(UINT_MAX)) {
1852  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1853 }
1854 
1855 __HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
1856  size_t* offset, struct textureReference* tex, const void* devPtr,
1857  const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
1858  return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1859 }
1860 
1861 inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
1862  hipChannelFormatKind f) {
1863  return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1864 }
1865 
1866 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1867  const hipResourceDesc* pResDesc,
1868  const hipTextureDesc* pTexDesc,
1869  const hipResourceViewDesc* pResViewDesc) {
1870  return hipCUDAErrorTohipError(
1871  cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1872 }
1873 
1874 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1875  return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1876 }
1877 
1878 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
1879  const hipResourceDesc* pResDesc) {
1880  return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1881 }
1882 
1883 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
1884  return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1885 }
1886 
1887 inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
1888  hipTextureObject_t textureObject) {
1889  return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1890 }
1891 
1892 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
1893  size_t* offset, const struct textureReference* texref) {
1894  return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1895 }
1896 
1897 inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
1898 {
1899  return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1900 }
1901 
1902 inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
1903  void** kernelParams, unsigned int sharedMemBytes,
1904  hipStream_t stream) {
1905  return hipCUDAErrorTohipError(
1906  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1907 }
1908 
1909 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
1910  int numDevices, unsigned int flags) {
1911  return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1912 }
1913 
1914 #ifdef __cplusplus
1915 }
1916 #endif
1917 
1918 #ifdef __CUDACC__
1919 
1920 template<class T>
1921 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1922  T func,
1923  int blockSize,
1924  size_t dynamicSMemSize) {
1925  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1926  blockSize, dynamicSMemSize));
1927 }
1928 
1929 template <class T>
1930 inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
1931  size_t dynamicSMemSize = 0,
1932  int blockSizeLimit = 0) {
1933  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1934  dynamicSMemSize, blockSizeLimit));
1935 }
1936 
1937 template <class T>
1938 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
1939  size_t dynamicSMemSize = 0,
1940  int blockSizeLimit = 0, unsigned int flags = 0) {
1941  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1942  dynamicSMemSize, blockSizeLimit, flags));
1943 }
1944 
1945 template <class T>
1946 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
1947  int blockSize, size_t dynamicSMemSize,unsigned int flags) {
1948  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1949  blockSize, dynamicSMemSize, flags));
1950 }
1951 
1952 template <class T, int dim, enum cudaTextureReadMode readMode>
1953 inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
1954  const void* devPtr, size_t size = UINT_MAX) {
1955  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1956 }
1957 
1958 template <class T, int dim, enum cudaTextureReadMode readMode>
1959 inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
1960  const void* devPtr, const hipChannelFormatDesc& desc,
1961  size_t size = UINT_MAX) {
1962  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1963 }
1964 
1965 template <class T, int dim, enum cudaTextureReadMode readMode>
1966 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
1967  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1968 }
1969 
1970 template <class T, int dim, enum cudaTextureReadMode readMode>
1971 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
1972  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1973 }
1974 
1975 template <class T, int dim, enum cudaTextureReadMode readMode>
1976 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1977  struct texture<T, dim, readMode>& tex, hipArray_const_t array,
1978  const hipChannelFormatDesc& desc) {
1979  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1980 }
1981 
1982 template <class T, int dim, enum cudaTextureReadMode readMode>
1983 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1984  struct texture<T, dim, readMode>* tex, hipArray_const_t array,
1985  const hipChannelFormatDesc* desc) {
1986  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1987 }
1988 
1989 template <class T, int dim, enum cudaTextureReadMode readMode>
1990 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1991  struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
1992  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1993 }
1994 
1995 template <class T>
1996 inline static hipChannelFormatDesc hipCreateChannelDesc() {
1997  return cudaCreateChannelDesc<T>();
1998 }
1999 
2000 template <class T>
2001 inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
2002  void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
2003  return hipCUDAErrorTohipError(
2004  cudaLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2005 }
2006 
2007 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){
2008  return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2009 }
2010 
2011 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2012  return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2013 }
2014 
2015 inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){
2016  return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2017 }
2018 
2019 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){
2020  return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2021 }
2022 
2023 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){
2024  return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2025 }
2026 
2027 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){
2028  return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2029 }
2030 
2031 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){
2032  return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2033 }
2034 
2035 inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){
2036  return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2037 }
2038 
2039 inline static hipError_t hipArrayDestroy(hiparray hArray){
2040  return hipCUResultTohipError(cuArrayDestroy(hArray));
2041 }
2042 
2043 #endif //__CUDACC__
2044 
2045 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
hipFuncAttributes
Definition: hip_runtime_api.h:128
hipModuleGetGlobal
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
hipPointerGetAttributes
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
hipDeviceAttributeMaxPitch
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:344
hipMemset3DAsync
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipMemcpy3D
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
hipIpcOpenMemHandle
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
hipCtxEnablePeerAccess
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
hipDeviceProp_t::regsPerBlock
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipMallocPitch
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
hipSetDevice
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
hipDeviceGetP2PAttribute
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipMemsetD16Async
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceAttributeMemoryBusWidth
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:315
hipGetErrorString
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
hipGetDeviceFlags
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipDeviceGetByPCIBusId
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
hipErrorInvalidMemcpyDirection
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:221
hipMalloc3DArray
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
hipDeviceArch_t::hasGlobalInt64Atomics
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
hipDeviceProp_t::minor
int minor
Definition: hip_runtime_api.h:100
hipDeviceAttributeMaxBlockDimX
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:299
hipErrorInvalidDevicePointer
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:220
hipChooseDevice
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
hipMemcpy2DAsync
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipMemsetD32
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
hipDeviceProp_t::texturePitchAlignment
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:130
hipDeviceAttributeMaxGridDimX
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:302
hipDeviceArch_t::hasThreadFenceSystem
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipStreamCreate
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
hipDeviceGetStreamPriorityRange
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
hipIpcEventHandle_st
Definition: hip_runtime_api.h:120
hipDeviceProp_t::maxTexture3D
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:125
hipStreamCreateWithPriority
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
hipFuncCache_t
hipFuncCache_t
Definition: hip_runtime_api.h:308
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:138
hipPeekAtLastError
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipModuleGetTexRef
hipError_t hipModuleGetTexRef(textureReference **texRef, hipModule_t hmod, const char *name)
returns the handle of the texture reference with the name from the module.
hipMemcpy3DAsync
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceGetPCIBusId
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
hipHostGetFlags
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
hipErrorHostMemoryNotRegistered
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:272
hipErrorRuntimeOther
hipErrorRuntimeOther
Definition: hip_runtime_api.h:285
hipDeviceAttributeClockRate
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:313
hipMemGetAddressRange
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
hipSurfaceObject_t
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipStreamWaitEvent
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
hipDeviceAttributeMaxGridDimZ
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:304
hipGetDevice
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
hipMallocArray
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipCtxSetCurrent
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
hipMemcpyToArray
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
hipMemoryTypeDevice
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:158
hipDeviceAttributeMaxRegistersPerBlock
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:309
hipMemcpyDtoDAsync
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
hipErrorNoDevice
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:226
hipDeviceAttributeComputeCapabilityMinor
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:323
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipDeviceProp_t::l2CacheSize
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDevicePrimaryCtxRelease
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
hipDeviceProp_t::textureAlignment
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:129
hipHostMalloc
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
hipDeviceAttributeKernelExecTimeout
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:347
hipDeviceAttributeL2CacheSize
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:318
hipDeviceGetName
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
hipDeviceAttributeMaxTexture3DWidth
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:337
hipDeviceArch_t::hasSurfaceFuncs
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
hipDeviceAttributeIntegrated
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:331
hipDeviceProp_t::isMultiGpuBoard
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipMemcpyParam2DAsync
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipDeviceAttributeMaxGridDimY
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:303
hipMemoryTypeHost
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:157
hipDeviceEnablePeerAccess
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
hipCtxSetCacheConfig
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipErrorInvalidContext
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:229
hipDeviceArch_t::hasSharedInt64Atomics
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
hipDeviceProp_t::computeMode
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
hipCtxPushCurrent
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
hipDeviceAttributeIsMultiGpuBoard
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:330
hipSharedMemConfig
hipSharedMemConfig
Definition: hip_runtime_api.h:318
hipDeviceProp_t::clockRate
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:266
hipDeviceComputeCapability
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
hipStreamCallback_t
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:1216
hipDeviceArch_t::hasDynamicParallelism
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
hip_Memcpy2D
Definition: driver_types.h:91
hipDeviceProp_t::canMapHostMemory
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
hipDeviceProp_t::sharedMemPerBlock
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipModuleOccupancyMaxPotentialBlockSize
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipIpcCloseMemHandle
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
hipDevicePrimaryCtxGetState
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
hipDeviceAttributeCooperativeMultiDeviceLaunch
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:333
hipDeviceProp_t::maxThreadsPerMultiProcessor
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipDeviceSetCacheConfig
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipDeviceProp_t::major
int major
Definition: hip_runtime_api.h:97
hipDeviceAttributeMaxSharedMemoryPerBlock
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:305
hipMemcpyAtoH
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
hipGetDeviceCount
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
hipSuccess
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:204
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
hipHostUnregister
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
hipStreamGetFlags
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
hipMemsetD8Async
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceAttributeMaxThreadsPerBlock
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:298
hipDeviceProp_t::gcnArch
int gcnArch
DEPRECATED: use gcnArchName instead.
Definition: hip_runtime_api.h:117
hipStreamSynchronize
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
hipGetErrorName
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
hipDeviceProp_t::kernelExecTimeoutEnabled
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:131
hipDeviceGet
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
hipMemcpyDtoD
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
hipDeviceProp_t::maxTexture1D
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:123
hipMemcpy3DParms
Definition: driver_types.h:383
hipDeviceAttributeMaxBlockDimZ
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:301
hipIpcGetMemHandle
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
hipMemcpyHtoD
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
hipDriverGetVersion
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
hipDeviceArch_t::hasDoubles
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:246
hipDeviceProp_t::maxThreadsPerBlock
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipCtxGetFlags
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
hipDeviceAttributeMaxBlockDimY
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:300
hipMemcpy2DToArray
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipMemAllocPitch
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
hipDeviceProp_t
Definition: hip_runtime_api.h:84
hipMemAllocHost
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:883
hipMallocHost
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:877
hipDeviceAttributeMaxTexture2DHeight
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:336
hipDeviceArch_t::hasSharedInt32Atomics
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipErrorInvalidValue
hipErrorInvalidValue
Definition: hip_runtime_api.h:205
hipDeviceProp_t::memPitch
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:128
hipMemsetD32Async
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipDeviceProp_t::pciBusID
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipRuntimeGetVersion
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
hipDeviceAttributeComputeCapabilityMajor
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:322
hipEventQuery
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
hipDeviceAttributeMaxTexture3DDepth
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:339
hipErrorRuntimeMemory
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:283
hipDeviceAttributeMaxThreadsPerMultiProcessor
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:320
hipStreamGetPriority
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipDeviceProp_t::arch
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipCtxGetApiVersion
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
hipEventSynchronize
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
hipCtxCreate
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
hipHostFree
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
hipDeviceAttributePciBusId
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:326
hipMemsetD16
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceProp_t::tccDriver
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:133
hipDeviceGetLimit
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
hipMalloc
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
hipIpcMemHandle_st
Definition: hip_runtime_api.h:111
hipEventElapsedTime
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
hipGetLastError
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
hipInit
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
hipDeviceAttributeTexturePitchAlignment
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:346
hipDeviceAttributeWarpSize
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:308
hipDeviceArch_t::hasGlobalInt32Atomics
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
hipFuncSetCacheConfig
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
hipCtxPopCurrent
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
hipArray
Definition: driver_types.h:78
hipDeviceArch_t::hasSyncThreadsExt
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipErrorInvalidDevice
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:227
hipDeviceArch_t::hasFunnelShift
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipDeviceAttributeMaxTexture3DHeight
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:338
hipDeviceAttributeMemoryClockRate
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:314
hipErrorNotReady
hipErrorNotReady
Definition: hip_runtime_api.h:257
hipHostGetDevicePointer
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipMemGetInfo
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
hipEventDestroy
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
hipDeviceSetSharedMemConfig
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
hipDeviceReset
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
hipDeviceProp_t::maxGridSize
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
hipDeviceAttributeComputeMode
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:317
hipSetDeviceFlags
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipCtxGetCurrent
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
hipDeviceAttributePciDeviceId
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:327
hipFuncGetAttributes
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
hipFuncGetAttribute
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
hipDeviceProp_t::maxSharedMemoryPerMultiProcessor
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
hipDeviceProp_t::clockInstructionRate
int clockInstructionRate
Definition: hip_runtime_api.h:107
dim3
Definition: hip_runtime_api.h:330
hipStreamQuery
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
hipDevicePrimaryCtxSetFlags
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
hipPointerAttribute_t
Definition: hip_runtime_api.h:169
hipDeviceAttributeTotalConstantMemory
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:307
hipFree
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
hipDeviceArch_t::hasWarpShuffle
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
hipArrayDefault
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipDevicePrimaryCtxRetain
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
hipCtxSynchronize
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
hipFreeHost
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:934
hipMemcpyHtoA
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
hipDeviceProp_t::memoryBusWidth
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipStreamAddCallback
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
hipDeviceArch_t::hasWarpVote
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
hipDeviceProp_t::name
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipMemcpyDtoHAsync
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
hipDeviceArch_t::hasGlobalFloatAtomicExch
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
hipDeviceProp_t::concurrentKernels
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
hipDeviceArch_t::hasWarpBallot
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
hipDeviceProp_t::totalGlobalMem
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipDeviceAttributeTextureAlignment
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:345
hipEventRecord
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
hipMemcpy2D
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipExtent
Definition: driver_types.h:370
hipPitchedPtr
Definition: driver_types.h:363
hipMemset2D
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
hipMemset3D
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipStreamCreateWithFlags
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
hipDeviceGetAttribute
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
hipMemcpyFromArray
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
hipDeviceAttributeCanMapHostMemory
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:348
hipDeviceProp_t::maxThreadsDim
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
hipMemcpyPeerAsync
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipMemcpyHtoDAsync
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
hipDeviceProp_t::cooperativeMultiDeviceLaunch
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipMemcpyDtoH
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
hipDeviceArch_t::has3dGrid
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipDeviceGetCacheConfig
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipMemcpyPeer
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
hipDeviceAttributeMaxTexture1DWidth
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:334
hipDeviceAttributeCooperativeLaunch
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:332
hipModuleLoadDataEx
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
hipDeviceAttributeMultiprocessorCount
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:316
hipDeviceProp_t::pciDeviceID
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipGetDeviceProperties
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
hipMemcpy
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
hipDeviceProp_t::memoryClockRate
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipEventCreateWithFlags
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
hipErrorCooperativeLaunchTooLarge
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:276
hipDeviceProp_t::warpSize
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipDeviceTotalMem
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
hipFreeArray
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
hipMallocManaged
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipErrorAssert
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:269
textureReference
Definition: texture_types.h:74
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:134
hipCtxGetSharedMemConfig
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:136
hipDeviceCanAccessPeer
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
hipModuleGetFunction
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
hipDeviceArch_t::hasFloatAtomicAdd
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipCtxDisablePeerAccess
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
hipDeviceProp_t::cooperativeLaunch
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
hipDeviceArch_t::hasSharedFloatAtomicExch
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
hipTextureDesc
Definition: texture_types.h:95
hipResourceViewDesc
Definition: driver_types.h:323
hipDeviceProp_t::multiProcessorCount
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
hipCtxSetSharedMemConfig
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
hipDeviceProp_t::integrated
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipMemsetD8
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipMemset2DAsync
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipDeviceProp_t::ECCEnabled
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:132
HIP_ARRAY_DESCRIPTOR
Definition: driver_types.h:62
hipCtxGetDevice
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
hipDeviceProp_t::totalConstMem
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipDeviceProp_t::maxTexture2D
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:124
hipLaunchParams_t
Definition: hip_runtime_api.h:339
hipErrorHostMemoryAlreadyRegistered
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:270
hipFuncAttribute
hipFuncAttribute
Definition: hip_runtime_api.h:299
hipDeviceAttribute_t
hipDeviceAttribute_t
Definition: hip_runtime_api.h:297
hipFuncSetSharedMemConfig
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
hipResourceDesc
Definition: driver_types.h:262
hipErrorLaunchFailure
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:274
hipDeviceSynchronize
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
hipCtxGetCacheConfig
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipCtxDestroy
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
hipModuleLaunchKernel
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipDeviceAttributeConcurrentKernels
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:324
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:140
hipProfilerStart
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
hipDeviceGetSharedMemConfig
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
hipErrorNotSupported
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:280
hipMemcpyAsync
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:262
hipOccupancyMaxPotentialBlockSize
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipStreamDestroy
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
hipHostRegister
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
hipModuleLoad
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
hipProfilerStop
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
hipEventCreate
hipError_t hipEventCreate(hipEvent_t *event)
hipMemsetAsync
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipDeviceAttributeMaxTexture2DWidth
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:335
hipDeviceProp_t::pciDomainID
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipModuleLoadData
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
hipMemcpyParam2D
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
hipHostAlloc
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:889
hipMemset
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceDisablePeerAccess
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipDeviceAttributeEccEnabled
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:349
hipFuncSetAttribute
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:328
hipDevicePrimaryCtxReset
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
hipChannelFormatDesc
Definition: driver_types.h:38
hipModuleUnload
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
hipErrorPeerAccessAlreadyEnabled
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:264