23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
26 #include <cuda_runtime_api.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
36 #define __dparm(x) = x
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
49 #define __HIP_DEPRECATED
58 typedef enum hipMemcpyKind {
60 hipMemcpyHostToDevice,
61 hipMemcpyDeviceToHost,
62 hipMemcpyDeviceToDevice,
67 #define hipDataType cudaDataType
68 #define HIP_R_16F CUDA_R_16F
69 #define HIP_R_32F CUDA_R_32F
70 #define HIP_R_64F CUDA_R_64F
71 #define HIP_C_16F CUDA_C_16F
72 #define HIP_C_32F CUDA_C_32F
73 #define HIP_C_64F CUDA_C_64F
76 #define hipLibraryPropertyType libraryPropertyType
77 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
78 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
79 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
81 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
84 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
85 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
86 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
87 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
88 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
89 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
90 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
91 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
94 #define hipArray_Format CUarray_format
96 inline static CUarray_format hipArray_FormatToCUarray_format(
97 hipArray_Format format) {
99 case HIP_AD_FORMAT_UNSIGNED_INT8:
100 return CU_AD_FORMAT_UNSIGNED_INT8;
101 case HIP_AD_FORMAT_UNSIGNED_INT16:
102 return CU_AD_FORMAT_UNSIGNED_INT16;
103 case HIP_AD_FORMAT_UNSIGNED_INT32:
104 return CU_AD_FORMAT_UNSIGNED_INT32;
105 case HIP_AD_FORMAT_SIGNED_INT8:
106 return CU_AD_FORMAT_SIGNED_INT8;
107 case HIP_AD_FORMAT_SIGNED_INT16:
108 return CU_AD_FORMAT_SIGNED_INT16;
109 case HIP_AD_FORMAT_SIGNED_INT32:
110 return CU_AD_FORMAT_SIGNED_INT32;
111 case HIP_AD_FORMAT_HALF:
112 return CU_AD_FORMAT_HALF;
113 case HIP_AD_FORMAT_FLOAT:
114 return CU_AD_FORMAT_FLOAT;
116 return CU_AD_FORMAT_UNSIGNED_INT8;
120 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
121 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
122 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
123 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
126 #define hipAddress_mode CUaddress_mode
128 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
129 hipAddress_mode mode) {
131 case HIP_TR_ADDRESS_MODE_WRAP:
132 return CU_TR_ADDRESS_MODE_WRAP;
133 case HIP_TR_ADDRESS_MODE_CLAMP:
134 return CU_TR_ADDRESS_MODE_CLAMP;
135 case HIP_TR_ADDRESS_MODE_MIRROR:
136 return CU_TR_ADDRESS_MODE_MIRROR;
137 case HIP_TR_ADDRESS_MODE_BORDER:
138 return CU_TR_ADDRESS_MODE_BORDER;
140 return CU_TR_ADDRESS_MODE_WRAP;
144 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
145 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
148 #define hipFilter_mode CUfilter_mode
150 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
151 hipFilter_mode mode) {
153 case HIP_TR_FILTER_MODE_POINT:
154 return CU_TR_FILTER_MODE_POINT;
155 case HIP_TR_FILTER_MODE_LINEAR:
156 return CU_TR_FILTER_MODE_LINEAR;
158 return CU_TR_FILTER_MODE_POINT;
163 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
164 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
165 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
166 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
169 #define hipResourcetype CUresourcetype
171 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
172 hipResourcetype resType) {
174 case HIP_RESOURCE_TYPE_ARRAY:
175 return CU_RESOURCE_TYPE_ARRAY;
176 case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
177 return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
178 case HIP_RESOURCE_TYPE_LINEAR:
179 return CU_RESOURCE_TYPE_LINEAR;
180 case HIP_RESOURCE_TYPE_PITCH2D:
181 return CU_RESOURCE_TYPE_PITCH2D;
183 return CU_RESOURCE_TYPE_ARRAY;
187 #define hipTexRef CUtexref
188 #define hiparray CUarray
191 typedef enum cudaTextureAddressMode hipTextureAddressMode;
192 #define hipAddressModeWrap cudaAddressModeWrap
193 #define hipAddressModeClamp cudaAddressModeClamp
194 #define hipAddressModeMirror cudaAddressModeMirror
195 #define hipAddressModeBorder cudaAddressModeBorder
198 typedef enum cudaTextureFilterMode hipTextureFilterMode;
199 #define hipFilterModePoint cudaFilterModePoint
200 #define hipFilterModeLinear cudaFilterModeLinear
203 typedef enum cudaTextureReadMode hipTextureReadMode;
204 #define hipReadModeElementType cudaReadModeElementType
205 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
208 typedef enum cudaChannelFormatKind hipChannelFormatKind;
209 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
210 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
211 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
212 #define hipChannelFormatKindNone cudaChannelFormatKindNone
214 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
215 #define hipBoundaryModeZero cudaBoundaryModeZero
216 #define hipBoundaryModeTrap cudaBoundaryModeTrap
217 #define hipBoundaryModeClamp cudaBoundaryModeClamp
220 #define hipFuncCachePreferNone cudaFuncCachePreferNone
221 #define hipFuncCachePreferShared cudaFuncCachePreferShared
222 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
223 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
226 #define hipResourceType cudaResourceType
227 #define hipResourceTypeArray cudaResourceTypeArray
228 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
229 #define hipResourceTypeLinear cudaResourceTypeLinear
230 #define hipResourceTypePitch2D cudaResourceTypePitch2D
236 #define hipEventDefault cudaEventDefault
237 #define hipEventBlockingSync cudaEventBlockingSync
238 #define hipEventDisableTiming cudaEventDisableTiming
239 #define hipEventInterprocess cudaEventInterprocess
240 #define hipEventReleaseToDevice 0
241 #define hipEventReleaseToSystem 0
244 #define hipHostMallocDefault cudaHostAllocDefault
245 #define hipHostMallocPortable cudaHostAllocPortable
246 #define hipHostMallocMapped cudaHostAllocMapped
247 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
248 #define hipHostMallocCoherent 0x0
249 #define hipHostMallocNonCoherent 0x0
251 #define hipMemAttachGlobal cudaMemAttachGlobal
252 #define hipMemAttachHost cudaMemAttachHost
254 #define hipHostRegisterDefault cudaHostRegisterDefault
255 #define hipHostRegisterPortable cudaHostRegisterPortable
256 #define hipHostRegisterMapped cudaHostRegisterMapped
257 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
259 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
260 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
261 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
262 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
263 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
265 #define hipOccupancyDefault cudaOccupancyDefault
267 #define hipCooperativeLaunchMultiDeviceNoPreSync \
268 cudaCooperativeLaunchMultiDeviceNoPreSync
269 #define hipCooperativeLaunchMultiDeviceNoPostSync \
270 cudaCooperativeLaunchMultiDeviceNoPostSync
274 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
275 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
276 #define hipJitOptionWallTime CU_JIT_WALL_TIME
277 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
278 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
279 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
280 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
281 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
282 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
283 #define hipJitOptionTarget CU_JIT_TARGET
284 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
285 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
286 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
287 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
288 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
289 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
290 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
291 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
293 typedef cudaEvent_t hipEvent_t;
294 typedef cudaStream_t hipStream_t;
297 typedef enum cudaLimit hipLimit_t;
300 typedef CUcontext hipCtx_t;
302 typedef CUfunc_cache hipFuncCache;
303 typedef CUjit_option hipJitOption;
304 typedef CUdevice hipDevice_t;
305 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
306 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
307 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
308 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
309 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
310 #define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
311 #define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
313 typedef CUmodule hipModule_t;
314 typedef CUfunction hipFunction_t;
315 typedef CUdeviceptr hipDeviceptr_t;
321 #define hipFunction_attribute CUfunction_attribute
322 #define hip_Memcpy2D CUDA_MEMCPY2D
323 #define hipMemcpy3DParms cudaMemcpy3DParms
324 #define hipArrayDefault cudaArrayDefault
325 #define hipArrayLayered cudaArrayLayered
326 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
327 #define hipArrayCubemap cudaArrayCubemap
328 #define hipArrayTextureGather cudaArrayTextureGather
330 typedef cudaTextureObject_t hipTextureObject_t;
332 #define hipTextureType1D cudaTextureType1D
333 #define hipTextureType1DLayered cudaTextureType1DLayered
334 #define hipTextureType2D cudaTextureType2D
335 #define hipTextureType2DLayered cudaTextureType2DLayered
336 #define hipTextureType3D cudaTextureType3D
337 #define hipDeviceMapHost cudaDeviceMapHost
341 #define make_hipExtent make_cudaExtent
342 #define make_hipPos make_cudaPos
343 #define make_hipPitchedPtr make_cudaPitchedPtr
345 #define hipStreamDefault cudaStreamDefault
346 #define hipStreamNonBlocking cudaStreamNonBlocking
353 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
354 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
355 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
358 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
359 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
360 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
361 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
362 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
363 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
364 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
365 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
366 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
367 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
368 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
370 #if CUDA_VERSION >= 9000
371 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
372 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
373 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
374 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
375 #endif // CUDA_VERSION >= 9000
377 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
381 case cudaErrorProfilerDisabled:
382 return hipErrorProfilerDisabled;
383 case cudaErrorProfilerNotInitialized:
384 return hipErrorProfilerNotInitialized;
385 case cudaErrorProfilerAlreadyStarted:
386 return hipErrorProfilerAlreadyStarted;
387 case cudaErrorProfilerAlreadyStopped:
388 return hipErrorProfilerAlreadyStopped;
389 case cudaErrorInsufficientDriver:
390 return hipErrorInsufficientDriver;
391 case cudaErrorUnsupportedLimit:
392 return hipErrorUnsupportedLimit;
393 case cudaErrorPeerAccessUnsupported:
394 return hipErrorPeerAccessUnsupported;
395 case cudaErrorInvalidGraphicsContext:
396 return hipErrorInvalidGraphicsContext;
397 case cudaErrorSharedObjectSymbolNotFound:
398 return hipErrorSharedObjectSymbolNotFound;
399 case cudaErrorSharedObjectInitFailed:
400 return hipErrorSharedObjectInitFailed;
401 case cudaErrorOperatingSystem:
402 return hipErrorOperatingSystem;
403 case cudaErrorSetOnActiveProcess:
404 return hipErrorSetOnActiveProcess;
405 case cudaErrorIllegalAddress:
406 return hipErrorIllegalAddress;
407 case cudaErrorInvalidSymbol:
408 return hipErrorInvalidSymbol;
409 case cudaErrorMissingConfiguration:
410 return hipErrorMissingConfiguration;
411 case cudaErrorMemoryAllocation:
412 return hipErrorOutOfMemory;
413 case cudaErrorInitializationError:
414 return hipErrorNotInitialized;
415 case cudaErrorLaunchFailure:
417 case cudaErrorCooperativeLaunchTooLarge:
419 case cudaErrorPriorLaunchFailure:
420 return hipErrorPriorLaunchFailure;
421 case cudaErrorLaunchOutOfResources:
423 case cudaErrorInvalidDeviceFunction:
424 return hipErrorInvalidDeviceFunction;
425 case cudaErrorInvalidConfiguration:
426 return hipErrorInvalidConfiguration;
427 case cudaErrorInvalidDevice:
429 case cudaErrorInvalidValue:
431 case cudaErrorInvalidDevicePointer:
433 case cudaErrorInvalidMemcpyDirection:
435 case cudaErrorInvalidResourceHandle:
436 return hipErrorInvalidHandle;
437 case cudaErrorNotReady:
439 case cudaErrorNoDevice:
441 case cudaErrorPeerAccessAlreadyEnabled:
443 case cudaErrorPeerAccessNotEnabled:
445 case cudaErrorHostMemoryAlreadyRegistered:
447 case cudaErrorHostMemoryNotRegistered:
449 case cudaErrorMapBufferObjectFailed:
450 return hipErrorMapFailed;
451 case cudaErrorAssert:
453 case cudaErrorNotSupported:
455 case cudaErrorCudartUnloading:
456 return hipErrorDeinitialized;
457 case cudaErrorInvalidKernelImage:
458 return hipErrorInvalidImage;
459 case cudaErrorUnmapBufferObjectFailed:
460 return hipErrorUnmapFailed;
461 case cudaErrorNoKernelImageForDevice:
462 return hipErrorNoBinaryForGpu;
463 case cudaErrorECCUncorrectable:
464 return hipErrorECCNotCorrectable;
465 case cudaErrorDeviceAlreadyInUse:
466 return hipErrorContextAlreadyInUse;
467 case cudaErrorInvalidPtx:
469 case cudaErrorLaunchTimeout:
470 return hipErrorLaunchTimeOut;
471 #if CUDA_VERSION >= 10010
472 case cudaErrorInvalidSource:
473 return hipErrorInvalidSource;
474 case cudaErrorFileNotFound:
475 return hipErrorFileNotFound;
476 case cudaErrorSymbolNotFound:
477 return hipErrorNotFound;
478 case cudaErrorArrayIsMapped:
479 return hipErrorArrayIsMapped;
480 case cudaErrorNotMappedAsPointer:
481 return hipErrorNotMappedAsPointer;
482 case cudaErrorNotMappedAsArray:
483 return hipErrorNotMappedAsArray;
484 case cudaErrorNotMapped:
485 return hipErrorNotMapped;
486 case cudaErrorAlreadyAcquired:
487 return hipErrorAlreadyAcquired;
488 case cudaErrorAlreadyMapped:
489 return hipErrorAlreadyMapped;
491 #if CUDA_VERSION >= 10020
492 case cudaErrorDeviceUninitialized:
495 case cudaErrorUnknown:
497 return hipErrorUnknown;
501 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
505 case CUDA_ERROR_OUT_OF_MEMORY:
506 return hipErrorOutOfMemory;
507 case CUDA_ERROR_INVALID_VALUE:
509 case CUDA_ERROR_INVALID_DEVICE:
511 case CUDA_ERROR_DEINITIALIZED:
512 return hipErrorDeinitialized;
513 case CUDA_ERROR_NO_DEVICE:
515 case CUDA_ERROR_INVALID_CONTEXT:
517 case CUDA_ERROR_NOT_INITIALIZED:
518 return hipErrorNotInitialized;
519 case CUDA_ERROR_INVALID_HANDLE:
520 return hipErrorInvalidHandle;
521 case CUDA_ERROR_MAP_FAILED:
522 return hipErrorMapFailed;
523 case CUDA_ERROR_PROFILER_DISABLED:
524 return hipErrorProfilerDisabled;
525 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
526 return hipErrorProfilerNotInitialized;
527 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
528 return hipErrorProfilerAlreadyStarted;
529 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
530 return hipErrorProfilerAlreadyStopped;
531 case CUDA_ERROR_INVALID_IMAGE:
532 return hipErrorInvalidImage;
533 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
534 return hipErrorContextAlreadyCurrent;
535 case CUDA_ERROR_UNMAP_FAILED:
536 return hipErrorUnmapFailed;
537 case CUDA_ERROR_ARRAY_IS_MAPPED:
538 return hipErrorArrayIsMapped;
539 case CUDA_ERROR_ALREADY_MAPPED:
540 return hipErrorAlreadyMapped;
541 case CUDA_ERROR_NO_BINARY_FOR_GPU:
542 return hipErrorNoBinaryForGpu;
543 case CUDA_ERROR_ALREADY_ACQUIRED:
544 return hipErrorAlreadyAcquired;
545 case CUDA_ERROR_NOT_MAPPED:
546 return hipErrorNotMapped;
547 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
548 return hipErrorNotMappedAsArray;
549 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
550 return hipErrorNotMappedAsPointer;
551 case CUDA_ERROR_ECC_UNCORRECTABLE:
552 return hipErrorECCNotCorrectable;
553 case CUDA_ERROR_UNSUPPORTED_LIMIT:
554 return hipErrorUnsupportedLimit;
555 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
556 return hipErrorContextAlreadyInUse;
557 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
558 return hipErrorPeerAccessUnsupported;
559 case CUDA_ERROR_INVALID_PTX:
561 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
562 return hipErrorInvalidGraphicsContext;
563 case CUDA_ERROR_INVALID_SOURCE:
564 return hipErrorInvalidSource;
565 case CUDA_ERROR_FILE_NOT_FOUND:
566 return hipErrorFileNotFound;
567 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
568 return hipErrorSharedObjectSymbolNotFound;
569 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
570 return hipErrorSharedObjectInitFailed;
571 case CUDA_ERROR_OPERATING_SYSTEM:
572 return hipErrorOperatingSystem;
573 case CUDA_ERROR_NOT_FOUND:
574 return hipErrorNotFound;
575 case CUDA_ERROR_NOT_READY:
577 case CUDA_ERROR_ILLEGAL_ADDRESS:
578 return hipErrorIllegalAddress;
579 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
581 case CUDA_ERROR_LAUNCH_TIMEOUT:
582 return hipErrorLaunchTimeOut;
583 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
585 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
587 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
588 return hipErrorSetOnActiveProcess;
589 case CUDA_ERROR_ASSERT:
591 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
593 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
595 case CUDA_ERROR_LAUNCH_FAILED:
597 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
599 case CUDA_ERROR_NOT_SUPPORTED:
601 case CUDA_ERROR_UNKNOWN:
603 return hipErrorUnknown;
607 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
611 case hipErrorOutOfMemory:
612 return cudaErrorMemoryAllocation;
613 case hipErrorProfilerDisabled:
614 return cudaErrorProfilerDisabled;
615 case hipErrorProfilerNotInitialized:
616 return cudaErrorProfilerNotInitialized;
617 case hipErrorProfilerAlreadyStarted:
618 return cudaErrorProfilerAlreadyStarted;
619 case hipErrorProfilerAlreadyStopped:
620 return cudaErrorProfilerAlreadyStopped;
621 case hipErrorInvalidConfiguration:
622 return cudaErrorInvalidConfiguration;
624 return cudaErrorLaunchOutOfResources;
626 return cudaErrorInvalidValue;
627 case hipErrorInvalidHandle:
628 return cudaErrorInvalidResourceHandle;
630 return cudaErrorInvalidDevice;
632 return cudaErrorInvalidMemcpyDirection;
634 return cudaErrorInvalidDevicePointer;
635 case hipErrorNotInitialized:
636 return cudaErrorInitializationError;
638 return cudaErrorNoDevice;
640 return cudaErrorNotReady;
642 return cudaErrorPeerAccessNotEnabled;
644 return cudaErrorPeerAccessAlreadyEnabled;
646 return cudaErrorHostMemoryAlreadyRegistered;
648 return cudaErrorHostMemoryNotRegistered;
649 case hipErrorDeinitialized:
650 return cudaErrorCudartUnloading;
651 case hipErrorInvalidSymbol:
652 return cudaErrorInvalidSymbol;
653 case hipErrorInsufficientDriver:
654 return cudaErrorInsufficientDriver;
655 case hipErrorMissingConfiguration:
656 return cudaErrorMissingConfiguration;
657 case hipErrorPriorLaunchFailure:
658 return cudaErrorPriorLaunchFailure;
659 case hipErrorInvalidDeviceFunction:
660 return cudaErrorInvalidDeviceFunction;
661 case hipErrorInvalidImage:
662 return cudaErrorInvalidKernelImage;
664 #if CUDA_VERSION >= 10020
665 return cudaErrorDeviceUninitialized;
667 return cudaErrorUnknown;
669 case hipErrorMapFailed:
670 return cudaErrorMapBufferObjectFailed;
671 case hipErrorUnmapFailed:
672 return cudaErrorUnmapBufferObjectFailed;
673 case hipErrorArrayIsMapped:
674 #if CUDA_VERSION >= 10010
675 return cudaErrorArrayIsMapped;
677 return cudaErrorUnknown;
679 case hipErrorAlreadyMapped:
680 #if CUDA_VERSION >= 10010
681 return cudaErrorAlreadyMapped;
683 return cudaErrorUnknown;
685 case hipErrorNoBinaryForGpu:
686 return cudaErrorNoKernelImageForDevice;
687 case hipErrorAlreadyAcquired:
688 #if CUDA_VERSION >= 10010
689 return cudaErrorAlreadyAcquired;
691 return cudaErrorUnknown;
693 case hipErrorNotMapped:
694 #if CUDA_VERSION >= 10010
695 return cudaErrorNotMapped;
697 return cudaErrorUnknown;
699 case hipErrorNotMappedAsArray:
700 #if CUDA_VERSION >= 10010
701 return cudaErrorNotMappedAsArray;
703 return cudaErrorUnknown;
705 case hipErrorNotMappedAsPointer:
706 #if CUDA_VERSION >= 10010
707 return cudaErrorNotMappedAsPointer;
709 return cudaErrorUnknown;
711 case hipErrorECCNotCorrectable:
712 return cudaErrorECCUncorrectable;
713 case hipErrorUnsupportedLimit:
714 return cudaErrorUnsupportedLimit;
715 case hipErrorContextAlreadyInUse:
716 return cudaErrorDeviceAlreadyInUse;
717 case hipErrorPeerAccessUnsupported:
718 return cudaErrorPeerAccessUnsupported;
720 return cudaErrorInvalidPtx;
721 case hipErrorInvalidGraphicsContext:
722 return cudaErrorInvalidGraphicsContext;
723 case hipErrorInvalidSource:
724 #if CUDA_VERSION >= 10010
725 return cudaErrorInvalidSource;
727 return cudaErrorUnknown;
729 case hipErrorFileNotFound:
730 #if CUDA_VERSION >= 10010
731 return cudaErrorFileNotFound;
733 return cudaErrorUnknown;
735 case hipErrorSharedObjectSymbolNotFound:
736 return cudaErrorSharedObjectSymbolNotFound;
737 case hipErrorSharedObjectInitFailed:
738 return cudaErrorSharedObjectInitFailed;
739 case hipErrorOperatingSystem:
740 return cudaErrorOperatingSystem;
741 case hipErrorNotFound:
742 #if CUDA_VERSION >= 10010
743 return cudaErrorSymbolNotFound;
745 return cudaErrorUnknown;
747 case hipErrorIllegalAddress:
748 return cudaErrorIllegalAddress;
749 case hipErrorLaunchTimeOut:
750 return cudaErrorLaunchTimeout;
751 case hipErrorSetOnActiveProcess:
752 return cudaErrorSetOnActiveProcess;
754 return cudaErrorLaunchFailure;
756 return cudaErrorCooperativeLaunchTooLarge;
758 return cudaErrorNotSupported;
763 case hipErrorUnknown:
766 return cudaErrorUnknown;
770 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
772 case hipMemcpyHostToHost:
773 return cudaMemcpyHostToHost;
774 case hipMemcpyHostToDevice:
775 return cudaMemcpyHostToDevice;
776 case hipMemcpyDeviceToHost:
777 return cudaMemcpyDeviceToHost;
778 case hipMemcpyDeviceToDevice:
779 return cudaMemcpyDeviceToDevice;
781 return cudaMemcpyDefault;
785 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
786 hipTextureAddressMode kind) {
788 case hipAddressModeWrap:
789 return cudaAddressModeWrap;
790 case hipAddressModeClamp:
791 return cudaAddressModeClamp;
792 case hipAddressModeMirror:
793 return cudaAddressModeMirror;
794 case hipAddressModeBorder:
795 return cudaAddressModeBorder;
797 return cudaAddressModeWrap;
801 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
802 hipTextureFilterMode kind) {
804 case hipFilterModePoint:
805 return cudaFilterModePoint;
806 case hipFilterModeLinear:
807 return cudaFilterModeLinear;
809 return cudaFilterModePoint;
813 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
815 case hipReadModeElementType:
816 return cudaReadModeElementType;
817 case hipReadModeNormalizedFloat:
818 return cudaReadModeNormalizedFloat;
820 return cudaReadModeElementType;
824 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
825 hipChannelFormatKind kind) {
827 case hipChannelFormatKindSigned:
828 return cudaChannelFormatKindSigned;
829 case hipChannelFormatKindUnsigned:
830 return cudaChannelFormatKindUnsigned;
831 case hipChannelFormatKindFloat:
832 return cudaChannelFormatKindFloat;
833 case hipChannelFormatKindNone:
834 return cudaChannelFormatKindNone;
836 return cudaChannelFormatKindNone;
843 #define HIPRT_CB CUDART_CB
844 typedef void(HIPRT_CB*
hipStreamCallback_t)(hipStream_t stream, hipError_t status,
void* userData);
845 inline static hipError_t
hipInit(
unsigned int flags) {
846 return hipCUResultTohipError(cuInit(flags));
849 inline static hipError_t
hipDeviceReset() {
return hipCUDAErrorTohipError(cudaDeviceReset()); }
851 inline static hipError_t
hipGetLastError() {
return hipCUDAErrorTohipError(cudaGetLastError()); }
854 return hipCUDAErrorTohipError(cudaPeekAtLastError());
857 inline static hipError_t
hipMalloc(
void** ptr,
size_t size) {
858 return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
861 inline static hipError_t
hipMallocPitch(
void** ptr,
size_t* pitch,
size_t width,
size_t height) {
862 return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
865 inline static hipError_t
hipMemAllocPitch(hipDeviceptr_t* dptr,
size_t* pitch,
size_t widthInBytes,
size_t height,
unsigned int elementSizeBytes){
866 return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
870 return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
873 inline static hipError_t
hipFree(
void* ptr) {
return hipCUDAErrorTohipError(cudaFree(ptr)); }
875 inline static hipError_t
hipMallocHost(
void** ptr,
size_t size)
876 __attribute__((deprecated(
"use hipHostMalloc instead")));
878 return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
882 __attribute__((deprecated(
"use hipHostMalloc instead")));
884 return hipCUResultTohipError(cuMemAllocHost(ptr, size));
887 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags)
888 __attribute__((deprecated(
"use hipHostMalloc instead")));
889 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags) {
890 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
893 inline static hipError_t
hipHostMalloc(
void** ptr,
size_t size,
unsigned int flags) {
894 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
897 inline static hipError_t
hipMallocManaged(
void** ptr,
size_t size,
unsigned int flags) {
898 return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
902 size_t width,
size_t height,
904 return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
909 return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
913 return hipCUDAErrorTohipError(cudaFreeArray(array));
917 return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
920 inline static hipError_t
hipHostGetFlags(
unsigned int* flagsPtr,
void* hostPtr) {
921 return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
924 inline static hipError_t
hipHostRegister(
void* ptr,
size_t size,
unsigned int flags) {
925 return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
929 return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
933 __attribute__((deprecated(
"use hipHostFree instead")));
935 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
939 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
943 return hipCUDAErrorTohipError(cudaSetDevice(device));
947 struct cudaDeviceProp cdprop;
948 memset(&cdprop, 0x0,
sizeof(
struct cudaDeviceProp));
949 cdprop.major = prop->
major;
950 cdprop.minor = prop->
minor;
965 return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
968 inline static hipError_t
hipMemcpyHtoD(hipDeviceptr_t dst,
void* src,
size_t size) {
969 return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
972 inline static hipError_t
hipMemcpyDtoH(
void* dst, hipDeviceptr_t src,
size_t size) {
973 return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
976 inline static hipError_t
hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size) {
977 return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
980 inline static hipError_t
hipMemcpyHtoDAsync(hipDeviceptr_t dst,
void* src,
size_t size,
981 hipStream_t stream) {
982 return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
985 inline static hipError_t
hipMemcpyDtoHAsync(
void* dst, hipDeviceptr_t src,
size_t size,
986 hipStream_t stream) {
987 return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
990 inline static hipError_t
hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size,
991 hipStream_t stream) {
992 return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
995 inline static hipError_t
hipMemcpy(
void* dst,
const void* src,
size_t sizeBytes,
996 hipMemcpyKind copyKind) {
997 return hipCUDAErrorTohipError(
998 cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
1002 inline static hipError_t hipMemcpyWithStream(
void* dst,
const void* src,
1003 size_t sizeBytes, hipMemcpyKind copyKind,
1004 hipStream_t stream) {
1005 cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1006 hipMemcpyKindToCudaMemcpyKind(copyKind),
1009 if (error != cudaSuccess)
return hipCUDAErrorTohipError(error);
1011 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1014 inline static hipError_t
hipMemcpyAsync(
void* dst,
const void* src,
size_t sizeBytes,
1015 hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
1016 return hipCUDAErrorTohipError(
1017 cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1020 inline static hipError_t hipMemcpyToSymbol(
const void* symbol,
const void* src,
size_t sizeBytes,
1021 size_t offset __dparm(0),
1022 hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1023 return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1024 hipMemcpyKindToCudaMemcpyKind(copyType)));
1027 inline static hipError_t hipMemcpyToSymbolAsync(
const void* symbol,
const void* src,
1028 size_t sizeBytes,
size_t offset,
1029 hipMemcpyKind copyType,
1030 hipStream_t stream __dparm(0)) {
1031 return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1032 symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1035 inline static hipError_t hipMemcpyFromSymbol(
void* dst,
const void* symbolName,
size_t sizeBytes,
1036 size_t offset __dparm(0),
1037 hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1038 return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1039 hipMemcpyKindToCudaMemcpyKind(kind)));
1042 inline static hipError_t hipMemcpyFromSymbolAsync(
void* dst,
const void* symbolName,
1043 size_t sizeBytes,
size_t offset,
1045 hipStream_t stream __dparm(0)) {
1046 return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1047 dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1050 inline static hipError_t hipGetSymbolAddress(
void** devPtr,
const void* symbolName) {
1051 return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1054 inline static hipError_t hipGetSymbolSize(
size_t* size,
const void* symbolName) {
1055 return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1058 inline static hipError_t
hipMemcpy2D(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
1059 size_t width,
size_t height, hipMemcpyKind kind) {
1060 return hipCUDAErrorTohipError(
1061 cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1065 return hipCUResultTohipError(cuMemcpy2D(pCopy));
1069 return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1074 return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1079 return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1082 inline static hipError_t
hipMemcpy2DAsync(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
1083 size_t width,
size_t height, hipMemcpyKind kind,
1084 hipStream_t stream) {
1085 return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1086 hipMemcpyKindToCudaMemcpyKind(kind), stream));
1090 const void* src,
size_t spitch,
size_t width,
1091 size_t height, hipMemcpyKind kind) {
1092 return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1093 height, hipMemcpyKindToCudaMemcpyKind(kind)));
1097 size_t hOffset,
const void* src,
1098 size_t count, hipMemcpyKind kind) {
1099 return hipCUDAErrorTohipError(
1100 cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1104 size_t wOffset,
size_t hOffset,
1105 size_t count, hipMemcpyKind kind) {
1106 return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1107 hipMemcpyKindToCudaMemcpyKind(kind)));
1112 return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1117 return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1121 return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1125 return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1129 return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1133 return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1137 return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1141 return cudaGetErrorString(hipErrorToCudaError(error));
1145 return cudaGetErrorName(hipErrorToCudaError(error));
1149 return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1153 return hipCUDAErrorTohipError(cudaGetDevice(device));
1157 return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1160 inline static hipError_t hipIpcGetEventHandle(
hipIpcEventHandle_t* handle, hipEvent_t event) {
1161 return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1165 return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1168 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event,
hipIpcEventHandle_t handle) {
1169 return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1173 unsigned int flags) {
1174 return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1177 inline static hipError_t
hipMemset(
void* devPtr,
int value,
size_t count) {
1178 return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1181 inline static hipError_t
hipMemsetD32(hipDeviceptr_t devPtr,
int value,
size_t count) {
1182 return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1185 inline static hipError_t
hipMemsetAsync(
void* devPtr,
int value,
size_t count,
1186 hipStream_t stream __dparm(0)) {
1187 return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1190 inline static hipError_t
hipMemsetD32Async(hipDeviceptr_t devPtr,
int value,
size_t count,
1191 hipStream_t stream __dparm(0)) {
1192 return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1195 inline static hipError_t
hipMemsetD8(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes) {
1196 return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1199 inline static hipError_t
hipMemsetD8Async(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes,
1200 hipStream_t stream __dparm(0)) {
1201 return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1204 inline static hipError_t
hipMemsetD16(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes) {
1205 return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1208 inline static hipError_t
hipMemsetD16Async(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes,
1209 hipStream_t stream __dparm(0)) {
1210 return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1213 inline static hipError_t
hipMemset2D(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height) {
1214 return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1217 inline static hipError_t
hipMemset2DAsync(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height, hipStream_t stream __dparm(0)) {
1218 return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1222 return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1226 return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1230 struct cudaDeviceProp cdprop;
1232 cerror = cudaGetDeviceProperties(&cdprop, device);
1234 strncpy(p_prop->
name, cdprop.name, 256);
1238 p_prop->
warpSize = cdprop.warpSize;
1240 for (
int i = 0; i < 3; i++) {
1248 p_prop->
major = cdprop.major;
1249 p_prop->
minor = cdprop.minor;
1256 int ccVers = p_prop->
major * 100 + p_prop->
minor * 10;
1277 p_prop->
pciBusID = cdprop.pciBusID;
1298 p_prop->
memPitch = cdprop.memPitch;
1305 return hipCUDAErrorTohipError(cerror);
1309 enum cudaDeviceAttr cdattr;
1314 cdattr = cudaDevAttrMaxThreadsPerBlock;
1317 cdattr = cudaDevAttrMaxBlockDimX;
1320 cdattr = cudaDevAttrMaxBlockDimY;
1323 cdattr = cudaDevAttrMaxBlockDimZ;
1326 cdattr = cudaDevAttrMaxGridDimX;
1329 cdattr = cudaDevAttrMaxGridDimY;
1332 cdattr = cudaDevAttrMaxGridDimZ;
1335 cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1338 cdattr = cudaDevAttrTotalConstantMemory;
1341 cdattr = cudaDevAttrWarpSize;
1344 cdattr = cudaDevAttrMaxRegistersPerBlock;
1347 cdattr = cudaDevAttrClockRate;
1350 cdattr = cudaDevAttrMemoryClockRate;
1353 cdattr = cudaDevAttrGlobalMemoryBusWidth;
1356 cdattr = cudaDevAttrMultiProcessorCount;
1359 cdattr = cudaDevAttrComputeMode;
1362 cdattr = cudaDevAttrL2CacheSize;
1365 cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1368 cdattr = cudaDevAttrComputeCapabilityMajor;
1371 cdattr = cudaDevAttrComputeCapabilityMinor;
1374 cdattr = cudaDevAttrConcurrentKernels;
1377 cdattr = cudaDevAttrPciBusId;
1380 cdattr = cudaDevAttrPciDeviceId;
1383 cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1386 cdattr = cudaDevAttrIsMultiGpuBoard;
1389 cdattr = cudaDevAttrIntegrated;
1392 cdattr = cudaDevAttrMaxTexture1DWidth;
1395 cdattr = cudaDevAttrMaxTexture2DWidth;
1398 cdattr = cudaDevAttrMaxTexture2DHeight;
1401 cdattr = cudaDevAttrMaxTexture3DWidth;
1404 cdattr = cudaDevAttrMaxTexture3DHeight;
1407 cdattr = cudaDevAttrMaxTexture3DDepth;
1410 cdattr = cudaDevAttrMaxPitch;
1413 cdattr = cudaDevAttrTextureAlignment;
1416 cdattr = cudaDevAttrTexturePitchAlignment;
1419 cdattr = cudaDevAttrKernelExecTimeout;
1422 cdattr = cudaDevAttrCanMapHostMemory;
1425 cdattr = cudaDevAttrEccEnabled;
1428 cdattr = cudaDevAttrCooperativeLaunch;
1431 cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1434 return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1437 cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1439 return hipCUDAErrorTohipError(cerror);
1445 size_t dynamicSMemSize) {
1446 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1447 blockSize, dynamicSMemSize));
1453 size_t dynamicSMemSize,
1454 unsigned int flags) {
1455 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1456 blockSize, dynamicSMemSize, flags));
1462 size_t dynamicSMemSize ){
1463 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1464 blockSize, dynamicSMemSize));
1470 size_t dynamicSMemSize,
1471 unsigned int flags ) {
1472 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1473 blockSize, dynamicSMemSize, flags));
1478 hipFunction_t f,
size_t dynSharedMemPerBlk,
1479 int blockSizeLimit){
1480 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1481 dynSharedMemPerBlk, blockSizeLimit));
1486 hipFunction_t f,
size_t dynSharedMemPerBlk,
1487 int blockSizeLimit,
unsigned int flags){
1488 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1489 dynSharedMemPerBlk, blockSizeLimit, flags));
1493 struct cudaPointerAttributes cPA;
1494 hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1496 #if (CUDART_VERSION >= 11000)
1497 auto memType = cPA.type;
1499 unsigned memType = cPA.memoryType;
1502 case cudaMemoryTypeDevice:
1505 case cudaMemoryTypeHost:
1509 return hipErrorUnknown;
1511 attributes->device = cPA.device;
1512 attributes->devicePointer = cPA.devicePointer;
1513 attributes->hostPointer = cPA.hostPointer;
1514 attributes->isManaged = 0;
1515 attributes->allocationFlags = 0;
1520 inline static hipError_t
hipMemGetInfo(
size_t* free,
size_t* total) {
1521 return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1525 return hipCUDAErrorTohipError(cudaEventCreate(event));
1528 inline static hipError_t
hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1529 return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1533 return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1536 inline static hipError_t
hipEventElapsedTime(
float* ms, hipEvent_t start, hipEvent_t stop) {
1537 return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1541 return hipCUDAErrorTohipError(cudaEventDestroy(event));
1545 return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1549 return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1553 return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1557 return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1561 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1565 return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1568 inline static hipError_t
hipStreamGetFlags(hipStream_t stream,
unsigned int *flags) {
1569 return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1573 return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1577 unsigned int flags) {
1578 return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1582 return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1586 void* userData,
unsigned int flags) {
1587 return hipCUDAErrorTohipError(
1588 cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1592 cudaError_t err = cudaDriverGetVersion(driverVersion);
1597 return hipCUDAErrorTohipError(err);
1601 return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1605 return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1609 return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1613 return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1617 return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1621 return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1626 return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1630 return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1634 return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1638 return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1642 return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1646 hipDeviceptr_t dptr) {
1647 return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1650 inline static hipError_t
hipMemcpyPeer(
void* dst,
int dstDevice,
const void* src,
int srcDevice,
1652 return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1655 inline static hipError_t
hipMemcpyPeerAsync(
void* dst,
int dstDevice,
const void* src,
1656 int srcDevice,
size_t count,
1657 hipStream_t stream __dparm(0)) {
1658 return hipCUDAErrorTohipError(
1659 cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1663 inline static hipError_t
hipProfilerStart() {
return hipCUDAErrorTohipError(cudaProfilerStart()); }
1665 inline static hipError_t
hipProfilerStop() {
return hipCUDAErrorTohipError(cudaProfilerStop()); }
1668 return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1672 return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1676 return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1680 return hipCUDAErrorTohipError(cudaEventQuery(event));
1683 inline static hipError_t
hipCtxCreate(hipCtx_t* ctx,
unsigned int flags, hipDevice_t device) {
1684 return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1688 return hipCUResultTohipError(cuCtxDestroy(ctx));
1692 return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1696 return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1700 return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1704 return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1708 return hipCUResultTohipError(cuCtxGetDevice(device));
1712 return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (
unsigned int*)apiVersion));
1716 return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1720 return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1724 return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1728 return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1732 return hipCUResultTohipError(cuCtxSynchronize());
1736 return hipCUResultTohipError(cuCtxGetFlags(flags));
1739 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1740 return hipCUResultTohipError(cuCtxDetach(ctx));
1743 inline static hipError_t
hipDeviceGet(hipDevice_t* device,
int ordinal) {
1744 return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1748 return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1751 inline static hipError_t
hipDeviceGetName(
char* name,
int len, hipDevice_t device) {
1752 return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1756 int srcDevice,
int dstDevice) {
1757 return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1761 return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1765 return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1769 return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1773 return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1777 return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1781 return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1784 inline static hipError_t
hipModuleLoad(hipModule_t* module,
const char* fname) {
1785 return hipCUResultTohipError(cuModuleLoad(module, fname));
1789 return hipCUResultTohipError(cuModuleUnload(hmod));
1793 const char* kname) {
1794 return hipCUResultTohipError(cuModuleGetFunction(
function, module, kname));
1797 inline static hipError_t
hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod,
const char* name){
1798 hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1802 return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1805 inline static hipError_t
hipFuncGetAttribute (
int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1806 return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1809 inline static hipError_t
hipModuleGetGlobal(hipDeviceptr_t* dptr,
size_t* bytes, hipModule_t hmod,
1811 return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1814 inline static hipError_t
hipModuleLoadData(hipModule_t* module,
const void* image) {
1815 return hipCUResultTohipError(cuModuleLoadData(module, image));
1819 unsigned int numOptions, hipJitOption* options,
1820 void** optionValues) {
1821 return hipCUResultTohipError(
1822 cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1826 dim3 dimBlocks,
void** args,
size_t sharedMemBytes,
1829 return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1833 unsigned int gridDimY,
unsigned int gridDimZ,
1834 unsigned int blockDimX,
unsigned int blockDimY,
1835 unsigned int blockDimZ,
unsigned int sharedMemBytes,
1836 hipStream_t stream,
void** kernelParams,
1838 return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1839 blockDimY, blockDimZ, sharedMemBytes, stream,
1840 kernelParams, extra));
1844 return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1847 __HIP_DEPRECATED
inline static hipError_t hipBindTexture(
size_t* offset,
1851 size_t size __dparm(UINT_MAX)) {
1852 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1855 __HIP_DEPRECATED
inline static hipError_t hipBindTexture2D(
1858 return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1862 hipChannelFormatKind f) {
1863 return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1866 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1870 return hipCUDAErrorTohipError(
1871 cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1874 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1875 return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1880 return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1884 return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1887 inline static hipError_t hipGetTextureObjectResourceDesc(
hipResourceDesc* pResDesc,
1888 hipTextureObject_t textureObject) {
1889 return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1892 __HIP_DEPRECATED
inline static hipError_t hipGetTextureAlignmentOffset(
1894 return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1899 return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1902 inline static hipError_t hipLaunchCooperativeKernel(
const void* f,
dim3 gridDim,
dim3 blockDim,
1903 void** kernelParams,
unsigned int sharedMemBytes,
1904 hipStream_t stream) {
1905 return hipCUDAErrorTohipError(
1906 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1909 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
1910 int numDevices,
unsigned int flags) {
1911 return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1924 size_t dynamicSMemSize) {
1925 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1926 blockSize, dynamicSMemSize));
1931 size_t dynamicSMemSize = 0,
1932 int blockSizeLimit = 0) {
1933 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1934 dynamicSMemSize, blockSizeLimit));
1938 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* minGridSize,
int* blockSize, T func,
1939 size_t dynamicSMemSize = 0,
1940 int blockSizeLimit = 0,
unsigned int flags = 0) {
1941 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1942 dynamicSMemSize, blockSizeLimit, flags));
1947 int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
1948 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1949 blockSize, dynamicSMemSize, flags));
1952 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1953 inline static hipError_t hipBindTexture(
size_t* offset,
const struct texture<T, dim, readMode>& tex,
1954 const void* devPtr,
size_t size = UINT_MAX) {
1955 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1958 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1959 inline static hipError_t hipBindTexture(
size_t* offset,
struct texture<T, dim, readMode>& tex,
1961 size_t size = UINT_MAX) {
1962 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1965 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1966 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>* tex) {
1967 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1970 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1971 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>& tex) {
1972 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1975 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1976 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1979 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1982 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1983 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1986 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1989 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1990 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1992 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1997 return cudaCreateChannelDesc<T>();
2001 inline static hipError_t hipLaunchCooperativeKernel(T f,
dim3 gridDim,
dim3 blockDim,
2002 void** kernelParams,
unsigned int sharedMemBytes, hipStream_t stream) {
2003 return hipCUDAErrorTohipError(
2004 cudaLaunchCooperativeKernel(
reinterpret_cast<const void*
>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2007 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef,
int dim, hipAddress_mode am){
2008 return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2011 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2012 return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2015 inline static hipError_t hipTexRefSetAddress(
size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr,
size_t bytes){
2016 return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2019 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef,
const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr,
size_t Pitch){
2020 return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2023 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt,
int NumPackedComponents){
2024 return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2027 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef,
unsigned int Flags){
2028 return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2031 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef,
hiparray hArray,
unsigned int Flags){
2032 return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2036 return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2039 inline static hipError_t hipArrayDestroy(
hiparray hArray){
2040 return hipCUResultTohipError(cuArrayDestroy(hArray));
2045 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H