HIP: Heterogenous-computing Interface for Portability
|
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
26 #include <cuda_runtime_api.h>
28 #include <cuda_profiler_api.h>
35 #define __dparm(x) = x
41 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
42 #define __HIP_DEPRECATED
43 #elif defined(_MSC_VER)
44 #define __HIP_DEPRECATED __declspec(deprecated)
45 #elif defined(__GNUC__)
46 #define __HIP_DEPRECATED __attribute__((deprecated))
48 #define __HIP_DEPRECATED
57 typedef enum hipMemcpyKind {
59 hipMemcpyHostToDevice,
60 hipMemcpyDeviceToHost,
61 hipMemcpyDeviceToDevice,
66 #define hipDataType cudaDataType
67 #define HIP_R_16F CUDA_R_16F
68 #define HIP_R_32F CUDA_R_32F
69 #define HIP_R_64F CUDA_R_64F
70 #define HIP_C_16F CUDA_C_16F
71 #define HIP_C_32F CUDA_C_32F
72 #define HIP_C_64F CUDA_C_64F
75 #define hipLibraryPropertyType libraryPropertyType
76 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
77 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
78 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
81 typedef enum cudaTextureAddressMode hipTextureAddressMode;
82 #define hipAddressModeWrap cudaAddressModeWrap
83 #define hipAddressModeClamp cudaAddressModeClamp
84 #define hipAddressModeMirror cudaAddressModeMirror
85 #define hipAddressModeBorder cudaAddressModeBorder
88 typedef enum cudaTextureFilterMode hipTextureFilterMode;
89 #define hipFilterModePoint cudaFilterModePoint
90 #define hipFilterModeLinear cudaFilterModeLinear
93 typedef enum cudaTextureReadMode hipTextureReadMode;
94 #define hipReadModeElementType cudaReadModeElementType
95 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
98 typedef enum cudaChannelFormatKind hipChannelFormatKind;
99 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
100 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
101 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
102 #define hipChannelFormatKindNone cudaChannelFormatKindNone
104 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
105 #define hipBoundaryModeZero cudaBoundaryModeZero
106 #define hipBoundaryModeTrap cudaBoundaryModeTrap
107 #define hipBoundaryModeClamp cudaBoundaryModeClamp
110 #define hipFuncCachePreferNone cudaFuncCachePreferNone
111 #define hipFuncCachePreferShared cudaFuncCachePreferShared
112 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
113 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
116 #define hipResourceType cudaResourceType
117 #define hipResourceTypeArray cudaResourceTypeArray
118 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
119 #define hipResourceTypeLinear cudaResourceTypeLinear
120 #define hipResourceTypePitch2D cudaResourceTypePitch2D
126 #define hipEventDefault cudaEventDefault
127 #define hipEventBlockingSync cudaEventBlockingSync
128 #define hipEventDisableTiming cudaEventDisableTiming
129 #define hipEventInterprocess cudaEventInterprocess
130 #define hipEventReleaseToDevice 0
131 #define hipEventReleaseToSystem 0
134 #define hipHostMallocDefault cudaHostAllocDefault
135 #define hipHostMallocPortable cudaHostAllocPortable
136 #define hipHostMallocMapped cudaHostAllocMapped
137 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
138 #define hipHostMallocCoherent 0x0
139 #define hipHostMallocNonCoherent 0x0
141 #define hipMemAttachGlobal cudaMemAttachGlobal
142 #define hipMemAttachHost cudaMemAttachHost
144 #define hipHostRegisterDefault cudaHostRegisterDefault
145 #define hipHostRegisterPortable cudaHostRegisterPortable
146 #define hipHostRegisterMapped cudaHostRegisterMapped
147 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
149 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
150 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
151 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
152 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
153 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
155 #define hipOccupancyDefault cudaOccupancyDefault
157 #define hipCooperativeLaunchMultiDeviceNoPreSync \
158 cudaCooperativeLaunchMultiDeviceNoPreSync
159 #define hipCooperativeLaunchMultiDeviceNoPostSync \
160 cudaCooperativeLaunchMultiDeviceNoPostSync
164 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
165 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
166 #define hipJitOptionWallTime CU_JIT_WALL_TIME
167 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
168 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
169 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
170 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
171 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
172 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
173 #define hipJitOptionTarget CU_JIT_TARGET
174 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
175 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
176 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
177 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
178 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
179 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
180 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
181 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
187 typedef enum cudaLimit hipLimit_t;
191 typedef CUfunc_cache hipFuncCache;
192 typedef CUjit_option hipJitOption;
193 typedef CUdevice hipDevice_t;
194 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
197 typedef CUdeviceptr hipDeviceptr_t;
203 #define hipFunction_attribute CUfunction_attribute
204 #define hip_Memcpy2D CUDA_MEMCPY2D
205 #define hipMemcpy3DParms cudaMemcpy3DParms
206 #define hipArrayDefault cudaArrayDefault
207 #define hipArrayLayered cudaArrayLayered
208 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
209 #define hipArrayCubemap cudaArrayCubemap
210 #define hipArrayTextureGather cudaArrayTextureGather
212 typedef cudaTextureObject_t hipTextureObject_t;
214 #define hipTextureType1D cudaTextureType1D
215 #define hipTextureType1DLayered cudaTextureType1DLayered
216 #define hipTextureType2D cudaTextureType2D
217 #define hipTextureType2DLayered cudaTextureType2DLayered
218 #define hipTextureType3D cudaTextureType3D
219 #define hipDeviceMapHost cudaDeviceMapHost
223 #define make_hipExtent make_cudaExtent
224 #define make_hipPos make_cudaPos
225 #define make_hipPitchedPtr make_cudaPitchedPtr
227 #define hipStreamDefault cudaStreamDefault
228 #define hipStreamNonBlocking cudaStreamNonBlocking
235 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
236 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
237 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
240 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
241 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
242 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
243 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
244 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
245 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
246 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
247 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
248 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
249 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
250 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
252 #if CUDA_VERSION >= 9000
253 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
254 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
255 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
256 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
257 #endif // CUDA_VERSION >= 9000
259 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
263 case cudaErrorProfilerDisabled:
264 return hipErrorProfilerDisabled;
265 case cudaErrorProfilerNotInitialized:
266 return hipErrorProfilerNotInitialized;
267 case cudaErrorProfilerAlreadyStarted:
268 return hipErrorProfilerAlreadyStarted;
269 case cudaErrorProfilerAlreadyStopped:
270 return hipErrorProfilerAlreadyStopped;
271 case cudaErrorInsufficientDriver:
272 return hipErrorInsufficientDriver;
273 case cudaErrorUnsupportedLimit:
274 return hipErrorUnsupportedLimit;
275 case cudaErrorPeerAccessUnsupported:
276 return hipErrorPeerAccessUnsupported;
277 case cudaErrorInvalidGraphicsContext:
278 return hipErrorInvalidGraphicsContext;
279 case cudaErrorSharedObjectSymbolNotFound:
280 return hipErrorSharedObjectSymbolNotFound;
281 case cudaErrorSharedObjectInitFailed:
282 return hipErrorSharedObjectInitFailed;
283 case cudaErrorOperatingSystem:
284 return hipErrorOperatingSystem;
285 case cudaErrorSetOnActiveProcess:
286 return hipErrorSetOnActiveProcess;
287 case cudaErrorIllegalAddress:
288 return hipErrorIllegalAddress;
289 case cudaErrorInvalidSymbol:
290 return hipErrorInvalidSymbol;
291 case cudaErrorMissingConfiguration:
292 return hipErrorMissingConfiguration;
293 case cudaErrorMemoryAllocation:
294 return hipErrorOutOfMemory;
295 case cudaErrorInitializationError:
296 return hipErrorNotInitialized;
297 case cudaErrorLaunchFailure:
299 case cudaErrorCooperativeLaunchTooLarge:
301 case cudaErrorPriorLaunchFailure:
302 return hipErrorPriorLaunchFailure;
303 case cudaErrorLaunchOutOfResources:
305 case cudaErrorInvalidDeviceFunction:
306 return hipErrorInvalidDeviceFunction;
307 case cudaErrorInvalidConfiguration:
308 return hipErrorInvalidConfiguration;
309 case cudaErrorInvalidDevice:
311 case cudaErrorInvalidValue:
313 case cudaErrorInvalidDevicePointer:
315 case cudaErrorInvalidMemcpyDirection:
317 case cudaErrorInvalidResourceHandle:
318 return hipErrorInvalidHandle;
319 case cudaErrorNotReady:
321 case cudaErrorNoDevice:
323 case cudaErrorPeerAccessAlreadyEnabled:
325 case cudaErrorPeerAccessNotEnabled:
327 case cudaErrorHostMemoryAlreadyRegistered:
329 case cudaErrorHostMemoryNotRegistered:
331 case cudaErrorMapBufferObjectFailed:
332 return hipErrorMapFailed;
333 case cudaErrorAssert:
335 case cudaErrorNotSupported:
337 case cudaErrorCudartUnloading:
338 return hipErrorDeinitialized;
339 case cudaErrorInvalidKernelImage:
340 return hipErrorInvalidImage;
341 case cudaErrorUnmapBufferObjectFailed:
342 return hipErrorUnmapFailed;
343 case cudaErrorNoKernelImageForDevice:
344 return hipErrorNoBinaryForGpu;
345 case cudaErrorECCUncorrectable:
346 return hipErrorECCNotCorrectable;
347 case cudaErrorDeviceAlreadyInUse:
348 return hipErrorContextAlreadyInUse;
349 case cudaErrorInvalidPtx:
351 case cudaErrorLaunchTimeout:
352 return hipErrorLaunchTimeOut;
353 #if CUDA_VERSION >= 10010
354 case cudaErrorInvalidSource:
355 return hipErrorInvalidSource;
356 case cudaErrorFileNotFound:
357 return hipErrorFileNotFound;
358 case cudaErrorSymbolNotFound:
359 return hipErrorNotFound;
360 case cudaErrorArrayIsMapped:
361 return hipErrorArrayIsMapped;
362 case cudaErrorNotMappedAsPointer:
363 return hipErrorNotMappedAsPointer;
364 case cudaErrorNotMappedAsArray:
365 return hipErrorNotMappedAsArray;
366 case cudaErrorNotMapped:
367 return hipErrorNotMapped;
368 case cudaErrorAlreadyAcquired:
369 return hipErrorAlreadyAcquired;
370 case cudaErrorAlreadyMapped:
371 return hipErrorAlreadyMapped;
373 #if CUDA_VERSION >= 10020
374 case cudaErrorDeviceUninitialized:
377 case cudaErrorUnknown:
379 return hipErrorUnknown;
383 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
387 case CUDA_ERROR_OUT_OF_MEMORY:
388 return hipErrorOutOfMemory;
389 case CUDA_ERROR_INVALID_VALUE:
391 case CUDA_ERROR_INVALID_DEVICE:
393 case CUDA_ERROR_DEINITIALIZED:
394 return hipErrorDeinitialized;
395 case CUDA_ERROR_NO_DEVICE:
397 case CUDA_ERROR_INVALID_CONTEXT:
399 case CUDA_ERROR_NOT_INITIALIZED:
400 return hipErrorNotInitialized;
401 case CUDA_ERROR_INVALID_HANDLE:
402 return hipErrorInvalidHandle;
403 case CUDA_ERROR_MAP_FAILED:
404 return hipErrorMapFailed;
405 case CUDA_ERROR_PROFILER_DISABLED:
406 return hipErrorProfilerDisabled;
407 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
408 return hipErrorProfilerNotInitialized;
409 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
410 return hipErrorProfilerAlreadyStarted;
411 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
412 return hipErrorProfilerAlreadyStopped;
413 case CUDA_ERROR_INVALID_IMAGE:
414 return hipErrorInvalidImage;
415 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
416 return hipErrorContextAlreadyCurrent;
417 case CUDA_ERROR_UNMAP_FAILED:
418 return hipErrorUnmapFailed;
419 case CUDA_ERROR_ARRAY_IS_MAPPED:
420 return hipErrorArrayIsMapped;
421 case CUDA_ERROR_ALREADY_MAPPED:
422 return hipErrorAlreadyMapped;
423 case CUDA_ERROR_NO_BINARY_FOR_GPU:
424 return hipErrorNoBinaryForGpu;
425 case CUDA_ERROR_ALREADY_ACQUIRED:
426 return hipErrorAlreadyAcquired;
427 case CUDA_ERROR_NOT_MAPPED:
428 return hipErrorNotMapped;
429 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
430 return hipErrorNotMappedAsArray;
431 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
432 return hipErrorNotMappedAsPointer;
433 case CUDA_ERROR_ECC_UNCORRECTABLE:
434 return hipErrorECCNotCorrectable;
435 case CUDA_ERROR_UNSUPPORTED_LIMIT:
436 return hipErrorUnsupportedLimit;
437 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
438 return hipErrorContextAlreadyInUse;
439 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
440 return hipErrorPeerAccessUnsupported;
441 case CUDA_ERROR_INVALID_PTX:
443 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
444 return hipErrorInvalidGraphicsContext;
445 case CUDA_ERROR_INVALID_SOURCE:
446 return hipErrorInvalidSource;
447 case CUDA_ERROR_FILE_NOT_FOUND:
448 return hipErrorFileNotFound;
449 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
450 return hipErrorSharedObjectSymbolNotFound;
451 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
452 return hipErrorSharedObjectInitFailed;
453 case CUDA_ERROR_OPERATING_SYSTEM:
454 return hipErrorOperatingSystem;
455 case CUDA_ERROR_NOT_FOUND:
456 return hipErrorNotFound;
457 case CUDA_ERROR_NOT_READY:
459 case CUDA_ERROR_ILLEGAL_ADDRESS:
460 return hipErrorIllegalAddress;
461 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
463 case CUDA_ERROR_LAUNCH_TIMEOUT:
464 return hipErrorLaunchTimeOut;
465 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
467 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
469 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
470 return hipErrorSetOnActiveProcess;
471 case CUDA_ERROR_ASSERT:
473 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
475 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
477 case CUDA_ERROR_LAUNCH_FAILED:
479 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
481 case CUDA_ERROR_NOT_SUPPORTED:
483 case CUDA_ERROR_UNKNOWN:
485 return hipErrorUnknown;
489 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
493 case hipErrorOutOfMemory:
494 return cudaErrorMemoryAllocation;
495 case hipErrorProfilerDisabled:
496 return cudaErrorProfilerDisabled;
497 case hipErrorProfilerNotInitialized:
498 return cudaErrorProfilerNotInitialized;
499 case hipErrorProfilerAlreadyStarted:
500 return cudaErrorProfilerAlreadyStarted;
501 case hipErrorProfilerAlreadyStopped:
502 return cudaErrorProfilerAlreadyStopped;
503 case hipErrorInvalidConfiguration:
504 return cudaErrorInvalidConfiguration;
506 return cudaErrorLaunchOutOfResources;
508 return cudaErrorInvalidValue;
509 case hipErrorInvalidHandle:
510 return cudaErrorInvalidResourceHandle;
512 return cudaErrorInvalidDevice;
514 return cudaErrorInvalidMemcpyDirection;
516 return cudaErrorInvalidDevicePointer;
517 case hipErrorNotInitialized:
518 return cudaErrorInitializationError;
520 return cudaErrorNoDevice;
522 return cudaErrorNotReady;
524 return cudaErrorPeerAccessNotEnabled;
526 return cudaErrorPeerAccessAlreadyEnabled;
528 return cudaErrorHostMemoryAlreadyRegistered;
530 return cudaErrorHostMemoryNotRegistered;
531 case hipErrorDeinitialized:
532 return cudaErrorCudartUnloading;
533 case hipErrorInvalidSymbol:
534 return cudaErrorInvalidSymbol;
535 case hipErrorInsufficientDriver:
536 return cudaErrorInsufficientDriver;
537 case hipErrorMissingConfiguration:
538 return cudaErrorMissingConfiguration;
539 case hipErrorPriorLaunchFailure:
540 return cudaErrorPriorLaunchFailure;
541 case hipErrorInvalidDeviceFunction:
542 return cudaErrorInvalidDeviceFunction;
543 case hipErrorInvalidImage:
544 return cudaErrorInvalidKernelImage;
546 #if CUDA_VERSION >= 10020
547 return cudaErrorDeviceUninitialized;
549 return cudaErrorUnknown;
551 case hipErrorMapFailed:
552 return cudaErrorMapBufferObjectFailed;
553 case hipErrorUnmapFailed:
554 return cudaErrorUnmapBufferObjectFailed;
555 case hipErrorArrayIsMapped:
556 #if CUDA_VERSION >= 10010
557 return cudaErrorArrayIsMapped;
559 return cudaErrorUnknown;
561 case hipErrorAlreadyMapped:
562 #if CUDA_VERSION >= 10010
563 return cudaErrorAlreadyMapped;
565 return cudaErrorUnknown;
567 case hipErrorNoBinaryForGpu:
568 return cudaErrorNoKernelImageForDevice;
569 case hipErrorAlreadyAcquired:
570 #if CUDA_VERSION >= 10010
571 return cudaErrorAlreadyAcquired;
573 return cudaErrorUnknown;
575 case hipErrorNotMapped:
576 #if CUDA_VERSION >= 10010
577 return cudaErrorNotMapped;
579 return cudaErrorUnknown;
581 case hipErrorNotMappedAsArray:
582 #if CUDA_VERSION >= 10010
583 return cudaErrorNotMappedAsArray;
585 return cudaErrorUnknown;
587 case hipErrorNotMappedAsPointer:
588 #if CUDA_VERSION >= 10010
589 return cudaErrorNotMappedAsPointer;
591 return cudaErrorUnknown;
593 case hipErrorECCNotCorrectable:
594 return cudaErrorECCUncorrectable;
595 case hipErrorUnsupportedLimit:
596 return cudaErrorUnsupportedLimit;
597 case hipErrorContextAlreadyInUse:
598 return cudaErrorDeviceAlreadyInUse;
599 case hipErrorPeerAccessUnsupported:
600 return cudaErrorPeerAccessUnsupported;
602 return cudaErrorInvalidPtx;
603 case hipErrorInvalidGraphicsContext:
604 return cudaErrorInvalidGraphicsContext;
605 case hipErrorInvalidSource:
606 #if CUDA_VERSION >= 10010
607 return cudaErrorInvalidSource;
609 return cudaErrorUnknown;
611 case hipErrorFileNotFound:
612 #if CUDA_VERSION >= 10010
613 return cudaErrorFileNotFound;
615 return cudaErrorUnknown;
617 case hipErrorSharedObjectSymbolNotFound:
618 return cudaErrorSharedObjectSymbolNotFound;
619 case hipErrorSharedObjectInitFailed:
620 return cudaErrorSharedObjectInitFailed;
621 case hipErrorOperatingSystem:
622 return cudaErrorOperatingSystem;
623 case hipErrorNotFound:
624 #if CUDA_VERSION >= 10010
625 return cudaErrorSymbolNotFound;
627 return cudaErrorUnknown;
629 case hipErrorIllegalAddress:
630 return cudaErrorIllegalAddress;
631 case hipErrorLaunchTimeOut:
632 return cudaErrorLaunchTimeout;
633 case hipErrorSetOnActiveProcess:
634 return cudaErrorSetOnActiveProcess;
636 return cudaErrorLaunchFailure;
638 return cudaErrorCooperativeLaunchTooLarge;
640 return cudaErrorNotSupported;
645 case hipErrorUnknown:
648 return cudaErrorUnknown;
652 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
654 case hipMemcpyHostToHost:
655 return cudaMemcpyHostToHost;
656 case hipMemcpyHostToDevice:
657 return cudaMemcpyHostToDevice;
658 case hipMemcpyDeviceToHost:
659 return cudaMemcpyDeviceToHost;
660 case hipMemcpyDeviceToDevice:
661 return cudaMemcpyDeviceToDevice;
663 return cudaMemcpyDefault;
667 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
668 hipTextureAddressMode kind) {
670 case hipAddressModeWrap:
671 return cudaAddressModeWrap;
672 case hipAddressModeClamp:
673 return cudaAddressModeClamp;
674 case hipAddressModeMirror:
675 return cudaAddressModeMirror;
676 case hipAddressModeBorder:
677 return cudaAddressModeBorder;
679 return cudaAddressModeWrap;
683 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
684 hipTextureFilterMode kind) {
686 case hipFilterModePoint:
687 return cudaFilterModePoint;
688 case hipFilterModeLinear:
689 return cudaFilterModeLinear;
691 return cudaFilterModePoint;
695 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
697 case hipReadModeElementType:
698 return cudaReadModeElementType;
699 case hipReadModeNormalizedFloat:
700 return cudaReadModeNormalizedFloat;
702 return cudaReadModeElementType;
706 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
707 hipChannelFormatKind kind) {
709 case hipChannelFormatKindSigned:
710 return cudaChannelFormatKindSigned;
711 case hipChannelFormatKindUnsigned:
712 return cudaChannelFormatKindUnsigned;
713 case hipChannelFormatKindFloat:
714 return cudaChannelFormatKindFloat;
715 case hipChannelFormatKindNone:
716 return cudaChannelFormatKindNone;
718 return cudaChannelFormatKindNone;
725 #define HIPRT_CB CUDART_CB
727 inline static hipError_t
hipInit(
unsigned int flags) {
728 return hipCUResultTohipError(cuInit(flags));
731 inline static hipError_t
hipDeviceReset() {
return hipCUDAErrorTohipError(cudaDeviceReset()); }
733 inline static hipError_t
hipGetLastError() {
return hipCUDAErrorTohipError(cudaGetLastError()); }
736 return hipCUDAErrorTohipError(cudaPeekAtLastError());
739 inline static hipError_t
hipMalloc(
void** ptr,
size_t size) {
740 return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
743 inline static hipError_t
hipMallocPitch(
void** ptr,
size_t* pitch,
size_t width,
size_t height) {
744 return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
747 inline static hipError_t
hipMemAllocPitch(hipDeviceptr_t* dptr,
size_t* pitch,
size_t widthInBytes,
size_t height,
unsigned int elementSizeBytes){
748 return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
752 return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
755 inline static hipError_t
hipFree(
void* ptr) {
return hipCUDAErrorTohipError(cudaFree(ptr)); }
757 inline static hipError_t
hipMallocHost(
void** ptr,
size_t size)
758 __attribute__((deprecated(
"use hipHostMalloc instead")));
760 return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
764 __attribute__((deprecated(
"use hipHostMalloc instead")));
766 return hipCUResultTohipError(cuMemAllocHost(ptr, size));
769 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags)
770 __attribute__((deprecated(
"use hipHostMalloc instead")));
771 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags) {
772 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
775 inline static hipError_t
hipHostMalloc(
void** ptr,
size_t size,
unsigned int flags) {
776 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
779 inline static hipError_t
hipMallocManaged(
void** ptr,
size_t size,
unsigned int flags) {
780 return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
784 size_t width,
size_t height,
786 return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
791 return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
795 return hipCUDAErrorTohipError(cudaFreeArray(array));
799 return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
802 inline static hipError_t
hipHostGetFlags(
unsigned int* flagsPtr,
void* hostPtr) {
803 return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
806 inline static hipError_t
hipHostRegister(
void* ptr,
size_t size,
unsigned int flags) {
807 return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
811 return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
815 __attribute__((deprecated(
"use hipHostFree instead")));
817 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
821 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
825 return hipCUDAErrorTohipError(cudaSetDevice(device));
829 struct cudaDeviceProp cdprop;
830 memset(&cdprop, 0x0,
sizeof(
struct cudaDeviceProp));
831 cdprop.major = prop->
major;
832 cdprop.minor = prop->
minor;
847 return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
850 inline static hipError_t
hipMemcpyHtoD(hipDeviceptr_t dst,
void* src,
size_t size) {
851 return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
854 inline static hipError_t
hipMemcpyDtoH(
void* dst, hipDeviceptr_t src,
size_t size) {
855 return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
858 inline static hipError_t
hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size) {
859 return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
862 inline static hipError_t
hipMemcpyHtoDAsync(hipDeviceptr_t dst,
void* src,
size_t size,
864 return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
867 inline static hipError_t
hipMemcpyDtoHAsync(
void* dst, hipDeviceptr_t src,
size_t size,
869 return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
872 inline static hipError_t
hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size,
874 return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
877 inline static hipError_t
hipMemcpy(
void* dst,
const void* src,
size_t sizeBytes,
878 hipMemcpyKind copyKind) {
879 return hipCUDAErrorTohipError(
880 cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
884 inline static hipError_t hipMemcpyWithStream(
void* dst,
const void* src,
885 size_t sizeBytes, hipMemcpyKind copyKind,
887 cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
888 hipMemcpyKindToCudaMemcpyKind(copyKind),
891 if (error != cudaSuccess)
return hipCUDAErrorTohipError(error);
893 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
896 inline static hipError_t
hipMemcpyAsync(
void* dst,
const void* src,
size_t sizeBytes,
897 hipMemcpyKind copyKind,
hipStream_t stream __dparm(0)) {
898 return hipCUDAErrorTohipError(
899 cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
902 inline static hipError_t hipMemcpyToSymbol(
const void* symbol,
const void* src,
size_t sizeBytes,
903 size_t offset __dparm(0),
904 hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
905 return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
906 hipMemcpyKindToCudaMemcpyKind(copyType)));
909 inline static hipError_t hipMemcpyToSymbolAsync(
const void* symbol,
const void* src,
910 size_t sizeBytes,
size_t offset,
911 hipMemcpyKind copyType,
913 return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
914 symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
917 inline static hipError_t hipMemcpyFromSymbol(
void* dst,
const void* symbolName,
size_t sizeBytes,
918 size_t offset __dparm(0),
919 hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
920 return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
921 hipMemcpyKindToCudaMemcpyKind(kind)));
924 inline static hipError_t hipMemcpyFromSymbolAsync(
void* dst,
const void* symbolName,
925 size_t sizeBytes,
size_t offset,
928 return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
929 dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
932 inline static hipError_t hipGetSymbolAddress(
void** devPtr,
const void* symbolName) {
933 return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
936 inline static hipError_t hipGetSymbolSize(
size_t* size,
const void* symbolName) {
937 return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
940 inline static hipError_t
hipMemcpy2D(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
941 size_t width,
size_t height, hipMemcpyKind kind) {
942 return hipCUDAErrorTohipError(
943 cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
947 return hipCUResultTohipError(cuMemcpy2D(pCopy));
951 return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
956 return hipCUDAErrorTohipError(cudaMemcpy3D(p));
961 return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
964 inline static hipError_t
hipMemcpy2DAsync(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
965 size_t width,
size_t height, hipMemcpyKind kind,
967 return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
968 hipMemcpyKindToCudaMemcpyKind(kind), stream));
972 const void* src,
size_t spitch,
size_t width,
973 size_t height, hipMemcpyKind kind) {
974 return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
975 height, hipMemcpyKindToCudaMemcpyKind(kind)));
979 size_t hOffset,
const void* src,
980 size_t count, hipMemcpyKind kind) {
981 return hipCUDAErrorTohipError(
982 cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
986 size_t wOffset,
size_t hOffset,
987 size_t count, hipMemcpyKind kind) {
988 return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
989 hipMemcpyKindToCudaMemcpyKind(kind)));
994 return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
999 return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1003 return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1007 return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1011 return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1015 return cudaGetErrorString(hipErrorToCudaError(error));
1019 return cudaGetErrorName(hipErrorToCudaError(error));
1023 return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1027 return hipCUDAErrorTohipError(cudaGetDevice(device));
1031 return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1035 return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1039 return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1043 return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1047 unsigned int flags) {
1048 return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1051 inline static hipError_t
hipMemset(
void* devPtr,
int value,
size_t count) {
1052 return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1055 inline static hipError_t
hipMemsetD32(hipDeviceptr_t devPtr,
int value,
size_t count) {
1056 return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1059 inline static hipError_t
hipMemsetAsync(
void* devPtr,
int value,
size_t count,
1061 return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1064 inline static hipError_t
hipMemsetD32Async(hipDeviceptr_t devPtr,
int value,
size_t count,
1066 return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1069 inline static hipError_t
hipMemsetD8(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes) {
1070 return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1073 inline static hipError_t
hipMemsetD8Async(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes,
1075 return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1078 inline static hipError_t
hipMemsetD16(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes) {
1079 return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1082 inline static hipError_t
hipMemsetD16Async(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes,
1084 return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1087 inline static hipError_t
hipMemset2D(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height) {
1088 return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1091 inline static hipError_t
hipMemset2DAsync(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height,
hipStream_t stream __dparm(0)) {
1092 return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1096 return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1100 return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1104 struct cudaDeviceProp cdprop;
1106 cerror = cudaGetDeviceProperties(&cdprop, device);
1108 strncpy(p_prop->
name, cdprop.name, 256);
1112 p_prop->
warpSize = cdprop.warpSize;
1114 for (
int i = 0; i < 3; i++) {
1122 p_prop->
major = cdprop.major;
1123 p_prop->
minor = cdprop.minor;
1130 int ccVers = p_prop->
major * 100 + p_prop->
minor * 10;
1151 p_prop->
pciBusID = cdprop.pciBusID;
1172 p_prop->
memPitch = cdprop.memPitch;
1179 return hipCUDAErrorTohipError(cerror);
1183 enum cudaDeviceAttr cdattr;
1188 cdattr = cudaDevAttrMaxThreadsPerBlock;
1191 cdattr = cudaDevAttrMaxBlockDimX;
1194 cdattr = cudaDevAttrMaxBlockDimY;
1197 cdattr = cudaDevAttrMaxBlockDimZ;
1200 cdattr = cudaDevAttrMaxGridDimX;
1203 cdattr = cudaDevAttrMaxGridDimY;
1206 cdattr = cudaDevAttrMaxGridDimZ;
1209 cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1212 cdattr = cudaDevAttrTotalConstantMemory;
1215 cdattr = cudaDevAttrWarpSize;
1218 cdattr = cudaDevAttrMaxRegistersPerBlock;
1221 cdattr = cudaDevAttrClockRate;
1224 cdattr = cudaDevAttrMemoryClockRate;
1227 cdattr = cudaDevAttrGlobalMemoryBusWidth;
1230 cdattr = cudaDevAttrMultiProcessorCount;
1233 cdattr = cudaDevAttrComputeMode;
1236 cdattr = cudaDevAttrL2CacheSize;
1239 cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1242 cdattr = cudaDevAttrComputeCapabilityMajor;
1245 cdattr = cudaDevAttrComputeCapabilityMinor;
1248 cdattr = cudaDevAttrConcurrentKernels;
1251 cdattr = cudaDevAttrPciBusId;
1254 cdattr = cudaDevAttrPciDeviceId;
1257 cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1260 cdattr = cudaDevAttrIsMultiGpuBoard;
1263 cdattr = cudaDevAttrIntegrated;
1266 cdattr = cudaDevAttrMaxTexture1DWidth;
1269 cdattr = cudaDevAttrMaxTexture2DWidth;
1272 cdattr = cudaDevAttrMaxTexture2DHeight;
1275 cdattr = cudaDevAttrMaxTexture3DWidth;
1278 cdattr = cudaDevAttrMaxTexture3DHeight;
1281 cdattr = cudaDevAttrMaxTexture3DDepth;
1284 cdattr = cudaDevAttrMaxPitch;
1287 cdattr = cudaDevAttrTextureAlignment;
1290 cdattr = cudaDevAttrTexturePitchAlignment;
1293 cdattr = cudaDevAttrKernelExecTimeout;
1296 cdattr = cudaDevAttrCanMapHostMemory;
1299 cdattr = cudaDevAttrEccEnabled;
1302 cdattr = cudaDevAttrCooperativeLaunch;
1305 cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1308 return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1311 cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1313 return hipCUDAErrorTohipError(cerror);
1319 size_t dynamicSMemSize) {
1320 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1321 blockSize, dynamicSMemSize));
1327 size_t dynamicSMemSize,
1328 unsigned int flags) {
1329 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1330 blockSize, dynamicSMemSize, flags));
1336 size_t dynamicSMemSize ){
1337 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1338 blockSize, dynamicSMemSize));
1344 size_t dynamicSMemSize,
1345 unsigned int flags ) {
1346 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1347 blockSize, dynamicSMemSize, flags));
1353 int blockSizeLimit){
1354 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1355 dynSharedMemPerBlk, blockSizeLimit));
1361 int blockSizeLimit,
unsigned int flags){
1362 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1363 dynSharedMemPerBlk, blockSizeLimit, flags));
1367 struct cudaPointerAttributes cPA;
1368 hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1370 #if (CUDART_VERSION >= 11000)
1371 auto memType = cPA.type;
1373 unsigned memType = cPA.memoryType;
1376 case cudaMemoryTypeDevice:
1379 case cudaMemoryTypeHost:
1383 return hipErrorUnknown;
1385 attributes->device = cPA.device;
1386 attributes->devicePointer = cPA.devicePointer;
1387 attributes->hostPointer = cPA.hostPointer;
1388 attributes->isManaged = 0;
1389 attributes->allocationFlags = 0;
1394 inline static hipError_t
hipMemGetInfo(
size_t* free,
size_t* total) {
1395 return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1399 return hipCUDAErrorTohipError(cudaEventCreate(event));
1403 return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1407 return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1411 return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1415 return hipCUDAErrorTohipError(cudaEventDestroy(event));
1419 return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1423 return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1427 return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1431 return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1435 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1439 return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1443 return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1447 return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1451 unsigned int flags) {
1452 return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1456 return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1460 void* userData,
unsigned int flags) {
1461 return hipCUDAErrorTohipError(
1462 cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1466 cudaError_t err = cudaDriverGetVersion(driverVersion);
1471 return hipCUDAErrorTohipError(err);
1475 return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1479 return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1483 return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1487 return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1491 return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1495 return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1500 return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1504 return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1508 return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1512 return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1516 return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1520 hipDeviceptr_t dptr) {
1521 return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1524 inline static hipError_t
hipMemcpyPeer(
void* dst,
int dstDevice,
const void* src,
int srcDevice,
1526 return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1529 inline static hipError_t
hipMemcpyPeerAsync(
void* dst,
int dstDevice,
const void* src,
1530 int srcDevice,
size_t count,
1532 return hipCUDAErrorTohipError(
1533 cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1537 inline static hipError_t
hipProfilerStart() {
return hipCUDAErrorTohipError(cudaProfilerStart()); }
1539 inline static hipError_t
hipProfilerStop() {
return hipCUDAErrorTohipError(cudaProfilerStop()); }
1542 return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1546 return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1550 return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1554 return hipCUDAErrorTohipError(cudaEventQuery(event));
1558 return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1562 return hipCUResultTohipError(cuCtxDestroy(ctx));
1566 return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1570 return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1574 return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1578 return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1582 return hipCUResultTohipError(cuCtxGetDevice(device));
1586 return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (
unsigned int*)apiVersion));
1590 return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1594 return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1598 return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1602 return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1606 return hipCUResultTohipError(cuCtxSynchronize());
1610 return hipCUResultTohipError(cuCtxGetFlags(flags));
1613 inline static hipError_t hipCtxDetach(
hipCtx_t ctx) {
1614 return hipCUResultTohipError(cuCtxDetach(ctx));
1617 inline static hipError_t
hipDeviceGet(hipDevice_t* device,
int ordinal) {
1618 return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1622 return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1625 inline static hipError_t
hipDeviceGetName(
char* name,
int len, hipDevice_t device) {
1626 return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1630 int srcDevice,
int dstDevice) {
1631 return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1635 return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1639 return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1643 return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1647 return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1651 return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1655 return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1659 return hipCUResultTohipError(cuModuleLoad(module, fname));
1663 return hipCUResultTohipError(cuModuleUnload(hmod));
1667 const char* kname) {
1668 return hipCUResultTohipError(cuModuleGetFunction(
function, module, kname));
1672 return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1676 return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1681 return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1685 return hipCUResultTohipError(cuModuleLoadData(module, image));
1689 unsigned int numOptions, hipJitOption* options,
1690 void** optionValues) {
1691 return hipCUResultTohipError(
1692 cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1696 dim3 dimBlocks,
void** args,
size_t sharedMemBytes,
1699 return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1703 unsigned int gridDimY,
unsigned int gridDimZ,
1704 unsigned int blockDimX,
unsigned int blockDimY,
1705 unsigned int blockDimZ,
unsigned int sharedMemBytes,
1708 return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1709 blockDimY, blockDimZ, sharedMemBytes, stream,
1710 kernelParams, extra));
1714 return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1717 __HIP_DEPRECATED
inline static hipError_t hipBindTexture(
size_t* offset,
1721 size_t size __dparm(UINT_MAX)) {
1722 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1725 __HIP_DEPRECATED
inline static hipError_t hipBindTexture2D(
1728 return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1732 hipChannelFormatKind f) {
1733 return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1736 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1740 return hipCUDAErrorTohipError(
1741 cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1744 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1745 return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1750 return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1754 return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1757 inline static hipError_t hipGetTextureObjectResourceDesc(
hipResourceDesc* pResDesc,
1758 hipTextureObject_t textureObject) {
1759 return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1762 __HIP_DEPRECATED
inline static hipError_t hipGetTextureAlignmentOffset(
1764 return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1769 return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1772 inline static hipError_t hipLaunchCooperativeKernel(
const void* f,
dim3 gridDim,
dim3 blockDim,
1773 void** kernelParams,
unsigned int sharedMemBytes,
1775 return hipCUDAErrorTohipError(
1776 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1779 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
1780 int numDevices,
unsigned int flags) {
1781 return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1794 size_t dynamicSMemSize) {
1795 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1796 blockSize, dynamicSMemSize));
1801 size_t dynamicSMemSize = 0,
1802 int blockSizeLimit = 0) {
1803 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1804 dynamicSMemSize, blockSizeLimit));
1808 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* minGridSize,
int* blockSize, T func,
1809 size_t dynamicSMemSize = 0,
1810 int blockSizeLimit = 0,
unsigned int flags = 0) {
1811 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1812 dynamicSMemSize, blockSizeLimit, flags));
1817 int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
1818 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1819 blockSize, dynamicSMemSize, flags));
1822 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1823 inline static hipError_t hipBindTexture(
size_t* offset,
const struct texture<T, dim, readMode>& tex,
1824 const void* devPtr,
size_t size = UINT_MAX) {
1825 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1828 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1829 inline static hipError_t hipBindTexture(
size_t* offset,
struct texture<T, dim, readMode>& tex,
1831 size_t size = UINT_MAX) {
1832 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1835 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1836 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>* tex) {
1837 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1840 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1841 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>& tex) {
1842 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1845 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1846 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1849 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1852 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1853 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1856 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1859 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1860 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1862 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1867 return cudaCreateChannelDesc<T>();
1871 inline static hipError_t hipLaunchCooperativeKernel(T f,
dim3 gridDim,
dim3 blockDim,
1872 void** kernelParams,
unsigned int sharedMemBytes,
hipStream_t stream) {
1873 return hipCUDAErrorTohipError(
1874 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1879 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
Definition: hip_runtime_api.h:128
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:336
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:87
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:307
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:213
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:53
int minor
Definition: hip_runtime_api.h:99
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:291
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:212
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:128
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:294
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:66
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
Definition: hip_runtime_api.h:120
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:123
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipFuncCache_t
Definition: hip_runtime_api.h:297
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:136
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:264
hipErrorRuntimeOther
Definition: hip_runtime_api.h:277
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:305
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:296
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1656
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
Definition: hip_module.cpp:1492
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:150
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:301
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:218
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:315
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:103
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:127
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:339
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:310
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:329
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:70
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:323
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:114
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:295
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:149
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:221
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:54
int computeMode
Compute mode.
Definition: hip_runtime_api.h:105
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:322
hipSharedMemConfig
Definition: hip_runtime_api.h:308
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:92
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:258
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1411
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1646
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:924
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:72
Definition: driver_types.h:91
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:115
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:86
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:325
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:104
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
int major
Definition: hip_runtime_api.h:96
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:297
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:196
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:290
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:116
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:129
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:121
Definition: driver_types.h:383
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:293
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:57
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:238
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:89
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:292
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
Definition: hip_runtime_api.h:83
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:765
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:759
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:328
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:48
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
hipErrorInvalidValue
Definition: hip_runtime_api.h:197
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:126
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:111
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:314
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:331
Definition: hip_hcc_internal.h:938
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:275
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:312
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:108
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1667
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:318
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:131
Definition: hip_hcc_internal.h:759
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
Definition: hip_runtime_api.h:111
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
Definition: hip_error.cpp:32
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
Definition: hip_hcc_internal.h:580
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:338
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:300
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:46
Definition: driver_types.h:78
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:67
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:219
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:63
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:330
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:306
hipErrorNotReady
Definition: hip_runtime_api.h:249
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
Definition: hip_memory.cpp:2296
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:91
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:309
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:319
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:113
int clockInstructionRate
Definition: hip_runtime_api.h:106
Definition: hip_runtime_api.h:320
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
Definition: hip_stream.cpp:161
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
Definition: hip_runtime_api.h:161
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:299
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:62
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1497
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1698
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:816
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:94
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:60
char name[256]
Device name.
Definition: hip_runtime_api.h:84
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:47
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:109
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:61
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:85
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:337
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
Definition: driver_types.h:370
Definition: driver_types.h:363
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
Definition: hip_module.cpp:1113
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
Definition: hip_memory.cpp:2286
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:340
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:90
Definition: hip_module.cpp:108
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:120
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:71
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:326
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:324
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:308
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:112
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:93
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:268
int warpSize
Warp size.
Definition: hip_runtime_api.h:88
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:261
Definition: texture_types.h:74
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:132
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:134
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:50
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:119
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:49
Definition: texture_types.h:95
Definition: driver_types.h:323
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:102
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:118
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:130
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:95
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:122
Definition: hip_runtime_api.h:329
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:262
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1677
hipDeviceAttribute_t
Definition: hip_runtime_api.h:289
Definition: driver_types.h:262
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:266
Definition: hip_hcc_internal.h:415
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:316
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:138
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:272
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:254
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
Definition: hip_module.cpp:1511
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:327
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:110
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:771
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:341
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:320
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:256