23 #ifndef HIP_SRC_HIP_HCC_INTERNAL_H 24 #define HIP_SRC_HIP_HCC_INTERNAL_H 28 #include <unordered_map> 31 #include "hsa/hsa_ext_amd.h" 32 #include "hip/hip_runtime.h" 33 #include "hip_prof_api.h" 38 #if (__hcc_workweek__ < 16354) 39 #error("This version of HIP requires a newer version of HCC."); 44 #if defined(__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE) and \ 45 (__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE != 0) 46 #define USE_APP_PTR_FOR_CTX 1 58 extern const int release;
62 extern int HIP_LAUNCH_BLOCKING;
63 extern int HIP_API_BLOCKING;
65 extern int HIP_PRINT_ENV;
66 extern int HIP_PROFILE_API;
70 extern int HIP_STAGING_SIZE;
71 extern int HIP_STREAM_SIGNALS;
72 extern int HIP_VISIBLE_DEVICES;
73 extern int HIP_FORCE_P2P_HOST;
75 extern int HIP_HOST_COHERENT;
77 extern int HIP_HIDDEN_FREE_MEM;
80 extern int HIP_SYNC_HOST_ALLOC;
81 extern int HIP_SYNC_STREAM_WAIT;
83 extern int HIP_SYNC_NULL_STREAM;
84 extern int HIP_INIT_ALLOC;
85 extern int HIP_FORCE_NULL_STREAM;
87 extern int HIP_SYNC_FREE;
89 extern int HIP_DUMP_CODE_OBJECT;
92 extern int HCC_OPT_FLUSH;
99 int tid()
const {
return _shortTid; };
100 pid_t pid()
const {
return _pid; };
101 uint64_t incApiSeqNum() {
return ++_apiSeqNum; };
102 uint64_t apiSeqNum()
const {
return _apiSeqNum; };
113 static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
115 void print(
int tid) {
116 std::cout <<
"Enabling tracing for ";
117 for (
auto iter = _profTrigger.begin(); iter != _profTrigger.end(); iter++) {
118 std::cout <<
"tid:" << tid <<
"." << *iter <<
",";
123 uint64_t nextTrigger() {
return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
124 void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
125 void sort() { std::sort(_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
128 std::vector<uint64_t> _profTrigger;
137 lastHipError = hipSuccess;
138 getPrimaryCtx =
true;
139 defaultCtx =
nullptr;
142 hipError_t lastHipError;
148 std::stack<ihipCtx_t*> ctxStack;
152 #define GET_TLS() TlsData *tls = tls_get_ptr() 154 extern std::vector<ProfTrigger> g_dbStartTriggers;
155 extern std::vector<ProfTrigger> g_dbStopTriggers;
165 #define KNRM "\x1B[0m" 166 #define KRED "\x1B[31m" 167 #define KGRN "\x1B[32m" 168 #define KYEL "\x1B[33m" 169 #define KBLU "\x1B[34m" 170 #define KMAG "\x1B[35m" 171 #define KCYN "\x1B[36m" 172 #define KWHT "\x1B[37m" 174 extern const char* API_COLOR;
175 extern const char* API_COLOR_END;
181 #define EVENT_THREAD_SAFE 1 183 #define STREAM_THREAD_SAFE 1 185 #define CTX_THREAD_SAFE 1 187 #define DEVICE_THREAD_SAFE 1 192 #define COMPILE_HIP_DB 1 200 #define COMPILE_HIP_TRACE_API 0x3 205 #ifndef COMPILE_HIP_ATP_MARKER 206 #define COMPILE_HIP_ATP_MARKER 0 215 #if COMPILE_HIP_ATP_MARKER 216 #include "CXLActivityLogger.h" 217 #define MARKER_BEGIN(markerName, group) amdtBeginMarker(markerName, group, nullptr); 218 #define MARKER_END() amdtEndMarker(); 219 #define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING); 220 #define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING); 223 #define MARKER_BEGIN(markerName, group) 225 #define RESUME_PROFILING 226 #define STOP_PROFILING 232 #define TRACE_ALL 0 // 0x01 233 #define TRACE_KCMD 1 // 0x02, kernel command 234 #define TRACE_MCMD 2 // 0x04, memory command 235 #define TRACE_MEM 3 // 0x08, memory allocation or deallocation. 236 #define TRACE_SYNC 4 // 0x10, synchronization (host or hipStreamWaitEvent) 237 #define TRACE_QUERY 5 // 0x20, hipEventRecord, hipEventQuery, hipStreamQuery 248 #define DB_MAX_FLAG 6 255 const char* _shortName;
259 static const DbName dbName[] = {
261 {KYEL,
"sync"}, {KCYN,
"mem"}, {KMAG,
"copy"}, {KRED,
"warn"},
267 #define tprintf(trace_level, ...) \ 269 if (HIP_DB & (1 << (trace_level))) { \ 272 snprintf(msgStr, sizeof(msgStr), __VA_ARGS__); \ 273 fprintf(stderr, " %ship-%s pid:%d tid:%d:%s%s", dbName[trace_level]._color, \ 274 dbName[trace_level]._shortName, tls->tidInfo.pid(), tls->tidInfo.tid(), msgStr, KNRM); \ 279 #define tprintf(trace_level, ...) 283 static inline uint64_t getTicks() {
return hc::get_system_ticks(); }
286 extern uint64_t recordApiTrace(
TlsData *tls, std::string* fullStr,
const std::string& apiStr);
288 #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1) 289 #define API_TRACE(forceTrace, ...) \ 291 uint64_t hipApiStartTick = 0; \ 293 tls->tidInfo.incApiSeqNum(); \ 295 (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1 << TRACE_ALL))))) { \ 296 std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')'; \ 297 std::string fullStr; \ 298 hipApiStartTick = recordApiTrace(tls, &fullStr, apiStr); \ 299 if (HIP_PROFILE_API == 0x1) { \ 300 MARKER_BEGIN(__func__, "HIP") \ 301 } else if (HIP_PROFILE_API == 0x2) { \ 302 MARKER_BEGIN(fullStr.c_str(), "HIP"); \ 309 #define API_TRACE(IS_CMD, ...) GET_TLS(); tls->tidInfo.incApiSeqNum(); 312 #define ihipGetTlsDefaultCtx() iihipGetTlsDefaultCtx(tls) 313 #define ihipSetTlsDefaultCtx(ctx) tls->defaultCtx = ctx 315 #define HIP_SET_DEVICE() ihipDeviceSetState(tls); 320 #define HIP_INIT_API(cid, ...) \ 321 hip_impl::hip_init(); \ 322 API_TRACE(0, __VA_ARGS__); \ 323 HIP_CB_SPAWNER_OBJECT(cid); 329 #define HIP_INIT_SPECIAL_API(cid, tbit, ...) \ 330 hip_impl::hip_init(); \ 331 API_TRACE((HIP_TRACE_API & (1 << tbit)), __VA_ARGS__); \ 332 HIP_CB_SPAWNER_OBJECT(cid); 338 #define ihipLogStatus(hipStatus) \ 340 hipError_t localHipStatus = hipStatus; \ 341 tls->lastHipError = localHipStatus; \ 343 if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \ 344 auto ticks = getTicks() - hipApiStartTick; \ 345 fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \ 346 (localHipStatus == 0) ? API_COLOR : KRED, tls->tidInfo.pid(), tls->tidInfo.tid(), \ 347 tls->tidInfo.apiSeqNum(), __func__, localHipStatus, \ 348 ihipErrorString(localHipStatus), ticks, API_COLOR_END); \ 350 if (HIP_PROFILE_API) { \ 380 #define HIP_IPC_RESERVED_SIZE 24 387 char reserved[HIP_IPC_RESERVED_SIZE];
392 std::string fileName;
393 hsa_executable_t executable = {};
394 hsa_code_object_reader_t coReader = {};
397 std::string, std::vector<std::pair<std::size_t, std::size_t>>> kernargs;
400 if (executable.handle) hsa_executable_destroy(executable);
401 if (coReader.handle) hsa_code_object_reader_destroy(coReader);
411 bool try_lock() {
return true; }
415 #if EVENT_THREAD_SAFE 416 typedef std::mutex EventMutex;
418 #warning "Stream thread-safe disabled" 422 #if STREAM_THREAD_SAFE 423 typedef std::mutex StreamMutex;
425 #warning "Stream thread-safe disabled" 431 typedef std::mutex CtxMutex;
434 #warning "Ctx thread-safe disabled" 437 #if DEVICE_THREAD_SAFE 438 typedef std::mutex DeviceMutex;
441 #warning "Device thread-safe disabled" 448 template <
typename T>
452 : _criticalData(&criticalData),
453 _autoUnlock(autoUnlock)
456 tprintf(DB_SYNC,
"locking criticalData=%p for %s..\n", _criticalData,
457 ToString(_criticalData->_parent).c_str());
458 _criticalData->_mutex.lock();
463 tprintf(DB_SYNC,
"auto-unlocking criticalData=%p for %s...\n", _criticalData,
464 ToString(_criticalData->_parent).c_str());
465 _criticalData->_mutex.unlock();
470 tprintf(DB_SYNC,
"unlocking criticalData=%p for %s...\n", _criticalData,
471 ToString(_criticalData->_parent).c_str());
472 _criticalData->_mutex.unlock();
476 T* operator->() {
return _criticalData; };
484 template <
typename MUTEX_TYPE>
488 void lock() { _mutex.lock(); }
489 void unlock() { _mutex.unlock(); }
490 bool try_lock() {
return _mutex.try_lock(); }
496 template <
typename MUTEX_TYPE>
500 : _av(av), _parent(parentStream){};
510 tprintf(DB_SYNC,
"munlocking criticalData=%p for %s...\n",
this,
511 ToString(this->_parent).c_str());
517 tprintf(DB_SYNC,
"mtry_locking=%d criticalData=%p for %s...\n", gotLock,
this,
518 ToString(this->_parent).c_str());
519 return gotLock ?
this :
nullptr;
525 hc::accelerator_view _av;
546 enum ScheduleMode { Auto, Spin, Yield };
547 typedef uint64_t SeqNum_t;
554 void locked_copySync(
void* dst,
const void* src,
size_t sizeBytes,
unsigned kind,
555 bool resolveOn =
true);
557 bool locked_copy2DSync(
void* dst,
const void* src,
size_t width,
size_t height,
size_t srcPitch,
size_t dstPitch,
unsigned kind,
558 bool resolveOn =
true);
560 void locked_copyAsync(
void* dst,
const void* src,
size_t sizeBytes,
unsigned kind);
562 bool locked_copy2DAsync(
void* dst,
const void* src,
size_t width,
size_t height,
size_t srcPitch,
size_t dstPitch,
unsigned kind);
564 void lockedSymbolCopySync(hc::accelerator& acc,
void* dst,
void* src,
size_t sizeBytes,
565 size_t offset,
unsigned kind);
566 void lockedSymbolCopyAsync(hc::accelerator& acc,
void* dst,
void* src,
size_t sizeBytes,
567 size_t offset,
unsigned kind);
573 void lockclose_postKernelCommand(
const char* kernelName, hc::accelerator_view* av,
bool unlockNotNeeded = 0);
578 hc::accelerator_view* locked_getAv() {
584 hc::completion_future locked_recordEvent(
hipEvent_t event);
591 hc::hcWaitMode waitMode()
const;
596 void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal, uint32_t blockDimX,
597 uint32_t blockDimY, uint32_t blockDimZ, uint32_t gridDimX,
598 uint32_t gridDimY, uint32_t gridDimZ, uint32_t groupSegmentSize,
599 uint32_t sharedMemBytes,
void* kernarg,
size_t kernSize,
610 bool isDefaultStream()
const {
return _id == 0; };
621 unsigned resolveMemcpyDirection(
bool srcInDeviceMem,
bool dstInDeviceMem);
622 void resolveHcMemcpyDirection(
unsigned hipMemKind,
const hc::AmPointerInfo* dstPtrInfo,
623 const hc::AmPointerInfo* srcPtrInfo, hc::hcCommandKind* hcCopyDir,
624 ihipCtx_t** copyDevice,
bool* forceUnpinnedCopy);
626 bool canSeeMemory(
const ihipCtx_t* thisCtx,
const hc::AmPointerInfo* dstInfo,
627 const hc::AmPointerInfo* srcInfo);
629 void addSymbolPtrToTracker(hc::accelerator& acc,
void* ptr,
size_t sizeBytes);
635 std::mutex _hasQueueLock;
640 friend std::ostream& operator<<(std::ostream& os,
const ihipStream_t& s);
643 ScheduleMode _scheduleMode;
652 : _stream(stream), _callback(callback), _userData(userData) {
662 enum hipEventStatus_t {
663 hipEventStatusUnitialized = 0,
664 hipEventStatusCreated = 1,
665 hipEventStatusRecording = 2,
666 hipEventStatusComplete = 3,
670 enum ihipEventType_t {
671 hipEventTypeIndependent,
672 hipEventTypeStartCommand,
673 hipEventTypeStopCommand,
679 _state = hipEventStatusCreated;
682 _type = hipEventTypeIndependent;
685 void marker(
const hc::completion_future& marker) { _marker = marker; };
686 hc::completion_future& marker() {
return _marker; }
687 uint64_t timestamp()
const {
return _timestamp; };
688 ihipEventType_t type()
const {
return _type; };
690 ihipEventType_t _type;
691 hipEventStatus_t _state;
696 hc::completion_future _marker;
702 template <
typename MUTEX_TYPE>
725 void attachToCompletionFuture(
const hc::completion_future* cf,
hipStream_t stream,
726 ihipEventType_t eventType);
727 std::pair<hipEventStatus_t, uint64_t> refreshEventStatus();
733 return _criticalData._eventData;
750 template <
typename MUTEX_TYPE>
754 : _parent(parentDevice), _ctxCount(0){};
761 std::list<ihipCtx_t*>& ctxs() {
return _ctxs; };
762 const std::list<ihipCtx_t*>& const_ctxs()
const {
return _ctxs; };
763 int getcount() {
return _ctxCount; };
770 std::list<ihipCtx_t*> _ctxs;
784 ihipDevice_t(
unsigned deviceId,
unsigned deviceCnt, hc::accelerator& acc);
788 ihipCtx_t* getPrimaryCtx()
const {
return _primaryCtx; };
796 hc::accelerator _acc;
797 hsa_agent_t _hsaAgent;
826 std::vector<char> _arguments;
831 template <
typename MUTEX_TYPE>
835 : _parent(parentCtx), _peerCnt(0) {
836 _peerAgents =
new hsa_agent_t[deviceCnt];
840 if (_peerAgents !=
nullptr) {
842 _peerAgents =
nullptr;
849 std::list<ihipStream_t*>& streams() {
return _streams; };
850 const std::list<ihipStream_t*>& const_streams()
const {
return _streams; };
854 bool isPeerWatcher(
const ihipCtx_t* peer);
858 void resetPeerWatchers(
ihipCtx_t* thisDevice);
859 void printPeerWatchers(FILE* f)
const;
861 uint32_t peerCnt()
const {
return _peerCnt; };
862 hsa_agent_t* peerAgents()
const {
return _peerAgents; };
866 std::list<ihipCtx_t*> _peers;
868 std::stack<ihipExec_t> _execStack;
876 std::list<ihipStream_t*> _streams;
884 hsa_agent_t* _peerAgents;
886 void recomputePeerAgents();
917 void locked_waitAllStreams();
918 void locked_syncDefaultStream(
bool waitOnSelf,
bool syncHost);
922 const ihipDevice_t* getDevice()
const {
return _device; };
923 int getDeviceNum()
const {
return _device->_deviceId; };
926 ihipDevice_t* getWriteableDevice()
const {
return _device; };
928 std::string toString()
const;
952 extern unsigned g_deviceCnt;
953 extern hsa_agent_t g_cpu_agent;
954 extern hsa_agent_t* g_allAgents;
958 extern void ihipInit();
959 extern const char* ihipErrorString(hipError_t);
960 extern hipError_t ihipSynchronize(
TlsData *tls);
961 extern void ihipCtxStackUpdate();
962 extern hipError_t ihipDeviceSetState(
TlsData *tls);
965 ihipCtx_t* ihipGetPrimaryCtx(
unsigned deviceIndex);
967 const char* name, hsa_agent_t *agent);
975 inline std::ostream& operator<<(std::ostream& os,
const ihipStream_t& s) {
977 os << s.getDevice()->_deviceId;
984 inline std::ostream& operator<<(std::ostream& os,
const dim3& s) {
995 inline std::ostream& operator<<(std::ostream& os,
const gl_dim3& s) {
1007 inline std::ostream& operator<<(std::ostream& os,
const hipEvent_t& e) {
1008 os <<
"event:" << std::hex << static_cast<void*>(e);
1012 inline std::ostream& operator<<(std::ostream& os,
const ihipCtx_t* c) {
1013 os <<
"ctx:" <<
static_cast<const void*
>(c) <<
".dev:" << c->getDevice()->_deviceId;
1020 hipError_t memcpyAsync(
void* dst,
const void* src,
size_t sizeBytes, hipMemcpyKind kind,
1032 if ((tls->defaultCtx ==
nullptr) && (g_deviceCnt > 0)) {
1033 tls->defaultCtx = ihipGetPrimaryCtx(0);
1035 return tls->defaultCtx;
Definition: hip_hcc_internal.h:253
Definition: hip_hcc_internal.h:782
Definition: hip_hcc_internal.h:485
Definition: hip_hcc_internal.h:112
Definition: hip_hcc_internal.h:408
Definition: hip_hcc_internal.h:381
uint32_t x
x
Definition: hip_runtime_api.h:271
Definition: grid_launch.h:17
Definition: hip_module.cpp:107
Definition: hip_hcc_internal.h:135
Definition: hip_hcc_internal.h:821
Definition: hip_hcc_internal.h:902
Definition: hip_runtime_api.h:270
uint32_t y
y
Definition: hip_runtime_api.h:272
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:844
Definition: hip_hcc_internal.h:649
Definition: hip_hcc_internal.h:751
unsigned _computeUnits
Number of compute units supported by the device:
Definition: hip_hcc_internal.h:800
uint32_t z
z
Definition: hip_runtime_api.h:273
Definition: hip_runtime_api.h:83
Definition: hip_hcc_internal.h:677
Definition: hip_hcc_internal.h:703
Definition: hip_hcc_internal.h:1019
Definition: hip_hcc_internal.h:357
Definition: hip_hcc_internal.h:722
Definition: hip_hcc_internal.h:832
Definition: hip_hcc_internal.h:544
Definition: hip_hcc_internal.h:497
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:110
hsa_amd_ipc_memory_t ipc_handle
ipc memory handle on ROCr
Definition: hip_hcc_internal.h:384
Definition: hip_hcc_internal.h:449
Definition: hip_hcc_internal.h:391
hipError_t hipStreamQuery(hipStream_t stream)
Return #hipSuccess if all of the operations in the specified stream have completed, or #hipErrorNotReady if not.
Definition: hip_stream.cpp:160
Definition: hip_hcc_internal.h:95