88IF UNAME_SYSNAME == " Windows" :
99 import win32api
1010 import struct
11+ from pywintypes import error
1112ELSE :
1213 cimport cuda._lib.dlfcn as dlfcn
14+ import sys
15+ cimport cuda._cuda.loader as loader
1316cdef bint __cuPythonInit = False
1417cdef void * __cuGetErrorString = NULL
1518cdef void * __cuGetErrorName = NULL
@@ -28,6 +31,7 @@ cdef void *__cuDeviceGetNvSciSyncAttributes = NULL
2831cdef void * __cuDeviceSetMemPool = NULL
2932cdef void * __cuDeviceGetMemPool = NULL
3033cdef void * __cuDeviceGetDefaultMemPool = NULL
34+ cdef void * __cuFlushGPUDirectRDMAWrites = NULL
3135cdef void * __cuDeviceGetProperties = NULL
3236cdef void * __cuDeviceComputeCapability = NULL
3337cdef void * __cuDevicePrimaryCtxRetain = NULL
@@ -360,29 +364,34 @@ cdef void *__cuGraphicsMapResources = NULL
360364cdef void * __cuGraphicsUnmapResources = NULL
361365cdef void * __cuGetProcAddress = NULL
362366cdef void * __cuGetExportTable = NULL
363- cdef void * __cuFlushGPUDirectRDMAWrites = NULL
364367
365368cdef int cuPythonInit() nogil except - 1 :
366369 global __cuPythonInit
367370 if __cuPythonInit:
368371 return 0
369372 __cuPythonInit = True
370- IF UNAME_SYSNAME == " Windows" :
371- LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
372- with gil:
373- if 8 * struct .calcsize(" P" ) == 32 :
374- try :
375- handle = win32api.LoadLibraryEx(' nvcuda32.dll' , 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
376- except :
377- handle = win32api.LoadLibraryEx(' nvcuda.dll' , 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
378- else :
379- handle = win32api.LoadLibraryEx(' nvcuda.dll' , 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
380- ELSE :
381- handle = dlfcn.dlopen(' libcuda.so' , dlfcn.RTLD_NOW)
382- if (handle == NULL ):
383- with gil:
373+ cdef char libPath[260 ]
374+ libPath[0 ] = 0
375+ with gil:
376+ status = loader.getCUDALibraryPath(libPath, sys.maxsize > 2 ** 32 )
377+ if status == 0 and len (libPath) != 0 :
378+ path = libPath.decode(' utf-8' )
379+ else :
380+ IF UNAME_SYSNAME == " Windows" :
381+ path = ' nvcuda.dll'
382+ ELSE :
383+ path = ' libcuda.so'
384+
385+ IF UNAME_SYSNAME == " Windows" :
386+ LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
387+ try :
388+ handle = win32api.LoadLibraryEx(path, 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
389+ except error as e:
390+ raise RuntimeError (' Failed to LoadLibraryEx ' + path)
391+ ELSE :
392+ handle = dlfcn.dlopen(bytes(path, encoding = ' utf-8' ), dlfcn.RTLD_NOW)
393+ if (handle == NULL ):
384394 raise RuntimeError (' Failed to dlopen libcuda.so' )
385-
386395 # All Globals
387396 global __cuGetErrorString
388397 global __cuGetErrorName
@@ -401,6 +410,7 @@ cdef int cuPythonInit() nogil except -1:
401410 global __cuDeviceSetMemPool
402411 global __cuDeviceGetMemPool
403412 global __cuDeviceGetDefaultMemPool
413+ global __cuFlushGPUDirectRDMAWrites
404414 global __cuDeviceGetProperties
405415 global __cuDeviceComputeCapability
406416 global __cuDevicePrimaryCtxRetain
@@ -733,7 +743,6 @@ cdef int cuPythonInit() nogil except -1:
733743 global __cuGraphicsUnmapResources
734744 global __cuGetProcAddress
735745 global __cuGetExportTable
736- global __cuFlushGPUDirectRDMAWrites
737746 # Get latest __cuGetProcAddress
738747 IF UNAME_SYSNAME == " Windows" :
739748 with gil:
@@ -763,6 +772,7 @@ cdef int cuPythonInit() nogil except -1:
763772 _cuGetProcAddress(' cuDeviceSetMemPool' , & __cuDeviceSetMemPool, 11020 , 0 )
764773 _cuGetProcAddress(' cuDeviceGetMemPool' , & __cuDeviceGetMemPool, 11020 , 0 )
765774 _cuGetProcAddress(' cuDeviceGetDefaultMemPool' , & __cuDeviceGetDefaultMemPool, 11020 , 0 )
775+ _cuGetProcAddress(' cuFlushGPUDirectRDMAWrites' , & __cuFlushGPUDirectRDMAWrites, 11030 , 0 )
766776 _cuGetProcAddress(' cuDeviceGetProperties' , & __cuDeviceGetProperties, 2000 , 0 )
767777 _cuGetProcAddress(' cuDeviceComputeCapability' , & __cuDeviceComputeCapability, 2000 , 0 )
768778 _cuGetProcAddress(' cuDevicePrimaryCtxRetain' , & __cuDevicePrimaryCtxRetain, 7000 , 0 )
@@ -1095,7 +1105,6 @@ cdef int cuPythonInit() nogil except -1:
10951105 _cuGetProcAddress(' cuGraphicsUnmapResources' , & __cuGraphicsUnmapResources, 3000 , 0 )
10961106 _cuGetProcAddress(' cuGetProcAddress' , & __cuGetProcAddress, 11030 , 0 )
10971107 _cuGetProcAddress(' cuGetExportTable' , & __cuGetExportTable, 3000 , 0 )
1098- _cuGetProcAddress(' cuFlushGPUDirectRDMAWrites' , & __cuFlushGPUDirectRDMAWrites, 11030 , 0 )
10991108 return 0
11001109 # dlsym calls
11011110 IF UNAME_SYSNAME == " Windows" :
@@ -1168,6 +1177,10 @@ cdef int cuPythonInit() nogil except -1:
11681177 __cuDeviceGetDefaultMemPool = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuDeviceGetDefaultMemPool' )
11691178 except :
11701179 pass
1180+ try :
1181+ __cuFlushGPUDirectRDMAWrites = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuFlushGPUDirectRDMAWrites' )
1182+ except :
1183+ pass
11711184 try :
11721185 __cuDeviceGetProperties = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuDeviceGetProperties' )
11731186 except :
@@ -2496,10 +2509,6 @@ cdef int cuPythonInit() nogil except -1:
24962509 __cuGetExportTable = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuGetExportTable' )
24972510 except :
24982511 pass
2499- try :
2500- __cuFlushGPUDirectRDMAWrites = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuFlushGPUDirectRDMAWrites' )
2501- except :
2502- pass
25032512 ELSE :
25042513 __cuGetErrorString = dlfcn.dlsym(handle, ' cuGetErrorString' )
25052514 __cuGetErrorName = dlfcn.dlsym(handle, ' cuGetErrorName' )
@@ -2518,6 +2527,7 @@ cdef int cuPythonInit() nogil except -1:
25182527 __cuDeviceSetMemPool = dlfcn.dlsym(handle, ' cuDeviceSetMemPool' )
25192528 __cuDeviceGetMemPool = dlfcn.dlsym(handle, ' cuDeviceGetMemPool' )
25202529 __cuDeviceGetDefaultMemPool = dlfcn.dlsym(handle, ' cuDeviceGetDefaultMemPool' )
2530+ __cuFlushGPUDirectRDMAWrites = dlfcn.dlsym(handle, ' cuFlushGPUDirectRDMAWrites' )
25212531 __cuDeviceGetProperties = dlfcn.dlsym(handle, ' cuDeviceGetProperties' )
25222532 __cuDeviceComputeCapability = dlfcn.dlsym(handle, ' cuDeviceComputeCapability' )
25232533 __cuDevicePrimaryCtxRetain = dlfcn.dlsym(handle, ' cuDevicePrimaryCtxRetain' )
@@ -2850,7 +2860,6 @@ cdef int cuPythonInit() nogil except -1:
28502860 __cuGraphicsUnmapResources = dlfcn.dlsym(handle, ' cuGraphicsUnmapResources' )
28512861 __cuGetProcAddress = dlfcn.dlsym(handle, ' cuGetProcAddress' )
28522862 __cuGetExportTable = dlfcn.dlsym(handle, ' cuGetExportTable' )
2853- __cuFlushGPUDirectRDMAWrites = dlfcn.dlsym(handle, ' cuFlushGPUDirectRDMAWrites' )
28542863
28552864cdef CUresult _cuGetErrorString(CUresult error, const char ** pStr) nogil except ?CUDA_ERROR_NOT_FOUND:
28562865 global __cuGetErrorString
@@ -3005,6 +3014,15 @@ cdef CUresult _cuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev) n
30053014 err = (< CUresult (* )(CUmemoryPool* , CUdevice) nogil> __cuDeviceGetDefaultMemPool)(pool_out, dev)
30063015 return err
30073016
3017+ cdef CUresult _cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) nogil except ?CUDA_ERROR_NOT_FOUND:
3018+ global __cuFlushGPUDirectRDMAWrites
3019+ cuPythonInit()
3020+ if __cuFlushGPUDirectRDMAWrites == NULL :
3021+ with gil:
3022+ raise RuntimeError (' Function "cuFlushGPUDirectRDMAWrites" not found' )
3023+ err = (< CUresult (* )(CUflushGPUDirectRDMAWritesTarget, CUflushGPUDirectRDMAWritesScope) nogil> __cuFlushGPUDirectRDMAWrites)(target, scope)
3024+ return err
3025+
30083026cdef CUresult _cuDeviceGetProperties(CUdevprop* prop, CUdevice dev) nogil except ?CUDA_ERROR_NOT_FOUND:
30093027 global __cuDeviceGetProperties
30103028 cuPythonInit()
@@ -5992,12 +6010,3 @@ cdef CUresult _cuGetExportTable(const void** ppExportTable, const CUuuid* pExpor
59926010 raise RuntimeError (' Function "cuGetExportTable" not found' )
59936011 err = (< CUresult (* )(const void ** , const CUuuid* ) nogil> __cuGetExportTable)(ppExportTable, pExportTableId)
59946012 return err
5995-
5996- cdef CUresult _cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) nogil except ?CUDA_ERROR_NOT_FOUND:
5997- global __cuFlushGPUDirectRDMAWrites
5998- cuPythonInit()
5999- if __cuFlushGPUDirectRDMAWrites == NULL :
6000- with gil:
6001- raise RuntimeError (' Function "cuFlushGPUDirectRDMAWrites" not found' )
6002- err = (< CUresult (* )(CUflushGPUDirectRDMAWritesTarget, CUflushGPUDirectRDMAWritesScope) nogil> __cuFlushGPUDirectRDMAWrites)(target, scope)
6003- return err
0 commit comments