1 #include <cuda_runtime.h>
2 #include <cuda_profiler_api.h>
23 static bool initialized =
false;
27 if (initialized)
return;
31 cudaDriverGetVersion(&driver_version);
32 printfQuda(
"CUDA Driver version = %d\n", driver_version);
35 cudaRuntimeGetVersion(&runtime_version);
36 printfQuda(
"CUDA Runtime version = %d\n", runtime_version);
39 nvmlReturn_t result = nvmlInit();
40 if (NVML_SUCCESS != result)
errorQuda(
"NVML Init failed with error %d", result);
42 char graphics_version[
length];
43 result = nvmlSystemGetDriverVersion(graphics_version,
length);
44 if (NVML_SUCCESS != result)
errorQuda(
"nvmlSystemGetDriverVersion failed with error %d", result);
45 printfQuda(
"Graphic driver version = %s\n", graphics_version);
46 result = nvmlShutdown();
47 if (NVML_SUCCESS != result)
errorQuda(
"NVML Shutdown failed with error %d", result);
51 cudaGetDeviceCount(&deviceCount);
52 if (deviceCount == 0) {
errorQuda(
"No CUDA devices found"); }
54 for (
int i = 0; i < deviceCount; i++) {
71 const int my_major = __COMPUTE_CAPABILITY__ / 100;
72 const int my_minor = (__COMPUTE_CAPABILITY__ - my_major * 100) / 10;
75 errorQuda(
"** Running on a device with compute capability %i.%i but QUDA was compiled for %i.%i. ** \n --- "
76 "Please set the correct QUDA_GPU_ARCH when running cmake.\n",
81 char *allow_jit_env = getenv(
"QUDA_ALLOW_JIT");
82 if (allow_jit_env && strcmp(allow_jit_env,
"1") == 0) {
84 warningQuda(
"** Running on a device with compute capability %i.%i but QUDA was compiled for %i.%i. **\n -- "
85 "Jitting the PTX since QUDA_ALLOW_JIT=1 was set. Note that this will take some time.\n",
88 errorQuda(
"** Running on a device with compute capability %i.%i but QUDA was compiled for %i.%i. **\n --- "
89 "Please set the correct QUDA_GPU_ARCH when running cmake.\n If you want the PTX to be jitted for "
90 "your current GPU arch please set the enviroment variable QUDA_ALLOW_JIT=1.",
97 "** Running on a device with compute capability %i.%i but QUDA was compiled for %i.%i. **\n -- This might "
98 "result in a lower performance. Please consider adjusting QUDA_GPU_ARCH when running cmake.\n",
109 char *enable_numa_env = getenv(
"QUDA_ENABLE_NUMA");
110 if (enable_numa_env && strcmp(enable_numa_env,
"0") == 0) {
117 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
126 int greatestPriority;
128 cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority);
129 for (
int i = 0; i <
Nstream - 1; i++) {
130 cudaStreamCreateWithPriority(&
streams[i], cudaStreamDefault, greatestPriority);
132 cudaStreamCreateWithPriority(&
streams[
Nstream - 1], cudaStreamDefault, leastPriority);
145 char *device_reset_env = getenv(
"QUDA_DEVICE_RESET");
146 if (device_reset_env && strcmp(device_reset_env,
"1") == 0) {
155 void start() { cudaProfilerStart(); }
157 void stop() { cudaProfilerStop(); }
cudaDeviceProp deviceProp
void stop()
Stop profiling.
void start()
Start profiling.
void create_context()
Create the streams associated with parallel execution.
void init(int dev)
Create the device context. Called by initQuda when initializing the library.
void destroy()
Free any persistent context state. Called by endQuda when tearing down the library.
int setNumaAffinityNVML(int deviceid)
cudaStream_t qudaStream_t
#define checkCudaErrorNoSync()
QudaVerbosity getVerbosity()