QUDA v0.4.0
A library for QCD on GPUs
|
00001 00002 /* Originally from Galen Arnold, NCSA arnoldg@ncsa.illinois.edu 00003 * modified by Guochun Shi 00004 * 00005 */ 00006 #undef _GNU_SOURCE 00007 #define _GNU_SOURCE 00008 #include <stdio.h> 00009 #include <stdlib.h> 00010 #include <sched.h> 00011 #include <unistd.h> 00012 #include <string.h> 00013 #include <numa_affinity.h> 00014 #include <util_quda.h> 00015 00016 static int 00017 process_core_string_item(const char* str, int* sub_list, int* sub_ncores) 00018 { 00019 /* assume the input format is one of the following two 00020 * 1. a number only, e.g. 5 00021 * 2. a range, e.g 4-6, which means three numbers 4,5,6 00022 * return a list of numbers in @sub_list and and the total numbers 00023 * in @sub_ncores 00024 */ 00025 int i; 00026 if(str == NULL || sub_list == NULL || sub_ncores == NULL || 00027 *sub_ncores <= 0){ 00028 printfQuda("Warning: Wrong parameters in function %s!\n", __FUNCTION__); 00029 return -1; 00030 } 00031 00032 if(strstr(str, "-") != NULL){ 00033 //a range 00034 int low_core, high_core; 00035 if (sscanf(str,"%d-%d",&low_core, &high_core) != 2){ 00036 printfQuda("Warning: range scan failed\n"); 00037 return -1; 00038 } 00039 if(*sub_ncores < high_core-low_core +1){ 00040 printfQuda("Warning: not enough space in sub_list\n"); 00041 return -1; 00042 } 00043 00044 for(i = 0; i < high_core-low_core +1; i++){ 00045 sub_list[i] = i + low_core; 00046 } 00047 *sub_ncores = high_core - low_core +1; 00048 00049 }else{ 00050 //a number 00051 int core; 00052 if (sscanf(str, "%d", &core) != 1){ 00053 printfQuda("Warning: wrong format for core number\n"); 00054 return -1; 00055 } 00056 sub_list[0] = core; 00057 *sub_ncores =1; 00058 } 00059 return 0; 00060 } 00061 00062 static int 00063 process_core_string_list(const char* _str, int* list, int* ncores) 00064 { 00065 /* The input string @str should be separated by comma, and each item can be 00066 * either a number or a range (see the comments in process_core_string_item 00067 * function) 00068 * 00069 */ 00070 00071 if(_str == NULL || list == NULL || ncores == NULL 00072 || *ncores <= 0){ 00073 printfQuda("Warning: Invalid arguments in function %s\n", __FUNCTION__ ); 00074 return -1; 00075 } 00076 00077 char str[256]; 00078 strncpy(str, _str, sizeof(str)); 00079 00080 int left_space = *ncores; 00081 int tot_cores = 0; 00082 00083 char* item = strtok(str, ","); 00084 if(item == NULL){ 00085 printfQuda("ERROR: Invalid string format(%s)\n", str); 00086 return -1; 00087 } 00088 00089 do { 00090 int sub_ncores = left_space; 00091 int* sub_list = list + tot_cores; 00092 00093 int rc = process_core_string_item(item, sub_list, &sub_ncores); 00094 if(rc <0){ 00095 printfQuda("Warning: processing item(%s) failed\n", item); 00096 return -1; 00097 } 00098 00099 tot_cores += sub_ncores; 00100 left_space -= sub_ncores; 00101 00102 item = strtok(NULL, ","); 00103 }while( item != NULL); 00104 00105 *ncores = tot_cores; 00106 return 0; 00107 } 00108 00109 00110 static int 00111 getNumaAffinity(int my_gpu, int *cpu_cores, int* ncores) 00112 { 00113 FILE *nvidia_info, *pci_bus_info; 00114 size_t nbytes = 255; 00115 char *my_line; 00116 char nvidia_info_path[255], pci_bus_info_path[255]; 00117 char bus_info[255]; 00118 00119 // the nvidia driver populates this path for each gpu 00120 sprintf(nvidia_info_path,"/proc/driver/nvidia/gpus/%d/information", my_gpu); 00121 nvidia_info= fopen(nvidia_info_path,"r"); 00122 if (nvidia_info == NULL){ 00123 return -1; 00124 } 00125 00126 my_line= (char *) malloc(nbytes +1); 00127 if (my_line == NULL){ 00128 errorQuda("Error: allocating memory for my_line failed"); 00129 } 00130 00131 while (!feof(nvidia_info)){ 00132 if ( -1 == getline(&my_line, &nbytes, nvidia_info)){ 00133 break; 00134 }else{ 00135 // the first 7 char of the Bus Location will lead to the corresponding 00136 // path under /sys/class/pci_bus/ , cpulistaffinity showing cores on that 00137 // bus is located there 00138 if ( 1 == sscanf(my_line,"Bus Location: %s", bus_info )){ 00139 sprintf(pci_bus_info_path,"/sys/class/pci_bus/%.7s/cpulistaffinity", 00140 bus_info); 00141 } 00142 } 00143 } 00144 // open the cpulistaffinity file on the pci_bus for "my_gpu" 00145 pci_bus_info= fopen(pci_bus_info_path,"r"); 00146 if (pci_bus_info == NULL){ 00147 //printfQuda("Warning: opening file %s failed\n", pci_bus_info_path); 00148 free(my_line); 00149 fclose(nvidia_info); 00150 return -1; 00151 } 00152 00153 while (!feof(pci_bus_info)){ 00154 if ( -1 == getline(&my_line, &nbytes, pci_bus_info)){ 00155 break; 00156 } else{ 00157 int rc = process_core_string_list(my_line, cpu_cores, ncores); 00158 if(rc < 0){ 00159 printfQuda("Warning:%s: processing the line (%s) failed\n", __FUNCTION__, my_line); 00160 free(my_line); 00161 fclose(nvidia_info); 00162 return -1; 00163 } 00164 } 00165 } 00166 00167 free(my_line); 00168 return(0); 00169 } 00170 00171 int 00172 setNumaAffinity(int devid) 00173 { 00174 int cpu_cores[128]; 00175 int ncores=128; 00176 int rc = getNumaAffinity(devid, cpu_cores, &ncores); 00177 if(rc != 0){ 00178 printfQuda("Warning: quda getting affinity for device %d failed\n", devid); 00179 return 1; 00180 } 00181 int which = devid % ncores; 00182 printfQuda("GPU: %d, Setting to affinity cpu cores: %d\n", devid, cpu_cores[which]); 00183 /* 00184 for(int i=0;i < ncores;i++){ 00185 if (i != which ) continue; 00186 printfQuda("%d", cpu_cores[i]); 00187 if((i+1) < ncores){ 00188 printfQuda(","); 00189 } 00190 } 00191 printfQuda("\n"); 00192 */ 00193 00194 cpu_set_t cpu_set; 00195 CPU_ZERO(&cpu_set); 00196 00197 for(int i=0;i < ncores;i++){ 00198 if( i != which) continue; 00199 CPU_SET(cpu_cores[i], &cpu_set); 00200 } 00201 00202 rc = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set); 00203 if (rc != 0){ 00204 printfQuda("Warning: quda settting affinity failed\n"); 00205 return -1; 00206 } 00207 00208 00209 return 0; 00210 } 00211