QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
numa_affinity.cpp
Go to the documentation of this file.
1 
2 /* Originally from Galen Arnold, NCSA arnoldg@ncsa.illinois.edu
3  * modified by Guochun Shi
4  *
5  */
6 #undef _GNU_SOURCE
7 #define _GNU_SOURCE
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <sched.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <numa_affinity.h>
14 #include <quda_internal.h>
15 
16 static int
17 process_core_string_item(const char* str, int* sub_list, int* sub_ncores)
18 {
19  /* assume the input format is one of the following two
20  * 1. a number only, e.g. 5
21  * 2. a range, e.g 4-6, which means three numbers 4,5,6
22  * return a list of numbers in @sub_list and and the total numbers
23  * in @sub_ncores
24  */
25  int i;
26  if(str == NULL || sub_list == NULL || sub_ncores == NULL ||
27  *sub_ncores <= 0){
28  warningQuda("Bad argument");
29  return -1;
30  }
31 
32  if(strstr(str, "-") != NULL){
33  //a range
34  int low_core, high_core;
35  if (sscanf(str,"%d-%d",&low_core, &high_core) != 2){
36  warningQuda("Range scan failed");
37  return -1;
38  }
39  if(*sub_ncores < high_core-low_core +1){
40  warningQuda("Not enough space in sub_list");
41  return -1;
42  }
43 
44  for(i = 0; i < high_core-low_core +1; i++){
45  sub_list[i] = i + low_core;
46  }
47  *sub_ncores = high_core - low_core +1;
48 
49  }else{
50  //a number
51  int core;
52  if (sscanf(str, "%d", &core) != 1){
53  warningQuda("Wrong format for core number");
54  return -1;
55  }
56  sub_list[0] = core;
57  *sub_ncores =1;
58  }
59  return 0;
60 }
61 
62 static int
63 process_core_string_list(const char* _str, int* list, int* ncores)
64 {
65  /* The input string @str should be separated by comma, and each item can be
66  * either a number or a range (see the comments in process_core_string_item
67  * function)
68  *
69  */
70 
71  if(_str == NULL || list == NULL || ncores == NULL
72  || *ncores <= 0){
73  warningQuda("Bad argument");
74  return -1;
75  }
76 
77  char str[256];
78  strncpy(str, _str, sizeof(str));
79 
80  int left_space = *ncores;
81  int tot_cores = 0;
82 
83  char* item = strtok(str, ",");
84  if(item == NULL){
85  warningQuda("Invalid string format (%s)", str);
86  return -1;
87  }
88 
89  do {
90  int sub_ncores = left_space;
91  int* sub_list = list + tot_cores;
92 
93  int rc = process_core_string_item(item, sub_list, &sub_ncores);
94  if(rc <0){
95  warningQuda("Processing item (%s) failed", item);
96  return -1;
97  }
98 
99  tot_cores += sub_ncores;
100  left_space -= sub_ncores;
101 
102  item = strtok(NULL, ",");
103  }while( item != NULL);
104 
105  *ncores = tot_cores;
106  return 0;
107 }
108 
109 
110 static int
111 getNumaAffinity(int my_gpu, int *cpu_cores, int* ncores)
112 {
113  FILE *nvidia_info, *pci_bus_info;
114  size_t nbytes = 255;
115  char *my_line;
116  char nvidia_info_path[255], pci_bus_info_path[255];
117  char bus_info[255];
118 
119  // the nvidia driver populates this path for each gpu
120  sprintf(nvidia_info_path,"/proc/driver/nvidia/gpus/%d/information", my_gpu);
121  nvidia_info= fopen(nvidia_info_path,"r");
122  if (nvidia_info == NULL){
123  return -1;
124  }
125 
126  my_line= (char *) safe_malloc(nbytes +1);
127 
128  while (!feof(nvidia_info)){
129  if ( -1 == getline(&my_line, &nbytes, nvidia_info)){
130  break;
131  }else{
132  // the first 7 char of the Bus Location will lead to the corresponding
133  // path under /sys/class/pci_bus/ , cpulistaffinity showing cores on that
134  // bus is located there
135  if ( 1 == sscanf(my_line,"Bus Location: %s", bus_info )){
136  sprintf(pci_bus_info_path,"/sys/class/pci_bus/%.7s/cpulistaffinity",
137  bus_info);
138  }
139  }
140  }
141  // open the cpulistaffinity file on the pci_bus for "my_gpu"
142  pci_bus_info= fopen(pci_bus_info_path,"r");
143  if (pci_bus_info == NULL){
144  //printfQuda("Warning: opening file %s failed\n", pci_bus_info_path);
145  host_free(my_line);
146  fclose(nvidia_info);
147  return -1;
148  }
149 
150  while (!feof(pci_bus_info)){
151  if ( -1 == getline(&my_line, &nbytes, pci_bus_info)){
152  break;
153  } else{
154  int rc = process_core_string_list(my_line, cpu_cores, ncores);
155  if(rc < 0){
156  warningQuda("Failed to process the line \"%s\"", my_line);
157  host_free(my_line);
158  fclose(nvidia_info);
159  return -1;
160  }
161  }
162  }
163 
164  host_free(my_line);
165  return 0;
166 }
167 
168 int
169 setNumaAffinity(int devid)
170 {
171  int cpu_cores[128];
172  int ncores=128;
173  int rc = getNumaAffinity(devid, cpu_cores, &ncores);
174  if(rc != 0){
175  warningQuda("Failed to determine NUMA affinity for device %d (possibly not applicable)", devid);
176  return 1;
177  }
178  int which = devid % ncores;
179  printfQuda("Setting NUMA affinity for device %d to CPU core %d\n", devid, cpu_cores[which]);
180 /*
181  for(int i=0;i < ncores;i++){
182  if (i != which ) continue;
183  printfQuda("%d", cpu_cores[i]);
184  if((i+1) < ncores){
185  printfQuda(",");
186  }
187  }
188  printfQuda("\n");
189  */
190 
191  cpu_set_t cpu_set;
192  CPU_ZERO(&cpu_set);
193 
194  for(int i=0;i < ncores;i++){
195  if( i != which) continue;
196  CPU_SET(cpu_cores[i], &cpu_set);
197  }
198 
199  rc = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set);
200  if (rc != 0){
201  warningQuda("Failed to enforce NUMA affinity (probably due to lack of kernel support)");
202  return -1;
203  }
204 
205 
206  return 0;
207 }
208 
#define host_free(ptr)
Definition: malloc_quda.h:29
#define warningQuda(...)
Definition: util_quda.h:84
#define safe_malloc(size)
Definition: malloc_quda.h:25
#define printfQuda(...)
Definition: util_quda.h:67
int setNumaAffinity(int devid)