14 static const std::string quda_hash = QUDA_HASH;
15 static std::string resource_path;
16 static std::map<TuneKey, TuneParam> tunecache;
17 static size_t initial_cache_size = 0;
20 #define STR(x) STR_(x)
28 static void deserializeTuneCache(std::istream &
in)
37 if (!line.length())
continue;
40 ls >> key.volume >> key.name >> key.aux >> param.block.x >> param.block.y >> param.block.z;
41 ls >> param.grid.x >> param.grid.y >> param.grid.z >> param.shared_bytes;
43 getline(ls, param.comment);
44 param.comment +=
"\n";
45 tunecache[key] =
param;
53 static void serializeTuneCache(std::ostream &
out)
55 std::map<TuneKey, TuneParam>::iterator entry;
57 for (entry = tunecache.begin(); entry != tunecache.end(); entry++) {
58 TuneKey key = entry->first;
59 TuneParam param = entry->second;
61 out << key.volume <<
"\t" << key.name <<
"\t" << key.aux <<
"\t";
62 out << param.block.x <<
"\t" << param.block.y <<
"\t" << param.block.z <<
"\t";
63 out << param.grid.x <<
"\t" << param.grid.y <<
"\t" << param.grid.z <<
"\t";
64 out << param.shared_bytes <<
"\t" << param.comment;
72 static void broadcastTuneCache()
76 std::stringstream serialized;
80 serializeTuneCache(serialized);
81 size = serialized.str().length();
87 comm_broadcast(const_cast<char *>(serialized.str().c_str()), size);
89 char *serstr =
new char[size+1];
92 serialized.str(serstr);
93 deserializeTuneCache(serialized);
108 std::string cache_path, line, token;
109 std::ifstream cache_file;
110 std::stringstream ls;
112 path = getenv(
"QUDA_RESOURCE_PATH");
114 warningQuda(
"Environment variable QUDA_RESOURCE_PATH is not set.");
115 warningQuda(
"Caching of tuned parameters will be disabled.");
117 }
else if (stat(path, &pstat) || !S_ISDIR(pstat.st_mode)) {
118 warningQuda(
"The path \"%s\" specified by QUDA_RESOURCE_PATH does not exist or is not a directory.", path);
119 warningQuda(
"Caching of tuned parameters will be disabled.");
122 resource_path = path;
129 cache_path = resource_path;
130 cache_path +=
"/tunecache.tsv";
131 cache_file.open(cache_path.c_str());
135 if (!cache_file.good())
errorQuda(
"Bad format in %s", cache_path.c_str());
136 getline(cache_file, line);
139 if (token.compare(
"tunecache"))
errorQuda(
"Bad format in %s", cache_path.c_str());
141 if (token.compare(quda_version))
errorQuda(
"Cache file %s does not match current QUDA version", cache_path.c_str());
143 if (token.compare(quda_hash))
warningQuda(
"Cache file %s does not match current QUDA build", cache_path.c_str());
145 if (!cache_file.good())
errorQuda(
"Bad format in %s", cache_path.c_str());
146 getline(cache_file, line);
148 if (!cache_file.good())
errorQuda(
"Bad format in %s", cache_path.c_str());
149 getline(cache_file, line);
151 deserializeTuneCache(cache_file);
153 initial_cache_size = tunecache.size();
156 printfQuda(
"Loaded %d sets of cached parameters from %s\n", static_cast<int>(initial_cache_size), cache_path.c_str());
160 warningQuda(
"Cache file not found. All kernels will be re-tuned (if tuning is enabled).");
167 broadcastTuneCache();
178 std::string lock_path, cache_path;
179 std::ofstream cache_file;
181 if (resource_path.empty())
return;
191 if (tunecache.size() == initial_cache_size)
return;
195 lock_path = resource_path +
"/tunecache.lock";
196 lock_handle = open(lock_path.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0666);
197 if (lock_handle == -1) {
198 warningQuda(
"Unable to lock cache file. Tuned launch parameters will not be cached to disk. "
199 "If you are certain that no other instances of QUDA are accessing this filesystem, "
200 "please manually remove %s", lock_path.c_str());
203 char msg[] =
"If no instances of applications using QUDA are running,\n"
204 "this lock file shouldn't be here and is safe to delete.";
205 int stat = write(lock_handle, msg,
sizeof(msg));
206 if (stat == -1)
warningQuda(
"Unable to write to lock file for some bizarre reason");
208 cache_path = resource_path +
"/tunecache.tsv";
209 cache_file.open(cache_path.c_str());
212 printfQuda(
"Saving %d sets of cached parameters to %s\n", static_cast<int>(tunecache.size()), cache_path.c_str());
216 cache_file <<
"tunecache\t" << quda_version <<
"\t" << quda_hash <<
"\t# Last updated " << ctime(&now) << std::endl;
217 cache_file <<
"volume\tname\taux\tblock.x\tblock.y\tblock.z\tgrid.x\tgrid.y\tgrid.z\tshared_bytes\tcomment" << std::endl;
218 serializeTuneCache(cache_file);
223 remove(lock_path.c_str());
225 initial_cache_size = tunecache.size();
242 static bool tuning =
false;
243 static const Tunable *active_tunable;
244 static TuneParam
param;
246 TuneParam best_param;
248 cudaEvent_t start,
end;
249 float elapsed_time, best_time;
252 const TuneKey key = tunable.tuneKey();
255 tunable.defaultTuneParam(param);
256 tunable.checkLaunchParam(param);
257 }
else if (tunecache.count(key)) {
258 param = tunecache[key];
259 tunable.checkLaunchParam(param);
260 }
else if (!tuning) {
263 active_tunable = &tunable;
269 cudaEventCreate(&start);
270 cudaEventCreate(&end);
273 printfQuda(
"Tuning %s with %s at vol=%s\n", key.name.c_str(), key.aux.c_str(), key.volume.c_str());
276 tunable.initTuneParam(param);
278 cudaDeviceSynchronize();
280 tunable.checkLaunchParam(param);
281 cudaEventRecord(start, 0);
282 for (
int i=0; i<tunable.tuningIter(); i++) {
285 cudaEventRecord(end, 0);
286 cudaEventSynchronize(end);
287 cudaEventElapsedTime(&elapsed_time, start, end);
288 cudaDeviceSynchronize();
289 error = cudaGetLastError();
290 elapsed_time /= (1e3 * tunable.tuningIter());
291 if ((elapsed_time < best_time) && (error == cudaSuccess)) {
292 best_time = elapsed_time;
296 if (error == cudaSuccess)
297 printfQuda(
" %s gives %s\n", tunable.paramString(param).c_str(),
298 tunable.perfString(elapsed_time).c_str());
300 printfQuda(
" %s gives %s\n", tunable.paramString(param).c_str(), cudaGetErrorString(error));
302 tuning = tunable.advanceTuneParam(param);
305 if (best_time == FLT_MAX) {
306 errorQuda(
"Auto-tuning failed for %s with %s at vol=%s", key.name.c_str(), key.aux.c_str(), key.volume.c_str());
309 printfQuda(
"Tuned %s giving %s for %s with %s\n", tunable.paramString(best_param).c_str(),
310 tunable.perfString(best_time).c_str(), key.name.c_str(), key.aux.c_str());
313 best_param.comment =
"# " + tunable.perfString(best_time) +
", tuned ";
314 best_param.comment += ctime(&now);
316 cudaEventDestroy(start);
317 cudaEventDestroy(end);
322 tunecache[key] = best_param;
324 }
else if (&tunable != active_tunable) {
325 errorQuda(
"Unexpected call to tuneLaunch() in %s::apply()",
typeid(tunable).name());