18 namespace quda {
static TuneKey last_key; }
24 typedef std::map<TuneKey, TuneParam>
map;
29 static map::iterator it;
30 static size_t initial_cache_size = 0;
34 #define STR(x) STR_(x)
42 static void deserializeTuneCache(std::istream &
in)
58 if (!line.length())
continue;
61 ls >> v >> n >> a >> param.block.x >> param.block.y >> param.block.z;
62 check = snprintf(key.volume, key.volume_n,
"%s", v.c_str());
63 if (check < 0 || check >= key.volume_n)
errorQuda(
"Error writing volume string");
64 check = snprintf(key.name, key.name_n,
"%s", n.c_str());
65 if (check < 0 || check >= key.name_n)
errorQuda(
"Error writing name string");
66 check = snprintf(key.aux, key.aux_n,
"%s", a.c_str());
67 if (check < 0 || check >= key.aux_n)
errorQuda(
"Error writing aux string");
68 ls >> param.grid.x >> param.grid.y >> param.grid.z >> param.shared_bytes;
70 getline(ls, param.comment);
71 param.comment +=
"\n";
72 tunecache[key] =
param;
80 static void serializeTuneCache(std::ostream &
out)
84 for (entry = tunecache.begin(); entry != tunecache.end(); entry++) {
85 TuneKey key = entry->first;
86 TuneParam param = entry->second;
88 out << key.volume <<
"\t" << key.name <<
"\t" << key.aux <<
"\t";
89 out << param.block.x <<
"\t" << param.block.y <<
"\t" << param.block.z <<
"\t";
90 out << param.grid.x <<
"\t" << param.grid.y <<
"\t" << param.grid.z <<
"\t";
91 out << param.shared_bytes <<
"\t" << param.comment;
99 static void broadcastTuneCache()
103 std::stringstream serialized;
107 serializeTuneCache(serialized);
108 size = serialized.str().length();
114 comm_broadcast(const_cast<char *>(serialized.str().c_str()), size);
116 char *serstr =
new char[size+1];
119 serialized.str(serstr);
120 deserializeTuneCache(serialized);
136 std::ifstream cache_file;
137 std::stringstream ls;
139 path = getenv(
"QUDA_RESOURCE_PATH");
141 warningQuda(
"Environment variable QUDA_RESOURCE_PATH is not set.");
142 warningQuda(
"Caching of tuned parameters will be disabled.");
144 }
else if (stat(path, &pstat) || !S_ISDIR(pstat.st_mode)) {
145 warningQuda(
"The path \"%s\" specified by QUDA_RESOURCE_PATH does not exist or is not a directory.", path);
146 warningQuda(
"Caching of tuned parameters will be disabled.");
149 resource_path = path;
156 cache_path = resource_path;
157 cache_path +=
"/tunecache.tsv";
158 cache_file.open(cache_path.c_str());
162 if (!cache_file.good())
errorQuda(
"Bad format in %s", cache_path.c_str());
163 getline(cache_file, line);
166 if (token.compare(
"tunecache"))
errorQuda(
"Bad format in %s", cache_path.c_str());
168 if (token.compare(quda_version))
errorQuda(
"Cache file %s does not match current QUDA version. \nPlease delete this file or set the QUDA_RESOURCE_PATH environment variable to point to a new path.", cache_path.c_str());
170 if (token.compare(quda_hash))
errorQuda(
"Cache file %s does not match current QUDA build. \nPlease delete this file or set the QUDA_RESOURCE_PATH environment variable to point to a new path.", cache_path.c_str());
173 if (!cache_file.good())
errorQuda(
"Bad format in %s", cache_path.c_str());
174 getline(cache_file, line);
176 if (!cache_file.good())
errorQuda(
"Bad format in %s", cache_path.c_str());
177 getline(cache_file, line);
179 deserializeTuneCache(cache_file);
182 initial_cache_size = tunecache.size();
185 printfQuda(
"Loaded %d sets of cached parameters from %s\n", static_cast<int>(initial_cache_size), cache_path.c_str());
190 warningQuda(
"Cache file not found. All kernels will be re-tuned (if tuning is enabled).");
198 broadcastTuneCache();
210 std::ofstream cache_file;
212 if (resource_path.empty())
return;
222 if (tunecache.size() == initial_cache_size)
return;
226 lock_path = resource_path +
"/tunecache.lock";
227 lock_handle = open(lock_path.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0666);
228 if (lock_handle == -1) {
229 warningQuda(
"Unable to lock cache file. Tuned launch parameters will not be cached to disk. "
230 "If you are certain that no other instances of QUDA are accessing this filesystem, "
231 "please manually remove %s", lock_path.c_str());
234 char msg[] =
"If no instances of applications using QUDA are running,\n"
235 "this lock file shouldn't be here and is safe to delete.";
236 int stat = write(lock_handle, msg,
sizeof(msg));
237 if (stat == -1)
warningQuda(
"Unable to write to lock file for some bizarre reason");
239 cache_path = resource_path +
"/tunecache.tsv";
240 cache_file.open(cache_path.c_str());
243 printfQuda(
"Saving %d sets of cached parameters to %s\n", static_cast<int>(tunecache.size()), cache_path.c_str());
247 cache_file <<
"tunecache\t" << quda_version <<
"\t" << quda_hash <<
"\t# Last updated " << ctime(&now) << std::endl;
248 cache_file <<
"volume\tname\taux\tblock.x\tblock.y\tblock.z\tgrid.x\tgrid.y\tgrid.z\tshared_bytes\tcomment" << std::endl;
249 serializeTuneCache(cache_file);
254 remove(lock_path.c_str());
256 initial_cache_size = tunecache.size();
263 static TimeProfile launchTimer(
"tuneLaunch");
273 #ifdef PTHREADS // tuning should be performed serially
295 it = tunecache.find(key);
336 static bool tuning =
false;
337 static const Tunable *active_tunable;
342 }
else if (!tuning) {
346 cudaEvent_t start,
end;
347 float elapsed_time, best_time;
351 active_tunable = &tunable;
357 cudaEventCreate(&start);
358 cudaEventCreate(&end);
366 cudaDeviceSynchronize();
369 cudaEventRecord(start, 0);
376 cudaEventRecord(end, 0);
377 cudaEventSynchronize(end);
378 cudaEventElapsedTime(&elapsed_time, start, end);
379 cudaDeviceSynchronize();
380 error = cudaGetLastError();
383 cudaDeviceSynchronize();
384 cudaError_t error = cudaGetLastError();
385 if (error != cudaSuccess)
errorQuda(
"Failed to clear error state %s\n", cudaGetErrorString(error));
389 if ((elapsed_time < best_time) && (error == cudaSuccess)) {
390 best_time = elapsed_time;
394 if (error == cudaSuccess)
403 if (best_time == FLT_MAX) {
412 best_param.
comment += ctime(&now);
414 cudaEventDestroy(start);
415 cudaEventDestroy(end);
420 tunecache[key] = best_param;
422 }
else if (&tunable != active_tunable) {
423 errorQuda(
"Unexpected call to tuneLaunch() in %s::apply()",
typeid(tunable).name());
#define QUDA_VERSION_MINOR
virtual void initTuneParam(TuneParam ¶m) const
quda::TuneKey getLastTuneKey()
virtual TuneKey tuneKey() const =0
std::map< TuneKey, TuneParam > map
virtual std::string paramString(const TuneParam ¶m) const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define QUDA_VERSION_SUBMINOR
virtual std::string perfString(float time) const
virtual void defaultTuneParam(TuneParam ¶m) const
virtual int tuningIter() const
void comm_broadcast(void *data, size_t nbytes)
cpuColorSpinorField * out
void Stop(QudaProfileType idx)
void loadTuneCache(QudaVerbosity verbosity)
Main header file for the QUDA library.
void Start(QudaProfileType idx)
void checkLaunchParam(TuneParam ¶m)
virtual bool advanceTuneParam(TuneParam ¶m) const
enum QudaVerbosity_s QudaVerbosity
#define QUDA_VERSION_MAJOR
void saveTuneCache(QudaVerbosity verbosity)
virtual void apply(const cudaStream_t &stream)=0