27 #define MAX(a,b) ((a)>(b)?(a):(b))
28 #define staggeredSpinorSiteSize 6
31 extern void usage(
char** argv );
54 const void **ghost_fatlink, **ghost_longlink;
129 int tmpint =
MAX(
X[1]*
X[2]*
X[3], X[0]*X[2]*X[3]);
130 tmpint =
MAX(tmpint, X[0]*X[1]*X[3]);
131 tmpint =
MAX(tmpint, X[0]*X[1]*X[2]);
141 for(
int d = 0; d < 4; d++) {
173 for (
int dir = 0; dir < 4; dir++) {
178 errorQuda(
"ERROR: malloc failed for fatlink/longlink");
183 const double cos_pi_3 = 0.5;
184 const double sin_pi_3 =
sqrt(0.75);
185 for(
int dir=0; dir<4; ++dir){
186 for(
int i=0; i<
V; ++i){
189 const double real = ((
double*)
longlink[dir])[i*gaugeSiteSize + j];
190 const double imag = ((
double*)
longlink[dir])[i*gaugeSiteSize + j + 1];
191 ((
double*)
longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
192 ((
double*)
longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
194 const float real = ((
float*)
longlink[dir])[i*gaugeSiteSize + j];
195 const float imag = ((
float*)
longlink[dir])[i*gaugeSiteSize + j + 1];
196 ((
float*)
longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
197 ((
float*)
longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
218 int x_face_size = X[1]*X[2]*X[3]/2;
219 int y_face_size = X[0]*X[2]*X[3]/2;
220 int z_face_size = X[0]*X[1]*X[3]/2;
221 int t_face_size = X[0]*X[1]*X[2]/2;
222 int pad_size =
MAX(x_face_size, y_face_size);
223 pad_size =
MAX(pad_size, z_face_size);
224 pad_size =
MAX(pad_size, t_face_size);
267 cudaDeviceSynchronize();
272 printfQuda(
"Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
297 for (
int dir = 0; dir < 4; dir++) {
321 cudaEvent_t start,
end;
322 cudaEventCreate(&start);
323 cudaEventRecord(start, 0);
324 cudaEventSynchronize(start);
326 for (
int i = 0; i <
niter; i++) {
345 errorQuda(
"Staggered operator acting on full-site not supported");
354 cudaEventCreate(&end);
355 cudaEventRecord(end, 0);
356 cudaEventSynchronize(end);
358 cudaEventElapsedTime(&runTime, start, end);
359 cudaEventDestroy(start);
360 cudaEventDestroy(end);
362 double secs = runTime / 1000;
365 cudaError_t stat = cudaGetLastError();
366 if (stat != cudaSuccess)
367 errorQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
379 printfQuda(
"Calculating reference implementation...");
423 ASSERT_LE(deviation, tol) <<
"CPU and CUDA implementations do not agree";
426 static int dslashTest(
int argc,
char **argv)
428 int accuracy_level = 0;
451 int spinor_floats = 8*6*2 + 6;
452 int link_float_size =
prec;
453 int spinor_float_size = 0;
455 link_floats =
test_type ? (2*link_floats) : link_floats;
456 spinor_floats =
test_type ? (2*spinor_floats) : spinor_floats;
458 int bytes_for_one_site = link_floats * link_float_size + spinor_floats * spinor_float_size;
461 printfQuda(
"GFLOPS = %f\n", 1.0e-9*flops/secs);
468 printfQuda(
"Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
470 printfQuda(
"Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
480 return accuracy_level;
488 printfQuda(
"prec recon test_type dagger S_dim T_dimension\n");
514 int main(
int argc,
char **argv)
518 for (i =1;i < argc; i++){
524 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
533 int accuracy_level = dslashTest(argc, argv);
535 printfQuda(
"accuracy_level =%d\n", accuracy_level);
537 if (accuracy_level >= 1) ret = 0;
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaDslashType dslash_type
QudaReconstructType reconstruct_sloppy
cpuColorSpinorField * spinorOut
cudaColorSpinorField * tmp1
const void ** Ghost() const
enum QudaPrecision_s QudaPrecision
#define ASSERT_LE(val1, val2)
int main(int argc, char **argv)
cpuColorSpinorField * spinorRef
QudaReconstructType link_recon
QudaDslashType dslash_type
QudaGaugeParam gaugeParam
__host__ __device__ ValueType sqrt(ValueType x)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void staggered_dslash(void *res, void **fatlink, void **longlink, void *spinorField, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cpuColorSpinorField * spinor
cudaColorSpinorField * cudaSpinor
QudaGaugeFieldOrder gauge_order
const char * get_prec_str(QudaPrecision prec)
QudaSiteSubset siteSubset
QudaFieldLocation input_location
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
double dslashCUDA(int niter)
virtual void Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const =0
void setTuning(QudaTune tune)
void usage_extra(char **argv)
void initQuda(int device)
cudaColorSpinorField * tmp
QudaFieldLocation output_location
QudaFieldOrder fieldOrder
unsigned long long Flops() const
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
QudaGammaBasis gammaBasis
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
QudaGammaBasis gamma_basis
cudaColorSpinorField * cudaSpinorOut
QudaPrecision cuda_prec_sloppy
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
virtual void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const =0
QudaInvertParam inv_param
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
void staggered_dslash_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
void staggeredDslashRef()
static Dirac * create(const DiracParam ¶m)
double norm2(const ColorSpinorField &)
void initComms(int argc, char **argv, const int *commDims)
int gridsize_from_cmdline[]
void setVerbosity(const QudaVerbosity verbosity)
QudaGaugeParam newQudaGaugeParam(void)