25 #define MAX(a,b) ((a)>(b)?(a):(b)) 26 #define staggeredSpinorSiteSize 6 29 extern void usage(
char** argv );
50 void **ghost_fatlink, **ghost_longlink;
134 int tmpint =
MAX(
X[1]*
X[2]*
X[3],
X[0]*
X[2]*
X[3]);
135 tmpint =
MAX(tmpint,
X[0]*
X[1]*
X[3]);
136 tmpint =
MAX(tmpint,
X[0]*
X[1]*
X[2]);
146 for(
int d = 0;
d < 4;
d++) {
181 for (
int dir = 0; dir < 4; dir++) {
186 errorQuda(
"ERROR: malloc failed for fatlink/longlink");
206 int x_face_size =
X[1]*
X[2]*
X[3]/2;
207 int y_face_size =
X[0]*
X[2]*
X[3]/2;
208 int z_face_size =
X[0]*
X[1]*
X[3]/2;
209 int t_face_size =
X[0]*
X[1]*
X[2]/2;
210 int pad_size =
MAX(x_face_size, y_face_size);
211 pad_size =
MAX(pad_size, z_face_size);
212 pad_size =
MAX(pad_size, t_face_size);
261 cudaDeviceSynchronize();
266 printfQuda(
"Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
290 for (
int dir = 0; dir < 4; dir++) {
319 DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
328 cudaEventCreate(&
start);
329 cudaEventRecord(
start, 0);
330 cudaEventSynchronize(
start);
333 cudaEventRecord(
start, 0);
337 gettimeofday(&tstart, NULL);
355 errorQuda(
"Staggered operator acting on full-site not supported");
363 gettimeofday(&tstop, NULL);
366 double elapsed = ds + 0.000001*dus;
376 cudaEventCreate(&
end);
377 cudaEventRecord(
end, 0);
378 cudaEventSynchronize(
end);
380 cudaEventElapsedTime(&runTime,
start,
end);
381 cudaEventDestroy(
start);
382 cudaEventDestroy(
end);
387 cudaError_t stat = cudaGetLastError();
388 if (stat != cudaSuccess)
389 errorQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
398 printfQuda(
"Calculating reference implementation...");
433 ASSERT_LE(deviation,
tol) <<
"CPU and CUDA implementations do not agree";
444 for (
int i=0;
i<attempts;
i++) {
465 printfQuda(
"Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
475 printfQuda(
"Results: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", spinor_ref_norm2, cuda_spinor_out_norm2,
478 printfQuda(
"Result: CPU=%f , CPU-CUDA=%f", spinor_ref_norm2, spinor_out_norm2);
482 test_rc = RUN_ALL_TESTS();
496 printfQuda(
"prec recon test_type dagger S_dim T_dimension\n");
522 int main(
int argc,
char **argv)
525 ::testing::InitGoogleTest(&argc, argv);
526 for (
int i=1 ;
i < argc;
i++){
532 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[
i]);
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaDslashType dslash_type
QudaReconstructType reconstruct_sloppy
cpuColorSpinorField * spinorOut
QudaGhostExchange ghostExchange
enum QudaPrecision_s QudaPrecision
void dw_setDims(int *X, const int L5)
int main(int argc, char **argv)
DslashTime dslashCUDA(int niter)
cpuColorSpinorField * spinorRef
QudaReconstructType link_recon
double norm2(const ColorSpinorField &a)
QudaDslashType dslash_type
QudaGaugeParam gaugeParam
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void staggered_dslash(void *res, void **fatlink, void **longlink, void *spinorField, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
void matdagmat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
cpuColorSpinorField * spinor
cudaColorSpinorField * cudaSpinor
QudaGaugeFieldOrder gauge_order
const char * get_prec_str(QudaPrecision prec)
unsigned long long Flops() const
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
QudaFieldLocation input_location
__darwin_suseconds_t tv_usec
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
else return(__swbuf(_c, _p))
cpuColorSpinorField * tmpCpu
void usage_extra(char **argv)
void initQuda(int device)
cudaColorSpinorField * tmp
QudaFieldLocation output_location
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaFieldOrder fieldOrder
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
QudaGammaBasis gammaBasis
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
void matdagmat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, void *tmp, QudaParity parity)
QudaGammaBasis gamma_basis
cudaColorSpinorField * cudaSpinorOut
QudaPrecision cuda_prec_sloppy
const void ** Ghost() const
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
QudaInvertParam inv_param
void staggered_dslash_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
void staggeredDslashRef()
static Dirac * create(const DiracParam ¶m)
static __inline__ size_t size_t d
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
size_t GhostBytes() const
void initComms(int argc, char **argv, const int *commDims)
int gridsize_from_cmdline[]
void setVerbosity(const QudaVerbosity verbosity)
QudaGaugeParam newQudaGaugeParam(void)