25 #define MAX(a,b) ((a)>(b)?(a):(b)) 27 extern void usage(
char** argv );
102 printfQuda (
"The covariant derivative doesn't support 5-d indexing, only source 0 will be tested.\n");
136 int tmpint =
MAX(
X[1]*
X[2]*
X[3],
X[0]*
X[2]*
X[3]);
137 tmpint =
MAX(tmpint,
X[0]*
X[1]*
X[3]);
138 tmpint =
MAX(tmpint,
X[0]*
X[1]*
X[2]);
148 for(
int d = 0;
d < 4;
d++) {
177 for (
int dir = 0; dir < 4; dir++) {
180 if (
links[dir] == NULL) {
181 errorQuda(
"ERROR: malloc failed for gauge links");
195 int x_face_size =
X[1]*
X[2]*
X[3]/2;
196 int y_face_size =
X[0]*
X[2]*
X[3]/2;
197 int z_face_size =
X[0]*
X[1]*
X[3]/2;
198 int t_face_size =
X[0]*
X[1]*
X[2]/2;
199 int pad_size =
MAX(x_face_size, y_face_size);
200 pad_size =
MAX(pad_size, z_face_size);
201 pad_size =
MAX(pad_size, t_face_size);
234 cudaDeviceSynchronize();
239 printfQuda(
"Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
260 for (
int dir = 0; dir < 4; dir++) {
284 cudaEventCreate(&
start);
285 cudaEventRecord(
start, 0);
286 cudaEventSynchronize(
start);
296 cudaEventCreate(&
end);
297 cudaEventRecord(
end, 0);
298 cudaEventSynchronize(
end);
300 cudaEventElapsedTime(&runTime,
start,
end);
301 cudaEventDestroy(
start);
302 cudaEventDestroy(
end);
304 double secs = runTime / 1000;
307 cudaError_t stat = cudaGetLastError();
308 if (stat != cudaSuccess)
309 errorQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
318 printfQuda(
"Calculating reference implementation...");
333 ASSERT_LE(deviation,
tol) <<
"CPU and CUDA implementations do not agree";
344 for (
int i=0;
i<attempts;
i++) {
345 for (
int mu=0;
mu<4;
mu++) {
347 int muCpu =
mu*2 + (
dagger ? 1 : 0);
364 printfQuda(
"Effective halo bi-directional bandwidth = %f for aggregate message size %lu bytes\n",
372 printfQuda(
"Results mu = %d: CPU=%f, CUDA=%f, CPU-CUDA=%f\n",
mu, spinor_ref_norm2, cuda_spinor_out_norm2,
375 printfQuda(
"Result mu = %d: CPU=%f , CPU-CUDA=%f",
mu, spinor_ref_norm2, spinor_out_norm2);
379 test_rc = RUN_ALL_TESTS();
395 printfQuda(
"prec recon test_type dagger S_dim T_dimension\n");
417 int main(
int argc,
char **argv)
420 ::testing::InitGoogleTest(&argc, argv);
421 for (
int i=1 ;
i < argc;
i++){
427 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[
i]);
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaReconstructType reconstruct_sloppy
QudaGhostExchange ghostExchange
QudaGaugeParam gaugeParam
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
enum QudaPrecision_s QudaPrecision
double norm2(const ColorSpinorField &a)
QudaDslashType dslash_type
cpuColorSpinorField * tmpCpu
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp
QudaGaugeFieldOrder gauge_order
void usage_extra(char **argv)
const char * get_prec_str(QudaPrecision prec)
QudaDslashType dslash_type
QudaSiteSubset siteSubset
QudaFieldLocation input_location
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
else return(__swbuf(_c, _p))
void initQuda(int device)
QudaFieldLocation output_location
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaInvertParam inv_param
QudaFieldOrder fieldOrder
QudaReconstructType link_recon
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
QudaGammaBasis gammaBasis
cpuColorSpinorField * spinorOut
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
const void ** Ghost() const
cudaColorSpinorField * cudaSpinor
int main(int argc, char **argv)
cudaColorSpinorField * cudaSpinorOut
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
cpuColorSpinorField * spinorRef
int gridsize_from_cmdline[]
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void mat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int daggerBit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
static __inline__ size_t size_t d
size_t GhostBytes() const
double dslashCUDA(int niter, int mu)
Full Covariant Derivative operator. Although not a Dirac operator per se, it's a linear operator so i...
void initComms(int argc, char **argv, const int *commDims)
void setVerbosity(const QudaVerbosity verbosity)
cpuColorSpinorField * spinor
virtual void MCD(ColorSpinorField &out, const ColorSpinorField &in, const int mu) const
QudaGaugeParam newQudaGaugeParam(void)