21 #include <gtest/gtest.h> 25 #define MAX(a,b) ((a)>(b)?(a):(b)) 27 extern void usage(
char** argv );
77 void init(
int argc,
char **argv)
89 gaugeParam.
X[0] =
X[0] =
xdim;
90 gaugeParam.
X[1] =
X[1] =
ydim;
91 gaugeParam.
X[2] =
X[2] =
zdim;
92 gaugeParam.
X[3] =
X[3] =
tdim;
98 printfQuda (
"The covariant derivative doesn't support 5-d indexing, only source 0 will be tested.\n");
113 gaugeParam.
scale = 1.0;
131 int tmpint =
MAX(
X[1]*
X[2]*
X[3], X[0]*X[2]*X[3]);
132 tmpint =
MAX(tmpint, X[0]*X[1]*X[3]);
133 tmpint =
MAX(tmpint, X[0]*X[1]*X[2]);
136 gaugeParam.
ga_pad = tmpint;
137 inv_param.
sp_pad = tmpint;
143 for(
int d = 0; d < 4; d++) {
144 csParam.
x[d] = gaugeParam.
X[d];
163 csParam.
x[0] = gaugeParam.
X[0];
171 for (
int dir = 0; dir < 4; dir++) {
174 if (
links[dir] == NULL) {
175 errorQuda(
"ERROR: malloc failed for gauge links");
187 ghostLink = cpuLink->
Ghost();
189 int x_face_size = X[1]*X[2]*X[3]/2;
190 int y_face_size = X[0]*X[2]*X[3]/2;
191 int z_face_size = X[0]*X[1]*X[3]/2;
192 int t_face_size = X[0]*X[1]*X[2]/2;
193 int pad_size =
MAX(x_face_size, y_face_size);
194 pad_size =
MAX(pad_size, z_face_size);
195 pad_size =
MAX(pad_size, t_face_size);
196 gaugeParam.
ga_pad = pad_size;
228 cudaDeviceSynchronize();
232 double cuda_spinor_norm2=
blas::norm2(*cudaSpinor);
233 printfQuda(
"Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
254 for (
int dir = 0; dir < 4; dir++) {
276 cudaEvent_t start,
end;
277 cudaEventCreate(&start);
278 cudaEventRecord(start, 0);
279 cudaEventSynchronize(start);
281 for (
int i = 0; i <
niter; i++) {
285 dirac->
MCD(*cudaSpinorOut, *cudaSpinor, mu);
289 cudaEventCreate(&end);
290 cudaEventRecord(end, 0);
291 cudaEventSynchronize(end);
293 cudaEventElapsedTime(&runTime, start, end);
294 cudaEventDestroy(start);
295 cudaEventDestroy(end);
297 double secs = runTime / 1000;
300 cudaError_t stat = cudaGetLastError();
301 if (stat != cudaSuccess)
302 errorQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
311 printfQuda(
"Calculating reference implementation...");
326 ASSERT_LE(deviation, tol) <<
"CPU and CUDA implementations do not agree";
334 printfQuda(
"prec recon test_type dagger S_dim T_dimension\n");
351 int main(
int argc,
char **argv)
354 ::testing::InitGoogleTest(&argc, argv);
357 for (
int i = 1; i < argc; i++) {
362 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
373 for (
int i = 0; i < attempts; i++) {
376 for (
int dag = 0; dag < 2; dag++) {
379 for (
int mu = 0;
mu < 4;
mu++) {
381 int muCpu =
mu * 2 + (
dagger ? 1 : 0);
385 printfQuda(
"\n\nChecking muQuda = %d\n", muCuda);
398 unsigned long long flops 400 printfQuda(
"GFLOPS = %f\n", 1.0e-9 * flops / secs);
406 double cuda_spinor_out_norm2 =
blas::norm2(*cudaSpinorOut);
407 printfQuda(
"Results mu = %d: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", muCuda, spinor_ref_norm2, cuda_spinor_out_norm2,
410 printfQuda(
"Result mu = %d: CPU=%f , CPU-CUDA=%f",
mu, spinor_ref_norm2, spinor_out_norm2);
414 ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
415 if (
comm_rank() != 0) {
delete listeners.Release(listeners.default_result_printer()); }
417 test_rc = RUN_ALL_TESTS();
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaReconstructType reconstruct_sloppy
QudaGhostExchange ghostExchange
QudaGaugeParam gaugeParam
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
enum QudaPrecision_s QudaPrecision
double norm2(const ColorSpinorField &a)
QudaDslashType dslash_type
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp
QudaGaugeFieldOrder gauge_order
void usage_extra(char **argv)
const char * get_prec_str(QudaPrecision prec)
QudaDslashType dslash_type
QudaSiteSubset siteSubset
QudaFieldLocation input_location
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
void initQuda(int device)
QudaFieldLocation output_location
QudaInvertParam inv_param
QudaFieldOrder fieldOrder
QudaReconstructType link_recon
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
QudaGammaBasis gammaBasis
cpuColorSpinorField * spinorOut
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
const void ** Ghost() const
cudaColorSpinorField * cudaSpinor
int main(int argc, char **argv)
cudaColorSpinorField * cudaSpinorOut
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
QudaPrecision Precision() const
enum QudaDslashType_s QudaDslashType
cpuColorSpinorField * spinorRef
int gridsize_from_cmdline[]
void initComms(int argc, char **argv, int *const commDims)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void mat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int daggerBit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
double dslashCUDA(int niter, int mu)
Full Covariant Derivative operator. Although not a Dirac operator per se, it's a linear operator so i...
void setVerbosity(QudaVerbosity verbosity)
cpuColorSpinorField * spinor
void init(int argc, char **argv)
virtual void MCD(ColorSpinorField &out, const ColorSpinorField &in, const int mu) const
QudaGaugeParam newQudaGaugeParam(void)