QUDA v0.3.2
A library for QCD on GPUs

quda/lib/interface_quda.cpp

Go to the documentation of this file.
00001 #include <iostream>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <math.h>
00005 
00006 #include <quda.h>
00007 #include <quda_internal.h>
00008 #include <blas_quda.h>
00009 #include <gauge_quda.h>
00010 #include <dirac_quda.h>
00011 #include <dslash_quda.h>
00012 #include <clover_quda.h>
00013 #include <invert_quda.h>
00014 
00015 #include <color_spinor_field.h>
00016 
00017 #define spinorSiteSize 24 // real numbers per spinor
00018 
00019 FullGauge cudaGaugePrecise;      // Wilson links
00020 FullGauge cudaGaugeSloppy;
00021 
00022 FullGauge cudaFatLinkPrecise;    // asqtad fat links
00023 FullGauge cudaFatLinkSloppy;
00024 
00025 FullGauge cudaLongLinkPrecise;   // asqtad long links
00026 FullGauge cudaLongLinkSloppy;
00027 
00028 FullClover cudaCloverPrecise;    // clover term
00029 FullClover cudaCloverSloppy;
00030 
00031 FullClover cudaCloverInvPrecise; // inverted clover term
00032 FullClover cudaCloverInvSloppy;
00033 
00034 // define newQudaGaugeParam() and newQudaInvertParam()
00035 #define INIT_PARAM
00036 #include "check_params.h"
00037 #undef INIT_PARAM
00038 
00039 // define (static) checkGaugeParam() and checkInvertParam()
00040 #define CHECK_PARAM
00041 #include "check_params.h"
00042 #undef CHECK_PARAM
00043 
00044 // define printQudaGaugeParam() and printQudaInvertParam()
00045 #define PRINT_PARAM
00046 #include "check_params.h"
00047 #undef PRINT_PARAM
00048 
00049 
00050 void initQuda(int dev)
00051 {
00052   static int initialized = 0;
00053   if (initialized) {
00054     return;
00055   }
00056   initialized = 1;
00057 
00058   int deviceCount;
00059   cudaGetDeviceCount(&deviceCount);
00060   if (deviceCount == 0) {
00061     errorQuda("No devices supporting CUDA");
00062   }
00063 
00064   for(int i=0; i<deviceCount; i++) {
00065     cudaDeviceProp deviceProp;
00066     cudaGetDeviceProperties(&deviceProp, i);
00067     fprintf(stderr, "QUDA: Found device %d: %s\n", i, deviceProp.name);
00068   }
00069 
00070   if (dev < 0) {
00071     dev = deviceCount - 1;
00072   }
00073 
00074   cudaDeviceProp deviceProp;
00075   cudaGetDeviceProperties(&deviceProp, dev);
00076   if (deviceProp.major < 1) {
00077     errorQuda("Device %d does not support CUDA", dev);
00078   }
00079 
00080   fprintf(stderr, "QUDA: Using device %d: %s\n", dev, deviceProp.name);
00081   cudaSetDevice(dev);
00082 
00083   cudaGaugePrecise.even = NULL;
00084   cudaGaugePrecise.odd = NULL;
00085   cudaGaugeSloppy.even = NULL;
00086   cudaGaugeSloppy.odd = NULL;
00087 
00088   cudaFatLinkPrecise.even = NULL;
00089   cudaFatLinkPrecise.odd = NULL;
00090   cudaFatLinkSloppy.even = NULL;
00091   cudaFatLinkSloppy.odd = NULL;
00092 
00093   cudaLongLinkPrecise.even = NULL;
00094   cudaLongLinkPrecise.odd = NULL;
00095   cudaLongLinkSloppy.even = NULL;
00096   cudaLongLinkSloppy.odd = NULL;
00097 
00098   cudaCloverPrecise.even.clover = NULL;
00099   cudaCloverPrecise.odd.clover = NULL;
00100   cudaCloverSloppy.even.clover = NULL;
00101   cudaCloverSloppy.odd.clover = NULL;
00102 
00103   cudaCloverInvPrecise.even.clover = NULL;
00104   cudaCloverInvPrecise.odd.clover = NULL;
00105   cudaCloverInvSloppy.even.clover = NULL;
00106   cudaCloverInvSloppy.odd.clover = NULL;
00107 
00108   //initCache();
00109   initBlas();
00110 }
00111 
00112 
00113 void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
00114 {
00115   int packed_size;
00116   double anisotropy;
00117   FullGauge *precise, *sloppy;
00118 
00119   checkGaugeParam(param);
00120 
00121   switch (param->reconstruct) {
00122   case QUDA_RECONSTRUCT_8:
00123     packed_size = 8;
00124     break;
00125   case QUDA_RECONSTRUCT_12:
00126     packed_size = 12;
00127     break;
00128   case QUDA_RECONSTRUCT_NO:
00129     packed_size = 18;
00130     break;
00131   default:
00132     errorQuda("Invalid reconstruct type");
00133   }
00134   param->packed_size = packed_size;
00135 
00136   switch (param->type) {
00137   case QUDA_WILSON_LINKS:
00138     precise = &cudaGaugePrecise;
00139     sloppy = &cudaGaugeSloppy;
00140     break;
00141   case QUDA_ASQTAD_FAT_LINKS:
00142     precise = &cudaFatLinkPrecise;
00143     sloppy = &cudaFatLinkSloppy;
00144     break;
00145   case QUDA_ASQTAD_LONG_LINKS:
00146     precise = &cudaLongLinkPrecise;
00147     sloppy = &cudaLongLinkSloppy;
00148     break;
00149   default:
00150     errorQuda("Invalid gauge type");   
00151   }
00152 
00153   if (param->type == QUDA_WILSON_LINKS) {
00154     anisotropy = param->anisotropy;
00155   } else {
00156     anisotropy = 1.0;
00157   }
00158 
00159   if (param->type != QUDA_WILSON_LINKS &&
00160       param->gauge_fix == QUDA_GAUGE_FIXED_YES) {
00161     errorQuda("Temporal gauge fixing not supported for staggered");
00162   }
00163 
00164   if ((param->cuda_prec == QUDA_HALF_PRECISION && param->reconstruct == QUDA_RECONSTRUCT_NO) ||
00165       (param->cuda_prec_sloppy == QUDA_HALF_PRECISION && param->reconstruct_sloppy == QUDA_RECONSTRUCT_NO)) {
00166     warningQuda("Loading gauge field in half precision may give wrong results "
00167                 "unless all elements have magnitude bounded by 1");
00168   }
00169 
00170   createGaugeField(precise, h_gauge, param->cuda_prec, param->cpu_prec, param->gauge_order, param->reconstruct, param->gauge_fix,
00171                    param->t_boundary, param->X, anisotropy, param->tadpole_coeff, param->ga_pad);
00172   param->gaugeGiB += 2.0 * precise->bytes / (1 << 30);
00173 
00174   if (param->cuda_prec_sloppy != param->cuda_prec ||
00175       param->reconstruct_sloppy != param->reconstruct) {
00176     createGaugeField(sloppy, h_gauge, param->cuda_prec_sloppy, param->cpu_prec, param->gauge_order,
00177                      param->reconstruct_sloppy, param->gauge_fix, param->t_boundary,
00178                      param->X, anisotropy, param->tadpole_coeff, param->ga_pad);
00179     param->gaugeGiB += 2.0 * sloppy->bytes / (1 << 30);
00180   } else {
00181     *sloppy = *precise;
00182   }
00183 }
00184 
00185 
00186 void saveGaugeQuda(void *h_gauge, QudaGaugeParam *param)
00187 {
00188   FullGauge *gauge;
00189 
00190   switch (param->type) {
00191   case QUDA_WILSON_LINKS:
00192     gauge = &cudaGaugePrecise;
00193     break;
00194   case QUDA_ASQTAD_FAT_LINKS:
00195     gauge = &cudaFatLinkPrecise;
00196     break;
00197   case QUDA_ASQTAD_LONG_LINKS:
00198     gauge = &cudaLongLinkPrecise;
00199     break;
00200   default:
00201     errorQuda("Invalid gauge type");   
00202   }
00203 
00204   restoreGaugeField(h_gauge, gauge, param->cpu_prec, param->gauge_order);
00205 }
00206 
00207 
00208 void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
00209 {
00210 
00211   if (!h_clover && !h_clovinv) {
00212     errorQuda("loadCloverQuda() called with neither clover term nor inverse");
00213   }
00214   if (inv_param->clover_cpu_prec == QUDA_HALF_PRECISION) {
00215     errorQuda("Half precision not supported on CPU");
00216   }
00217   if (cudaGaugePrecise.even == NULL) {
00218     errorQuda("Gauge field must be loaded before clover");
00219   }
00220   if (inv_param->dslash_type != QUDA_CLOVER_WILSON_DSLASH) {
00221     errorQuda("Wrong dslash_type in loadCloverQuda()");
00222   }
00223 
00224   // determines whether operator is preconditioned when calling invertQuda()
00225   bool pc_solve = (inv_param->solve_type == QUDA_DIRECT_PC_SOLVE ||
00226                    inv_param->solve_type == QUDA_NORMEQ_PC_SOLVE);
00227 
00228   // determines whether operator is preconditioned when calling MatQuda() or MatDagMatQuda()
00229   bool pc_solution = (inv_param->solution_type == QUDA_MATPC_SOLUTION ||
00230                       inv_param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION);
00231 
00232   bool asymmetric = (inv_param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ||
00233                      inv_param->matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC);
00234 
00235   // We issue a warning only when it seems likely that the user is screwing up:
00236 
00237   // inverted clover term is required when applying preconditioned operator
00238   if (!h_clovinv && pc_solve && pc_solution) {
00239     warningQuda("Inverted clover term not loaded");
00240   }
00241 
00242   // uninverted clover term is required when applying unpreconditioned operator,
00243   // but note that dslashQuda() is always preconditioned
00244   if (!h_clover && !pc_solve && !pc_solution) {
00245     //warningQuda("Uninverted clover term not loaded");
00246   }
00247 
00248   // uninverted clover term is also required for "asymmetric" preconditioning
00249   if (!h_clover && pc_solve && pc_solution && asymmetric) {
00250     warningQuda("Uninverted clover term not loaded");
00251   }
00252 
00253   int X[4];
00254   for (int i=0; i<4; i++) {
00255     X[i] = cudaGaugePrecise.X[i];
00256   }
00257 
00258   inv_param->cloverGiB = 0;
00259 
00260   if (h_clover) {
00261     allocateCloverField(&cudaCloverPrecise, X, inv_param->cl_pad, inv_param->clover_cuda_prec);
00262     loadCloverField(cudaCloverPrecise, h_clover, inv_param->clover_cpu_prec, inv_param->clover_order);
00263     inv_param->cloverGiB += 2.0*cudaCloverPrecise.even.bytes / (1<<30);
00264 
00265     if (inv_param->clover_cuda_prec != inv_param->clover_cuda_prec_sloppy) {
00266       allocateCloverField(&cudaCloverSloppy, X, inv_param->cl_pad, inv_param->clover_cuda_prec_sloppy);
00267       loadCloverField(cudaCloverSloppy, h_clover, inv_param->clover_cpu_prec, inv_param->clover_order);
00268       inv_param->cloverGiB += 2.0*cudaCloverInvSloppy.even.bytes / (1<<30);
00269     } else {
00270       cudaCloverSloppy = cudaCloverPrecise;
00271     }
00272   }
00273 
00274   if (h_clovinv) {
00275     allocateCloverField(&cudaCloverInvPrecise, X, inv_param->cl_pad, inv_param->clover_cuda_prec);
00276     loadCloverField(cudaCloverInvPrecise, h_clovinv, inv_param->clover_cpu_prec, inv_param->clover_order);
00277     inv_param->cloverGiB += 2.0*cudaCloverInvPrecise.even.bytes / (1<<30);
00278     
00279     if (inv_param->clover_cuda_prec != inv_param->clover_cuda_prec_sloppy) {
00280       allocateCloverField(&cudaCloverInvSloppy, X, inv_param->cl_pad, inv_param->clover_cuda_prec_sloppy);
00281       loadCloverField(cudaCloverInvSloppy, h_clovinv, inv_param->clover_cpu_prec, inv_param->clover_order);
00282       inv_param->cloverGiB += 2.0*cudaCloverInvSloppy.even.bytes / (1<<30);
00283     } else {
00284       cudaCloverInvSloppy = cudaCloverInvPrecise;
00285     }
00286   }
00287 
00288 }
00289 
00290 
00291 #if 0
00292 // discard clover term but keep the inverse
00293 void discardCloverQuda(QudaInvertParam *inv_param)
00294 {
00295   inv_param->cloverGiB -= 2.0*cudaCloverPrecise.even.bytes / (1<<30);
00296   freeCloverField(&cudaCloverPrecise);
00297   if (cudaCloverSloppy.even.clover) {
00298     inv_param->cloverGiB -= 2.0*cudaCloverSloppy.even.bytes / (1<<30);
00299     freeCloverField(&cudaCloverSloppy);
00300   }
00301 }
00302 #endif
00303 
00304 
00305 void endQuda(void)
00306 {
00307   cudaColorSpinorField::freeBuffer();
00308   freeGaugeField(&cudaGaugePrecise);
00309   freeGaugeField(&cudaGaugeSloppy);
00310   freeGaugeField(&cudaFatLinkPrecise);
00311   freeGaugeField(&cudaFatLinkSloppy);
00312   freeGaugeField(&cudaLongLinkPrecise);
00313   freeGaugeField(&cudaLongLinkSloppy);
00314   if (cudaCloverPrecise.even.clover) freeCloverField(&cudaCloverPrecise);
00315   if (cudaCloverSloppy.even.clover) freeCloverField(&cudaCloverSloppy);
00316   if (cudaCloverInvPrecise.even.clover) freeCloverField(&cudaCloverInvPrecise);
00317   if (cudaCloverInvSloppy.even.clover) freeCloverField(&cudaCloverInvSloppy);
00318   endBlas();
00319 }
00320 
00321 
00322 void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
00323 {
00324   double kappa = inv_param->kappa;
00325   if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
00326     kappa *= cudaGaugePrecise.anisotropy;
00327   }
00328 
00329   switch (inv_param->dslash_type) {
00330   case QUDA_WILSON_DSLASH:
00331     diracParam.type = pc ? QUDA_WILSONPC_DIRAC : QUDA_WILSON_DIRAC;
00332     break;
00333   case QUDA_CLOVER_WILSON_DSLASH:
00334     diracParam.type = pc ? QUDA_CLOVERPC_DIRAC : QUDA_CLOVER_DIRAC;
00335     break;
00336   case QUDA_DOMAIN_WALL_DSLASH:
00337     diracParam.type = pc ? QUDA_DOMAIN_WALLPC_DIRAC : QUDA_DOMAIN_WALL_DIRAC;
00338     break;
00339   case QUDA_ASQTAD_DSLASH:
00340     diracParam.type = pc ? QUDA_ASQTADPC_DIRAC : QUDA_ASQTAD_DIRAC;
00341     break;
00342   case QUDA_TWISTED_MASS_DSLASH:
00343     diracParam.type = pc ? QUDA_TWISTED_MASSPC_DIRAC : QUDA_TWISTED_MASS_DIRAC;
00344     break;
00345   default:
00346     errorQuda("Unsupported dslash_type");
00347   }
00348 
00349   diracParam.matpcType = inv_param->matpc_type;
00350   diracParam.dagger = inv_param->dagger;
00351   diracParam.gauge = &cudaGaugePrecise;
00352   diracParam.clover = &cudaCloverPrecise;
00353   diracParam.cloverInv = &cudaCloverInvPrecise;
00354   diracParam.kappa = kappa;
00355   diracParam.mass = inv_param->mass;
00356   diracParam.m5 = inv_param->m5;
00357   diracParam.mu = inv_param->mu;
00358   diracParam.verbose = inv_param->verbosity;
00359 }
00360 
00361 
00362 void setDiracSloppyParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
00363 {
00364   setDiracParam(diracParam, inv_param, pc);
00365 
00366   diracParam.gauge = &cudaGaugeSloppy;
00367   diracParam.clover = &cudaCloverSloppy;
00368   diracParam.cloverInv = &cudaCloverInvSloppy;
00369 }
00370 
00371 
00372 static void massRescale(QudaDslashType dslash_type, double &kappa, QudaSolutionType solution_type, 
00373                         QudaMassNormalization mass_normalization, cudaColorSpinorField &b)
00374 {    
00375   if (dslash_type == QUDA_ASQTAD_DSLASH) {
00376     if (mass_normalization != QUDA_MASS_NORMALIZATION) {
00377       errorQuda("Staggered code only supports QUDA_MASS_NORMALIZATION");
00378     }
00379     return;
00380   }
00381 
00382   // multiply the source to compensate for normalization of the Dirac operator, if necessary
00383   switch (solution_type) {
00384   case QUDA_MAT_SOLUTION:
00385     if (mass_normalization == QUDA_MASS_NORMALIZATION ||
00386         mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00387       axCuda(2.0*kappa, b);
00388     }
00389     break;
00390   case QUDA_MATDAG_MAT_SOLUTION:
00391     if (mass_normalization == QUDA_MASS_NORMALIZATION ||
00392         mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00393       axCuda(4.0*kappa*kappa, b);
00394     }
00395     break;
00396   case QUDA_MATPC_SOLUTION:
00397     if (mass_normalization == QUDA_MASS_NORMALIZATION) {
00398         axCuda(4.0*kappa*kappa, b);
00399     } else if (mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00400         axCuda(2.0*kappa, b);
00401     }
00402     break;
00403   case QUDA_MATPCDAG_MATPC_SOLUTION:
00404     if (mass_normalization == QUDA_MASS_NORMALIZATION) {
00405         axCuda(16.0*pow(kappa,4), b);
00406     } else if (mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00407         axCuda(4.0*kappa*kappa, b);
00408     }
00409     break;
00410   default:
00411     errorQuda("Solution type %d not supported", solution_type);
00412   }
00413 }
00414 
00415 
00416 void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
00417 {
00418   ColorSpinorParam cpuParam(h_in, *inv_param, cudaGaugePrecise.X);
00419   cpuParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00420   ColorSpinorParam cudaParam(cpuParam, *inv_param);
00421 
00422   cpuColorSpinorField hIn(cpuParam);
00423 
00424   cudaColorSpinorField in(hIn, cudaParam);
00425 
00426   cudaParam.create = QUDA_NULL_FIELD_CREATE;
00427   cudaColorSpinorField out(in, cudaParam);
00428 
00429   if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
00430     if (parity == QUDA_EVEN_PARITY) {
00431       parity = QUDA_ODD_PARITY;
00432     } else {
00433       parity = QUDA_EVEN_PARITY;
00434     }
00435     axCuda(cudaGaugePrecise.anisotropy, in);
00436   }
00437   bool pc = true;
00438 
00439   DiracParam diracParam;
00440   setDiracParam(diracParam, inv_param, pc);
00441 
00442   Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator
00443   dirac->Dslash(out, in, parity); // apply the operator
00444   delete dirac; // clean up
00445 
00446   cpuParam.v = h_out;
00447   cpuColorSpinorField hOut(cpuParam);
00448   out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out;
00449 }
00450 
00451 
00452 void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
00453 {
00454   bool pc = (inv_param->solution_type == QUDA_MATPC_SOLUTION ||
00455              inv_param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION);
00456 
00457   if (!pc) cudaGaugePrecise.X[0] *= 2;
00458   ColorSpinorParam cpuParam(h_in, *inv_param, cudaGaugePrecise.X);
00459   if (!pc) {
00460     cudaGaugePrecise.X[0] /= 2;
00461     cpuParam.siteSubset = QUDA_FULL_SITE_SUBSET;;
00462   } else {
00463     cpuParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00464   }
00465   ColorSpinorParam cudaParam(cpuParam, *inv_param);
00466 
00467   cpuColorSpinorField hIn(cpuParam);
00468   cudaColorSpinorField in(hIn, cudaParam);
00469   cudaParam.create = QUDA_NULL_FIELD_CREATE;
00470   cudaColorSpinorField out(in, cudaParam);
00471 
00472   DiracParam diracParam;
00473   setDiracParam(diracParam, inv_param, pc);
00474 
00475   Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator
00476   dirac->M(out, in); // apply the operator
00477   delete dirac; // clean up
00478 
00479   double kappa = inv_param->kappa;
00480   if (pc) {
00481     if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION) {
00482       axCuda(0.25/(kappa*kappa), out);
00483     } else if (inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00484       axCuda(0.5/kappa, out);
00485     }
00486   } else {
00487     if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION ||
00488         inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00489       axCuda(0.5/kappa, out);
00490     }
00491   }
00492 
00493   cpuParam.v = h_out;
00494   cpuColorSpinorField hOut(cpuParam);
00495   out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out;
00496 }
00497 
00498 
00499 void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
00500 {
00501   bool pc = (inv_param->solution_type == QUDA_MATPC_SOLUTION ||
00502              inv_param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION);
00503 
00504   if (!pc) cudaGaugePrecise.X[0] *= 2;
00505   ColorSpinorParam cpuParam(h_in, *inv_param, cudaGaugePrecise.X);
00506   if (!pc) {
00507     cudaGaugePrecise.X[0] /= 2;
00508     cpuParam.siteSubset = QUDA_FULL_SITE_SUBSET;;
00509   } else {
00510     cpuParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00511   }
00512   ColorSpinorParam cudaParam(cpuParam, *inv_param);
00513 
00514   cpuColorSpinorField hIn(cpuParam);
00515   cudaColorSpinorField in(hIn, cudaParam);
00516   cudaParam.create = QUDA_NULL_FIELD_CREATE;
00517   cudaColorSpinorField out(in, cudaParam);
00518 
00519   //  double kappa = inv_param->kappa;
00520   //  if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= cudaGaugePrecise.anisotropy;
00521 
00522   DiracParam diracParam;
00523   setDiracParam(diracParam, inv_param, pc);
00524 
00525   Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator
00526   dirac->MdagM(out, in); // apply the operator
00527   delete dirac; // clean up
00528 
00529   double kappa = inv_param->kappa;
00530   if (pc) {
00531     if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION) {
00532       axCuda(1.0/pow(2.0*kappa,4), out);
00533     } else if (inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00534       axCuda(0.25/(kappa*kappa), out);
00535     }
00536   } else {
00537     if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION ||
00538         inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) {
00539       axCuda(0.25/(kappa*kappa), out);
00540     }
00541   }
00542 
00543   cpuParam.v = h_out;
00544   cpuColorSpinorField hOut(cpuParam);
00545   out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out;
00546 }
00547 
00548 
00549 void invertQuda(void *hp_x, void *hp_b, QudaInvertParam *param)
00550 {
00551   checkInvertParam(param);
00552 
00553   bool pc_solve = (param->solve_type == QUDA_DIRECT_PC_SOLVE ||
00554                    param->solve_type == QUDA_NORMEQ_PC_SOLVE);
00555 
00556   bool pc_solution = (param->solution_type == QUDA_MATPC_SOLUTION ||
00557                       param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION);
00558 
00559   param->spinorGiB = cudaGaugePrecise.volume * spinorSiteSize;
00560   if (!pc_solve) param->spinorGiB *= 2;
00561   param->spinorGiB *= (param->cuda_prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float));
00562   if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) {
00563     param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 5 : 7)/(double)(1<<30);
00564   } else {
00565     param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 8 : 9)/(double)(1<<30);
00566   }
00567 
00568   param->secs = 0;
00569   param->gflops = 0;
00570   param->iter = 0;
00571 
00572   Dirac *dirac;
00573   Dirac *diracSloppy;
00574 
00575   // set the Dirac operator parameters
00576   DiracParam diracParam;
00577   setDiracParam(diracParam, param, pc_solve);
00578 
00579   cpuColorSpinorField* h_b;
00580   cpuColorSpinorField* h_x;
00581   cudaColorSpinorField* b;
00582   cudaColorSpinorField* x;
00583   cudaColorSpinorField *in, *out;
00584 
00585   if (param->dslash_type == QUDA_ASQTAD_DSLASH) {
00586 
00587     if(param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION  
00588         && param->solution_type != QUDA_MATDAG_MAT_SOLUTION){
00589         errorQuda("Your solution type not supported for staggered. "
00590         "Only QUDA_MATPCDAG_MATPC_SOLUTION and QUDA_MATDAG_MAT_SOLUTION is supported.");
00591      }
00592 
00593     ColorSpinorParam csParam;
00594     csParam.precision = param->cpu_prec;
00595     csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION;
00596     csParam.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;  
00597     csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
00598     csParam.nColor=3;
00599     csParam.nSpin=1;
00600     csParam.nDim=4;
00601     csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
00602     
00603     if (param->solve_type == QUDA_NORMEQ_SOLVE) {
00604       csParam.siteSubset = QUDA_FULL_SITE_SUBSET;
00605       csParam.x[0] = 2*cudaFatLinkPrecise.X[0];
00606     } else if (param->solve_type == QUDA_NORMEQ_PC_SOLVE) {
00607       csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00608       csParam.x[0] = cudaFatLinkPrecise.X[0];
00609     } else {
00610       errorQuda("Direct solve_type not supported for staggered");
00611     }
00612     csParam.x[1] = cudaFatLinkPrecise.X[1];
00613     csParam.x[2] = cudaFatLinkPrecise.X[2];
00614     csParam.x[3] = cudaFatLinkPrecise.X[3];
00615     csParam.create = QUDA_REFERENCE_FIELD_CREATE;
00616     csParam.v = hp_b;  
00617     h_b = new cpuColorSpinorField(csParam);
00618     
00619     csParam.v = hp_x;
00620     h_x = new cpuColorSpinorField(csParam);
00621     
00622     csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00623     csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
00624     csParam.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
00625     
00626     csParam.pad = param->sp_pad;
00627     csParam.precision = param->cuda_prec;
00628     csParam.create = QUDA_ZERO_FIELD_CREATE;
00629     
00630     b = new cudaColorSpinorField(csParam);
00631     
00632     diracParam.fatGauge = &cudaFatLinkPrecise;
00633     diracParam.longGauge = &cudaLongLinkPrecise;
00634     
00635     dirac = Dirac::create(diracParam); // create the Dirac operator
00636     
00637     diracParam.fatGauge = &cudaFatLinkSloppy;
00638     diracParam.longGauge = &cudaLongLinkSloppy;
00639     
00640     setDiracSloppyParam(diracParam, param, pc_solve);
00641     diracSloppy = Dirac::create(diracParam);
00642     
00643     *b = *h_b; // send data from CPU to GPU
00644     
00645     csParam.create = QUDA_COPY_FIELD_CREATE;  
00646     x = new cudaColorSpinorField(*h_x, csParam); // solution  
00647     csParam.create = QUDA_ZERO_FIELD_CREATE;
00648         
00649   } else if (param->dslash_type == QUDA_WILSON_DSLASH ||
00650              param->dslash_type == QUDA_CLOVER_WILSON_DSLASH ||
00651              param->dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
00652              param->dslash_type == QUDA_TWISTED_MASS_DSLASH) {
00653 
00654     // temporary hack
00655     if (!pc_solution) cudaGaugePrecise.X[0] *= 2;
00656     ColorSpinorParam cpuParam(hp_b, *param, cudaGaugePrecise.X);
00657     if (!pc_solution) {
00658       cudaGaugePrecise.X[0] /= 2;
00659       cpuParam.siteSubset = QUDA_FULL_SITE_SUBSET;;
00660     } else {
00661       cpuParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00662     }
00663     
00664     ColorSpinorParam cudaParam(cpuParam, *param);
00665     
00666     h_b = new cpuColorSpinorField(cpuParam);
00667     cpuParam.v = hp_x;
00668     h_x = new cpuColorSpinorField(cpuParam);
00669     
00670     b = new cudaColorSpinorField(*h_b, cudaParam); // download source
00671     
00672     if (param->verbosity >= QUDA_VERBOSE) {
00673       printfQuda("Source: CPU = %f, CUDA copy = %f\n", norm2(*h_b),norm2(*b));
00674     }
00675     cudaParam.create = QUDA_ZERO_FIELD_CREATE;
00676     x = new cudaColorSpinorField(cudaParam); // solution
00677     
00678     // if using preconditioning but solving the full system
00679     if (pc_solve && !pc_solution) {
00680       cudaParam.x[0] /= 2;
00681       cudaParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00682     }
00683     
00684     dirac = Dirac::create(diracParam); // create the Dirac operator
00685     
00686     setDiracSloppyParam(diracParam, param, pc_solve);
00687     diracSloppy = Dirac::create(diracParam);
00688     
00689   } else {
00690     errorQuda("Invalid dslash_type");
00691   }
00692   
00693   dirac->prepare(in, out, *x, *b, param->solution_type);
00694   if (param->verbosity >= QUDA_VERBOSE) printfQuda("Prepared source = %f\n", norm2(*in));   
00695 
00696   massRescale(param->dslash_type, diracParam.kappa, param->solution_type, param->mass_normalization, *in);
00697   if (param->verbosity >= QUDA_VERBOSE) printfQuda("Mass rescale done\n");   
00698   
00699   switch (param->inv_type) {
00700   case QUDA_CG_INVERTER:
00701     if (param->solution_type != QUDA_MATDAG_MAT_SOLUTION && param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) {
00702       copyCuda(*out, *in);
00703       dirac->Mdag(*in, *out);
00704     }
00705     invertCgCuda(DiracMdagM(dirac), DiracMdagM(diracSloppy), *out, *in, param);
00706     break;
00707   case QUDA_BICGSTAB_INVERTER:
00708     if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
00709       invertBiCGstabCuda(DiracMdag(dirac), DiracMdag(diracSloppy), *out, *in, param);
00710       copyCuda(*in, *out);
00711     }
00712     invertBiCGstabCuda(DiracM(dirac), DiracM(diracSloppy), *out, *in, param);
00713     break;
00714   default:
00715     errorQuda("Inverter type %d not implemented", param->inv_type);
00716   }
00717   
00718   if (param->verbosity >= QUDA_VERBOSE){
00719     printfQuda("Solution = %f\n",norm2(*x));
00720   }
00721   dirac->reconstruct(*x, *b, param->solution_type);
00722   
00723   x->saveCPUSpinorField(*h_x); // since this is a reference, this won't work: h_x = x;
00724   
00725   if (param->verbosity >= QUDA_VERBOSE){
00726     printfQuda("Reconstructed: CUDA solution = %f, CPU copy = %f\n", norm2(*x), norm2(*h_x));
00727   }
00728   
00729   delete diracSloppy;
00730   delete dirac;
00731   
00732   delete h_b;
00733   delete h_x;
00734   delete b;
00735   delete x;
00736   
00737   return;
00738 }
00739 
00740 
00741 void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param,
00742                           double* offsets, int num_offsets, double* residue_sq)
00743 {
00744   checkInvertParam(param);
00745 
00746   if (param->dslash_type != QUDA_ASQTAD_DSLASH) {
00747     errorQuda("Multi-shift solver only supports staggered");
00748   }
00749   if (param->solve_type != QUDA_NORMEQ_SOLVE &&
00750       param->solve_type != QUDA_NORMEQ_PC_SOLVE) {
00751     errorQuda("Direct solve_type not supported for staggered");
00752   }
00753   if (num_offsets <= 0){
00754     warningQuda("invertMultiShiftQuda() called with no offsets");
00755     return;
00756   }
00757 
00758   if(param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION
00759       && param->solution_type != QUDA_MATDAG_MAT_SOLUTION){
00760       errorQuda("Your solution type not supported for staggered. "
00761       "Only QUDA_MATPCDAG_MATPC_SOLUTION and QUDA_MATDAG_MAT_SOLUTION is supported.");
00762   }
00763 
00764 
00765 
00766   bool pc_solve = (param->solve_type == QUDA_NORMEQ_PC_SOLVE);
00767 
00768   param->secs = 0;
00769   param->gflops = 0;
00770   param->iter = 0;
00771   
00772   double low_offset = offsets[0];
00773   int low_index = 0;
00774   for (int i=1;i < num_offsets;i++){
00775     if (offsets[i] < low_offset){
00776       low_offset = offsets[i];
00777       low_index = i;
00778     }
00779   }
00780   
00781   void* hp_x[num_offsets];
00782   void* hp_b = _hp_b;
00783   for(int i=0;i < num_offsets;i++){
00784     hp_x[i] = _hp_x[i];
00785   }
00786   
00787   if (low_index != 0){
00788     void* tmp = hp_x[0];
00789     hp_x[0] = hp_x[low_index] ;
00790     hp_x[low_index] = tmp;
00791     
00792     double tmp1 = offsets[0];
00793     offsets[0]= offsets[low_index];
00794     offsets[low_index] =tmp1;
00795   }
00796     
00797   ColorSpinorParam csParam;
00798   csParam.precision = param->cpu_prec;
00799   csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION;
00800   csParam.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;  
00801   csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
00802   csParam.nColor=3;
00803   csParam.nSpin=1;
00804   csParam.nDim=4;
00805   csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
00806 
00807   if (param->solve_type == QUDA_NORMEQ_SOLVE) {
00808     csParam.siteSubset = QUDA_FULL_SITE_SUBSET;
00809     csParam.x[0] = 2*cudaFatLinkPrecise.X[0];
00810   } else if (param->solve_type == QUDA_NORMEQ_PC_SOLVE) {
00811     csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00812     csParam.x[0] = cudaFatLinkPrecise.X[0];
00813   } else {
00814     errorQuda("Direct solve_type not supported for staggered");
00815   }
00816   csParam.x[1] = cudaFatLinkPrecise.X[1];
00817   csParam.x[2] = cudaFatLinkPrecise.X[2];
00818   csParam.x[3] = cudaFatLinkPrecise.X[3];
00819   csParam.create = QUDA_REFERENCE_FIELD_CREATE;
00820   csParam.v = hp_b;  
00821   cpuColorSpinorField h_b(csParam);
00822   
00823   cpuColorSpinorField* h_x[num_offsets];
00824   
00825   for (int i=0; i<num_offsets; i++) {
00826     csParam.v = hp_x[i];
00827     h_x[i] = new cpuColorSpinorField(csParam);
00828   }
00829   
00830   csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00831   csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
00832   csParam.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
00833 
00834   csParam.pad = param->sp_pad;
00835   csParam.precision = param->cuda_prec;
00836   csParam.create = QUDA_ZERO_FIELD_CREATE;
00837   
00838   cudaColorSpinorField b(csParam);
00839   
00840   // set the mass in the invert_param
00841   param->mass = sqrt(offsets[0]/4);
00842   
00843   // set the Dirac operator parameters
00844   DiracParam diracParam;
00845   setDiracParam(diracParam, param, pc_solve);
00846   diracParam.fatGauge = &cudaFatLinkPrecise;
00847   diracParam.longGauge = &cudaLongLinkPrecise;
00848   
00849   Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator
00850 
00851   diracParam.fatGauge = &cudaFatLinkSloppy;
00852   diracParam.longGauge = &cudaLongLinkSloppy;
00853   
00854   setDiracSloppyParam(diracParam, param, pc_solve);
00855   Dirac *diracSloppy = Dirac::create(diracParam);
00856   
00857   b = h_b; //send data from CPU to GPU
00858   
00859   csParam.create = QUDA_ZERO_FIELD_CREATE;
00860   cudaColorSpinorField* x[num_offsets]; // solution  
00861   for (int i=0; i < num_offsets; i++) {
00862     x[i] = new cudaColorSpinorField(csParam);
00863   }
00864   cudaColorSpinorField tmp(csParam); // temporary
00865   invertMultiShiftCgCuda(DiracMdagM(dirac), DiracMdagM(diracSloppy), x, b, param, offsets, num_offsets, residue_sq);    
00866   
00867   for (int i=0; i < num_offsets; i++) {
00868     x[i]->saveCPUSpinorField(*h_x[i]);
00869   }
00870   
00871   for(int i=0; i<num_offsets; i++) {
00872     delete h_x[i];
00873     delete x[i];
00874   }
00875   delete diracSloppy;
00876   delete dirac;
00877   
00878   return;
00879 }
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines