12 #define stSpinorSiteSize 6 13 template<
typename Float>
16 printf(
"(%f,%f) (%f,%f) (%f,%f) \t",
17 spinor[0], spinor[1], spinor[2],
18 spinor[3], spinor[4], spinor[5]);
31 double* myspinor = (
double*)spinor;
32 for (i = 0;i < len; i++){
36 float* myspinor = (
float*)spinor;
37 for (i=0;i < len ;i++){
46 template<
typename Float>
51 for (i = 0;i < 3; i++){
53 printf(
"(%.10f,%.10f) \t", link[i*3*2 + j*2], link[i*3*2 + j*2 + 1]);
68 double* mylink = (
double*)link;
69 for (i = 0;i < len; i++){
73 float* mylink = (
float*)link;
74 for (i=0;i < len ;i++){
83 template <
typename Float>
85 a[0] += sign * (b[0]*c[0] - b[1]*c[1]);
86 a[1] -= sign * (b[0]*c[1] + b[1]*c[0]);
90 template<
typename Float>
98 Float* refc = &refc_buf[0];
100 memset((
void*)refc, 0,
sizeof(refc_buf));
104 Float* c = link + 12;
113 int X1h=gaugeParam->
X[0]/2;
114 int X1 =gaugeParam->
X[0];
115 int X2 =gaugeParam->
X[1];
116 int X3 =gaugeParam->
X[2];
117 int X4 =gaugeParam->
X[3];
121 double coff= -u0*u0*24;
131 int i4 = index /(X3*X2*
X1);
132 int i3 = (index - i4*(X3*X2*
X1))/(X2*
X1);
133 int i2 = (index - i4*(X3*X2*
X1) - i3*(X2*X1))/X1;
134 int i1 = index - i4*(X3*X2*
X1) - i3*(X2*X1) - i2*
X1;
143 if ((i1+i4) % 2 == 1){
148 if ( (i4+i1+i2) % 2 == 1){
153 if (ga_idx >= (X4-3)*X1h*X2*
X3 ){
164 refc[0]*=coff; refc[1]*=coff; refc[2]*=coff; refc[3]*=coff; refc[4]*=coff; refc[5]*=coff;
167 double delta = 0.0001;
169 for (i =0;i < 6; i++){
170 double diff = refc[i] - c[i];
171 double absdiff = diff > 0? diff: (-diff);
172 if (absdiff > delta){
173 printf(
"ERROR: sanity check failed for link\n");
175 printf(
"refc = (%.10f,%.10f) (%.10f,%.10f) (%.10f,%.10f)\n",
176 refc[0], refc[1], refc[2], refc[3], refc[4], refc[5]);
177 printf(
"dir=%d, ga_idx=%d, coff=%f, t_boundary=%f\n",dir, ga_idx,coff, t_boundary);
178 printf(
"X=%d %d %d %d, X1h=%d\n", gaugeParam->
X[0], X2, X3, X4, X1h);
189 template<
typename Float>
195 Float* refc = &refc_buf[0];
197 memset((
void*)refc, 0,
sizeof(refc_buf));
201 Float* c = link + 12;
210 int X1h=gaugeParam->
X[0]/2;
211 int X1 =gaugeParam->
X[0];
212 int X2 =gaugeParam->
X[1];
213 int X3 =gaugeParam->
X[2];
214 int X4 =gaugeParam->
X[3];
221 int i4 = index /(X3*X2*
X1);
222 int i3 = (index - i4*(X3*X2*
X1))/(X2*
X1);
223 int i2 = (index - i4*(X3*X2*
X1) - i3*(X2*X1))/X1;
224 int i1 = index - i4*(X3*X2*
X1) - i3*(X2*X1) - i2*
X1;
233 if ((i1+i4) % 2 == 1){
238 if ( (i4+i1+i2) % 2 == 1){
250 refc[0]*=coeff; refc[1]*=coeff; refc[2]*=coeff; refc[3]*=coeff; refc[4]*=coeff; refc[5]*=coeff;
254 double delta = 0.0001;
256 for (i =0;i < 6; i++){
257 double diff = refc[i] - c[i];
258 double absdiff = diff > 0? diff: (-diff);
259 if (absdiff > delta){
260 printf(
"ERROR: sanity check failed for site link\n");
262 printf(
"refc = (%.10f,%.10f) (%.10f,%.10f) (%.10f,%.10f)\n",
263 refc[0], refc[1], refc[2], refc[3], refc[4], refc[5]);
264 printf(
"X=%d %d %d %d, X1h=%d\n", gaugeParam->
X[0], X2, X3, X4, X1h);
280 template <
typename Float>
287 template <
typename Float>
289 a[0] = b[0]*c[0] - b[1]*c[1];
290 a[1] = b[0]*c[1] + b[1]*c[0];
294 template <
typename Float>
296 a[0] = b[0]*c[0] - b[1]*c[1];
297 a[1] = -b[0]*c[1] - b[1]*c[0];
301 template <
typename Float>
303 a[0] = b[0]*c[0] + b[1]*c[1];
304 a[1] = b[0]*c[1] - b[1]*c[0];
308 template <
typename Float>
310 a[0] += sign*(b[0]*c[0] - b[1]*c[1]);
311 a[1] += sign*(b[0]*c[1] + b[1]*c[0]);
315 template <
typename Float>
317 a[0] += b[0]*c[0] + b[1]*c[1];
318 a[1] += b[0]*c[1] - b[1]*c[0];
322 template<
typename Float>
327 Float ref_link_buf[18];
328 Float*
ref = & ref_link_buf[0];
329 memset(ref, 0,
sizeof(ref_link_buf));
331 ref[0] =
atan2(link[1], link[0]);
332 ref[1] =
atan2(link[13], link[12]);
333 for (
int i=2; i<7; i++) {
337 int X1h=gaugeParam->
X[0]/2;
338 int X2 =gaugeParam->
X[1];
339 int X3 =gaugeParam->
X[2];
340 int X4 =gaugeParam->
X[3];
346 row_sum += ref[2]*ref[2];
347 row_sum += ref[3]*ref[3];
348 row_sum += ref[4]*ref[4];
349 row_sum += ref[5]*ref[5];
351 #define SMALL_NUM 1e-24 352 row_sum = (row_sum != 0)?row_sum:
SMALL_NUM;
356 int X1h=gaugeParam->
X[0]/2;
357 int X1 =gaugeParam->
X[0];
358 int X2 =gaugeParam->
X[1];
359 int X3 =gaugeParam->
X[2];
360 int X4 =gaugeParam->
X[3];
363 int i4 = index /(X3*X2*
X1);
364 int i3 = (index - i4*(X3*X2*
X1))/(X2*
X1);
365 int i2 = (index - i4*(X3*X2*
X1) - i3*(X2*X1))/X1;
366 int i1 = index - i4*(X3*X2*
X1) - i3*(X2*X1) - i2*
X1;
375 if ((i1+i4) % 2 == 1){
380 if ( (i4+i1+i2) % 2 == 1){
385 if (ga_idx >= (X4-3)*X1h*X2*
X3 ){
396 Float U00_mag =
sqrt( (1.f/(u0*u0) - row_sum)>0? (1.f/(u0*u0)-row_sum):0);
401 ref[0] = U00_mag *
cos(ref[14]);
402 ref[1] = U00_mag *
sin(ref[14]);
404 Float column_sum = 0.0;
405 for (
int i=0; i<2; i++) column_sum += ref[i]*ref[i];
406 for (
int i=6; i<8; i++) column_sum += ref[i]*ref[i];
407 Float U20_mag =
sqrt( (1.f/(u0*u0) - column_sum) > 0? (1.f/(u0*u0)-column_sum) : 0);
409 ref[12] = U20_mag *
cos(ref[15]);
410 ref[13] = U20_mag *
sin(ref[15]);
415 Float r_inv2 = 1.0/(u0*row_sum);
444 double delta = 0.0001;
446 for (i =0;i < 18; i++){
448 double diff = ref[i] - link[i];
449 double absdiff = diff > 0? diff: (-diff);
450 if ( (ref[i] != ref[i]) || (absdiff > delta)){
451 printf(
"ERROR: sanity check failed for link\n");
453 printf(
"reconstructed link is\n");
455 printf(
"dir=%d, ga_idx=%d, u0=%f, t_boundary=%f\n",dir, ga_idx, u0, t_boundary);
456 printf(
"X=%d %d %d %d, X1h=%d\n", gaugeParam->
X[0], X2, X3, X4, X1h);
475 double* mylink = (
double*)link;
477 for (i = 0;i < len/2; i++){
480 printf(
"ERROR: even link sanity check failed, i=%d\n",i);
488 for (i = 0;i < len/2; i++){
491 printf(
"ERROR: odd link sanity check failed, i=%d\n",i);
498 float* mylink = (
float*)link;
501 for (i=0;i < len/2 ;i++){
504 printf(
"ERROR: even link sanity check 12 failed, i=%d\n",i);
518 for (i=0;i < len/2 ;i++){
521 printf(
"ERROR: odd link sanity check 12 failed, i=%d\n", i);
549 double* mylink = (
double*)link;
551 for (i = 0;i < len/2; i++){
552 for(dir=
XUP;dir <=
TUP; dir++){
555 printf(
"ERROR: even link sanity check failed, i=%d, function %s\n",i, __FUNCTION__);
564 for (i = 0;i < len/2; i++){
565 for(dir=
XUP;dir <=
TUP; dir++){
568 printf(
"ERROR: odd link sanity check failed, i=%d, function %s\n",i, __FUNCTION__);
576 float* mylink = (
float*)link;
579 for (i=0;i < len/2 ;i++){
580 for(dir=
XUP;dir <=
TUP; dir++){
583 printf(
"ERROR: even link sanity check 12 failed, i=%d, function %s\n",i, __FUNCTION__);
590 for (i=0;i < len/2 ;i++){
591 for(dir=
XUP;dir <=
TUP; dir++){
594 printf(
"ERROR: odd link sanity check 12 failed, i=%d, function %s\n", i, __FUNCTION__);
610 if (strcmp(s,
"silent") == 0){
612 }
else if (strcmp(s,
"summarize") == 0){
614 }
else if (strcmp(s,
"verbose") == 0){
616 }
else if (strcmp(s,
"debug") == 0){
619 fprintf(stderr,
"Error: invalid verbosity type %s\n", s);
645 fprintf(stderr,
"Error: invalid verbosity type %d\n", type);
657 if (strcmp(s,
"8") == 0){
659 }
else if (strcmp(s,
"9") == 0){
661 }
else if (strcmp(s,
"12") == 0){
663 }
else if (strcmp(s,
"13") == 0){
665 }
else if (strcmp(s,
"18") == 0){
668 fprintf(stderr,
"Error: invalid reconstruct type\n");
682 if (strcmp(s,
"double") == 0) {
684 }
else if (strcmp(s,
"single") == 0) {
686 }
else if (strcmp(s,
"half") == 0) {
688 }
else if (strcmp(s,
"half") == 0) {
690 }
else if (strcmp(s,
"quarter") == 0) {
693 fprintf(stderr,
"Error: invalid precision type\n");
735 ret =
"Cayley-Hamilton/SVD";
835 ret =
"full_ee_prec";
838 ret =
"full_oo_prec";
864 if (strcmp(s,
"col") == 0) {
866 }
else if (strcmp(s,
"row") == 0) {
869 fprintf(stderr,
"Error: invalid rank order type\n");
881 if (strcmp(s,
"wilson") == 0){
883 }
else if (strcmp(s,
"clover") == 0){
885 }
else if (strcmp(s,
"twisted-mass") == 0){
887 }
else if (strcmp(s,
"twisted-clover") == 0){
889 }
else if (strcmp(s,
"staggered") == 0){
891 }
else if (strcmp(s,
"asqtad") == 0){
893 }
else if (strcmp(s,
"domain-wall") == 0){
895 }
else if (strcmp(s,
"domain-wall-4d") == 0){
897 }
else if (strcmp(s,
"mobius") == 0){
899 }
else if (strcmp(s,
"laplace") == 0){
902 fprintf(stderr,
"Error: invalid dslash type\n");
925 ret=
"twisted-clover";
937 ret =
"domain_wall_4d";
959 if (strcmp(s,
"open") == 0 || strcmp(s,
"OPEN") == 0 || strcmp(s,
"Open") == 0) {
961 }
else if (strcmp(s,
"dr") == 0 || strcmp(s,
"DR") == 0) {
964 fprintf(stderr,
"Error: invalid contract type\n");
977 default: ret =
"unknown";
break;
988 if (strcmp(s,
"SR") == 0) {
990 }
else if (strcmp(s,
"LR") == 0) {
992 }
else if (strcmp(s,
"SM") == 0) {
994 }
else if (strcmp(s,
"LM") == 0) {
996 }
else if (strcmp(s,
"SI") == 0) {
998 }
else if (strcmp(s,
"LI") == 0) {
1001 fprintf(stderr,
"Error: invalid eigen spectrum type\n");
1019 default: ret =
"unknown eigenspectrum";
break;
1030 if (strcmp(s,
"trlm") == 0) {
1032 }
else if (strcmp(s,
"irlm") == 0) {
1034 }
else if (strcmp(s,
"iram") == 0) {
1037 fprintf(stderr,
"Error: invalid quda eigensolver type\n");
1052 default: ret =
"unknown eigensolver";
break;
1062 if (strcmp(s,
"kappa") == 0) {
1064 }
else if (strcmp(s,
"mass") == 0) {
1066 }
else if (strcmp(s,
"asym-mass") == 0) {
1069 fprintf(stderr,
"Error: invalid mass normalization\n");
1092 fprintf(stderr,
"Error: invalid mass normalization\n");
1104 if (strcmp(s,
"even-even") == 0){
1106 }
else if (strcmp(s,
"odd-odd") == 0){
1108 }
else if (strcmp(s,
"even-even-asym") == 0){
1110 }
else if (strcmp(s,
"odd-odd-asym") == 0){
1113 fprintf(stderr,
"Error: invalid matpc type %s\n", s);
1133 ret =
"even-even-asym";
1136 ret =
"odd-odd-asym";
1139 fprintf(stderr,
"Error: invalid matpc type %d\n", type);
1150 if (strcmp(s,
"direct") == 0) {
1152 }
else if (strcmp(s,
"direct-pc") == 0) {
1154 }
else if (strcmp(s,
"normop") == 0) {
1156 }
else if (strcmp(s,
"normop-pc") == 0) {
1158 }
else if (strcmp(s,
"normerr") == 0) {
1160 }
else if (strcmp(s,
"normerr-pc") == 0) {
1163 fprintf(stderr,
"Error: invalid matpc type %s\n", s);
1195 fprintf(stderr,
"Error: invalid solve type %d\n", type);
1206 if (strcmp(s,
"mat") == 0) {
1208 }
else if (strcmp(s,
"mat-dag-mat") == 0) {
1210 }
else if (strcmp(s,
"mat-pc") == 0) {
1212 }
else if (strcmp(s,
"mat-pc-dag") == 0) {
1214 }
else if (strcmp(s,
"mat-pc-dag-mat-pc") == 0) {
1217 fprintf(stderr,
"Error: invalid solution type %s\n", s);
1228 if (strcmp(s,
"false") == 0) {
1230 }
else if (strcmp(s,
"add") == 0) {
1232 }
else if (strcmp(s,
"mul") == 0) {
1235 fprintf(stderr,
"Error: invalid Schwarz type %s\n", s);
1247 if (strcmp(s,
"singlet") == 0){
1249 }
else if (strcmp(s,
"deg-doublet") == 0){
1251 }
else if (strcmp(s,
"nondeg-doublet") == 0){
1253 }
else if (strcmp(s,
"no") == 0){
1256 fprintf(stderr,
"Error: invalid flavor type\n");
1273 ret =
"deg-doublet";
1276 ret =
"nondeg-doublet";
1294 if (strcmp(s,
"cg") == 0){
1296 }
else if (strcmp(s,
"bicgstab") == 0){
1298 }
else if (strcmp(s,
"gcr") == 0){
1300 }
else if (strcmp(s,
"pcg") == 0){
1302 }
else if (strcmp(s,
"mpcg") == 0){
1304 }
else if (strcmp(s,
"mpbicgstab") == 0){
1306 }
else if (strcmp(s,
"mr") == 0){
1308 }
else if (strcmp(s,
"sd") == 0){
1310 }
else if (strcmp(s,
"eigcg") == 0){
1312 }
else if (strcmp(s,
"inc-eigcg") == 0){
1314 }
else if (strcmp(s,
"gmresdr") == 0){
1316 }
else if (strcmp(s,
"gmresdr-proj") == 0){
1318 }
else if (strcmp(s,
"gmresdr-sh") == 0){
1320 }
else if (strcmp(s,
"fgmresdr") == 0){
1322 }
else if (strcmp(s,
"mg") == 0){
1324 }
else if (strcmp(s,
"bicgstab-l") == 0){
1326 }
else if (strcmp(s,
"cgne") == 0){
1328 }
else if (strcmp(s,
"cgnr") == 0){
1330 }
else if (strcmp(s,
"cg3") == 0){
1332 }
else if (strcmp(s,
"cg3ne") == 0){
1334 }
else if (strcmp(s,
"cg3nr") == 0){
1336 }
else if (strcmp(s,
"ca-cg") == 0){
1338 }
else if (strcmp(s,
"ca-cgne") == 0){
1340 }
else if (strcmp(s,
"ca-cgnr") == 0){
1342 }
else if (strcmp(s,
"ca-gcr") == 0){
1345 fprintf(stderr,
"Error: invalid solver type %s\n", s);
1392 ret =
"gmresdr-proj";
1435 errorQuda(
"Error: invalid solver type %d\n", type);
1445 static char vstr[32];
1449 sprintf(vstr,
"%1d.%1d.%1d",
1462 if (strcmp(s,
"eigen") == 0) {
1464 }
else if (strcmp(s,
"magma") == 0) {
1467 fprintf(stderr,
"Error: invalid external library type %s\n", s);
1479 if (strcmp(s,
"cpu") == 0 || strcmp(s,
"host") == 0) {
1481 }
else if (strcmp(s,
"gpu") == 0 || strcmp(s,
"cuda") == 0) {
1484 fprintf(stderr,
"Error: invalid location %s\n", s);
1498 default: fprintf(stderr,
"Error: invalid location\n"); exit(1);
1509 if (strcmp(s,
"device") == 0) {
1511 }
else if (strcmp(s,
"pinned") == 0) {
1513 }
else if (strcmp(s,
"mapped") == 0) {
1516 fprintf(stderr,
"Error: invalid memory type %s\n", s);
1531 default: fprintf(stderr,
"Error: invalid memory type\n"); exit(1);
enum QudaMassNormalization_s QudaMassNormalization
QudaGaugeParam gaugeParam
QudaSolutionType get_solution_type(char *s)
void complexDotProduct(Float *a, Float *b, Float *c)
enum QudaPrecision_s QudaPrecision
QudaTwistFlavorType get_flavor_type(char *s)
QudaPrecision get_prec(char *s)
void accumulateComplexDotProduct(Float *a, Float *b, Float *c)
int site_link_sanity_check(void *link, int len, int precision, QudaGaugeParam *gaugeParam)
const char * get_memory_type_str(QudaMemoryType type)
enum QudaSolveType_s QudaSolveType
__host__ __device__ ValueType sqrt(ValueType x)
const char * get_eig_spectrum_str(QudaEigSpectrumType type)
#define QUDA_VERSION_MINOR
const char * get_matpc_str(QudaMatPCType type)
const char * get_contract_str(QudaContractType type)
const char * get_ritz_location_str(QudaFieldLocation type)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
const char * get_staggered_test_type(int t)
const char * get_prec_str(QudaPrecision prec)
int get_rank_order(char *s)
enum QudaEigType_s QudaEigType
QudaReconstructType get_recon(char *s)
void accumulateConjugateProduct(Float *a, Float *b, Float *c, int sign)
QudaFieldLocation get_location(char *s)
void accumulateComplexProduct(Float *a, Float *b, Float *c, Float sign)
const char * get_flavor_str(QudaTwistFlavorType type)
const char * get_test_type(int t)
QudaSchwarzType get_schwarz_type(char *s)
const char * get_solver_str(QudaInverterType type)
int fullLatticeIndex(int dim[4], int index, int oddBit)
QudaSolveType get_solve_type(char *s)
void complexProduct(Float *a, Float *b, Float *c)
QudaExtLibType get_solve_ext_lib_type(char *s)
const char * get_eig_type_str(QudaEigType type)
const char * get_solve_str(QudaSolveType type)
const char * get_unitarization_str(bool svd_only)
QudaInverterType get_solver_type(char *s)
void display_link_internal(Float *link)
__host__ __device__ ValueType sin(ValueType x)
void display_spinor(void *spinor, int len, int precision)
const char * get_mass_normalization_str(QudaMassNormalization type)
QudaDslashType get_dslash_type(char *s)
const char * get_recon_str(QudaReconstructType recon)
QudaEigSpectrumType get_eig_spectrum_type(char *s)
__host__ __device__ ValueType atan2(ValueType x, ValueType y)
enum QudaMatPCType_s QudaMatPCType
int link_sanity_check(void *link, int len, int precision, int dir, QudaGaugeParam *gaugeParam)
enum QudaSolutionType_s QudaSolutionType
#define QUDA_VERSION_SUBMINOR
enum QudaSchwarzType_s QudaSchwarzType
QudaMatPCType get_matpc_type(char *s)
const char * get_dslash_str(QudaDslashType type)
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
int link_sanity_check_internal_8(Float *link, int dir, int ga_idx, QudaGaugeParam *gaugeParam, int oddBit)
QudaEigType get_eig_type(char *s)
QudaMemoryType get_df_mem_type_ritz(char *s)
void display_link(void *link, int len, int precision)
void * memset(void *s, int c, size_t n)
static int index(int ndim, const int *dims, const int *x)
enum QudaFieldLocation_s QudaFieldLocation
int link_sanity_check_internal_12(Float *link, int dir, int ga_idx, QudaGaugeParam *gaugeParam, int oddBit)
cpuColorSpinorField * ref
enum QudaEigSpectrumType_s QudaEigSpectrumType
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
const char * get_verbosity_str(QudaVerbosity type)
void complexConjugateProduct(Float *a, Float *b, Float *c)
QudaMassNormalization get_mass_normalization_type(char *s)
enum QudaDslashType_s QudaDslashType
void display_spinor_internal(Float *spinor)
enum QudaContractType_s QudaContractType
int site_link_sanity_check_internal_12(Float *link, int dir, int ga_idx, QudaGaugeParam *gaugeParam, int oddBit)
__host__ __device__ ValueType cos(ValueType x)
enum QudaVerbosity_s QudaVerbosity
QudaVerbosity get_verbosity_type(char *s)
const char * get_quda_ver_str()
QudaContractType get_contract_type(char *s)
#define QUDA_VERSION_MAJOR
void complexAddTo(Float *a, Float *b)
enum QudaInverterType_s QudaInverterType
enum QudaMemoryType_s QudaMemoryType
cpuColorSpinorField * spinor
enum QudaExtLibType_s QudaExtLibType
enum QudaTwistFlavorType_s QudaTwistFlavorType