18 #define MAX(a, b) ((a) > (b) ? (a) : (b))
38 cpuFat = GaugeField::Create(cpuFatParam);
43 cpuLong = GaugeField::Create(cpuLongParam);
72 for (
int dir = 0; dir < 4; dir++) {
85 for (
int dir = 0; dir < 4; dir++) {
118 for (
int dir = 0; dir < 4; dir++) {
127 for (
int dir = 0; dir < 4; dir++) {
184 const double phase = (M_PI * rand()) / RAND_MAX;
186 for (
int dir = 0; dir < 4; ++dir) {
187 for (
int i = 0; i <
V; ++i) {
201 if (type == 3)
return;
207 for (
int dir = 0; dir < 4; ++dir) {
208 for (
int i = 0; i <
V; ++i) {
232 pad_size =
MAX(x_face_size, y_face_size);
233 pad_size =
MAX(pad_size, z_face_size);
234 pad_size =
MAX(pad_size, t_face_size);
237 int fat_pad = pad_size;
238 int link_pad = 3 * pad_size;
271 template <
typename su3_matrix,
typename Float>
276 for (
int dir =
XUP; dir <=
TUP; ++dir) {
277 int dx[4] = {0, 0, 0, 0};
278 for (
int i = 0; i <
V; ++i) {
294 template <
typename su3_matrix,
typename Float>
298 for (
int dir = 0; dir < 4; ++dir)
E[dir] =
Z[dir] + 4;
299 const int extended_volume =
E[3] *
E[2] *
E[1] *
E[0];
302 for (
int t = 0; t <
Z[3]; ++t) {
303 for (
int z = 0; z <
Z[2]; ++z) {
304 for (
int y = 0; y <
Z[1]; ++y) {
305 for (
int x = 0; x <
Z[0]; ++x) {
306 const int oddBit = (x + y + z + t) & 1;
307 int little_index = ((((t *
Z[2] + z) *
Z[1] + y) *
Z[0] + x) / 2) + oddBit *
Vh;
309 = (((((t + 2) *
E[2] + (z + 2)) *
E[1] + (y + 2)) *
E[0] + x + 2) / 2) + oddBit * (extended_volume / 2);
311 for (
int dir =
XUP; dir <=
TUP; ++dir) {
312 int dx[4] = {0, 0, 0, 0};
342 fprintf(stderr,
"ERROR: unsupported precision(%d)\n",
prec);
352 void computeHISQLinksCPU(
void **fatlink,
void **longlink,
void **fatlink_eps,
void **longlink_eps,
void **sitelink,
353 void *qudaGaugeParamPtr,
double **act_path_coeffs,
double eps_naik)
365 const size_t gSize =
prec;
374 void *sitelink_ex[4];
378 void *ghost_sitelink[4];
379 void *ghost_sitelink_diag[16];
387 for (
int i = 0; i <
V_ex; i++) {
396 int x1h = sid - za *
E1h;
398 int x2 = za - zb *
E2;
400 int x3 = zb - x4 *
E3;
401 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
402 int x1 = 2 * x1h + x1odd;
404 if (x1 < 2 || x1 >= X1 + 2 || x2 < 2 || x2 >= X2 + 2 || x3 < 2 || x3 >= X3 + 2 || x4 < 2 || x4 >= X4 + 2) {
410 x1 = (x1 - 2 + X1) % X1;
411 x2 = (x2 - 2 + X2) % X2;
412 x3 = (x3 - 2 + X3) % X3;
413 x4 = (x4 - 2 + X4) % X4;
415 int idx = (x4 * X3 * X2 * X1 + x3 * X2 * X1 + x2 * X1 + x1) >> 1;
416 if (oddBit) { idx +=
Vh; }
417 for (
int dir = 0; dir < 4; dir++) {
418 char *src = (
char *)sitelink[dir];
419 char *dst = (
char *)sitelink_ex[dir];
430 void *w_reflink_ex[4];
431 for (
int i = 0; i < 4; i++) {
438 void *ghost_wlink[4];
439 void *ghost_wlink_diag[16];
455 for (
int i = 0; i < 6; i++) coeff_sp[i] = coeff_dp[i] = act_path_coeffs[0][i];
470 for (
int nu = 0; nu < 4; nu++) {
471 for (
int mu = 0;
mu < 4;
mu++) {
473 ghost_sitelink_diag[nu * 4 +
mu] = NULL;
477 for (dir1 = 0; dir1 < 4; dir1++) {
478 if (dir1 != nu && dir1 !=
mu) {
break; }
480 for (dir2 = 0; dir2 < 4; dir2++) {
481 if (dir2 != nu && dir2 !=
mu && dir2 != dir1) {
break; }
526 for (
int i = 0; i <
V_ex; i++) {
535 int x1h = sid - za *
E1h;
537 int x2 = za - zb *
E2;
539 int x3 = zb - x4 *
E3;
540 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
541 int x1 = 2 * x1h + x1odd;
543 if (x1 < 2 || x1 >= X1 + 2 || x2 < 2 || x2 >= X2 + 2 || x3 < 2 || x3 >= X3 + 2 || x4 < 2 || x4 >= X4 + 2) {
549 x1 = (x1 - 2 + X1) % X1;
550 x2 = (x2 - 2 + X2) % X2;
551 x3 = (x3 - 2 + X3) % X3;
552 x4 = (x4 - 2 + X4) % X4;
554 int idx = (x4 * X3 * X2 * X1 + x3 * X2 * X1 + x2 * X1 + x1) >> 1;
555 if (oddBit) { idx +=
Vh; }
556 for (
int dir = 0; dir < 4; dir++) {
557 char *src = (
char *)w_reflink[dir];
558 char *dst = (
char *)w_reflink_ex[dir];
577 for (
int nu = 0; nu < 4; nu++) {
578 for (
int mu = 0;
mu < 4;
mu++) {
580 ghost_wlink_diag[nu * 4 +
mu] = NULL;
584 for (dir1 = 0; dir1 < 4; dir1++) {
585 if (dir1 != nu && dir1 !=
mu) {
break; }
587 for (dir2 = 0; dir2 < 4; dir2++) {
588 if (dir2 != nu && dir2 !=
mu && dir2 != dir1) {
break; }
603 for (
int i = 0; i < 6; i++) coeff_sp[i] = coeff_dp[i] = act_path_coeffs[2][i];
609 &qudaGaugeParam, optflag);
613 int R[4] = {2, 2, 2, 2};
623 for (
int i = 0; i < 4; i++) {
633 for (
int i = 0; i < 6; i++) coeff_sp[i] = coeff_dp[i] = act_path_coeffs[1][i];
642 &qudaGaugeParam, optflag);
646 int R[4] = {2, 2, 2, 2};
657 for (
int i = 0; i < 4; i++) {
667 for (
int i = 0; i < 4; i++) {
676 for (
int i = 0; i < 4; i++) {
679 for (
int j = 0; j < 4; j++) {
680 if (i == j)
continue;
681 host_free(ghost_sitelink_diag[i * 4 + j]);
695 for (
int d = 0; d < 4; d++) cs_param->
x[d] =
gauge_param->
X[d];
697 if (pc) cs_param->
x[0] /= 2;
712 template <
typename Out,
typename In>
void reorderQDPtoMILC(Out *milc_out, In **qdp_in,
int V,
int siteSize)
714 for (
int i = 0; i <
V; i++) {
715 for (
int dir = 0; dir < 4; dir++) {
716 for (
int j = 0; j < siteSize; j++) {
717 milc_out[(i * 4 + dir) * siteSize + j] =
static_cast<Out
>(qdp_in[dir][i * siteSize + j]);
728 reorderQDPtoMILC<float, float>((
float *)milc_out, (
float **)qdp_in,
V, siteSize);
730 reorderQDPtoMILC<float, double>((
float *)milc_out, (
double **)qdp_in,
V, siteSize);
734 reorderQDPtoMILC<double, float>((
double *)milc_out, (
float **)qdp_in,
V, siteSize);
736 reorderQDPtoMILC<double, double>((
double *)milc_out, (
double **)qdp_in,
V, siteSize);
741 template <
typename Out,
typename In>
void reorderMILCtoQDP(Out **qdp_out, In *milc_in,
int V,
int siteSize)
743 for (
int i = 0; i <
V; i++) {
744 for (
int dir = 0; dir < 4; dir++) {
745 for (
int j = 0; j < siteSize; j++) {
746 qdp_out[dir][i * siteSize + j] =
static_cast<Out
>(milc_in[(i * 4 + dir) * siteSize + j]);
757 reorderMILCtoQDP<float, float>((
float **)qdp_out, (
float *)milc_in,
V, siteSize);
759 reorderMILCtoQDP<float, double>((
float **)qdp_out, (
double *)milc_in,
V, siteSize);
763 reorderMILCtoQDP<double, float>((
double **)qdp_out, (
float *)milc_in,
V, siteSize);
765 reorderMILCtoQDP<double, double>((
double **)qdp_out, (
double *)milc_in,
V, siteSize);
778 template <
typename Float>
781 int X1h =
param->
X[0] / 2;
789 for (
int d = 0; d < 4; d++) {
797 for (
int d = 0; d < 3; d++) {
800 for (
int i = 0; i <
Vh; i++) {
803 int i4 = index / (X3 * X2 * X1);
804 int i3 = (index - i4 * (X3 * X2 * X1)) / (X2 * X1);
805 int i2 = (index - i4 * (X3 * X2 * X1) - i3 * (X2 * X1)) / X1;
806 int i1 = index - i4 * (X3 * X2 * X1) - i3 * (X2 * X1) - i2 * X1;
810 if (i4 % 2 == 1) { sign = -1; }
814 if ((i4 + i1) % 2 == 1) { sign = -1; }
817 if ((i4 + i1 + i2) % 2 == 1) { sign = -1; }
820 for (
int j = 0; j < 18; j++) { gauge[d][i *
gauge_site_size + j] *= sign; }
823 for (
int i = 0; i <
Vh; i++) {
825 int i4 = index / (X3 * X2 * X1);
826 int i3 = (index - i4 * (X3 * X2 * X1)) / (X2 * X1);
827 int i2 = (index - i4 * (X3 * X2 * X1) - i3 * (X2 * X1)) / X1;
828 int i1 = index - i4 * (X3 * X2 * X1) - i3 * (X2 * X1) - i2 * X1;
832 if (i4 % 2 == 1) { sign = -1; }
836 if ((i4 + i1) % 2 == 1) { sign = -1; }
839 if ((i4 + i1 + i2) % 2 == 1) { sign = -1; }
842 for (
int j = 0; j < 18; j++) { gauge[d][(
Vh + i) *
gauge_site_size + j] *= sign; }
848 for (
int j = 0; j <
Vh; j++) {
851 if (j >= (X4 - 3) * X1h * X2 * X3) { sign = -1; }
853 if (j >= (X4 - 1) * X1h * X2 * X3) { sign = -1; }
856 for (
int i = 0; i < 18; i++) {
QudaGammaBasis gammaBasis
QudaFieldLocation location
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaFieldOrder fieldOrder
static GaugeField * Create(const GaugeFieldParam ¶m)
Create the gauge field, with meta data specified in the parameter struct.
QudaReconstructType link_recon_sloppy
QudaReconstructType link_recon
QudaReconstructType link_recon_precondition
QudaDslashType dslash_type
void * memset(void *s, int c, size_t n)
QudaGaugeParam gauge_param
QudaInvertParam inv_param
enum QudaPrecision_s QudaPrecision
@ QUDA_STAGGERED_PHASE_NO
@ QUDA_CPU_FIELD_LOCATION
@ QUDA_PARITY_SITE_SUBSET
enum QudaDslashType_s QudaDslashType
@ QUDA_GHOST_EXCHANGE_PAD
@ QUDA_EVEN_ODD_SITE_ORDER
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
@ QUDA_REFERENCE_FIELD_CREATE
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
void exchange_cpu_sitelink(int *X, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision gPrecision, QudaGaugeParam *param, int optflag)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void cpu_axy(QudaPrecision prec, double a, void *x, void *y, int size)
void cpu_xpy(QudaPrecision prec, void *x, void *y, int size)
size_t host_gauge_data_type_size
void constructUnitGaugeField(Float **res, QudaGaugeParam *param)
void constructRandomGaugeField(Float **res, QudaGaugeParam *param, QudaDslashType dslash_type)
int fullLatticeIndex(int dim[4], int index, int oddBit)
void constructQudaGaugeField(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
bool isPCSolution(QudaSolutionType solution_type)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
void llfat_mult_su3_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void llfat_scalar_mult_su3_matrix(su3_matrix *a, Real s, su3_matrix *b)
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
#define safe_malloc(size)
#define pinned_malloc(size)
__host__ __device__ complex< ValueType > polar(const ValueType &m, const ValueType &theta=0)
Returns the complex with magnitude m and angle theta in radians.
void unitarizeLinksCPU(GaugeField &outfield, const GaugeField &infield)
FloatingPoint< float > Float
void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[])
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void computeFatLongGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_inlink, QudaGaugeParam &gauge_param, size_t gauge_data_type_size, int n_naiks, double eps_naik)
void computeFatLongGPUandCPU(void **qdp_fatlink_gpu, void **qdp_longlink_gpu, void **qdp_fatlink_cpu, void **qdp_longlink_cpu, void **qdp_inlink, QudaGaugeParam &gauge_param, size_t gauge_data_type_size, int n_naiks, double eps_naik)
void applyStaggeredScaling(Float **res, QudaGaugeParam *param, int type)
void constructStaggeredHostGhostGaugeField(quda::GaugeField *cpuFat, quda::GaugeField *cpuLong, void *milc_fatlink, void *milc_longlink, QudaGaugeParam &gauge_param)
void constructFatLongGaugeField(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
void constructStaggeredHostDeviceGaugeField(void **qdp_inlink, void **qdp_longlink_cpu, void **qdp_longlink_gpu, void **qdp_fatlink_cpu, void **qdp_fatlink_gpu, QudaGaugeParam &gauge_param, int argc, char **argv, bool &gauge_loaded)
void computeLongLinkCPU(void **longlink, su3_matrix **sitelink, Float *act_path_coeff)
void reorderQDPtoMILC(Out *milc_out, In **qdp_in, int V, int siteSize)
void computeHISQLinksCPU(void **fatlink, void **longlink, void **fatlink_eps, void **longlink_eps, void **sitelink, void *qudaGaugeParamPtr, double **act_path_coeffs, double eps_naik)
void applyGaugeFieldScaling_long(Float **gauge, int Vh, QudaGaugeParam *param, QudaDslashType dslash_type)
void constructStaggeredTestSpinorParam(quda::ColorSpinorParam *cs_param, const QudaInvertParam *inv_param, const QudaGaugeParam *gauge_param)
void constructStaggeredHostGaugeField(void **qdp_inlink, void **qdp_longlink, void **qdp_fatlink, QudaGaugeParam &gauge_param, int argc, char **argv)
void loadFatLongGaugeQuda(void *milc_fatlink, void *milc_longlink, QudaGaugeParam &gauge_param)
void reorderMILCtoQDP(Out **qdp_out, In *milc_in, int V, int siteSize)
QudaReconstructType reconstruct_precondition
QudaReconstructType reconstruct
QudaFieldLocation location
QudaReconstructType reconstruct_sloppy
QudaStaggeredPhase staggered_phase_type
QudaReconstructType reconstruct_refinement_sloppy
QudaSolutionType solution_type
QudaGammaBasis gamma_basis
QudaGaugeFieldOrder order
QudaFieldLocation location
QudaGhostExchange ghostExchange
QudaSiteSubset siteSubset