4 #define SHARED_TMNDEG_FLOATS_PER_THREAD 0 8 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler 10 #else // Open64 compiler 11 #define VOLATILE volatile 15 #define spinorFloat double 41 #define spinorFloat float 66 #endif // SPINOR_DOUBLE 109 #endif // GAUGE_DOUBLE 112 #define gT00_re (+g00_re) 113 #define gT00_im (-g00_im) 114 #define gT01_re (+g10_re) 115 #define gT01_im (-g10_im) 116 #define gT02_re (+g20_re) 117 #define gT02_im (-g20_im) 118 #define gT10_re (+g01_re) 119 #define gT10_im (-g01_im) 120 #define gT11_re (+g11_re) 121 #define gT11_im (-g11_im) 122 #define gT12_re (+g21_re) 123 #define gT12_im (-g21_im) 124 #define gT20_re (+g02_re) 125 #define gT20_im (-g02_im) 126 #define gT21_re (+g12_re) 127 #define gT21_im (-g12_im) 128 #define gT22_re (+g22_re) 129 #define gT22_im (-g22_im) 295 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][1];
296 #if (DD_PREC==2) // half precision 297 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
343 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
501 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
504 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
650 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][0];
651 #if (DD_PREC==2) // half precision 652 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
702 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
860 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
863 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1009 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][1];
1010 #if (DD_PREC==2) // half precision 1011 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
1057 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1215 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1218 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1364 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][0];
1365 #if (DD_PREC==2) // half precision 1366 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
1416 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1574 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1577 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1723 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][1];
1724 #if (DD_PREC==2) // half precision 1725 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
1771 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1929 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1932 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2078 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][0];
2079 #if (DD_PREC==2) // half precision 2080 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
2130 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2288 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2291 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2437 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][1];
2438 #if (DD_PREC==2) // half precision 2439 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
2481 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2546 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2550 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2619 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2766 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2770 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2905 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][0];
2906 #if (DD_PREC==2) // half precision 2907 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
2953 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
3018 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
3022 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
3091 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
3238 const int sp_stride_pad =
FLAVORS*
param.dc.ghostFace[
static_cast<int>(kernel_type)];
3242 const int fl_idx =
sp_idx +
param.dc.ghostFace[
static_cast<int>(kernel_type)];
3368 switch(kernel_type) {
3387 #ifdef SPINOR_DOUBLE 3399 x1_re = 0.0, x1_im = 0.0;
3400 y1_re = 0.0, y1_im = 0.0;
3401 x2_re = 0.0, x2_im = 0.0;
3402 y2_re = 0.0, y2_im = 0.0;
3588 #if !defined(DSLASH_XPAY) || defined(DSLASH_TWIST) 3589 #ifdef SPINOR_DOUBLE 3648 #ifdef SPINOR_DOUBLE 3650 #define acc_00_re accum0.x 3651 #define acc_00_im accum0.y 3652 #define acc_01_re accum1.x 3653 #define acc_01_im accum1.y 3654 #define acc_02_re accum2.x 3655 #define acc_02_im accum2.y 3656 #define acc_10_re accum3.x 3657 #define acc_10_im accum3.y 3658 #define acc_11_re accum4.x 3659 #define acc_11_im accum4.y 3660 #define acc_12_re accum5.x 3661 #define acc_12_im accum5.y 3662 #define acc_20_re accum6.x 3663 #define acc_20_im accum6.y 3664 #define acc_21_re accum7.x 3665 #define acc_21_im accum7.y 3666 #define acc_22_re accum8.x 3667 #define acc_22_im accum8.y 3668 #define acc_30_re accum9.x 3669 #define acc_30_im accum9.y 3670 #define acc_31_re accum10.x 3671 #define acc_31_im accum10.y 3672 #define acc_32_re accum11.x 3673 #define acc_32_im accum11.y 3676 #define acc_00_re accum0.x 3677 #define acc_00_im accum0.y 3678 #define acc_01_re accum0.z 3679 #define acc_01_im accum0.w 3680 #define acc_02_re accum1.x 3681 #define acc_02_im accum1.y 3682 #define acc_10_re accum1.z 3683 #define acc_10_im accum1.w 3684 #define acc_11_re accum2.x 3685 #define acc_11_im accum2.y 3686 #define acc_12_re accum2.z 3687 #define acc_12_im accum2.w 3688 #define acc_20_re accum3.x 3689 #define acc_20_im accum3.y 3690 #define acc_21_re accum3.z 3691 #define acc_21_im accum3.w 3692 #define acc_22_re accum4.x 3693 #define acc_22_im accum4.y 3694 #define acc_30_re accum4.z 3695 #define acc_30_im accum4.w 3696 #define acc_31_re accum5.x 3697 #define acc_31_im accum5.y 3698 #define acc_32_re accum5.z 3699 #define acc_32_im accum5.w 3701 #endif // SPINOR_DOUBLE 3704 READ_ACCUM(ACCUMTEX,
param.sp_stride)
3731 ASSN_ACCUM(ACCUMTEX,
param.sp_stride,
param.fl_stride)
3786 #ifdef SPINOR_DOUBLE 3788 #define acc1_00_re flv1_accum0.x 3789 #define acc1_00_im flv1_accum0.y 3790 #define acc1_01_re flv1_accum1.x 3791 #define acc1_01_im flv1_accum1.y 3792 #define acc1_02_re flv1_accum2.x 3793 #define acc1_02_im flv1_accum2.y 3794 #define acc1_10_re flv1_accum3.x 3795 #define acc1_10_im flv1_accum3.y 3796 #define acc1_11_re flv1_accum4.x 3797 #define acc1_11_im flv1_accum4.y 3798 #define acc1_12_re flv1_accum5.x 3799 #define acc1_12_im flv1_accum5.y 3800 #define acc1_20_re flv1_accum6.x 3801 #define acc1_20_im flv1_accum6.y 3802 #define acc1_21_re flv1_accum7.x 3803 #define acc1_21_im flv1_accum7.y 3804 #define acc1_22_re flv1_accum8.x 3805 #define acc1_22_im flv1_accum8.y 3806 #define acc1_30_re flv1_accum9.x 3807 #define acc1_30_im flv1_accum9.y 3808 #define acc1_31_re flv1_accum10.x 3809 #define acc1_31_im flv1_accum10.y 3810 #define acc1_32_re flv1_accum11.x 3811 #define acc1_32_im flv1_accum11.y 3813 #define acc2_00_re flv2_accum0.x 3814 #define acc2_00_im flv2_accum0.y 3815 #define acc2_01_re flv2_accum1.x 3816 #define acc2_01_im flv2_accum1.y 3817 #define acc2_02_re flv2_accum2.x 3818 #define acc2_02_im flv2_accum2.y 3819 #define acc2_10_re flv2_accum3.x 3820 #define acc2_10_im flv2_accum3.y 3821 #define acc2_11_re flv2_accum4.x 3822 #define acc2_11_im flv2_accum4.y 3823 #define acc2_12_re flv2_accum5.x 3824 #define acc2_12_im flv2_accum5.y 3825 #define acc2_20_re flv2_accum6.x 3826 #define acc2_20_im flv2_accum6.y 3827 #define acc2_21_re flv2_accum7.x 3828 #define acc2_21_im flv2_accum7.y 3829 #define acc2_22_re flv2_accum8.x 3830 #define acc2_22_im flv2_accum8.y 3831 #define acc2_30_re flv2_accum9.x 3832 #define acc2_30_im flv2_accum9.y 3833 #define acc2_31_re flv2_accum10.x 3834 #define acc2_31_im flv2_accum10.y 3835 #define acc2_32_re flv2_accum11.x 3836 #define acc2_32_im flv2_accum11.y 3840 #define acc1_00_re flv1_accum0.x 3841 #define acc1_00_im flv1_accum0.y 3842 #define acc1_01_re flv1_accum0.z 3843 #define acc1_01_im flv1_accum0.w 3844 #define acc1_02_re flv1_accum1.x 3845 #define acc1_02_im flv1_accum1.y 3846 #define acc1_10_re flv1_accum1.z 3847 #define acc1_10_im flv1_accum1.w 3848 #define acc1_11_re flv1_accum2.x 3849 #define acc1_11_im flv1_accum2.y 3850 #define acc1_12_re flv1_accum2.z 3851 #define acc1_12_im flv1_accum2.w 3852 #define acc1_20_re flv1_accum3.x 3853 #define acc1_20_im flv1_accum3.y 3854 #define acc1_21_re flv1_accum3.z 3855 #define acc1_21_im flv1_accum3.w 3856 #define acc1_22_re flv1_accum4.x 3857 #define acc1_22_im flv1_accum4.y 3858 #define acc1_30_re flv1_accum4.z 3859 #define acc1_30_im flv1_accum4.w 3860 #define acc1_31_re flv1_accum5.x 3861 #define acc1_31_im flv1_accum5.y 3862 #define acc1_32_re flv1_accum5.z 3863 #define acc1_32_im flv1_accum5.w 3865 #define acc2_00_re flv2_accum0.x 3866 #define acc2_00_im flv2_accum0.y 3867 #define acc2_01_re flv2_accum0.z 3868 #define acc2_01_im flv2_accum0.w 3869 #define acc2_02_re flv2_accum1.x 3870 #define acc2_02_im flv2_accum1.y 3871 #define acc2_10_re flv2_accum1.z 3872 #define acc2_10_im flv2_accum1.w 3873 #define acc2_11_re flv2_accum2.x 3874 #define acc2_11_im flv2_accum2.y 3875 #define acc2_12_re flv2_accum2.z 3876 #define acc2_12_im flv2_accum2.w 3877 #define acc2_20_re flv2_accum3.x 3878 #define acc2_20_im flv2_accum3.y 3879 #define acc2_21_re flv2_accum3.z 3880 #define acc2_21_im flv2_accum3.w 3881 #define acc2_22_re flv2_accum4.x 3882 #define acc2_22_im flv2_accum4.y 3883 #define acc2_30_re flv2_accum4.z 3884 #define acc2_30_im flv2_accum4.w 3885 #define acc2_31_re flv2_accum5.x 3886 #define acc2_31_im flv2_accum5.y 3887 #define acc2_32_re flv2_accum5.z 3888 #define acc2_32_im flv2_accum5.w 3890 #endif // SPINOR_DOUBLE 3893 READ_ACCUM_FLAVOR(ACCUMTEX,
param.sp_stride,
param.fl_stride)
3895 #ifdef SPINOR_DOUBLE 3907 x1_re = 0.0, x1_im = 0.0;
3908 y1_re = 0.0, y1_im = 0.0;
3909 x2_re = 0.0, x2_im = 0.0;
3910 y2_re = 0.0, y2_im = 0.0;
3914 x1_re = acc1_00_re +
a *acc1_20_im;
3915 x1_im = acc1_00_im -
a *acc1_20_re;
3916 x2_re =
b * acc1_00_re;
3917 x2_im =
b * acc1_00_im;
3919 y1_re = acc1_20_re +
a *acc1_00_im;
3920 y1_im = acc1_20_im -
a *acc1_00_re;
3921 y2_re =
b * acc1_20_re;
3922 y2_im =
b * acc1_20_im;
3926 x2_re += acc2_00_re -
a *acc2_20_im;
3927 x2_im += acc2_00_im +
a *acc2_20_re;
3928 x1_re +=
b * acc2_00_re;
3929 x1_im +=
b * acc2_00_im;
3931 y2_re += acc2_20_re -
a *acc2_00_im;
3932 y2_im += acc2_20_im +
a *acc2_00_re;
3933 y1_re +=
b * acc2_20_re;
3934 y1_im +=
b * acc2_20_im;
3937 acc1_00_re = x1_re; acc1_00_im = x1_im;
3938 acc1_20_re = y1_re; acc1_20_im = y1_im;
3940 acc2_00_re = x2_re; acc2_00_im = x2_im;
3941 acc2_20_re = y2_re; acc2_20_im = y2_im;
3944 x1_re = acc1_10_re +
a *acc1_30_im;
3945 x1_im = acc1_10_im -
a *acc1_30_re;
3946 x2_re =
b * acc1_10_re;
3947 x2_im =
b * acc1_10_im;
3949 y1_re = acc1_30_re +
a *acc1_10_im;
3950 y1_im = acc1_30_im -
a *acc1_10_re;
3951 y2_re =
b * acc1_30_re;
3952 y2_im =
b * acc1_30_im;
3956 x2_re += acc2_10_re -
a *acc2_30_im;
3957 x2_im += acc2_10_im +
a *acc2_30_re;
3958 x1_re +=
b * acc2_10_re;
3959 x1_im +=
b * acc2_10_im;
3961 y2_re += acc2_30_re -
a *acc2_10_im;
3962 y2_im += acc2_30_im +
a *acc2_10_re;
3963 y1_re +=
b * acc2_30_re;
3964 y1_im +=
b * acc2_30_im;
3967 acc1_10_re = x1_re; acc1_10_im = x1_im;
3968 acc1_30_re = y1_re; acc1_30_im = y1_im;
3970 acc2_10_re = x2_re; acc2_10_im = x2_im;
3971 acc2_30_re = y2_re; acc2_30_im = y2_im;
3974 x1_re = acc1_01_re +
a *acc1_21_im;
3975 x1_im = acc1_01_im -
a *acc1_21_re;
3976 x2_re =
b * acc1_01_re;
3977 x2_im =
b * acc1_01_im;
3979 y1_re = acc1_21_re +
a *acc1_01_im;
3980 y1_im = acc1_21_im -
a *acc1_01_re;
3981 y2_re =
b * acc1_21_re;
3982 y2_im =
b * acc1_21_im;
3986 x2_re += acc2_01_re -
a *acc2_21_im;
3987 x2_im += acc2_01_im +
a *acc2_21_re;
3988 x1_re +=
b * acc2_01_re;
3989 x1_im +=
b * acc2_01_im;
3991 y2_re += acc2_21_re -
a *acc2_01_im;
3992 y2_im += acc2_21_im +
a *acc2_01_re;
3993 y1_re +=
b * acc2_21_re;
3994 y1_im +=
b * acc2_21_im;
3997 acc1_01_re = x1_re; acc1_01_im = x1_im;
3998 acc1_21_re = y1_re; acc1_21_im = y1_im;
4000 acc2_01_re = x2_re; acc2_01_im = x2_im;
4001 acc2_21_re = y2_re; acc2_21_im = y2_im;
4004 x1_re = acc1_11_re +
a *acc1_31_im;
4005 x1_im = acc1_11_im -
a *acc1_31_re;
4006 x2_re =
b * acc1_11_re;
4007 x2_im =
b * acc1_11_im;
4009 y1_re = acc1_31_re +
a *acc1_11_im;
4010 y1_im = acc1_31_im -
a *acc1_11_re;
4011 y2_re =
b * acc1_31_re;
4012 y2_im =
b * acc1_31_im;
4016 x2_re += acc2_11_re -
a *acc2_31_im;
4017 x2_im += acc2_11_im +
a *acc2_31_re;
4018 x1_re +=
b * acc2_11_re;
4019 x1_im +=
b * acc2_11_im;
4021 y2_re += acc2_31_re -
a *acc2_11_im;
4022 y2_im += acc2_31_im +
a *acc2_11_re;
4023 y1_re +=
b * acc2_31_re;
4024 y1_im +=
b * acc2_31_im;
4027 acc1_11_re = x1_re; acc1_11_im = x1_im;
4028 acc1_31_re = y1_re; acc1_31_im = y1_im;
4030 acc2_11_re = x2_re; acc2_11_im = x2_im;
4031 acc2_31_re = y2_re; acc2_31_im = y2_im;
4034 x1_re = acc1_02_re +
a *acc1_22_im;
4035 x1_im = acc1_02_im -
a *acc1_22_re;
4036 x2_re =
b * acc1_02_re;
4037 x2_im =
b * acc1_02_im;
4039 y1_re = acc1_22_re +
a *acc1_02_im;
4040 y1_im = acc1_22_im -
a *acc1_02_re;
4041 y2_re =
b * acc1_22_re;
4042 y2_im =
b * acc1_22_im;
4046 x2_re += acc2_02_re -
a *acc2_22_im;
4047 x2_im += acc2_02_im +
a *acc2_22_re;
4048 x1_re +=
b * acc2_02_re;
4049 x1_im +=
b * acc2_02_im;
4051 y2_re += acc2_22_re -
a *acc2_02_im;
4052 y2_im += acc2_22_im +
a *acc2_02_re;
4053 y1_re +=
b * acc2_22_re;
4054 y1_im +=
b * acc2_22_im;
4057 acc1_02_re = x1_re; acc1_02_im = x1_im;
4058 acc1_22_re = y1_re; acc1_22_im = y1_im;
4060 acc2_02_re = x2_re; acc2_02_im = x2_im;
4061 acc2_22_re = y2_re; acc2_22_im = y2_im;
4064 x1_re = acc1_12_re +
a *acc1_32_im;
4065 x1_im = acc1_12_im -
a *acc1_32_re;
4066 x2_re =
b * acc1_12_re;
4067 x2_im =
b * acc1_12_im;
4069 y1_re = acc1_32_re +
a *acc1_12_im;
4070 y1_im = acc1_32_im -
a *acc1_12_re;
4071 y2_re =
b * acc1_32_re;
4072 y2_im =
b * acc1_32_im;
4076 x2_re += acc2_12_re -
a *acc2_32_im;
4077 x2_im += acc2_12_im +
a *acc2_32_re;
4078 x1_re +=
b * acc2_12_re;
4079 x1_im +=
b * acc2_12_im;
4081 y2_re += acc2_32_re -
a *acc2_12_im;
4082 y2_im += acc2_32_im +
a *acc2_12_re;
4083 y1_re +=
b * acc2_32_re;
4084 y1_im +=
b * acc2_32_im;
4087 acc1_12_re = x1_re; acc1_12_im = x1_im;
4088 acc1_32_re = y1_re; acc1_32_im = y1_im;
4090 acc2_12_re = x2_re; acc2_12_im = x2_im;
4091 acc2_32_re = y2_re; acc2_32_im = y2_im;
4093 #ifdef SPINOR_DOUBLE 4199 #endif//DSLASH_TWIST 4201 #endif // DSLASH_XPAY RECONSTRUCT_GAUGE_MATRIX(0)
VOLATILE spinorFloat o2_01_re
VOLATILE spinorFloat o1_30_re
VOLATILE spinorFloat o1_02_im
VOLATILE spinorFloat o1_32_re
VOLATILE spinorFloat o1_22_im
VOLATILE spinorFloat o2_32_im
VOLATILE spinorFloat o2_02_im
VOLATILE spinorFloat o2_31_re
VOLATILE spinorFloat o1_31_re
VOLATILE spinorFloat o2_32_re
VOLATILE spinorFloat o1_20_re
VOLATILE spinorFloat o1_02_re
VOLATILE spinorFloat o2_22_re
VOLATILE spinorFloat o1_10_im
VOLATILE spinorFloat o2_30_re
VOLATILE spinorFloat o2_31_im
VOLATILE spinorFloat o1_31_im
VOLATILE spinorFloat o1_22_re
VOLATILE spinorFloat o1_00_re
VOLATILE spinorFloat o1_12_re
o1_00_re *o1_00_im *o1_01_re *o1_01_im *o1_02_re *o1_02_im *o1_10_re *o1_10_im *o1_11_re *o1_11_im *o1_12_re *o1_12_im *o1_20_re *o1_20_im *o1_21_re *o1_21_im *o1_22_re *o1_22_im *o1_30_re *o1_30_im *o1_31_re *o1_31_im *o1_32_re *o1_32_im *o2_00_re *o2_00_im *o2_01_re *o2_01_im *o2_02_re *o2_02_im *o2_10_re *o2_10_im *o2_11_re *o2_11_im *o2_12_re *o2_12_im *o2_20_re *o2_20_im *o2_21_re *o2_21_im *o2_22_re *o2_22_im *o2_30_re *o2_30_im *o2_31_re *o2_31_im *o2_32_re *o2_32_im * WRITE_FLAVOR_SPINOR()
VOLATILE spinorFloat o2_12_re
VOLATILE spinorFloat o1_11_re
VOLATILE spinorFloat o2_22_im
VOLATILE spinorFloat o2_00_im
VOLATILE spinorFloat o2_21_im
VOLATILE spinorFloat o1_32_im
VOLATILE spinorFloat o1_21_im
VOLATILE spinorFloat o2_10_im
VOLATILE spinorFloat o1_01_re
VOLATILE spinorFloat o2_01_im
VOLATILE spinorFloat o1_01_im
#define READ_INTERMEDIATE_SPINOR
VOLATILE spinorFloat o1_11_im
VOLATILE spinorFloat o1_12_im
VOLATILE spinorFloat o2_10_re
VOLATILE spinorFloat o2_02_re
coordsFromIndex< 4, QUDA_4D_PC, EVEN_X >(X, coord, sid, param)
#define READ_SPINOR_GHOST
READ_GAUGE_MATRIX(G, GAUGE0TEX, 0, ga_idx, param.gauge_stride)
VOLATILE spinorFloat o1_20_im
VOLATILE spinorFloat o2_00_re
VOLATILE spinorFloat o2_11_im
VOLATILE spinorFloat o1_10_re
VOLATILE spinorFloat o1_30_im
VOLATILE spinorFloat o2_20_im
VOLATILE spinorFloat o2_30_im
VOLATILE spinorFloat o2_12_im
VOLATILE spinorFloat o2_21_re
VOLATILE spinorFloat o1_21_re
VOLATILE spinorFloat o2_20_re
VOLATILE spinorFloat o2_11_re
VOLATILE spinorFloat o1_00_im