QUDA v0.4.0
A library for QCD on GPUs
|
00001 #define READ_CLOVER_DOUBLE(clover, chi) \ 00002 double2 C0 = clover[sid + (18*chi+0)*cl_stride]; \ 00003 double2 C1 = clover[sid + (18*chi+1)*cl_stride]; \ 00004 double2 C2 = clover[sid + (18*chi+2)*cl_stride]; \ 00005 double2 C3 = clover[sid + (18*chi+3)*cl_stride]; \ 00006 double2 C4 = clover[sid + (18*chi+4)*cl_stride]; \ 00007 double2 C5 = clover[sid + (18*chi+5)*cl_stride]; \ 00008 double2 C6 = clover[sid + (18*chi+6)*cl_stride]; \ 00009 double2 C7 = clover[sid + (18*chi+7)*cl_stride]; \ 00010 double2 C8 = clover[sid + (18*chi+8)*cl_stride]; \ 00011 double2 C9 = clover[sid + (18*chi+9)*cl_stride]; \ 00012 double2 C10 = clover[sid + (18*chi+10)*cl_stride]; \ 00013 double2 C11 = clover[sid + (18*chi+11)*cl_stride]; \ 00014 double2 C12 = clover[sid + (18*chi+12)*cl_stride]; \ 00015 double2 C13 = clover[sid + (18*chi+13)*cl_stride]; \ 00016 double2 C14 = clover[sid + (18*chi+14)*cl_stride]; \ 00017 double2 C15 = clover[sid + (18*chi+15)*cl_stride]; \ 00018 double2 C16 = clover[sid + (18*chi+16)*cl_stride]; \ 00019 double2 C17 = clover[sid + (18*chi+17)*cl_stride]; 00020 00021 #define READ_CLOVER_SINGLE(clover, chi) \ 00022 float4 C0 = clover[sid + (9*chi+0)*cl_stride]; \ 00023 float4 C1 = clover[sid + (9*chi+1)*cl_stride]; \ 00024 float4 C2 = clover[sid + (9*chi+2)*cl_stride]; \ 00025 float4 C3 = clover[sid + (9*chi+3)*cl_stride]; \ 00026 float4 C4 = clover[sid + (9*chi+4)*cl_stride]; \ 00027 float4 C5 = clover[sid + (9*chi+5)*cl_stride]; \ 00028 float4 C6 = clover[sid + (9*chi+6)*cl_stride]; \ 00029 float4 C7 = clover[sid + (9*chi+7)*cl_stride]; \ 00030 float4 C8 = clover[sid + (9*chi+8)*cl_stride]; 00031 00032 #define READ_CLOVER_HALF(clover, chi) \ 00033 float4 C0 = short42float4(clover[sid + (9*chi+0)*cl_stride]); \ 00034 float4 C1 = short42float4(clover[sid + (9*chi+1)*cl_stride]); \ 00035 float4 C2 = short42float4(clover[sid + (9*chi+2)*cl_stride]); \ 00036 float4 C3 = short42float4(clover[sid + (9*chi+3)*cl_stride]); \ 00037 float4 C4 = short42float4(clover[sid + (9*chi+4)*cl_stride]); \ 00038 float4 C5 = short42float4(clover[sid + (9*chi+5)*cl_stride]); \ 00039 float4 C6 = short42float4(clover[sid + (9*chi+6)*cl_stride]); \ 00040 float4 C7 = short42float4(clover[sid + (9*chi+7)*cl_stride]); \ 00041 float4 C8 = short42float4(clover[sid + (9*chi+8)*cl_stride]); \ 00042 float K = cloverNorm[sid + chi*cl_stride]; \ 00043 C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \ 00044 C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \ 00045 C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \ 00046 C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \ 00047 C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \ 00048 C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \ 00049 C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \ 00050 C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \ 00051 C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K; 00052 00053 #define READ_CLOVER_DOUBLE_TEX(clover, chi) \ 00054 double2 C0 = fetch_double2((clover), sid + (18*chi+0)*cl_stride); \ 00055 double2 C1 = fetch_double2((clover), sid + (18*chi+1)*cl_stride); \ 00056 double2 C2 = fetch_double2((clover), sid + (18*chi+2)*cl_stride); \ 00057 double2 C3 = fetch_double2((clover), sid + (18*chi+3)*cl_stride); \ 00058 double2 C4 = fetch_double2((clover), sid + (18*chi+4)*cl_stride); \ 00059 double2 C5 = fetch_double2((clover), sid + (18*chi+5)*cl_stride); \ 00060 double2 C6 = fetch_double2((clover), sid + (18*chi+6)*cl_stride); \ 00061 double2 C7 = fetch_double2((clover), sid + (18*chi+7)*cl_stride); \ 00062 double2 C8 = fetch_double2((clover), sid + (18*chi+8)*cl_stride); \ 00063 double2 C9 = fetch_double2((clover), sid + (18*chi+9)*cl_stride); \ 00064 double2 C10 = fetch_double2((clover), sid + (18*chi+10)*cl_stride); \ 00065 double2 C11 = fetch_double2((clover), sid + (18*chi+11)*cl_stride); \ 00066 double2 C12 = fetch_double2((clover), sid + (18*chi+12)*cl_stride); \ 00067 double2 C13 = fetch_double2((clover), sid + (18*chi+13)*cl_stride); \ 00068 double2 C14 = fetch_double2((clover), sid + (18*chi+14)*cl_stride); \ 00069 double2 C15 = fetch_double2((clover), sid + (18*chi+15)*cl_stride); \ 00070 double2 C16 = fetch_double2((clover), sid + (18*chi+16)*cl_stride); \ 00071 double2 C17 = fetch_double2((clover), sid + (18*chi+17)*cl_stride); 00072 00073 #define READ_CLOVER_SINGLE_TEX(clover, chi) \ 00074 float4 C0 = tex1Dfetch((clover), sid + (9*chi+0)*cl_stride); \ 00075 float4 C1 = tex1Dfetch((clover), sid + (9*chi+1)*cl_stride); \ 00076 float4 C2 = tex1Dfetch((clover), sid + (9*chi+2)*cl_stride); \ 00077 float4 C3 = tex1Dfetch((clover), sid + (9*chi+3)*cl_stride); \ 00078 float4 C4 = tex1Dfetch((clover), sid + (9*chi+4)*cl_stride); \ 00079 float4 C5 = tex1Dfetch((clover), sid + (9*chi+5)*cl_stride); \ 00080 float4 C6 = tex1Dfetch((clover), sid + (9*chi+6)*cl_stride); \ 00081 float4 C7 = tex1Dfetch((clover), sid + (9*chi+7)*cl_stride); \ 00082 float4 C8 = tex1Dfetch((clover), sid + (9*chi+8)*cl_stride); 00083 00084 #define READ_CLOVER_HALF_TEX(clover, chi) \ 00085 float4 C0 = tex1Dfetch((clover), sid + (9*chi+0)*cl_stride); \ 00086 float4 C1 = tex1Dfetch((clover), sid + (9*chi+1)*cl_stride); \ 00087 float4 C2 = tex1Dfetch((clover), sid + (9*chi+2)*cl_stride); \ 00088 float4 C3 = tex1Dfetch((clover), sid + (9*chi+3)*cl_stride); \ 00089 float4 C4 = tex1Dfetch((clover), sid + (9*chi+4)*cl_stride); \ 00090 float4 C5 = tex1Dfetch((clover), sid + (9*chi+5)*cl_stride); \ 00091 float4 C6 = tex1Dfetch((clover), sid + (9*chi+6)*cl_stride); \ 00092 float4 C7 = tex1Dfetch((clover), sid + (9*chi+7)*cl_stride); \ 00093 float4 C8 = tex1Dfetch((clover), sid + (9*chi+8)*cl_stride); \ 00094 float K = tex1Dfetch((cloverTexNorm), sid + chi*cl_stride); \ 00095 C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \ 00096 C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \ 00097 C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \ 00098 C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \ 00099 C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \ 00100 C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \ 00101 C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \ 00102 C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \ 00103 C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;