QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Macros | Functions | Variables
asym_wilson_clover_dslash_dagger_fermi_core.h File Reference
#include "read_gauge.h"
#include "read_clover.h"
#include "io_spinor.h"

Go to the source code of this file.

Macros

#define DSLASH_SHARED_FLOATS_PER_THREAD   24
 
#define VOLATILE   volatile
 
#define spinorFloat   float
 
#define WRITE_SPINOR_SHARED   WRITE_SPINOR_SHARED_FLOAT4
 
#define READ_SPINOR_SHARED   READ_SPINOR_SHARED_FLOAT4
 
#define i00_re   I0.x
 
#define i00_im   I0.y
 
#define i01_re   I0.z
 
#define i01_im   I0.w
 
#define i02_re   I1.x
 
#define i02_im   I1.y
 
#define i10_re   I1.z
 
#define i10_im   I1.w
 
#define i11_re   I2.x
 
#define i11_im   I2.y
 
#define i12_re   I2.z
 
#define i12_im   I2.w
 
#define i20_re   I3.x
 
#define i20_im   I3.y
 
#define i21_re   I3.z
 
#define i21_im   I3.w
 
#define i22_re   I4.x
 
#define i22_im   I4.y
 
#define i30_re   I4.z
 
#define i30_im   I4.w
 
#define i31_re   I5.x
 
#define i31_im   I5.y
 
#define i32_re   I5.z
 
#define i32_im   I5.w
 
#define acc00_re   accum0.x
 
#define acc00_im   accum0.y
 
#define acc01_re   accum0.z
 
#define acc01_im   accum0.w
 
#define acc02_re   accum1.x
 
#define acc02_im   accum1.y
 
#define acc10_re   accum1.z
 
#define acc10_im   accum1.w
 
#define acc11_re   accum2.x
 
#define acc11_im   accum2.y
 
#define acc12_re   accum2.z
 
#define acc12_im   accum2.w
 
#define acc20_re   accum3.x
 
#define acc20_im   accum3.y
 
#define acc21_re   accum3.z
 
#define acc21_im   accum3.w
 
#define acc22_re   accum4.x
 
#define acc22_im   accum4.y
 
#define acc30_re   accum4.z
 
#define acc30_im   accum4.w
 
#define acc31_re   accum5.x
 
#define acc31_im   accum5.y
 
#define acc32_re   accum5.z
 
#define acc32_im   accum5.w
 
#define g00_re   G0.x
 
#define g00_im   G0.y
 
#define g01_re   G0.z
 
#define g01_im   G0.w
 
#define g02_re   G1.x
 
#define g02_im   G1.y
 
#define g10_re   G1.z
 
#define g10_im   G1.w
 
#define g11_re   G2.x
 
#define g11_im   G2.y
 
#define g12_re   G2.z
 
#define g12_im   G2.w
 
#define g20_re   G3.x
 
#define g20_im   G3.y
 
#define g21_re   G3.z
 
#define g21_im   G3.w
 
#define g22_re   G4.x
 
#define g22_im   G4.y
 
#define gT00_re   (+g00_re)
 
#define gT00_im   (-g00_im)
 
#define gT01_re   (+g10_re)
 
#define gT01_im   (-g10_im)
 
#define gT02_re   (+g20_re)
 
#define gT02_im   (-g20_im)
 
#define gT10_re   (+g01_re)
 
#define gT10_im   (-g01_im)
 
#define gT11_re   (+g11_re)
 
#define gT11_im   (-g11_im)
 
#define gT12_re   (+g21_re)
 
#define gT12_im   (-g21_im)
 
#define gT20_re   (+g02_re)
 
#define gT20_im   (-g02_im)
 
#define gT21_re   (+g12_re)
 
#define gT21_im   (-g12_im)
 
#define gT22_re   (+g22_re)
 
#define gT22_im   (-g22_im)
 
#define c00_00_re   C0.x
 
#define c01_01_re   C0.y
 
#define c02_02_re   C0.z
 
#define c10_10_re   C0.w
 
#define c11_11_re   C1.x
 
#define c12_12_re   C1.y
 
#define c01_00_re   C1.z
 
#define c01_00_im   C1.w
 
#define c02_00_re   C2.x
 
#define c02_00_im   C2.y
 
#define c10_00_re   C2.z
 
#define c10_00_im   C2.w
 
#define c11_00_re   C3.x
 
#define c11_00_im   C3.y
 
#define c12_00_re   C3.z
 
#define c12_00_im   C3.w
 
#define c02_01_re   C4.x
 
#define c02_01_im   C4.y
 
#define c10_01_re   C4.z
 
#define c10_01_im   C4.w
 
#define c11_01_re   C5.x
 
#define c11_01_im   C5.y
 
#define c12_01_re   C5.z
 
#define c12_01_im   C5.w
 
#define c10_02_re   C6.x
 
#define c10_02_im   C6.y
 
#define c11_02_re   C6.z
 
#define c11_02_im   C6.w
 
#define c12_02_re   C7.x
 
#define c12_02_im   C7.y
 
#define c11_10_re   C7.z
 
#define c11_10_im   C7.w
 
#define c12_10_re   C8.x
 
#define c12_10_im   C8.y
 
#define c12_11_re   C8.z
 
#define c12_11_im   C8.w
 
#define c00_01_re   (+c01_00_re)
 
#define c00_01_im   (-c01_00_im)
 
#define c00_02_re   (+c02_00_re)
 
#define c00_02_im   (-c02_00_im)
 
#define c01_02_re   (+c02_01_re)
 
#define c01_02_im   (-c02_01_im)
 
#define c00_10_re   (+c10_00_re)
 
#define c00_10_im   (-c10_00_im)
 
#define c01_10_re   (+c10_01_re)
 
#define c01_10_im   (-c10_01_im)
 
#define c02_10_re   (+c10_02_re)
 
#define c02_10_im   (-c10_02_im)
 
#define c00_11_re   (+c11_00_re)
 
#define c00_11_im   (-c11_00_im)
 
#define c01_11_re   (+c11_01_re)
 
#define c01_11_im   (-c11_01_im)
 
#define c02_11_re   (+c11_02_re)
 
#define c02_11_im   (-c11_02_im)
 
#define c10_11_re   (+c11_10_re)
 
#define c10_11_im   (-c11_10_im)
 
#define c00_12_re   (+c12_00_re)
 
#define c00_12_im   (-c12_00_im)
 
#define c01_12_re   (+c12_01_re)
 
#define c01_12_im   (-c12_01_im)
 
#define c02_12_re   (+c12_02_re)
 
#define c02_12_im   (-c12_02_im)
 
#define c10_12_re   (+c12_10_re)
 
#define c10_12_im   (-c12_10_im)
 
#define c11_12_re   (+c12_11_re)
 
#define c11_12_im   (-c12_11_im)
 
#define c20_20_re   c00_00_re
 
#define c21_20_re   c01_00_re
 
#define c21_20_im   c01_00_im
 
#define c22_20_re   c02_00_re
 
#define c22_20_im   c02_00_im
 
#define c30_20_re   c10_00_re
 
#define c30_20_im   c10_00_im
 
#define c31_20_re   c11_00_re
 
#define c31_20_im   c11_00_im
 
#define c32_20_re   c12_00_re
 
#define c32_20_im   c12_00_im
 
#define c20_21_re   c00_01_re
 
#define c20_21_im   c00_01_im
 
#define c21_21_re   c01_01_re
 
#define c22_21_re   c02_01_re
 
#define c22_21_im   c02_01_im
 
#define c30_21_re   c10_01_re
 
#define c30_21_im   c10_01_im
 
#define c31_21_re   c11_01_re
 
#define c31_21_im   c11_01_im
 
#define c32_21_re   c12_01_re
 
#define c32_21_im   c12_01_im
 
#define c20_22_re   c00_02_re
 
#define c20_22_im   c00_02_im
 
#define c21_22_re   c01_02_re
 
#define c21_22_im   c01_02_im
 
#define c22_22_re   c02_02_re
 
#define c30_22_re   c10_02_re
 
#define c30_22_im   c10_02_im
 
#define c31_22_re   c11_02_re
 
#define c31_22_im   c11_02_im
 
#define c32_22_re   c12_02_re
 
#define c32_22_im   c12_02_im
 
#define c20_30_re   c00_10_re
 
#define c20_30_im   c00_10_im
 
#define c21_30_re   c01_10_re
 
#define c21_30_im   c01_10_im
 
#define c22_30_re   c02_10_re
 
#define c22_30_im   c02_10_im
 
#define c30_30_re   c10_10_re
 
#define c31_30_re   c11_10_re
 
#define c31_30_im   c11_10_im
 
#define c32_30_re   c12_10_re
 
#define c32_30_im   c12_10_im
 
#define c20_31_re   c00_11_re
 
#define c20_31_im   c00_11_im
 
#define c21_31_re   c01_11_re
 
#define c21_31_im   c01_11_im
 
#define c22_31_re   c02_11_re
 
#define c22_31_im   c02_11_im
 
#define c30_31_re   c10_11_re
 
#define c30_31_im   c10_11_im
 
#define c31_31_re   c11_11_re
 
#define c32_31_re   c12_11_re
 
#define c32_31_im   c12_11_im
 
#define c20_32_re   c00_12_re
 
#define c20_32_im   c00_12_im
 
#define c21_32_re   c01_12_re
 
#define c21_32_im   c01_12_im
 
#define c22_32_re   c02_12_re
 
#define c22_32_im   c02_12_im
 
#define c30_32_re   c10_12_re
 
#define c30_32_im   c10_12_im
 
#define c31_32_re   c11_12_re
 
#define c31_32_im   c11_12_im
 
#define c32_32_re   c12_12_re
 
#define SHARED_STRIDE   32
 

Functions

 coordsFromIndex3D< EVEN_X > (X, x1, x2, x3, x4, sid, param.parity, dims)
 
 if (x2 >=X2) return
 
 if (x3 >=X3) return
 
 READ_SPINOR (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 WRITE_SPINOR_SHARED (threadIdx.x, threadIdx.y, threadIdx.z, i)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 0, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (0)
 
 __syncthreads ()
 
 READ_SPINOR_SHARED (tx, threadIdx.y, threadIdx.z)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 1, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (1)
 
 if (threadIdx.y==blockDim.y-1 &&blockDim.y< X2)
 
 READ_SPINOR_SHARED (tx, ty, threadIdx.z)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 2, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (2)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 3, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (3)
 
 READ_SPINOR_SHARED (tx, threadIdx.y, tz)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 4, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (4)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 5, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (5)
 
 READ_SPINOR_UP (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 6, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (6)
 
 READ_SPINOR_DOWN (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 7, ga_idx, ga_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (7)
 
 WRITE_SPINOR (param.sp_stride)
 

Variables

VOLATILE spinorFloat o00_re = 0
 
VOLATILE spinorFloat o00_im = 0
 
VOLATILE spinorFloat o01_re = 0
 
VOLATILE spinorFloat o01_im = 0
 
VOLATILE spinorFloat o02_re = 0
 
VOLATILE spinorFloat o02_im = 0
 
VOLATILE spinorFloat o10_re = 0
 
VOLATILE spinorFloat o10_im = 0
 
VOLATILE spinorFloat o11_re = 0
 
VOLATILE spinorFloat o11_im = 0
 
VOLATILE spinorFloat o12_re = 0
 
VOLATILE spinorFloat o12_im = 0
 
VOLATILE spinorFloat o20_re = 0
 
VOLATILE spinorFloat o20_im = 0
 
VOLATILE spinorFloat o21_re = 0
 
VOLATILE spinorFloat o21_im = 0
 
VOLATILE spinorFloat o22_re = 0
 
VOLATILE spinorFloat o22_im = 0
 
VOLATILE spinorFloat o30_re = 0
 
VOLATILE spinorFloat o30_im = 0
 
VOLATILE spinorFloat o31_re = 0
 
VOLATILE spinorFloat o31_im = 0
 
VOLATILE spinorFloat o32_re = 0
 
VOLATILE spinorFloat o32_im = 0
 
int x1 = 2*x1h + x1odd
 
int x2 = z1 - z2*X2
 
int x3 = z2 - x4*X3
 
int x4 = z2 / X3
 
int X = 2*mem_idx + x1odd
 
int sid = blockIdx.x * blockDim.x + threadIdx.x
 
const int dims [] = {X1, X2, X3, X4}
 
const int ga_idx = sid
 
spinorFloat a0_re = +i00_re-i30_im
 
spinorFloat a0_im = +i00_im+i30_re
 
spinorFloat a1_re = +i01_re-i31_im
 
spinorFloat a1_im = +i01_im+i31_re
 
spinorFloat a2_re = +i02_re-i32_im
 
spinorFloat a2_im = +i02_im+i32_re
 
spinorFloat b0_re = +i10_re-i20_im
 
spinorFloat b0_im = +i10_im+i20_re
 
spinorFloat b1_re = +i11_re-i21_im
 
spinorFloat b1_im = +i11_im+i21_re
 
spinorFloat b2_re = +i12_re-i22_im
 
spinorFloat b2_im = +i12_im+i22_re
 
spinorFloat A0_re = 0
 
spinorFloat A0_im = 0
 
spinorFloat B0_re = 0
 
spinorFloat B0_im = 0
 
spinorFloat A1_re = 0
 
spinorFloat A1_im = 0
 
spinorFloat B1_re = 0
 
spinorFloat B1_im = 0
 
spinorFloat A2_re = 0
 
spinorFloat A2_im = 0
 
spinorFloat B2_re = 0
 
spinorFloat B2_im = 0
 
int tx = (threadIdx.x > 0) ? threadIdx.x-1 : blockDim.x-1
 
 else
 
int ty = (threadIdx.y < blockDim.y - 1) ? threadIdx.y + 1 : 0
 
int tz = (threadIdx.z < blockDim.z - 1) ? threadIdx.z + 1 : 0
 

Macro Definition Documentation

#define acc00_im   accum0.y
#define acc00_re   accum0.x
#define acc01_im   accum0.w
#define acc01_re   accum0.z
#define acc02_im   accum1.y
#define acc02_re   accum1.x
#define acc10_im   accum1.w
#define acc10_re   accum1.z
#define acc11_im   accum2.y
#define acc11_re   accum2.x
#define acc12_im   accum2.w
#define acc12_re   accum2.z
#define acc20_im   accum3.y
#define acc20_re   accum3.x
#define acc21_im   accum3.w
#define acc21_re   accum3.z
#define acc22_im   accum4.y
#define acc22_re   accum4.x
#define acc30_im   accum4.w
#define acc30_re   accum4.z
#define acc31_im   accum5.y
#define acc31_re   accum5.x
#define acc32_im   accum5.w
#define acc32_re   accum5.z
#define c00_00_re   C0.x
#define c00_01_im   (-c01_00_im)
#define c00_01_re   (+c01_00_re)
#define c00_02_im   (-c02_00_im)
#define c00_02_re   (+c02_00_re)
#define c00_10_im   (-c10_00_im)
#define c00_10_re   (+c10_00_re)
#define c00_11_im   (-c11_00_im)
#define c00_11_re   (+c11_00_re)
#define c00_12_im   (-c12_00_im)
#define c00_12_re   (+c12_00_re)
#define c01_00_im   C1.w
#define c01_00_re   C1.z
#define c01_01_re   C0.y
#define c01_02_im   (-c02_01_im)
#define c01_02_re   (+c02_01_re)
#define c01_10_im   (-c10_01_im)
#define c01_10_re   (+c10_01_re)
#define c01_11_im   (-c11_01_im)
#define c01_11_re   (+c11_01_re)
#define c01_12_im   (-c12_01_im)
#define c01_12_re   (+c12_01_re)
#define c02_00_im   C2.y
#define c02_00_re   C2.x
#define c02_01_im   C4.y
#define c02_01_re   C4.x
#define c02_02_re   C0.z
#define c02_10_im   (-c10_02_im)
#define c02_10_re   (+c10_02_re)
#define c02_11_im   (-c11_02_im)
#define c02_11_re   (+c11_02_re)
#define c02_12_im   (-c12_02_im)
#define c02_12_re   (+c12_02_re)
#define c10_00_im   C2.w
#define c10_00_re   C2.z
#define c10_01_im   C4.w
#define c10_01_re   C4.z
#define c10_02_im   C6.y
#define c10_02_re   C6.x
#define c10_10_re   C0.w
#define c10_11_im   (-c11_10_im)
#define c10_11_re   (+c11_10_re)
#define c10_12_im   (-c12_10_im)
#define c10_12_re   (+c12_10_re)
#define c11_00_im   C3.y
#define c11_00_re   C3.x
#define c11_01_im   C5.y
#define c11_01_re   C5.x
#define c11_02_im   C6.w
#define c11_02_re   C6.z
#define c11_10_im   C7.w
#define c11_10_re   C7.z
#define c11_11_re   C1.x
#define c11_12_im   (-c12_11_im)
#define c11_12_re   (+c12_11_re)
#define c12_00_im   C3.w
#define c12_00_re   C3.z
#define c12_01_im   C5.w
#define c12_01_re   C5.z
#define c12_02_im   C7.y
#define c12_02_re   C7.x
#define c12_10_im   C8.y
#define c12_10_re   C8.x
#define c12_11_im   C8.w
#define c12_11_re   C8.z
#define c12_12_re   C1.y
#define c20_20_re   c00_00_re
#define c20_21_im   c00_01_im
#define c20_21_re   c00_01_re
#define c20_22_im   c00_02_im
#define c20_22_re   c00_02_re
#define c20_30_im   c00_10_im
#define c20_30_re   c00_10_re
#define c20_31_im   c00_11_im
#define c20_31_re   c00_11_re
#define c20_32_im   c00_12_im
#define c20_32_re   c00_12_re
#define c21_20_im   c01_00_im
#define c21_20_re   c01_00_re
#define c21_21_re   c01_01_re
#define c21_22_im   c01_02_im
#define c21_22_re   c01_02_re
#define c21_30_im   c01_10_im
#define c21_30_re   c01_10_re
#define c21_31_im   c01_11_im
#define c21_31_re   c01_11_re
#define c21_32_im   c01_12_im
#define c21_32_re   c01_12_re
#define c22_20_im   c02_00_im
#define c22_20_re   c02_00_re
#define c22_21_im   c02_01_im
#define c22_21_re   c02_01_re
#define c22_22_re   c02_02_re
#define c22_30_im   c02_10_im
#define c22_30_re   c02_10_re
#define c22_31_im   c02_11_im
#define c22_31_re   c02_11_re
#define c22_32_im   c02_12_im
#define c22_32_re   c02_12_re
#define c30_20_im   c10_00_im
#define c30_20_re   c10_00_re
#define c30_21_im   c10_01_im
#define c30_21_re   c10_01_re
#define c30_22_im   c10_02_im
#define c30_22_re   c10_02_re
#define c30_30_re   c10_10_re
#define c30_31_im   c10_11_im
#define c30_31_re   c10_11_re
#define c30_32_im   c10_12_im
#define c30_32_re   c10_12_re
#define c31_20_im   c11_00_im
#define c31_20_re   c11_00_re
#define c31_21_im   c11_01_im
#define c31_21_re   c11_01_re
#define c31_22_im   c11_02_im
#define c31_22_re   c11_02_re
#define c31_30_im   c11_10_im
#define c31_30_re   c11_10_re
#define c31_31_re   c11_11_re
#define c31_32_im   c11_12_im
#define c31_32_re   c11_12_re
#define c32_20_im   c12_00_im
#define c32_20_re   c12_00_re
#define c32_21_im   c12_01_im
#define c32_21_re   c12_01_re
#define c32_22_im   c12_02_im
#define c32_22_re   c12_02_re
#define c32_30_im   c12_10_im
#define c32_30_re   c12_10_re
#define c32_31_im   c12_11_im
#define c32_31_re   c12_11_re
#define c32_32_re   c12_12_re
#define DSLASH_SHARED_FLOATS_PER_THREAD   24

Definition at line 3 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

#define g00_im   G0.y
#define g00_re   G0.x
#define g01_im   G0.w
#define g01_re   G0.z
#define g02_im   G1.y
#define g02_re   G1.x
#define g10_im   G1.w
#define g10_re   G1.z
#define g11_im   G2.y
#define g11_re   G2.x
#define g12_im   G2.w
#define g12_re   G2.z
#define g20_im   G3.y
#define g20_re   G3.x
#define g21_im   G3.w
#define g21_re   G3.z
#define g22_im   G4.y
#define g22_re   G4.x
#define gT00_im   (-g00_im)
#define gT00_re   (+g00_re)
#define gT01_im   (-g10_im)
#define gT01_re   (+g10_re)
#define gT02_im   (-g20_im)
#define gT02_re   (+g20_re)
#define gT10_im   (-g01_im)
#define gT10_re   (+g01_re)
#define gT11_im   (-g11_im)
#define gT11_re   (+g11_re)
#define gT12_im   (-g21_im)
#define gT12_re   (+g21_re)
#define gT20_im   (-g02_im)
#define gT20_re   (+g02_re)
#define gT21_im   (-g12_im)
#define gT21_re   (+g12_re)
#define gT22_im   (-g22_im)
#define gT22_re   (+g22_re)
#define i00_im   I0.y
#define i00_re   I0.x
#define i01_im   I0.w
#define i01_re   I0.z
#define i02_im   I1.y
#define i02_re   I1.x
#define i10_im   I1.w
#define i10_re   I1.z
#define i11_im   I2.y
#define i11_re   I2.x
#define i12_im   I2.w
#define i12_re   I2.z
#define i20_im   I3.y
#define i20_re   I3.x
#define i21_im   I3.w
#define i21_re   I3.z
#define i22_im   I4.y
#define i22_re   I4.x
#define i30_im   I4.w
#define i30_re   I4.z
#define i31_im   I5.y
#define i31_re   I5.x
#define i32_im   I5.w
#define i32_re   I5.z
#define READ_SPINOR_SHARED   READ_SPINOR_SHARED_FLOAT4
#define SHARED_STRIDE   32
#define spinorFloat   float
#define VOLATILE   volatile

Definition at line 9 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

#define WRITE_SPINOR_SHARED   WRITE_SPINOR_SHARED_FLOAT4

Function Documentation

__syncthreads ( )
coordsFromIndex3D< EVEN_X > ( X  ,
x1  ,
x2  ,
x3  ,
x4  ,
sid  ,
param.  parity,
dims   
)
if ( x2 >=  X2)
if ( x3 >=  X3)
if ( )
READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
ga_stride   
)
READ_SPINOR ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)
READ_SPINOR_DOWN ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)
READ_SPINOR_SHARED ( tx  ,
threadIdx.  y,
threadIdx.  z 
)
READ_SPINOR_SHARED ( tx  ,
ty  ,
threadIdx.  z 
)
READ_SPINOR_SHARED ( tx  ,
threadIdx.  y,
tz   
)
READ_SPINOR_UP ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
RECONSTRUCT_GAUGE_MATRIX ( )
WRITE_SPINOR ( param.  sp_stride)
WRITE_SPINOR_SHARED ( threadIdx.  x,
threadIdx.  y,
threadIdx.  z,
 
)

Variable Documentation

spinorFloat A0_im = 0
spinorFloat A0_re = 0
spinorFloat A1_im = 0
spinorFloat A1_re = 0
spinorFloat A2_im = 0
spinorFloat A2_re = 0
spinorFloat B0_im = 0
spinorFloat B0_re = 0
spinorFloat B1_im = 0
spinorFloat B1_re = 0
spinorFloat B2_im = 0
spinorFloat B2_re = 0
const int dims[] = {X1, X2, X3, X4}
else
Initial value:
{
int tx = (threadIdx.x + blockDim.x - ((x1+1)&1) ) % blockDim.x

Definition at line 1355 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

const int ga_idx = sid
o00_im = 0
o00_re = 0
o01_im = 0
o01_re = 0
o02_im = 0
o02_re = 0
o10_im = 0
o10_re = 0
o11_im = 0
o11_re = 0
o12_im = 0
o12_re = 0
o20_im = 0
o20_re = 0
o21_im = 0
o21_re = 0
o22_im = 0
o22_re = 0
o30_im = 0
o30_re = 0
o31_im = 0
o31_re = 0
o32_im = 0
o32_re = 0
int tx = (threadIdx.x > 0) ? threadIdx.x-1 : blockDim.x-1
int ty = (threadIdx.y < blockDim.y - 1) ? threadIdx.y + 1 : 0
int tz = (threadIdx.z < blockDim.z - 1) ? threadIdx.z + 1 : 0
int X = 2*mem_idx + x1odd
short x1 = 2*x1h + x1odd
short x2 = z1 - z2*X2
short x3 = z2 - x4*X3
short x4 = z2 / X3