QUDA  0.9.0
Macros | Functions | Variables
asym_wilson_clover_dslash_dagger_fermi_core.h File Reference
#include "read_gauge.h"
#include "read_clover.h"
#include "io_spinor.h"
Include dependency graph for asym_wilson_clover_dslash_dagger_fermi_core.h:

Go to the source code of this file.

Macros

#define DSLASH_SHARED_FLOATS_PER_THREAD   24
 
#define VOLATILE   volatile
 
#define spinorFloat   float
 
#define WRITE_SPINOR_SHARED   WRITE_SPINOR_SHARED_FLOAT4
 
#define READ_SPINOR_SHARED   READ_SPINOR_SHARED_FLOAT4
 
#define i00_re   I0.x
 
#define i00_im   I0.y
 
#define i01_re   I0.z
 
#define i01_im   I0.w
 
#define i02_re   I1.x
 
#define i02_im   I1.y
 
#define i10_re   I1.z
 
#define i10_im   I1.w
 
#define i11_re   I2.x
 
#define i11_im   I2.y
 
#define i12_re   I2.z
 
#define i12_im   I2.w
 
#define i20_re   I3.x
 
#define i20_im   I3.y
 
#define i21_re   I3.z
 
#define i21_im   I3.w
 
#define i22_re   I4.x
 
#define i22_im   I4.y
 
#define i30_re   I4.z
 
#define i30_im   I4.w
 
#define i31_re   I5.x
 
#define i31_im   I5.y
 
#define i32_re   I5.z
 
#define i32_im   I5.w
 
#define acc00_re   accum0.x
 
#define acc00_im   accum0.y
 
#define acc01_re   accum0.z
 
#define acc01_im   accum0.w
 
#define acc02_re   accum1.x
 
#define acc02_im   accum1.y
 
#define acc10_re   accum1.z
 
#define acc10_im   accum1.w
 
#define acc11_re   accum2.x
 
#define acc11_im   accum2.y
 
#define acc12_re   accum2.z
 
#define acc12_im   accum2.w
 
#define acc20_re   accum3.x
 
#define acc20_im   accum3.y
 
#define acc21_re   accum3.z
 
#define acc21_im   accum3.w
 
#define acc22_re   accum4.x
 
#define acc22_im   accum4.y
 
#define acc30_re   accum4.z
 
#define acc30_im   accum4.w
 
#define acc31_re   accum5.x
 
#define acc31_im   accum5.y
 
#define acc32_re   accum5.z
 
#define acc32_im   accum5.w
 
#define g00_re   G0.x
 
#define g00_im   G0.y
 
#define g01_re   G0.z
 
#define g01_im   G0.w
 
#define g02_re   G1.x
 
#define g02_im   G1.y
 
#define g10_re   G1.z
 
#define g10_im   G1.w
 
#define g11_re   G2.x
 
#define g11_im   G2.y
 
#define g12_re   G2.z
 
#define g12_im   G2.w
 
#define g20_re   G3.x
 
#define g20_im   G3.y
 
#define g21_re   G3.z
 
#define g21_im   G3.w
 
#define g22_re   G4.x
 
#define g22_im   G4.y
 
#define gT00_re   (+g00_re)
 
#define gT00_im   (-g00_im)
 
#define gT01_re   (+g10_re)
 
#define gT01_im   (-g10_im)
 
#define gT02_re   (+g20_re)
 
#define gT02_im   (-g20_im)
 
#define gT10_re   (+g01_re)
 
#define gT10_im   (-g01_im)
 
#define gT11_re   (+g11_re)
 
#define gT11_im   (-g11_im)
 
#define gT12_re   (+g21_re)
 
#define gT12_im   (-g21_im)
 
#define gT20_re   (+g02_re)
 
#define gT20_im   (-g02_im)
 
#define gT21_re   (+g12_re)
 
#define gT21_im   (-g12_im)
 
#define gT22_re   (+g22_re)
 
#define gT22_im   (-g22_im)
 
#define c00_00_re   C0.x
 
#define c01_01_re   C0.y
 
#define c02_02_re   C0.z
 
#define c10_10_re   C0.w
 
#define c11_11_re   C1.x
 
#define c12_12_re   C1.y
 
#define c01_00_re   C1.z
 
#define c01_00_im   C1.w
 
#define c02_00_re   C2.x
 
#define c02_00_im   C2.y
 
#define c10_00_re   C2.z
 
#define c10_00_im   C2.w
 
#define c11_00_re   C3.x
 
#define c11_00_im   C3.y
 
#define c12_00_re   C3.z
 
#define c12_00_im   C3.w
 
#define c02_01_re   C4.x
 
#define c02_01_im   C4.y
 
#define c10_01_re   C4.z
 
#define c10_01_im   C4.w
 
#define c11_01_re   C5.x
 
#define c11_01_im   C5.y
 
#define c12_01_re   C5.z
 
#define c12_01_im   C5.w
 
#define c10_02_re   C6.x
 
#define c10_02_im   C6.y
 
#define c11_02_re   C6.z
 
#define c11_02_im   C6.w
 
#define c12_02_re   C7.x
 
#define c12_02_im   C7.y
 
#define c11_10_re   C7.z
 
#define c11_10_im   C7.w
 
#define c12_10_re   C8.x
 
#define c12_10_im   C8.y
 
#define c12_11_re   C8.z
 
#define c12_11_im   C8.w
 
#define c00_01_re   (+c01_00_re)
 
#define c00_01_im   (-c01_00_im)
 
#define c00_02_re   (+c02_00_re)
 
#define c00_02_im   (-c02_00_im)
 
#define c01_02_re   (+c02_01_re)
 
#define c01_02_im   (-c02_01_im)
 
#define c00_10_re   (+c10_00_re)
 
#define c00_10_im   (-c10_00_im)
 
#define c01_10_re   (+c10_01_re)
 
#define c01_10_im   (-c10_01_im)
 
#define c02_10_re   (+c10_02_re)
 
#define c02_10_im   (-c10_02_im)
 
#define c00_11_re   (+c11_00_re)
 
#define c00_11_im   (-c11_00_im)
 
#define c01_11_re   (+c11_01_re)
 
#define c01_11_im   (-c11_01_im)
 
#define c02_11_re   (+c11_02_re)
 
#define c02_11_im   (-c11_02_im)
 
#define c10_11_re   (+c11_10_re)
 
#define c10_11_im   (-c11_10_im)
 
#define c00_12_re   (+c12_00_re)
 
#define c00_12_im   (-c12_00_im)
 
#define c01_12_re   (+c12_01_re)
 
#define c01_12_im   (-c12_01_im)
 
#define c02_12_re   (+c12_02_re)
 
#define c02_12_im   (-c12_02_im)
 
#define c10_12_re   (+c12_10_re)
 
#define c10_12_im   (-c12_10_im)
 
#define c11_12_re   (+c12_11_re)
 
#define c11_12_im   (-c12_11_im)
 
#define c20_20_re   c00_00_re
 
#define c21_20_re   c01_00_re
 
#define c21_20_im   c01_00_im
 
#define c22_20_re   c02_00_re
 
#define c22_20_im   c02_00_im
 
#define c30_20_re   c10_00_re
 
#define c30_20_im   c10_00_im
 
#define c31_20_re   c11_00_re
 
#define c31_20_im   c11_00_im
 
#define c32_20_re   c12_00_re
 
#define c32_20_im   c12_00_im
 
#define c20_21_re   c00_01_re
 
#define c20_21_im   c00_01_im
 
#define c21_21_re   c01_01_re
 
#define c22_21_re   c02_01_re
 
#define c22_21_im   c02_01_im
 
#define c30_21_re   c10_01_re
 
#define c30_21_im   c10_01_im
 
#define c31_21_re   c11_01_re
 
#define c31_21_im   c11_01_im
 
#define c32_21_re   c12_01_re
 
#define c32_21_im   c12_01_im
 
#define c20_22_re   c00_02_re
 
#define c20_22_im   c00_02_im
 
#define c21_22_re   c01_02_re
 
#define c21_22_im   c01_02_im
 
#define c22_22_re   c02_02_re
 
#define c30_22_re   c10_02_re
 
#define c30_22_im   c10_02_im
 
#define c31_22_re   c11_02_re
 
#define c31_22_im   c11_02_im
 
#define c32_22_re   c12_02_re
 
#define c32_22_im   c12_02_im
 
#define c20_30_re   c00_10_re
 
#define c20_30_im   c00_10_im
 
#define c21_30_re   c01_10_re
 
#define c21_30_im   c01_10_im
 
#define c22_30_re   c02_10_re
 
#define c22_30_im   c02_10_im
 
#define c30_30_re   c10_10_re
 
#define c31_30_re   c11_10_re
 
#define c31_30_im   c11_10_im
 
#define c32_30_re   c12_10_re
 
#define c32_30_im   c12_10_im
 
#define c20_31_re   c00_11_re
 
#define c20_31_im   c00_11_im
 
#define c21_31_re   c01_11_re
 
#define c21_31_im   c01_11_im
 
#define c22_31_re   c02_11_re
 
#define c22_31_im   c02_11_im
 
#define c30_31_re   c10_11_re
 
#define c30_31_im   c10_11_im
 
#define c31_31_re   c11_11_re
 
#define c32_31_re   c12_11_re
 
#define c32_31_im   c12_11_im
 
#define c20_32_re   c00_12_re
 
#define c20_32_im   c00_12_im
 
#define c21_32_re   c01_12_re
 
#define c21_32_im   c01_12_im
 
#define c22_32_re   c02_12_re
 
#define c22_32_im   c02_12_im
 
#define c30_32_re   c10_12_re
 
#define c30_32_im   c10_12_im
 
#define c31_32_re   c11_12_re
 
#define c31_32_im   c11_12_im
 
#define c32_32_re   c12_12_re
 
#define SHARED_STRIDE   32
 

Functions

 coordsFromIndex3D< EVEN_X > (X, x, sid, param.parity, param.dc.X)
 
 if (coord[1] >=param.dc.X[1]) return
 
 if (coord[2] >=param.dc.X[2]) return
 
 READ_SPINOR (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 WRITE_SPINOR_SHARED (threadIdx.x, threadIdx.y, threadIdx.z, i)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 0, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (0)
 
 __syncthreads ()
 
 READ_SPINOR_SHARED (tx, threadIdx.y, threadIdx.z)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 1, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (1)
 
 if (threadIdx.y==blockDim.y-1 &&blockDim.y< param.dc.X[1])
 
 READ_SPINOR_SHARED (tx, ty, threadIdx.z)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 2, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (2)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 3, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (3)
 
 READ_SPINOR_SHARED (tx, threadIdx.y, tz)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 4, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (4)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 5, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (5)
 
 READ_SPINOR_UP (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 READ_GAUGE_MATRIX (G, GAUGE0TEX, 6, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (6)
 
 READ_SPINOR_DOWN (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 READ_GAUGE_MATRIX (G, GAUGE1TEX, 7, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (7)
 
 WRITE_SPINOR (param.sp_stride)
 

Variables

VOLATILE spinorFloat o00_re = 0
 
VOLATILE spinorFloat o00_im = 0
 
VOLATILE spinorFloat o01_re = 0
 
VOLATILE spinorFloat o01_im = 0
 
VOLATILE spinorFloat o02_re = 0
 
VOLATILE spinorFloat o02_im = 0
 
VOLATILE spinorFloat o10_re = 0
 
VOLATILE spinorFloat o10_im = 0
 
VOLATILE spinorFloat o11_re = 0
 
VOLATILE spinorFloat o11_im = 0
 
VOLATILE spinorFloat o12_re = 0
 
VOLATILE spinorFloat o12_im = 0
 
VOLATILE spinorFloat o20_re = 0
 
VOLATILE spinorFloat o20_im = 0
 
VOLATILE spinorFloat o21_re = 0
 
VOLATILE spinorFloat o21_im = 0
 
VOLATILE spinorFloat o22_re = 0
 
VOLATILE spinorFloat o22_im = 0
 
VOLATILE spinorFloat o30_re = 0
 
VOLATILE spinorFloat o30_im = 0
 
VOLATILE spinorFloat o31_re = 0
 
VOLATILE spinorFloat o31_im = 0
 
VOLATILE spinorFloat o32_re = 0
 
VOLATILE spinorFloat o32_im = 0
 
int coord [5]
 
int X
 
int sid
 
const int ga_idx = sid
 
spinorFloat a0_re = +i00_re-i30_im
 
spinorFloat a0_im = +i00_im+i30_re
 
spinorFloat a1_re = +i01_re-i31_im
 
spinorFloat a1_im = +i01_im+i31_re
 
spinorFloat a2_re = +i02_re-i32_im
 
spinorFloat a2_im = +i02_im+i32_re
 
spinorFloat b0_re = +i10_re-i20_im
 
spinorFloat b0_im = +i10_im+i20_re
 
spinorFloat b1_re = +i11_re-i21_im
 
spinorFloat b1_im = +i11_im+i21_re
 
spinorFloat b2_re = +i12_re-i22_im
 
spinorFloat b2_im = +i12_im+i22_re
 
spinorFloat A0_re = 0
 
spinorFloat A0_im = 0
 
spinorFloat B0_re = 0
 
spinorFloat B0_im = 0
 
spinorFloat A1_re = 0
 
spinorFloat A1_im = 0
 
spinorFloat B1_re = 0
 
spinorFloat B1_im = 0
 
spinorFloat A2_re = 0
 
spinorFloat A2_im = 0
 
spinorFloat B2_re = 0
 
spinorFloat B2_im = 0
 
spinorFloat a = param.a_f
 
int tx = (threadIdx.x > 0) ? threadIdx.x-1 : blockDim.x-1
 
 else
 
int ty = (threadIdx.y < blockDim.y - 1) ? threadIdx.y + 1 : 0
 
int tz = (threadIdx.z < blockDim.z - 1) ? threadIdx.z + 1 : 0
 

Macro Definition Documentation

◆ acc00_im

#define acc00_im   accum0.y

◆ acc00_re

#define acc00_re   accum0.x

◆ acc01_im

#define acc01_im   accum0.w

◆ acc01_re

#define acc01_re   accum0.z

◆ acc02_im

#define acc02_im   accum1.y

◆ acc02_re

#define acc02_re   accum1.x

◆ acc10_im

#define acc10_im   accum1.w

◆ acc10_re

#define acc10_re   accum1.z

◆ acc11_im

#define acc11_im   accum2.y

◆ acc11_re

#define acc11_re   accum2.x

◆ acc12_im

#define acc12_im   accum2.w

◆ acc12_re

#define acc12_re   accum2.z

◆ acc20_im

#define acc20_im   accum3.y

◆ acc20_re

#define acc20_re   accum3.x

◆ acc21_im

#define acc21_im   accum3.w

◆ acc21_re

#define acc21_re   accum3.z

◆ acc22_im

#define acc22_im   accum4.y

◆ acc22_re

#define acc22_re   accum4.x

◆ acc30_im

#define acc30_im   accum4.w

◆ acc30_re

#define acc30_re   accum4.z

◆ acc31_im

#define acc31_im   accum5.y

◆ acc31_re

#define acc31_re   accum5.x

◆ acc32_im

#define acc32_im   accum5.w

◆ acc32_re

#define acc32_re   accum5.z

◆ c00_00_re

#define c00_00_re   C0.x

◆ c00_01_im

#define c00_01_im   (-c01_00_im)

◆ c00_01_re

#define c00_01_re   (+c01_00_re)

◆ c00_02_im

#define c00_02_im   (-c02_00_im)

◆ c00_02_re

#define c00_02_re   (+c02_00_re)

◆ c00_10_im

#define c00_10_im   (-c10_00_im)

◆ c00_10_re

#define c00_10_re   (+c10_00_re)

◆ c00_11_im

#define c00_11_im   (-c11_00_im)

◆ c00_11_re

#define c00_11_re   (+c11_00_re)

◆ c00_12_im

#define c00_12_im   (-c12_00_im)

◆ c00_12_re

#define c00_12_re   (+c12_00_re)

◆ c01_00_im

#define c01_00_im   C1.w

◆ c01_00_re

#define c01_00_re   C1.z

◆ c01_01_re

#define c01_01_re   C0.y

◆ c01_02_im

#define c01_02_im   (-c02_01_im)

◆ c01_02_re

#define c01_02_re   (+c02_01_re)

◆ c01_10_im

#define c01_10_im   (-c10_01_im)

◆ c01_10_re

#define c01_10_re   (+c10_01_re)

◆ c01_11_im

#define c01_11_im   (-c11_01_im)

◆ c01_11_re

#define c01_11_re   (+c11_01_re)

◆ c01_12_im

#define c01_12_im   (-c12_01_im)

◆ c01_12_re

#define c01_12_re   (+c12_01_re)

◆ c02_00_im

#define c02_00_im   C2.y

◆ c02_00_re

#define c02_00_re   C2.x

◆ c02_01_im

#define c02_01_im   C4.y

◆ c02_01_re

#define c02_01_re   C4.x

◆ c02_02_re

#define c02_02_re   C0.z

◆ c02_10_im

#define c02_10_im   (-c10_02_im)

◆ c02_10_re

#define c02_10_re   (+c10_02_re)

◆ c02_11_im

#define c02_11_im   (-c11_02_im)

◆ c02_11_re

#define c02_11_re   (+c11_02_re)

◆ c02_12_im

#define c02_12_im   (-c12_02_im)

◆ c02_12_re

#define c02_12_re   (+c12_02_re)

◆ c10_00_im

#define c10_00_im   C2.w

◆ c10_00_re

#define c10_00_re   C2.z

◆ c10_01_im

#define c10_01_im   C4.w

◆ c10_01_re

#define c10_01_re   C4.z

◆ c10_02_im

#define c10_02_im   C6.y

◆ c10_02_re

#define c10_02_re   C6.x

◆ c10_10_re

#define c10_10_re   C0.w

◆ c10_11_im

#define c10_11_im   (-c11_10_im)

◆ c10_11_re

#define c10_11_re   (+c11_10_re)

◆ c10_12_im

#define c10_12_im   (-c12_10_im)

◆ c10_12_re

#define c10_12_re   (+c12_10_re)

◆ c11_00_im

#define c11_00_im   C3.y

◆ c11_00_re

#define c11_00_re   C3.x

◆ c11_01_im

#define c11_01_im   C5.y

◆ c11_01_re

#define c11_01_re   C5.x

◆ c11_02_im

#define c11_02_im   C6.w

◆ c11_02_re

#define c11_02_re   C6.z

◆ c11_10_im

#define c11_10_im   C7.w

◆ c11_10_re

#define c11_10_re   C7.z

◆ c11_11_re

#define c11_11_re   C1.x

◆ c11_12_im

#define c11_12_im   (-c12_11_im)

◆ c11_12_re

#define c11_12_re   (+c12_11_re)

◆ c12_00_im

#define c12_00_im   C3.w

◆ c12_00_re

#define c12_00_re   C3.z

◆ c12_01_im

#define c12_01_im   C5.w

◆ c12_01_re

#define c12_01_re   C5.z

◆ c12_02_im

#define c12_02_im   C7.y

◆ c12_02_re

#define c12_02_re   C7.x

◆ c12_10_im

#define c12_10_im   C8.y

◆ c12_10_re

#define c12_10_re   C8.x

◆ c12_11_im

#define c12_11_im   C8.w

◆ c12_11_re

#define c12_11_re   C8.z

◆ c12_12_re

#define c12_12_re   C1.y

◆ c20_20_re

#define c20_20_re   c00_00_re

◆ c20_21_im

#define c20_21_im   c00_01_im

◆ c20_21_re

#define c20_21_re   c00_01_re

◆ c20_22_im

#define c20_22_im   c00_02_im

◆ c20_22_re

#define c20_22_re   c00_02_re

◆ c20_30_im

#define c20_30_im   c00_10_im

◆ c20_30_re

#define c20_30_re   c00_10_re

◆ c20_31_im

#define c20_31_im   c00_11_im

◆ c20_31_re

#define c20_31_re   c00_11_re

◆ c20_32_im

#define c20_32_im   c00_12_im

◆ c20_32_re

#define c20_32_re   c00_12_re

◆ c21_20_im

#define c21_20_im   c01_00_im

◆ c21_20_re

#define c21_20_re   c01_00_re

◆ c21_21_re

#define c21_21_re   c01_01_re

◆ c21_22_im

#define c21_22_im   c01_02_im

◆ c21_22_re

#define c21_22_re   c01_02_re

◆ c21_30_im

#define c21_30_im   c01_10_im

◆ c21_30_re

#define c21_30_re   c01_10_re

◆ c21_31_im

#define c21_31_im   c01_11_im

◆ c21_31_re

#define c21_31_re   c01_11_re

◆ c21_32_im

#define c21_32_im   c01_12_im

◆ c21_32_re

#define c21_32_re   c01_12_re

◆ c22_20_im

#define c22_20_im   c02_00_im

◆ c22_20_re

#define c22_20_re   c02_00_re

◆ c22_21_im

#define c22_21_im   c02_01_im

◆ c22_21_re

#define c22_21_re   c02_01_re

◆ c22_22_re

#define c22_22_re   c02_02_re

◆ c22_30_im

#define c22_30_im   c02_10_im

◆ c22_30_re

#define c22_30_re   c02_10_re

◆ c22_31_im

#define c22_31_im   c02_11_im

◆ c22_31_re

#define c22_31_re   c02_11_re

◆ c22_32_im

#define c22_32_im   c02_12_im

◆ c22_32_re

#define c22_32_re   c02_12_re

◆ c30_20_im

#define c30_20_im   c10_00_im

◆ c30_20_re

#define c30_20_re   c10_00_re

◆ c30_21_im

#define c30_21_im   c10_01_im

◆ c30_21_re

#define c30_21_re   c10_01_re

◆ c30_22_im

#define c30_22_im   c10_02_im

◆ c30_22_re

#define c30_22_re   c10_02_re

◆ c30_30_re

#define c30_30_re   c10_10_re

◆ c30_31_im

#define c30_31_im   c10_11_im

◆ c30_31_re

#define c30_31_re   c10_11_re

◆ c30_32_im

#define c30_32_im   c10_12_im

◆ c30_32_re

#define c30_32_re   c10_12_re

◆ c31_20_im

#define c31_20_im   c11_00_im

◆ c31_20_re

#define c31_20_re   c11_00_re

◆ c31_21_im

#define c31_21_im   c11_01_im

◆ c31_21_re

#define c31_21_re   c11_01_re

◆ c31_22_im

#define c31_22_im   c11_02_im

◆ c31_22_re

#define c31_22_re   c11_02_re

◆ c31_30_im

#define c31_30_im   c11_10_im

◆ c31_30_re

#define c31_30_re   c11_10_re

◆ c31_31_re

#define c31_31_re   c11_11_re

◆ c31_32_im

#define c31_32_im   c11_12_im

◆ c31_32_re

#define c31_32_re   c11_12_re

◆ c32_20_im

#define c32_20_im   c12_00_im

◆ c32_20_re

#define c32_20_re   c12_00_re

◆ c32_21_im

#define c32_21_im   c12_01_im

◆ c32_21_re

#define c32_21_re   c12_01_re

◆ c32_22_im

#define c32_22_im   c12_02_im

◆ c32_22_re

#define c32_22_re   c12_02_re

◆ c32_30_im

#define c32_30_im   c12_10_im

◆ c32_30_re

#define c32_30_re   c12_10_re

◆ c32_31_im

#define c32_31_im   c12_11_im

◆ c32_31_re

#define c32_31_re   c12_11_re

◆ c32_32_re

#define c32_32_re   c12_12_re

◆ DSLASH_SHARED_FLOATS_PER_THREAD

#define DSLASH_SHARED_FLOATS_PER_THREAD   24

Definition at line 3 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

◆ g00_im

#define g00_im   G0.y

◆ g00_re

#define g00_re   G0.x

◆ g01_im

#define g01_im   G0.w

◆ g01_re

#define g01_re   G0.z

◆ g02_im

#define g02_im   G1.y

◆ g02_re

#define g02_re   G1.x

◆ g10_im

#define g10_im   G1.w

◆ g10_re

#define g10_re   G1.z

◆ g11_im

#define g11_im   G2.y

◆ g11_re

#define g11_re   G2.x

◆ g12_im

#define g12_im   G2.w

◆ g12_re

#define g12_re   G2.z

◆ g20_im

#define g20_im   G3.y

◆ g20_re

#define g20_re   G3.x

◆ g21_im

#define g21_im   G3.w

◆ g21_re

#define g21_re   G3.z

◆ g22_im

#define g22_im   G4.y

◆ g22_re

#define g22_re   G4.x

◆ gT00_im

#define gT00_im   (-g00_im)

◆ gT00_re

#define gT00_re   (+g00_re)

◆ gT01_im

#define gT01_im   (-g10_im)

◆ gT01_re

#define gT01_re   (+g10_re)

◆ gT02_im

#define gT02_im   (-g20_im)

◆ gT02_re

#define gT02_re   (+g20_re)

◆ gT10_im

#define gT10_im   (-g01_im)

◆ gT10_re

#define gT10_re   (+g01_re)

◆ gT11_im

#define gT11_im   (-g11_im)

◆ gT11_re

#define gT11_re   (+g11_re)

◆ gT12_im

#define gT12_im   (-g21_im)

◆ gT12_re

#define gT12_re   (+g21_re)

◆ gT20_im

#define gT20_im   (-g02_im)

◆ gT20_re

#define gT20_re   (+g02_re)

◆ gT21_im

#define gT21_im   (-g12_im)

◆ gT21_re

#define gT21_re   (+g12_re)

◆ gT22_im

#define gT22_im   (-g22_im)

◆ gT22_re

#define gT22_re   (+g22_re)

◆ i00_im

#define i00_im   I0.y

Definition at line 69 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i00_re

#define i00_re   I0.x

Definition at line 68 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i01_im

#define i01_im   I0.w

Definition at line 71 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i01_re

#define i01_re   I0.z

Definition at line 70 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i02_im

#define i02_im   I1.y

Definition at line 73 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i02_re

#define i02_re   I1.x

Definition at line 72 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i10_im

#define i10_im   I1.w

Definition at line 75 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i10_re

#define i10_re   I1.z

Definition at line 74 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i11_im

#define i11_im   I2.y

Definition at line 77 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i11_re

#define i11_re   I2.x

Definition at line 76 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i12_im

#define i12_im   I2.w

Definition at line 79 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i12_re

#define i12_re   I2.z

Definition at line 78 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i20_im

#define i20_im   I3.y

Definition at line 81 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i20_re

#define i20_re   I3.x

Definition at line 80 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i21_im

#define i21_im   I3.w

Definition at line 83 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i21_re

#define i21_re   I3.z

Definition at line 82 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i22_im

#define i22_im   I4.y

Definition at line 85 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i22_re

#define i22_re   I4.x

Definition at line 84 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i30_im

#define i30_im   I4.w

Definition at line 87 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i30_re

#define i30_re   I4.z

Definition at line 86 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i31_im

#define i31_im   I5.y

Definition at line 89 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i31_re

#define i31_re   I5.x

Definition at line 88 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i32_im

#define i32_im   I5.w

Definition at line 91 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ i32_re

#define i32_re   I5.z

Definition at line 90 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ READ_SPINOR_SHARED

#define READ_SPINOR_SHARED   READ_SPINOR_SHARED_FLOAT4

◆ SHARED_STRIDE

#define SHARED_STRIDE   32

◆ spinorFloat

#define spinorFloat   float

Definition at line 65 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by bindSpinorTex(), and unbindSpinorTex().

◆ VOLATILE

#define VOLATILE   volatile

Definition at line 9 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

◆ WRITE_SPINOR_SHARED

#define WRITE_SPINOR_SHARED   WRITE_SPINOR_SHARED_FLOAT4

Function Documentation

◆ __syncthreads()

__syncthreads ( )

◆ coordsFromIndex3D< EVEN_X >()

coordsFromIndex3D< EVEN_X > ( X  ,
x  ,
sid  ,
param.  parity,
param.dc.  X 
)

◆ if() [1/3]

if ( coord >=param.dc.  X[1][1])

◆ if() [2/3]

if ( coord >=param.dc.  X[2][2])

◆ if() [3/3]

if ( )

◆ READ_GAUGE_MATRIX() [1/8]

READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [2/8]

READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [3/8]

READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [4/8]

READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [5/8]

READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [6/8]

READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [7/8]

READ_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_GAUGE_MATRIX() [8/8]

READ_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ READ_SPINOR()

READ_SPINOR ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)

Referenced by if().

Here is the caller graph for this function:

◆ READ_SPINOR_DOWN()

READ_SPINOR_DOWN ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)

◆ READ_SPINOR_SHARED() [1/3]

READ_SPINOR_SHARED ( tx  ,
threadIdx.  y,
threadIdx.  z 
)

◆ READ_SPINOR_SHARED() [2/3]

READ_SPINOR_SHARED ( tx  ,
ty  ,
threadIdx.  z 
)

◆ READ_SPINOR_SHARED() [3/3]

READ_SPINOR_SHARED ( tx  ,
threadIdx.  y,
tz   
)

◆ READ_SPINOR_UP()

READ_SPINOR_UP ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)

◆ RECONSTRUCT_GAUGE_MATRIX() [1/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [2/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [3/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [4/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [5/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [6/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [7/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [8/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ WRITE_SPINOR()

WRITE_SPINOR ( param.  sp_stride)

◆ WRITE_SPINOR_SHARED()

WRITE_SPINOR_SHARED ( threadIdx.  x,
threadIdx.  y,
threadIdx.  z,
 
)

Variable Documentation

◆ a

spinorFloat a = param.a_f

◆ a0_im

Definition at line 928 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ A0_im

spinorFloat A0_im = 0

Definition at line 991 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ a0_re

Definition at line 928 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ A0_re

spinorFloat A0_re = 0

Definition at line 984 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ a1_im

Definition at line 929 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ A1_im

spinorFloat A1_im = 0

Definition at line 1021 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ a1_re

Definition at line 929 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ A1_re

spinorFloat A1_re = 0

Definition at line 1014 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ a2_im

Definition at line 930 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ A2_im

spinorFloat A2_im = 0

Definition at line 1051 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ a2_re

Definition at line 930 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ A2_re

spinorFloat A2_re = 0

Definition at line 1044 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ b0_im

Definition at line 931 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ B0_im

spinorFloat B0_im = 0

Definition at line 1005 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ b0_re

Definition at line 931 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ B0_re

spinorFloat B0_re = 0

Definition at line 998 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ b1_im

Definition at line 932 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ B1_im

spinorFloat B1_im = 0

Definition at line 1035 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ b1_re

Definition at line 932 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ B1_re

spinorFloat B1_re = 0

Definition at line 1028 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ b2_im

Definition at line 933 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ B2_im

spinorFloat B2_im = 0

Definition at line 1065 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ b2_re

Definition at line 933 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ B2_re

spinorFloat B2_re = 0

Definition at line 1058 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by if().

◆ coord

int coord[5]

◆ else

else
Initial value:
{
int tx = (threadIdx.x + blockDim.x - ((coord[0]+1)&1) ) % blockDim.x
dim3 dim3 blockDim

Definition at line 1363 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

◆ ga_idx

const int ga_idx = sid

◆ o00_im

o00_im = 0

◆ o00_re

o00_re = 0

◆ o01_im

o01_im = 0

◆ o01_re

o01_re = 0

◆ o02_im

o02_im = 0

◆ o02_re

o02_re = 0

◆ o10_im

o10_im = 0

◆ o10_re

o10_re = 0

◆ o11_im

o11_im = 0

◆ o11_re

o11_re = 0

◆ o12_im

o12_im = 0

◆ o12_re

o12_re = 0

◆ o20_im

o20_im = 0

◆ o20_re

o20_re = 0

◆ o21_im

o21_im = 0

◆ o21_re

o21_re = 0

◆ o22_im

o22_im = 0

◆ o22_re

o22_re = 0

◆ o30_im

o30_im = 0

◆ o30_re

o30_re = 0

◆ o31_im

o31_im = 0

◆ o31_re

o31_re = 0

◆ o32_im

o32_im = 0

◆ o32_re

o32_re = 0

◆ sid

int sid

◆ tx

int tx = (threadIdx.x > 0) ? threadIdx.x-1 : blockDim.x-1

◆ ty

int ty = (threadIdx.y < blockDim.y - 1) ? threadIdx.y + 1 : 0

◆ tz

int tz = (threadIdx.z < blockDim.z - 1) ? threadIdx.z + 1 : 0

◆ X

int X

Definition at line 394 of file asym_wilson_clover_dslash_dagger_fermi_core.h.

Referenced by quda::ShiftUpdate::apply(), quda::ApplyCoarse(), blasCuda(), quda::calculateY(), quda::CoarseCoarseOp(), quda::CoarseOp(), quda::ColorSpinorParam::ColorSpinorParam(), quda::completeKSForceCore(), computeAllLinkSite(), quda::computeCoarseClover(), computeLongLinkSite(), computeMiddleLinkSite(), quda::computeOvrImpSTOUTStep(), computeSideLinkSite(), computeStaggeredForceQuda(), quda::computeStapleRectangle(), coordsFromFaceIndex(), coordsFromFaceIndexStaggered(), coordsFromIndex(), coordsFromIndex3D(), quda::copyGaugeEx(), quda::copyGaugeExKernel(), quda::copyInterior(), quda::blas::copy_ns::copyKernel(), quda::copySpinorEx(), quda::CopySpinorExArg< OutOrder, InOrder, Basis >::CopySpinorExArg(), quda::DiracWilson::createCoarseOp(), quda::DiracClover::createCoarseOp(), quda::DiracCloverPC::createCoarseOp(), quda::DiracTwistedMass::createCoarseOp(), quda::DiracTwistedMassPC::createCoarseOp(), quda::DiracTwistedClover::createCoarseOp(), quda::DiracTwistedCloverPC::createCoarseOp(), quda::DiracCoarse::createCoarseOp(), quda::DiracCoarsePC::createCoarseOp(), dslashReference_5th(), dw_setDims(), quda::extendedCopyColorSpinor(), quda::extractGhost(), quda::extractGhostEx(), quda::ExtractGhostExArg< Order, nDim, dim >::ExtractGhostExArg(), quda::extractGhostExKernel(), quda::extractGhostKernel(), fullLatticeIndex(), fullLatticeIndex_4d(), genericBlas(), genericMultiBlas(), genericReduce(), quda::getCoords(), quda::getCoords5(), quda::getCoordsExtended(), quda::getIndexFull(), quda::ghostFaceIndex(), inBoundary(), indexFromFaceIndexExtended(), indexFromFaceIndexExtendedStaggered(), indexFromFaceIndexStaggered(), invert_multishift_quda_(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), isActive(), quda::KSForceArg< Oprod, Gauge, Mom >::KSForceArg(), quda::KSLongLinkArg< Result, Oprod, Gauge >::KSLongLinkArg(), lanczosQuda(), quda::launch_kernel_random(), quda::linkIndex(), quda::linkIndexM1(), quda::linkIndexP1(), quda::linkIndexShift(), quda::linkNormalIndexP1(), multiblasCuda(), multiReduceCuda(), neighborIndex_4d(), neighborIndex_5d(), quda::MinResExt::operator()(), printGaugeElement(), printLinkElement(), printMomElement(), printQudaGaugeParam(), printSpinorElement(), read_gauge_field(), read_spinor_field(), reduceCuda(), set_layout(), setDims(), quda::sin(), quda::gauge::timeBoundary(), quda::Transfer::Transfer(), quda::gauge::Reconstruct< 12, Float >::Unpack(), quda::gauge::Reconstruct< 8, Float >::Unpack(), quda::gauge::Reconstruct< 9, Float >::Unpack(), quda::updateSolution(), and write_spinor_field().