QUDA  0.9.0
Macros | Functions | Variables
dw_dslash4_core.h File Reference
#include "read_gauge.h"
#include "io_spinor.h"
Include dependency graph for dw_dslash4_core.h:

Go to the source code of this file.

Macros

#define DSLASH_SHARED_FLOATS_PER_THREAD   0
 
#define VOLATILE   volatile
 
#define spinorFloat   float
 
#define POW(a, b)   __fast_pow(a, b)
 
#define i00_re   I0.x
 
#define i00_im   I0.y
 
#define i01_re   I0.z
 
#define i01_im   I0.w
 
#define i02_re   I1.x
 
#define i02_im   I1.y
 
#define i10_re   I1.z
 
#define i10_im   I1.w
 
#define i11_re   I2.x
 
#define i11_im   I2.y
 
#define i12_re   I2.z
 
#define i12_im   I2.w
 
#define i20_re   I3.x
 
#define i20_im   I3.y
 
#define i21_re   I3.z
 
#define i21_im   I3.w
 
#define i22_re   I4.x
 
#define i22_im   I4.y
 
#define i30_re   I4.z
 
#define i30_im   I4.w
 
#define i31_re   I5.x
 
#define i31_im   I5.y
 
#define i32_re   I5.z
 
#define i32_im   I5.w
 
#define m5   param.m5_f
 
#define mdwf_b5   param.mdwf_b5_f
 
#define mdwf_c5   param.mdwf_c5_f
 
#define mferm   param.mferm_f
 
#define a   param.a
 
#define b   param.b
 
#define g00_re   G0.x
 
#define g00_im   G0.y
 
#define g01_re   G0.z
 
#define g01_im   G0.w
 
#define g02_re   G1.x
 
#define g02_im   G1.y
 
#define g10_re   G1.z
 
#define g10_im   G1.w
 
#define g11_re   G2.x
 
#define g11_im   G2.y
 
#define g12_re   G2.z
 
#define g12_im   G2.w
 
#define g20_re   G3.x
 
#define g20_im   G3.y
 
#define g21_re   G3.z
 
#define g21_im   G3.w
 
#define g22_re   G4.x
 
#define g22_im   G4.y
 
#define gT00_re   (+g00_re)
 
#define gT00_im   (-g00_im)
 
#define gT01_re   (+g10_re)
 
#define gT01_im   (-g10_im)
 
#define gT02_re   (+g20_re)
 
#define gT02_im   (-g20_im)
 
#define gT10_re   (+g01_re)
 
#define gT10_im   (-g01_im)
 
#define gT11_re   (+g11_re)
 
#define gT11_im   (-g11_im)
 
#define gT12_re   (+g21_re)
 
#define gT12_im   (-g21_im)
 
#define gT20_re   (+g02_re)
 
#define gT20_im   (-g02_im)
 
#define gT21_re   (+g12_re)
 
#define gT21_im   (-g12_im)
 
#define gT22_re   (+g22_re)
 
#define gT22_im   (-g22_im)
 
#define SHARED_STRIDE   16
 

Functions

 if (sid >=param.threads *param.dc.Ls) return
 
 if (kernel_type==INTERIOR_KERNEL)
 
 coordsFromFaceIndex< 5, QUDA_4D_PC, kernel_type, 1 > (X, sid, coord, face_idx, face_num, param)
 
 READ_INTERMEDIATE_SPINOR (INTERTEX, param.sp_stride, sid, sid)
 
 ASSN_GAUGE_MATRIX (G, GAUGE0TEX, 0, ga_idx, param.gauge_stride)
 
 READ_SPINOR (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 RECONSTRUCT_GAUGE_MATRIX (0)
 
 ASSN_GAUGE_MATRIX (G, GAUGE1TEX, 1, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (1)
 
 ASSN_GAUGE_MATRIX (G, GAUGE0TEX, 2, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (2)
 
 ASSN_GAUGE_MATRIX (G, GAUGE1TEX, 3, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (3)
 
 ASSN_GAUGE_MATRIX (G, GAUGE0TEX, 4, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (4)
 
 ASSN_GAUGE_MATRIX (G, GAUGE1TEX, 5, ga_idx, param.gauge_stride)
 
 RECONSTRUCT_GAUGE_MATRIX (5)
 
 if (param.gauge_fixed &&ga_idx< param.dc.X4X3X2X1hmX3X2X1h)
 
 READ_SPINOR_DOWN (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 RECONSTRUCT_GAUGE_MATRIX (6)
 
 READ_SPINOR_UP (SPINORTEX, param.sp_stride, sp_idx, sp_idx)
 
 RECONSTRUCT_GAUGE_MATRIX (7)
 

Variables

VOLATILE spinorFloat o00_re = i00_re
 
VOLATILE spinorFloat o00_im = i00_im
 
VOLATILE spinorFloat o01_re = i01_re
 
VOLATILE spinorFloat o01_im = i01_im
 
VOLATILE spinorFloat o02_re = i02_re
 
VOLATILE spinorFloat o02_im = i02_im
 
VOLATILE spinorFloat o10_re = i10_re
 
VOLATILE spinorFloat o10_im = i10_im
 
VOLATILE spinorFloat o11_re = i11_re
 
VOLATILE spinorFloat o11_im = i11_im
 
VOLATILE spinorFloat o12_re = i12_re
 
VOLATILE spinorFloat o12_im = i12_im
 
VOLATILE spinorFloat o20_re = i20_re
 
VOLATILE spinorFloat o20_im = i20_im
 
VOLATILE spinorFloat o21_re = i21_re
 
VOLATILE spinorFloat o21_im = i21_im
 
VOLATILE spinorFloat o22_re = i22_re
 
VOLATILE spinorFloat o22_im = i22_im
 
VOLATILE spinorFloat o30_re = i30_re
 
VOLATILE spinorFloat o30_im = i30_im
 
VOLATILE spinorFloat o31_re = i31_re
 
VOLATILE spinorFloat o31_im = i31_im
 
VOLATILE spinorFloat o32_re = i32_re
 
VOLATILE spinorFloat o32_im = i32_im
 
int sid = ((blockIdx.y*blockDim.y + threadIdx.y)*gridDim.x + blockIdx.x)*blockDim.x + threadIdx.x
 
int X
 
int coord [5]
 
int face_idx = sid - face_num*face_volume
 
 else
 
const int face_num = (sid >= face_volume)
 
float4 G0
 
float4 G1
 
float4 G2
 
float4 G3
 
float4 G4
 
const int ga_idx = sid % param.dc.volume_4d_cb
 
spinorFloat a0_re = +i00_re+i30_im
 
spinorFloat a0_im = +i00_im-i30_re
 
spinorFloat a1_re = +i01_re+i31_im
 
spinorFloat a1_im = +i01_im-i31_re
 
spinorFloat a2_re = +i02_re+i32_im
 
spinorFloat a2_im = +i02_im-i32_re
 
spinorFloat b0_re = +i10_re+i20_im
 
spinorFloat b0_im = +i10_im-i20_re
 
spinorFloat b1_re = +i11_re+i21_im
 
spinorFloat b1_im = +i11_im-i21_re
 
spinorFloat b2_re = +i12_re+i22_im
 
spinorFloat b2_im = +i12_im-i22_re
 
spinorFloat A0_re = 0
 
spinorFloat A0_im = 0
 
spinorFloat B0_re = 0
 
spinorFloat B0_im = 0
 
spinorFloat A1_re = 0
 
spinorFloat A1_im = 0
 
spinorFloat B1_re = 0
 
spinorFloat B1_im = 0
 
spinorFloat A2_re = 0
 
spinorFloat A2_im = 0
 
spinorFloat B2_re = 0
 
spinorFloat B2_im = 0
 

Macro Definition Documentation

◆ a

spinorFloat a   param.a

Definition at line 82 of file dw_dslash4_core.h.

Referenced by __fast_pow(), quda::blas::_caxpby(), quda::blas::_caxpy(), quda::blas::_cxpaypbz(), quda::gauge::Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER >::Accessor(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::Accessor(), accumulateComplexDotProduct(), accumulateComplexProduct(), accumulateConjugateProduct(), add_dbldbl(), add_double_to_dbldbl(), add_double_to_doubledouble(), adjoint_su3_matrix(), quda::aligned_malloc(), applyTwist(), quda::asymCloverDslashCuda(), quda::Trig< isHalf, T >::Atan2(), quda::Trig< false, float >::Atan2(), quda::Trig< true, float >::Atan2(), ax(), aX(), quda::blas::ax(), quda::ax(), quda::blas::axCpu(), axmy(), quda::blas::axpby(), axpby(), quda::blas::axpbyCpu(), aXpY(), axpy(), quda::blas::axpy(), quda::axpy(), quda::blas::axpyBzpcx(), quda::blas::axpyBzpcxCpu(), quda::blas::axpyCGNorm(), quda::blas::axpyCpu(), quda::blas::axpyNorm(), quda::blas::axpyNormCpu(), quda::blas::axpyReDot(), quda::blas::axpyZpbx(), quda::blas::axpyZpbxCpu(), benchmark(), mixed::blasCuda(), blasCuda(), quda::blas::cabxpyAx(), quda::blas::cabxpyAxCpu(), quda::blas::cabxpyAxNorm(), quda::blas::cabxpyAxNormCpu(), quda::blas::caxpby(), quda::blas::caxpbyCpu(), quda::blas::caxpbypcz(), quda::blas::caxpbypczpw(), quda::blas::caxpbypczpwCpu(), quda::blas::caxpbypz(), quda::blas::caxpbypzCpu(), quda::blas::caxpbypzYmbw(), quda::blas::caxpbypzYmbwcDotProductUYNormY(), quda::blas::caxpbypzYmbwcDotProductUYNormYCpu(), quda::blas::caxpbypzYmbwCpu(), quda::blas::caxpy(), quda::blas::Caxpy_(), quda::blas::caxpy_L(), quda::blas::caxpy_recurse(), quda::blas::caxpy_U(), quda::blas::caxpyBxpz(), quda::blas::caxpyBzpx(), quda::blas::caxpyCpu(), quda::blas::caxpyDotzy(), quda::blas::caxpyDotzyCpu(), quda::blas::caxpyNorm(), quda::blas::caxpyNormCpu(), quda::blas::caxpyXmaz(), quda::blas::caxpyXmazCpu(), quda::blas::caxpyXmazMR(), quda::blas::caxpyXmazNormX(), quda::blas::caxpyXmazNormXCpu(), quda::blas::caxpyz(), quda::blas::caxpyz_L(), quda::blas::caxpyz_recurse(), quda::blas::caxpyz_U(), quda::blas::cdot_(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), quda::blas::cDotProduct(), quda::blas::cDotProductCpu(), quda::blas::cDotProductNormACpu(), quda::blas::cDotProductNormBCpu(), quda::ColorSpinorField::checkField(), quda::LatticeField::checkField(), checkLength(), checkSpinor(), quda::Dirac::checkSpinorAlias(), quda::cloverDslashCuda(), quda::CloverFieldParam::CloverFieldParam(), quda::ColorSpinor< Float, Nc, Ns >::ColorSpinor(), quda::ColorSpinor< Float, Nc, 4 >::ColorSpinor(), quda::ColorSpinor< Float, Nc, 2 >::ColorSpinor(), quda::colorSpinorParam(), comm_drand(), quda::cpuColorSpinorField::Compare(), compare_floats(), compareFloats(), GaugeAlgTest::comparePlaquette(), complexAddTo(), complexConjugateProduct(), complexDotProduct(), complexProduct(), quda::computeBeta(), quda::BiCGstabL::computeTau(), quda::copy(), copyFloatN(), quda::Trig< isHalf, T >::Cos(), quda::Trig< false, float >::Cos(), quda::Trig< true, float >::Cos(), quda::DiracWilson::createCoarseOp(), quda::DiracClover::createCoarseOp(), quda::DiracCloverPC::createCoarseOp(), quda::DiracTwistedMass::createCoarseOp(), quda::DiracTwistedMassPC::createCoarseOp(), quda::DiracTwistedClover::createCoarseOp(), quda::DiracTwistedCloverPC::createCoarseOp(), quda::DiracCoarse::createCoarseOp(), quda::DiracCoarsePC::createCoarseOp(), quda::blas::cxpaypbz(), quda::blas::cxpaypbzCpu(), quda::RitzMat::deleteTmp(), quda::Dirac::deleteTmp(), quda::deserializeTuneCache(), quda::device_malloc_(), quda::device_pinned_malloc_(), div_dbldbl(), quda::domainWallDslashCuda(), dot(), quda::blas::dot_(), quda::blas::dotNormA_(), dsadd(), quda::DiracTwistedMassPC::Dslash(), quda::DiracTwistedCloverPC::Dslash(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracTwistedMassPC::DslashXpay(), quda::DiracTwistedCloverPC::DslashXpay(), quda::extractGhost(), quda::extractGhostEx(), quda::extractGhostExKernel(), quda::extractGhostKernel(), quda::extractor(), quda::colorspinor::FloatNOrder< Float, Ns, Nc, N, huge_alloc >::FloatNOrder(), FloatToShort(), quda::ColorSpinor< Float, Nc, 4 >::gamma(), quda::genericCompare(), quda::genericPackGhost(), quda::genericPrintVector(), quda::genericSource(), get_dbldbl_head(), get_dbldbl_tail(), quda::getDeterminant(), getNorm(), quda::gauge::Reconstruct< 13, Float >::getPhase(), quda::gauge::Reconstruct< 9, Float >::getPhase(), quda::getRealTraceUVdagger(), quda::getSubTraceUnit(), quda::getTrace(), quda::gauge::GhostAccessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, native_ghost >::GhostAccessor(), quda::gauge::GhostAccessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, native_ghost >::GhostAccessor(), quda::gauge::GhostAccessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, native_ghost >::GhostAccessor(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER >::GhostAccessorCB(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT2_FIELD_ORDER >::GhostAccessorCB(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT4_FIELD_ORDER >::GhostAccessorCB(), quda::HMatrix< T, N >::HMatrix(), if(), quda::ColorSpinor< Float, Nc, 4 >::igamma(), quda::injector(), l2(), link_sanity_check_internal_12(), llfat_add_su3_matrix(), llfat_mult_su3_an(), llfat_mult_su3_na(), llfat_mult_su3_nn(), llfat_scalar_mult_add_su3_matrix(), llfat_scalar_mult_su3_matrix(), quda::load_global_float4(), quda::load_streaming_double2(), quda::load_streaming_float4(), quda::Location_(), quda::DiracTwistedMass::M(), quda::DiracTwistedMassPC::M(), quda::DiracTwistedClover::M(), quda::DiracTwistedCloverPC::M(), quda::make_Complex(), quda::make_Float2(), quda::make_FloatN(), quda::make_shortN(), quda::mapped_malloc_(), quda::Matrix< T, N >::Matrix(), matrix_mult_aa(), matrix_mult_an(), matrix_mult_na(), matrix_mult_nn(), quda::max_fabs(), mod(), mul_dbldbl(), mul_double_to_dbldbl(), mul_double_to_doubledouble(), mult_adj_su3_mat_vec(), mult_su3_an(), mult_su3_mat_vec(), mult_su3_na(), mult_su3_nn(), mixed::multiblasCuda(), multiblasCuda(), quda::blas::multiReduce_recurse(), mixed::multiReduceCuda(), multiReduceCuda(), quda::DiracTwistedMass::NdegTwistedDslash(), quda::DiracTwistedMass::NdegTwistedDslashXpay(), ndegTwistGamma5(), neg_dbldbl(), quda::RitzMat::newTmp(), quda::Dirac::newTmp(), quda::blas::norm(), quda::norm1(), quda::blas::norm1_(), quda::norm2(), quda::blas::norm2_(), normalize(), quda::blas::normCpu(), quda::vector< float, 2 >::operator float2(), quda::vector< double, 2 >::operator type(), quda::Summ< T >::operator()(), quda::Summ< double2 >::operator()(), quda::Summ< double3 >::operator()(), quda::Summ< double4 >::operator()(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::operator()(), quda::reduce_vector< T >::operator()(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), quda::MPBiCGstab::operator()(), quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()(), operator*(), quda::operator*(), quda::operator*=(), quda::operator+(), operator+(), operator+=(), quda::ColorSpinor< Float, Nc, Ns >::operator+=(), quda::vector_type< scalar, n >::operator+=(), quda::ColorSpinor< Float, Nc, 4 >::operator+=(), quda::HMatrix_wrapper< T, Hmat >::operator+=(), quda::operator+=(), doubledouble2::operator+=(), doubledouble3::operator+=(), quda::ColorSpinor< Float, Nc, 2 >::operator+=(), operator-(), quda::operator-(), quda::operator-=(), operator/(), quda::Int2::operator<(), quda::operator<<(), quda::MemAlloc::operator=(), doublesingle2::operator=(), quda::ColorSpinor< Float, Nc, Ns >::operator=(), quda::clover_wrapper< Float, T >::operator=(), quda::gauge_wrapper< Float, T >::operator=(), quda::colorspinor_wrapper< Float, T >::operator=(), quda::gauge_ghost_wrapper< Float, T >::operator=(), quda::Matrix< T, N >::operator=(), quda::ColorSpinor< Float, Nc, 4 >::operator=(), quda::colorspinor_ghost_wrapper< Float, T >::operator=(), quda::HMatrix_wrapper< T, Hmat >::operator=(), quda::HMatrix< T, N >::operator=(), quda::ColorSpinor< Float, Nc, 2 >::operator=(), operator>(), orthogonalize(), quda::outerProd(), quda::outerProdSpinTrace(), quda::cudaColorSpinorField::pack(), quda::cudaColorSpinorField::packGhost(), quda::PackGhostArg< Field >::PackGhostArg(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::PaddedSpaceSpinorColorOrder(), quda::pinned_malloc_(), quda::Precision_(), quda::print_alloc(), print_m(), print_su3_matrix(), quda::colorspinor::QDPJITDiracOrder< Float, Ns, Nc >::QDPJITDiracOrder(), quadSum(), quda::Random< double >(), quda::Random< float >(), quda::blas::reDotProduct(), quda::blas::reDotProductCpu(), mixed::reduceCuda(), reduceCuda(), rsqrt_dbldbl(), s2d(), quda::s2d(), s2f(), quda::s2f(), quda::safe_malloc_(), scalar_mult_add_su3_matrix(), scalar_mult_add_su3_vector(), scalar_mult_sub_su3_matrix(), scale_su3_matrix(), shortToFloat(), quda::ColorSpinor< Float, Nc, 4 >::sigma(), quda::Trig< isHalf, T >::Sin(), quda::Trig< false, float >::Sin(), quda::Trig< true, float >::Sin(), quda::Trig< isHalf, T >::SinCos(), quda::Trig< false, float >::SinCos(), site_link_sanity_check_internal_12(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::SpaceColorSpinorOrder(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::SpaceSpinorColorOrder(), sqrt_dbldbl(), su3_adjoint(), su3_projector(), sub(), sub_dbldbl(), quda::SubTraceUnit(), sum(), swap(), test(), quda::ColorSpinor< Float, Nc, 4 >::toNonRel(), quda::ColorSpinor< Float, Nc, 4 >::toRel(), quda::track_malloc(), quda::blas::tripleCGUpdate(), quda::TuneKey::TuneKey(), twistCloverGamma5(), quda::DiracTwistedMass::TwistedDslash(), quda::DiracTwistedMass::TwistedDslashXpay(), twistGamma5(), quda::u32toa(), quda::u64toa(), ucopy(), quda::vector< double, 2 >::vector(), quda::vector< float, 2 >::vector(), vfill_m(), quda::blas::xpay(), xpay(), quda::blas::xpaycDotzy(), quda::blas::xpaycDotzyCpu(), quda::blas::xpayCpu(), quda::blas::xpayz(), quda::zero(), and quda::blas::zero().

◆ b

static __inline__ double double enum cudaRoundMode mode double enum cudaRoundMode mode double enum cudaRoundMode mode double b   param.b

Definition at line 83 of file dw_dslash4_core.h.

Referenced by __fast_pow(), quda::blas::_caxpby(), quda::blas::_cxpaypbz(), accumulateComplexDotProduct(), accumulateComplexProduct(), accumulateConjugateProduct(), add_dbldbl(), add_double_to_dbldbl(), add_double_to_doubledouble(), adjoint_su3_matrix(), quda::LatticeField::allocateGhostBuffer(), quda::Trig< isHalf, T >::Atan2(), quda::Trig< false, float >::Atan2(), quda::Trig< true, float >::Atan2(), quda::ax(), quda::blas::axpby(), axpby(), quda::blas::axpbyCpu(), quda::blas::axpyBzpcx(), quda::blas::axpyBzpcxCpu(), quda::blas::axpyZpbx(), quda::blas::axpyZpbxCpu(), quda::linalg::Cholesky< Mat, T, N, fast >::backward(), bdSVD(), benchmark(), mixed::blasCuda(), blasCuda(), quda::blas::cabxpyAx(), quda::blas::cabxpyAxCpu(), quda::blas::cabxpyAxNorm(), quda::blas::cabxpyAxNormCpu(), quda::blas::caxpby(), quda::blas::caxpbyCpu(), quda::blas::caxpbypcz(), quda::blas::caxpbypczpw(), quda::blas::caxpbypczpwCpu(), quda::blas::caxpbypz(), quda::blas::caxpbypzCpu(), quda::blas::caxpbypzYmbw(), quda::blas::caxpbypzYmbwcDotProductUYNormY(), quda::blas::caxpbypzYmbwcDotProductUYNormYCpu(), quda::blas::caxpbypzYmbwCpu(), quda::blas::caxpy_recurse(), quda::blas::caxpyBxpz(), quda::blas::caxpyBzpx(), quda::blas::caxpyz_recurse(), quda::blas::cdot_(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), quda::blas::cDotProduct(), quda::blas::cDotProductCpu(), quda::blas::cDotProductNormACpu(), quda::blas::cDotProductNormBCpu(), quda::ColorSpinorField::checkField(), checkLength(), checkSpinor(), quda::Dirac::checkSpinorAlias(), quda::cpuColorSpinorField::Compare(), compare_floats(), compareFloats(), GaugeAlgTest::comparePlaquette(), complexAddTo(), complexConjugateProduct(), complexDotProduct(), complexProduct(), quda::computeBeta(), quda::BiCGstabL::computeTau(), quda::copy(), copyFloatN(), quda::LatticeField::createComms(), quda::cudaColorSpinorField::createComms(), quda::LatticeField::createIPCComms(), quda::blas::cxpaypbz(), quda::blas::cxpaypbzCpu(), quda::LatticeField::destroyComms(), quda::LatticeField::destroyIPCComms(), div_dbldbl(), quda::domainWallDslashCuda(), dot(), quda::blas::dot_(), quda::blas::dotNormA_(), dsadd(), quda::DiracTwistedMassPC::Dslash(), quda::DiracTwistedCloverPC::Dslash(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracTwistedMassPC::DslashXpay(), quda::DiracTwistedCloverPC::DslashXpay(), quda::IncEigCG::eigCGsolve(), quda::cudaGaugeField::exchangeExtendedGhost(), quda::extractGhost(), quda::extractGhostEx(), quda::extractGhostExKernel(), quda::extractGhostKernel(), quda::extractor(), quda::linalg::Cholesky< Mat, T, N, fast >::forward(), quda::LatticeField::freeGhostBuffer(), quda::MG::generateNullVectors(), quda::genericCompare(), getLambdaMax(), quda::getRealTraceUVdagger(), if(), quda::IncEigCG::initCGsolve(), quda::injector(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), l2(), link_sanity_check_internal_12(), llfat_add_su3_matrix(), llfat_mult_su3_an(), llfat_mult_su3_na(), llfat_mult_su3_nn(), llfat_scalar_mult_add_su3_matrix(), llfat_scalar_mult_su3_matrix(), quda::Location_(), quda::DiracTwistedMass::M(), quda::DiracTwistedMassPC::M(), quda::massRescale(), matrix_mult_aa(), matrix_mult_an(), matrix_mult_na(), matrix_mult_nn(), quda::max_fabs(), mod(), mul_dbldbl(), mul_double_to_dbldbl(), mul_double_to_doubledouble(), mult_adj_su3_mat_vec(), mult_su3_an(), mult_su3_mat_vec(), mult_su3_na(), mult_su3_nn(), mixed::multiblasCuda(), multiblasCuda(), quda::blas::multiReduce_recurse(), mixed::multiReduceCuda(), multiReduceCuda(), quda::DiracTwistedMass::NdegTwistedDslash(), quda::DiracTwistedMass::NdegTwistedDslashXpay(), ndegTwistGamma5(), quda::norm1(), quda::norm2(), quda::Summ< T >::operator()(), quda::Summ< double2 >::operator()(), quda::Summ< double3 >::operator()(), quda::Summ< double4 >::operator()(), quda::Deflation::operator()(), quda::reduce_vector< T >::operator()(), quda::MG::operator()(), quda::CG::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::SimpleBiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::SD::operator()(), quda::XSD::operator()(), quda::PreconditionedSolver::operator()(), quda::MultiShiftCG::operator()(), quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()(), quda::MinResExt::operator()(), quda::GMResDR::operator()(), operator*(), quda::operator*(), quda::operator*=(), quda::operator+(), operator+(), doublesingle::operator+=(), operator+=(), doublesingle2::operator+=(), doublesingle3::operator+=(), quda::operator+=(), operator-(), quda::operator-(), quda::operator-=(), operator/(), doublesingle::operator=(), quda::Matrix< T, N >::operator=(), quda::HMatrix< T, N >::operator=(), operator>(), orthogonalize(), quda::outerProd(), quda::outerProdSpinTrace(), quda::cudaColorSpinorField::pack(), quda::cudaColorSpinorField::packGhost(), quda::Precision_(), quda::DiracWilson::prepare(), quda::DiracWilsonPC::prepare(), quda::DiracClover::prepare(), quda::DiracCloverPC::prepare(), quda::DiracDomainWall::prepare(), quda::DiracDomainWallPC::prepare(), quda::DiracDomainWall4DPC::prepare(), quda::DiracMobius::prepare(), quda::DiracMobiusPC::prepare(), quda::DiracTwistedMass::prepare(), quda::DiracTwistedMassPC::prepare(), quda::DiracTwistedClover::prepare(), quda::DiracTwistedCloverPC::prepare(), quda::DiracStaggered::prepare(), quda::DiracStaggeredPC::prepare(), quda::DiracImprovedStaggered::prepare(), quda::DiracImprovedStaggeredPC::prepare(), quda::DiracCoarse::prepare(), quda::DiracCoarsePC::prepare(), quda::GaugeLaplace::prepare(), quda::GaugeLaplacePC::prepare(), quadSum(), quda::Random< double >(), quda::Random< float >(), quda::DiracWilsonPC::reconstruct(), quda::DiracCloverPC::reconstruct(), quda::DiracDomainWallPC::reconstruct(), quda::DiracDomainWall4DPC::reconstruct(), quda::DiracMobiusPC::reconstruct(), quda::DiracTwistedMassPC::reconstruct(), quda::DiracTwistedCloverPC::reconstruct(), quda::DiracCoarsePC::reconstruct(), quda::GaugeLaplacePC::reconstruct(), quda::blas::reDotProduct(), quda::blas::reDotProductCpu(), mixed::reduceCuda(), reduceCuda(), scalar_mult_add_su3_matrix(), scalar_mult_add_su3_vector(), scalar_mult_sub_su3_matrix(), quda::ColorSpinor< Float, Nc, 4 >::sigma(), site_link_sanity_check_internal_12(), quda::solve(), quda::CG::solve(), su3_adjoint(), su3_projector(), sub(), sub_dbldbl(), sum(), swap(), test(), quda::blas::tripleCGUpdate(), quda::DiracTwistedMass::TwistedDslash(), quda::DiracTwistedMass::TwistedDslashXpay(), twistGamma5(), quda::u32toa(), quda::u64toa(), and ucopy().

◆ DSLASH_SHARED_FLOATS_PER_THREAD

#define DSLASH_SHARED_FLOATS_PER_THREAD   0

Definition at line 3 of file dw_dslash4_core.h.

◆ g00_im

#define g00_im   G0.y

Definition at line 109 of file dw_dslash4_core.h.

◆ g00_re

#define g00_re   G0.x

Definition at line 108 of file dw_dslash4_core.h.

◆ g01_im

#define g01_im   G0.w

Definition at line 111 of file dw_dslash4_core.h.

◆ g01_re

#define g01_re   G0.z

Definition at line 110 of file dw_dslash4_core.h.

◆ g02_im

#define g02_im   G1.y

Definition at line 113 of file dw_dslash4_core.h.

◆ g02_re

#define g02_re   G1.x

Definition at line 112 of file dw_dslash4_core.h.

◆ g10_im

#define g10_im   G1.w

Definition at line 115 of file dw_dslash4_core.h.

◆ g10_re

#define g10_re   G1.z

Definition at line 114 of file dw_dslash4_core.h.

◆ g11_im

#define g11_im   G2.y

Definition at line 117 of file dw_dslash4_core.h.

◆ g11_re

#define g11_re   G2.x

Definition at line 116 of file dw_dslash4_core.h.

◆ g12_im

#define g12_im   G2.w

Definition at line 119 of file dw_dslash4_core.h.

◆ g12_re

#define g12_re   G2.z

Definition at line 118 of file dw_dslash4_core.h.

◆ g20_im

#define g20_im   G3.y

Definition at line 121 of file dw_dslash4_core.h.

◆ g20_re

#define g20_re   G3.x

Definition at line 120 of file dw_dslash4_core.h.

◆ g21_im

#define g21_im   G3.w

Definition at line 123 of file dw_dslash4_core.h.

◆ g21_re

#define g21_re   G3.z

Definition at line 122 of file dw_dslash4_core.h.

◆ g22_im

#define g22_im   G4.y

Definition at line 125 of file dw_dslash4_core.h.

◆ g22_re

#define g22_re   G4.x

Definition at line 124 of file dw_dslash4_core.h.

◆ gT00_im

#define gT00_im   (-g00_im)

Definition at line 131 of file dw_dslash4_core.h.

◆ gT00_re

#define gT00_re   (+g00_re)

Definition at line 130 of file dw_dslash4_core.h.

◆ gT01_im

#define gT01_im   (-g10_im)

Definition at line 133 of file dw_dslash4_core.h.

◆ gT01_re

#define gT01_re   (+g10_re)

Definition at line 132 of file dw_dslash4_core.h.

◆ gT02_im

#define gT02_im   (-g20_im)

Definition at line 135 of file dw_dslash4_core.h.

◆ gT02_re

#define gT02_re   (+g20_re)

Definition at line 134 of file dw_dslash4_core.h.

◆ gT10_im

#define gT10_im   (-g01_im)

Definition at line 137 of file dw_dslash4_core.h.

◆ gT10_re

#define gT10_re   (+g01_re)

Definition at line 136 of file dw_dslash4_core.h.

◆ gT11_im

#define gT11_im   (-g11_im)

Definition at line 139 of file dw_dslash4_core.h.

◆ gT11_re

#define gT11_re   (+g11_re)

Definition at line 138 of file dw_dslash4_core.h.

◆ gT12_im

#define gT12_im   (-g21_im)

Definition at line 141 of file dw_dslash4_core.h.

◆ gT12_re

#define gT12_re   (+g21_re)

Definition at line 140 of file dw_dslash4_core.h.

◆ gT20_im

#define gT20_im   (-g02_im)

Definition at line 143 of file dw_dslash4_core.h.

◆ gT20_re

#define gT20_re   (+g02_re)

Definition at line 142 of file dw_dslash4_core.h.

◆ gT21_im

#define gT21_im   (-g12_im)

Definition at line 145 of file dw_dslash4_core.h.

◆ gT21_re

#define gT21_re   (+g12_re)

Definition at line 144 of file dw_dslash4_core.h.

◆ gT22_im

#define gT22_im   (-g22_im)

Definition at line 147 of file dw_dslash4_core.h.

◆ gT22_re

#define gT22_re   (+g22_re)

Definition at line 146 of file dw_dslash4_core.h.

◆ i00_im

#define i00_im   I0.y

Definition at line 55 of file dw_dslash4_core.h.

Referenced by if().

◆ i00_re

#define i00_re   I0.x

Definition at line 54 of file dw_dslash4_core.h.

Referenced by if().

◆ i01_im

#define i01_im   I0.w

Definition at line 57 of file dw_dslash4_core.h.

Referenced by if().

◆ i01_re

#define i01_re   I0.z

Definition at line 56 of file dw_dslash4_core.h.

Referenced by if().

◆ i02_im

#define i02_im   I1.y

Definition at line 59 of file dw_dslash4_core.h.

Referenced by if().

◆ i02_re

#define i02_re   I1.x

Definition at line 58 of file dw_dslash4_core.h.

Referenced by if().

◆ i10_im

#define i10_im   I1.w

Definition at line 61 of file dw_dslash4_core.h.

Referenced by if().

◆ i10_re

#define i10_re   I1.z

Definition at line 60 of file dw_dslash4_core.h.

Referenced by if().

◆ i11_im

#define i11_im   I2.y

Definition at line 63 of file dw_dslash4_core.h.

Referenced by if().

◆ i11_re

#define i11_re   I2.x

Definition at line 62 of file dw_dslash4_core.h.

Referenced by if().

◆ i12_im

#define i12_im   I2.w

Definition at line 65 of file dw_dslash4_core.h.

Referenced by if().

◆ i12_re

#define i12_re   I2.z

Definition at line 64 of file dw_dslash4_core.h.

Referenced by if().

◆ i20_im

#define i20_im   I3.y

Definition at line 67 of file dw_dslash4_core.h.

Referenced by if().

◆ i20_re

#define i20_re   I3.x

Definition at line 66 of file dw_dslash4_core.h.

Referenced by if().

◆ i21_im

#define i21_im   I3.w

Definition at line 69 of file dw_dslash4_core.h.

Referenced by if().

◆ i21_re

#define i21_re   I3.z

Definition at line 68 of file dw_dslash4_core.h.

Referenced by if().

◆ i22_im

#define i22_im   I4.y

Definition at line 71 of file dw_dslash4_core.h.

Referenced by if().

◆ i22_re

#define i22_re   I4.x

Definition at line 70 of file dw_dslash4_core.h.

Referenced by if().

◆ i30_im

#define i30_im   I4.w

Definition at line 73 of file dw_dslash4_core.h.

Referenced by if().

◆ i30_re

#define i30_re   I4.z

Definition at line 72 of file dw_dslash4_core.h.

Referenced by if().

◆ i31_im

#define i31_im   I5.y

Definition at line 75 of file dw_dslash4_core.h.

Referenced by if().

◆ i31_re

#define i31_re   I5.x

Definition at line 74 of file dw_dslash4_core.h.

Referenced by if().

◆ i32_im

#define i32_im   I5.w

Definition at line 77 of file dw_dslash4_core.h.

Referenced by if().

◆ i32_re

#define i32_re   I5.z

Definition at line 76 of file dw_dslash4_core.h.

Referenced by if().

◆ m5

#define m5   param.m5_f

Definition at line 78 of file dw_dslash4_core.h.

Referenced by quda::MDWFDslashCuda(), and printQudaInvertParam().

◆ mdwf_b5

#define mdwf_b5   param.mdwf_b5_f

Definition at line 79 of file dw_dslash4_core.h.

◆ mdwf_c5

#define mdwf_c5   param.mdwf_c5_f

Definition at line 80 of file dw_dslash4_core.h.

◆ mferm

#define mferm   param.mferm_f

◆ POW

#define POW (   a,
  b 
)    __fast_pow(a, b)

Definition at line 53 of file dw_dslash4_core.h.

◆ SHARED_STRIDE

#define SHARED_STRIDE   16

Definition at line 185 of file dw_dslash4_core.h.

◆ spinorFloat

#define spinorFloat   float

Definition at line 52 of file dw_dslash4_core.h.

Referenced by if().

◆ VOLATILE

#define VOLATILE   volatile

Definition at line 9 of file dw_dslash4_core.h.

Function Documentation

◆ ASSN_GAUGE_MATRIX() [1/6]

ASSN_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ ASSN_GAUGE_MATRIX() [2/6]

ASSN_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ ASSN_GAUGE_MATRIX() [3/6]

ASSN_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ ASSN_GAUGE_MATRIX() [4/6]

ASSN_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ ASSN_GAUGE_MATRIX() [5/6]

ASSN_GAUGE_MATRIX ( ,
GAUGE0TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ ASSN_GAUGE_MATRIX() [6/6]

ASSN_GAUGE_MATRIX ( ,
GAUGE1TEX  ,
,
ga_idx  ,
param.  gauge_stride 
)

◆ coordsFromFaceIndex< 5, QUDA_4D_PC, kernel_type, 1 >()

coordsFromFaceIndex< 5, QUDA_4D_PC, kernel_type, 1 > ( X  ,
sid  ,
coord  ,
face_idx  ,
face_num  ,
param   
)

◆ if() [1/3]

if ( sid >=param.threads *param.dc.  Ls)

◆ if() [2/3]

if ( kernel_type  = INTERIOR_KERNEL)

◆ if() [3/3]

if ( )

◆ READ_INTERMEDIATE_SPINOR()

READ_INTERMEDIATE_SPINOR ( INTERTEX  ,
param.  sp_stride,
sid  ,
sid   
)

◆ READ_SPINOR()

READ_SPINOR ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)

◆ READ_SPINOR_DOWN()

READ_SPINOR_DOWN ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)

Referenced by if().

Here is the caller graph for this function:

◆ READ_SPINOR_UP()

READ_SPINOR_UP ( SPINORTEX  ,
param.  sp_stride,
sp_idx  ,
sp_idx   
)

◆ RECONSTRUCT_GAUGE_MATRIX() [1/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [2/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [3/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [4/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [5/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [6/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [7/8]

RECONSTRUCT_GAUGE_MATRIX ( )

◆ RECONSTRUCT_GAUGE_MATRIX() [8/8]

RECONSTRUCT_GAUGE_MATRIX ( )

Variable Documentation

◆ a0_im

a0_im = +i00_im-i30_re

Definition at line 306 of file dw_dslash4_core.h.

Referenced by if().

◆ A0_im

A0_im = 0

Definition at line 363 of file dw_dslash4_core.h.

Referenced by if().

◆ a0_re

a0_re = +i00_re+i30_im

Definition at line 306 of file dw_dslash4_core.h.

Referenced by if().

◆ A0_re

A0_re = 0

Definition at line 356 of file dw_dslash4_core.h.

Referenced by if().

◆ a1_im

a1_im = +i01_im-i31_re

Definition at line 307 of file dw_dslash4_core.h.

Referenced by if().

◆ A1_im

A1_im = 0

Definition at line 393 of file dw_dslash4_core.h.

Referenced by if().

◆ a1_re

a1_re = +i01_re+i31_im

Definition at line 307 of file dw_dslash4_core.h.

Referenced by if().

◆ A1_re

A1_re = 0

Definition at line 386 of file dw_dslash4_core.h.

Referenced by if().

◆ a2_im

a2_im = +i02_im-i32_re

Definition at line 308 of file dw_dslash4_core.h.

Referenced by if().

◆ A2_im

A2_im = 0

Definition at line 423 of file dw_dslash4_core.h.

Referenced by if().

◆ a2_re

a2_re = +i02_re+i32_im

Definition at line 308 of file dw_dslash4_core.h.

Referenced by if().

◆ A2_re

A2_re = 0

Definition at line 416 of file dw_dslash4_core.h.

Referenced by if().

◆ b0_im

b0_im = +i10_im-i20_re

Definition at line 309 of file dw_dslash4_core.h.

Referenced by if().

◆ B0_im

B0_im = 0

Definition at line 377 of file dw_dslash4_core.h.

Referenced by if().

◆ b0_re

b0_re = +i10_re+i20_im

Definition at line 309 of file dw_dslash4_core.h.

Referenced by if().

◆ B0_re

B0_re = 0

Definition at line 370 of file dw_dslash4_core.h.

Referenced by if().

◆ b1_im

b1_im = +i11_im-i21_re

Definition at line 310 of file dw_dslash4_core.h.

Referenced by if().

◆ B1_im

B1_im = 0

Definition at line 407 of file dw_dslash4_core.h.

Referenced by if().

◆ b1_re

b1_re = +i11_re+i21_im

Definition at line 310 of file dw_dslash4_core.h.

Referenced by if().

◆ B1_re

B1_re = 0

Definition at line 400 of file dw_dslash4_core.h.

Referenced by if().

◆ b2_im

b2_im = +i12_im-i22_re

Definition at line 311 of file dw_dslash4_core.h.

Referenced by if().

◆ B2_im

B2_im = 0

Definition at line 437 of file dw_dslash4_core.h.

Referenced by if().

◆ b2_re

b2_re = +i12_re+i22_im

Definition at line 311 of file dw_dslash4_core.h.

Referenced by if().

◆ B2_re

B2_re = 0

Definition at line 430 of file dw_dslash4_core.h.

Referenced by if().

◆ coord

int coord[5]

Definition at line 196 of file dw_dslash4_core.h.

Referenced by if().

◆ else

else
Initial value:
{
const int face_volume = (param.threads*param.dc.Ls >> 1)
QudaGaugeParam param
Definition: pack_test.cpp:17

Definition at line 220 of file dw_dslash4_core.h.

◆ face_idx

face_idx = sid - face_num*face_volume

◆ face_num

const int face_num = (sid >= face_volume)

◆ G0

float4 G0

Definition at line 271 of file dw_dslash4_core.h.

◆ G1

float4 G1

Definition at line 272 of file dw_dslash4_core.h.

◆ G2

float4 G2

Definition at line 273 of file dw_dslash4_core.h.

◆ G3

float4 G3

Definition at line 274 of file dw_dslash4_core.h.

◆ G4

float4 G4

Definition at line 275 of file dw_dslash4_core.h.

◆ ga_idx

const int ga_idx = sid % param.dc.volume_4d_cb

Definition at line 301 of file dw_dslash4_core.h.

◆ o00_im

o00_im = i00_im

Definition at line 151 of file dw_dslash4_core.h.

Referenced by if().

◆ o00_re

o00_re = i00_re

Definition at line 150 of file dw_dslash4_core.h.

Referenced by if().

◆ o01_im

o01_im = i01_im

Definition at line 153 of file dw_dslash4_core.h.

Referenced by if().

◆ o01_re

o01_re = i01_re

Definition at line 152 of file dw_dslash4_core.h.

Referenced by if().

◆ o02_im

o02_im = i02_im

Definition at line 155 of file dw_dslash4_core.h.

Referenced by if().

◆ o02_re

o02_re = i02_re

Definition at line 154 of file dw_dslash4_core.h.

Referenced by if().

◆ o10_im

o10_im = i10_im

Definition at line 157 of file dw_dslash4_core.h.

Referenced by if().

◆ o10_re

o10_re = i10_re

Definition at line 156 of file dw_dslash4_core.h.

Referenced by if().

◆ o11_im

o11_im = i11_im

Definition at line 159 of file dw_dslash4_core.h.

Referenced by if().

◆ o11_re

o11_re = i11_re

Definition at line 158 of file dw_dslash4_core.h.

Referenced by if().

◆ o12_im

o12_im = i12_im

Definition at line 161 of file dw_dslash4_core.h.

Referenced by if().

◆ o12_re

o12_re = i12_re

Definition at line 160 of file dw_dslash4_core.h.

Referenced by if().

◆ o20_im

o20_im = i20_im

Definition at line 163 of file dw_dslash4_core.h.

Referenced by if().

◆ o20_re

o20_re = i20_re

Definition at line 162 of file dw_dslash4_core.h.

Referenced by if().

◆ o21_im

o21_im = i21_im

Definition at line 165 of file dw_dslash4_core.h.

Referenced by if().

◆ o21_re

o21_re = i21_re

Definition at line 164 of file dw_dslash4_core.h.

Referenced by if().

◆ o22_im

o22_im = i22_im

Definition at line 167 of file dw_dslash4_core.h.

Referenced by if().

◆ o22_re

o22_re = i22_re

Definition at line 166 of file dw_dslash4_core.h.

Referenced by if().

◆ o30_im

o30_im = i30_im

Definition at line 169 of file dw_dslash4_core.h.

Referenced by if().

◆ o30_re

o30_re = i30_re

Definition at line 168 of file dw_dslash4_core.h.

Referenced by if().

◆ o31_im

o31_im = i31_im

Definition at line 171 of file dw_dslash4_core.h.

Referenced by if().

◆ o31_re

o31_re = i31_re

Definition at line 170 of file dw_dslash4_core.h.

Referenced by if().

◆ o32_im

o32_im = i32_im

Definition at line 173 of file dw_dslash4_core.h.

Referenced by if().

◆ o32_re

o32_re = i32_re

Definition at line 172 of file dw_dslash4_core.h.

Referenced by if().

◆ sid

Definition at line 192 of file dw_dslash4_core.h.

Referenced by if().

◆ X

int X

Definition at line 196 of file dw_dslash4_core.h.

Referenced by if().