|
QUDA
0.9.0
|

Go to the source code of this file.
Macros | |
| #define | DSLASH_SHARED_FLOATS_PER_THREAD 0 |
| #define | VOLATILE volatile |
| #define | spinorFloat float |
| #define | POW(a, b) __fast_pow(a, b) |
| #define | i00_re I0.x |
| #define | i00_im I0.y |
| #define | i01_re I0.z |
| #define | i01_im I0.w |
| #define | i02_re I1.x |
| #define | i02_im I1.y |
| #define | i10_re I1.z |
| #define | i10_im I1.w |
| #define | i11_re I2.x |
| #define | i11_im I2.y |
| #define | i12_re I2.z |
| #define | i12_im I2.w |
| #define | i20_re I3.x |
| #define | i20_im I3.y |
| #define | i21_re I3.z |
| #define | i21_im I3.w |
| #define | i22_re I4.x |
| #define | i22_im I4.y |
| #define | i30_re I4.z |
| #define | i30_im I4.w |
| #define | i31_re I5.x |
| #define | i31_im I5.y |
| #define | i32_re I5.z |
| #define | i32_im I5.w |
| #define | m5 param.m5_f |
| #define | mdwf_b5 param.mdwf_b5_f |
| #define | mdwf_c5 param.mdwf_c5_f |
| #define | mferm param.mferm_f |
| #define | a param.a |
| #define | b param.b |
| #define | g00_re G0.x |
| #define | g00_im G0.y |
| #define | g01_re G0.z |
| #define | g01_im G0.w |
| #define | g02_re G1.x |
| #define | g02_im G1.y |
| #define | g10_re G1.z |
| #define | g10_im G1.w |
| #define | g11_re G2.x |
| #define | g11_im G2.y |
| #define | g12_re G2.z |
| #define | g12_im G2.w |
| #define | g20_re G3.x |
| #define | g20_im G3.y |
| #define | g21_re G3.z |
| #define | g21_im G3.w |
| #define | g22_re G4.x |
| #define | g22_im G4.y |
| #define | gT00_re (+g00_re) |
| #define | gT00_im (-g00_im) |
| #define | gT01_re (+g10_re) |
| #define | gT01_im (-g10_im) |
| #define | gT02_re (+g20_re) |
| #define | gT02_im (-g20_im) |
| #define | gT10_re (+g01_re) |
| #define | gT10_im (-g01_im) |
| #define | gT11_re (+g11_re) |
| #define | gT11_im (-g11_im) |
| #define | gT12_re (+g21_re) |
| #define | gT12_im (-g21_im) |
| #define | gT20_re (+g02_re) |
| #define | gT20_im (-g02_im) |
| #define | gT21_re (+g12_re) |
| #define | gT21_im (-g12_im) |
| #define | gT22_re (+g22_re) |
| #define | gT22_im (-g22_im) |
| #define | SHARED_STRIDE 16 |
| spinorFloat a param.a |
Definition at line 82 of file dw_dslash4_core.h.
Referenced by __fast_pow(), quda::blas::_caxpby(), quda::blas::_caxpy(), quda::blas::_cxpaypbz(), quda::gauge::Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER >::Accessor(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::Accessor(), accumulateComplexDotProduct(), accumulateComplexProduct(), accumulateConjugateProduct(), add_dbldbl(), add_double_to_dbldbl(), add_double_to_doubledouble(), adjoint_su3_matrix(), quda::aligned_malloc(), applyTwist(), quda::asymCloverDslashCuda(), quda::Trig< isHalf, T >::Atan2(), quda::Trig< false, float >::Atan2(), quda::Trig< true, float >::Atan2(), ax(), aX(), quda::blas::ax(), quda::ax(), quda::blas::axCpu(), axmy(), quda::blas::axpby(), axpby(), quda::blas::axpbyCpu(), aXpY(), axpy(), quda::blas::axpy(), quda::axpy(), quda::blas::axpyBzpcx(), quda::blas::axpyBzpcxCpu(), quda::blas::axpyCGNorm(), quda::blas::axpyCpu(), quda::blas::axpyNorm(), quda::blas::axpyNormCpu(), quda::blas::axpyReDot(), quda::blas::axpyZpbx(), quda::blas::axpyZpbxCpu(), benchmark(), mixed::blasCuda(), blasCuda(), quda::blas::cabxpyAx(), quda::blas::cabxpyAxCpu(), quda::blas::cabxpyAxNorm(), quda::blas::cabxpyAxNormCpu(), quda::blas::caxpby(), quda::blas::caxpbyCpu(), quda::blas::caxpbypcz(), quda::blas::caxpbypczpw(), quda::blas::caxpbypczpwCpu(), quda::blas::caxpbypz(), quda::blas::caxpbypzCpu(), quda::blas::caxpbypzYmbw(), quda::blas::caxpbypzYmbwcDotProductUYNormY(), quda::blas::caxpbypzYmbwcDotProductUYNormYCpu(), quda::blas::caxpbypzYmbwCpu(), quda::blas::caxpy(), quda::blas::Caxpy_(), quda::blas::caxpy_L(), quda::blas::caxpy_recurse(), quda::blas::caxpy_U(), quda::blas::caxpyBxpz(), quda::blas::caxpyBzpx(), quda::blas::caxpyCpu(), quda::blas::caxpyDotzy(), quda::blas::caxpyDotzyCpu(), quda::blas::caxpyNorm(), quda::blas::caxpyNormCpu(), quda::blas::caxpyXmaz(), quda::blas::caxpyXmazCpu(), quda::blas::caxpyXmazMR(), quda::blas::caxpyXmazNormX(), quda::blas::caxpyXmazNormXCpu(), quda::blas::caxpyz(), quda::blas::caxpyz_L(), quda::blas::caxpyz_recurse(), quda::blas::caxpyz_U(), quda::blas::cdot_(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), quda::blas::cDotProduct(), quda::blas::cDotProductCpu(), quda::blas::cDotProductNormACpu(), quda::blas::cDotProductNormBCpu(), quda::ColorSpinorField::checkField(), quda::LatticeField::checkField(), checkLength(), checkSpinor(), quda::Dirac::checkSpinorAlias(), quda::cloverDslashCuda(), quda::CloverFieldParam::CloverFieldParam(), quda::ColorSpinor< Float, Nc, Ns >::ColorSpinor(), quda::ColorSpinor< Float, Nc, 4 >::ColorSpinor(), quda::ColorSpinor< Float, Nc, 2 >::ColorSpinor(), quda::colorSpinorParam(), comm_drand(), quda::cpuColorSpinorField::Compare(), compare_floats(), compareFloats(), GaugeAlgTest::comparePlaquette(), complexAddTo(), complexConjugateProduct(), complexDotProduct(), complexProduct(), quda::computeBeta(), quda::BiCGstabL::computeTau(), quda::copy(), copyFloatN(), quda::Trig< isHalf, T >::Cos(), quda::Trig< false, float >::Cos(), quda::Trig< true, float >::Cos(), quda::DiracWilson::createCoarseOp(), quda::DiracClover::createCoarseOp(), quda::DiracCloverPC::createCoarseOp(), quda::DiracTwistedMass::createCoarseOp(), quda::DiracTwistedMassPC::createCoarseOp(), quda::DiracTwistedClover::createCoarseOp(), quda::DiracTwistedCloverPC::createCoarseOp(), quda::DiracCoarse::createCoarseOp(), quda::DiracCoarsePC::createCoarseOp(), quda::blas::cxpaypbz(), quda::blas::cxpaypbzCpu(), quda::RitzMat::deleteTmp(), quda::Dirac::deleteTmp(), quda::deserializeTuneCache(), quda::device_malloc_(), quda::device_pinned_malloc_(), div_dbldbl(), quda::domainWallDslashCuda(), dot(), quda::blas::dot_(), quda::blas::dotNormA_(), dsadd(), quda::DiracTwistedMassPC::Dslash(), quda::DiracTwistedCloverPC::Dslash(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracTwistedMassPC::DslashXpay(), quda::DiracTwistedCloverPC::DslashXpay(), quda::extractGhost(), quda::extractGhostEx(), quda::extractGhostExKernel(), quda::extractGhostKernel(), quda::extractor(), quda::colorspinor::FloatNOrder< Float, Ns, Nc, N, huge_alloc >::FloatNOrder(), FloatToShort(), quda::ColorSpinor< Float, Nc, 4 >::gamma(), quda::genericCompare(), quda::genericPackGhost(), quda::genericPrintVector(), quda::genericSource(), get_dbldbl_head(), get_dbldbl_tail(), quda::getDeterminant(), getNorm(), quda::gauge::Reconstruct< 13, Float >::getPhase(), quda::gauge::Reconstruct< 9, Float >::getPhase(), quda::getRealTraceUVdagger(), quda::getSubTraceUnit(), quda::getTrace(), quda::gauge::GhostAccessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, native_ghost >::GhostAccessor(), quda::gauge::GhostAccessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, native_ghost >::GhostAccessor(), quda::gauge::GhostAccessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, native_ghost >::GhostAccessor(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER >::GhostAccessorCB(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT2_FIELD_ORDER >::GhostAccessorCB(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT4_FIELD_ORDER >::GhostAccessorCB(), quda::HMatrix< T, N >::HMatrix(), if(), quda::ColorSpinor< Float, Nc, 4 >::igamma(), quda::injector(), l2(), link_sanity_check_internal_12(), llfat_add_su3_matrix(), llfat_mult_su3_an(), llfat_mult_su3_na(), llfat_mult_su3_nn(), llfat_scalar_mult_add_su3_matrix(), llfat_scalar_mult_su3_matrix(), quda::load_global_float4(), quda::load_streaming_double2(), quda::load_streaming_float4(), quda::Location_(), quda::DiracTwistedMass::M(), quda::DiracTwistedMassPC::M(), quda::DiracTwistedClover::M(), quda::DiracTwistedCloverPC::M(), quda::make_Complex(), quda::make_Float2(), quda::make_FloatN(), quda::make_shortN(), quda::mapped_malloc_(), quda::Matrix< T, N >::Matrix(), matrix_mult_aa(), matrix_mult_an(), matrix_mult_na(), matrix_mult_nn(), quda::max_fabs(), mod(), mul_dbldbl(), mul_double_to_dbldbl(), mul_double_to_doubledouble(), mult_adj_su3_mat_vec(), mult_su3_an(), mult_su3_mat_vec(), mult_su3_na(), mult_su3_nn(), mixed::multiblasCuda(), multiblasCuda(), quda::blas::multiReduce_recurse(), mixed::multiReduceCuda(), multiReduceCuda(), quda::DiracTwistedMass::NdegTwistedDslash(), quda::DiracTwistedMass::NdegTwistedDslashXpay(), ndegTwistGamma5(), neg_dbldbl(), quda::RitzMat::newTmp(), quda::Dirac::newTmp(), quda::blas::norm(), quda::norm1(), quda::blas::norm1_(), quda::norm2(), quda::blas::norm2_(), normalize(), quda::blas::normCpu(), quda::vector< float, 2 >::operator float2(), quda::vector< double, 2 >::operator type(), quda::Summ< T >::operator()(), quda::Summ< double2 >::operator()(), quda::Summ< double3 >::operator()(), quda::Summ< double4 >::operator()(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::operator()(), quda::reduce_vector< T >::operator()(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), quda::MPBiCGstab::operator()(), quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()(), operator*(), quda::operator*(), quda::operator*=(), quda::operator+(), operator+(), operator+=(), quda::ColorSpinor< Float, Nc, Ns >::operator+=(), quda::vector_type< scalar, n >::operator+=(), quda::ColorSpinor< Float, Nc, 4 >::operator+=(), quda::HMatrix_wrapper< T, Hmat >::operator+=(), quda::operator+=(), doubledouble2::operator+=(), doubledouble3::operator+=(), quda::ColorSpinor< Float, Nc, 2 >::operator+=(), operator-(), quda::operator-(), quda::operator-=(), operator/(), quda::Int2::operator<(), quda::operator<<(), quda::MemAlloc::operator=(), doublesingle2::operator=(), quda::ColorSpinor< Float, Nc, Ns >::operator=(), quda::clover_wrapper< Float, T >::operator=(), quda::gauge_wrapper< Float, T >::operator=(), quda::colorspinor_wrapper< Float, T >::operator=(), quda::gauge_ghost_wrapper< Float, T >::operator=(), quda::Matrix< T, N >::operator=(), quda::ColorSpinor< Float, Nc, 4 >::operator=(), quda::colorspinor_ghost_wrapper< Float, T >::operator=(), quda::HMatrix_wrapper< T, Hmat >::operator=(), quda::HMatrix< T, N >::operator=(), quda::ColorSpinor< Float, Nc, 2 >::operator=(), operator>(), orthogonalize(), quda::outerProd(), quda::outerProdSpinTrace(), quda::cudaColorSpinorField::pack(), quda::cudaColorSpinorField::packGhost(), quda::PackGhostArg< Field >::PackGhostArg(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::PaddedSpaceSpinorColorOrder(), quda::pinned_malloc_(), quda::Precision_(), quda::print_alloc(), print_m(), print_su3_matrix(), quda::colorspinor::QDPJITDiracOrder< Float, Ns, Nc >::QDPJITDiracOrder(), quadSum(), quda::Random< double >(), quda::Random< float >(), quda::blas::reDotProduct(), quda::blas::reDotProductCpu(), mixed::reduceCuda(), reduceCuda(), rsqrt_dbldbl(), s2d(), quda::s2d(), s2f(), quda::s2f(), quda::safe_malloc_(), scalar_mult_add_su3_matrix(), scalar_mult_add_su3_vector(), scalar_mult_sub_su3_matrix(), scale_su3_matrix(), shortToFloat(), quda::ColorSpinor< Float, Nc, 4 >::sigma(), quda::Trig< isHalf, T >::Sin(), quda::Trig< false, float >::Sin(), quda::Trig< true, float >::Sin(), quda::Trig< isHalf, T >::SinCos(), quda::Trig< false, float >::SinCos(), site_link_sanity_check_internal_12(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::SpaceColorSpinorOrder(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::SpaceSpinorColorOrder(), sqrt_dbldbl(), su3_adjoint(), su3_projector(), sub(), sub_dbldbl(), quda::SubTraceUnit(), sum(), swap(), test(), quda::ColorSpinor< Float, Nc, 4 >::toNonRel(), quda::ColorSpinor< Float, Nc, 4 >::toRel(), quda::track_malloc(), quda::blas::tripleCGUpdate(), quda::TuneKey::TuneKey(), twistCloverGamma5(), quda::DiracTwistedMass::TwistedDslash(), quda::DiracTwistedMass::TwistedDslashXpay(), twistGamma5(), quda::u32toa(), quda::u64toa(), ucopy(), quda::vector< double, 2 >::vector(), quda::vector< float, 2 >::vector(), vfill_m(), quda::blas::xpay(), xpay(), quda::blas::xpaycDotzy(), quda::blas::xpaycDotzyCpu(), quda::blas::xpayCpu(), quda::blas::xpayz(), quda::zero(), and quda::blas::zero().
| static __inline__ double double enum cudaRoundMode mode double enum cudaRoundMode mode double enum cudaRoundMode mode double b param.b |
Definition at line 83 of file dw_dslash4_core.h.
Referenced by __fast_pow(), quda::blas::_caxpby(), quda::blas::_cxpaypbz(), accumulateComplexDotProduct(), accumulateComplexProduct(), accumulateConjugateProduct(), add_dbldbl(), add_double_to_dbldbl(), add_double_to_doubledouble(), adjoint_su3_matrix(), quda::LatticeField::allocateGhostBuffer(), quda::Trig< isHalf, T >::Atan2(), quda::Trig< false, float >::Atan2(), quda::Trig< true, float >::Atan2(), quda::ax(), quda::blas::axpby(), axpby(), quda::blas::axpbyCpu(), quda::blas::axpyBzpcx(), quda::blas::axpyBzpcxCpu(), quda::blas::axpyZpbx(), quda::blas::axpyZpbxCpu(), quda::linalg::Cholesky< Mat, T, N, fast >::backward(), bdSVD(), benchmark(), mixed::blasCuda(), blasCuda(), quda::blas::cabxpyAx(), quda::blas::cabxpyAxCpu(), quda::blas::cabxpyAxNorm(), quda::blas::cabxpyAxNormCpu(), quda::blas::caxpby(), quda::blas::caxpbyCpu(), quda::blas::caxpbypcz(), quda::blas::caxpbypczpw(), quda::blas::caxpbypczpwCpu(), quda::blas::caxpbypz(), quda::blas::caxpbypzCpu(), quda::blas::caxpbypzYmbw(), quda::blas::caxpbypzYmbwcDotProductUYNormY(), quda::blas::caxpbypzYmbwcDotProductUYNormYCpu(), quda::blas::caxpbypzYmbwCpu(), quda::blas::caxpy_recurse(), quda::blas::caxpyBxpz(), quda::blas::caxpyBzpx(), quda::blas::caxpyz_recurse(), quda::blas::cdot_(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), quda::blas::cDotProduct(), quda::blas::cDotProductCpu(), quda::blas::cDotProductNormACpu(), quda::blas::cDotProductNormBCpu(), quda::ColorSpinorField::checkField(), checkLength(), checkSpinor(), quda::Dirac::checkSpinorAlias(), quda::cpuColorSpinorField::Compare(), compare_floats(), compareFloats(), GaugeAlgTest::comparePlaquette(), complexAddTo(), complexConjugateProduct(), complexDotProduct(), complexProduct(), quda::computeBeta(), quda::BiCGstabL::computeTau(), quda::copy(), copyFloatN(), quda::LatticeField::createComms(), quda::cudaColorSpinorField::createComms(), quda::LatticeField::createIPCComms(), quda::blas::cxpaypbz(), quda::blas::cxpaypbzCpu(), quda::LatticeField::destroyComms(), quda::LatticeField::destroyIPCComms(), div_dbldbl(), quda::domainWallDslashCuda(), dot(), quda::blas::dot_(), quda::blas::dotNormA_(), dsadd(), quda::DiracTwistedMassPC::Dslash(), quda::DiracTwistedCloverPC::Dslash(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracTwistedMassPC::DslashXpay(), quda::DiracTwistedCloverPC::DslashXpay(), quda::IncEigCG::eigCGsolve(), quda::cudaGaugeField::exchangeExtendedGhost(), quda::extractGhost(), quda::extractGhostEx(), quda::extractGhostExKernel(), quda::extractGhostKernel(), quda::extractor(), quda::linalg::Cholesky< Mat, T, N, fast >::forward(), quda::LatticeField::freeGhostBuffer(), quda::MG::generateNullVectors(), quda::genericCompare(), getLambdaMax(), quda::getRealTraceUVdagger(), if(), quda::IncEigCG::initCGsolve(), quda::injector(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), l2(), link_sanity_check_internal_12(), llfat_add_su3_matrix(), llfat_mult_su3_an(), llfat_mult_su3_na(), llfat_mult_su3_nn(), llfat_scalar_mult_add_su3_matrix(), llfat_scalar_mult_su3_matrix(), quda::Location_(), quda::DiracTwistedMass::M(), quda::DiracTwistedMassPC::M(), quda::massRescale(), matrix_mult_aa(), matrix_mult_an(), matrix_mult_na(), matrix_mult_nn(), quda::max_fabs(), mod(), mul_dbldbl(), mul_double_to_dbldbl(), mul_double_to_doubledouble(), mult_adj_su3_mat_vec(), mult_su3_an(), mult_su3_mat_vec(), mult_su3_na(), mult_su3_nn(), mixed::multiblasCuda(), multiblasCuda(), quda::blas::multiReduce_recurse(), mixed::multiReduceCuda(), multiReduceCuda(), quda::DiracTwistedMass::NdegTwistedDslash(), quda::DiracTwistedMass::NdegTwistedDslashXpay(), ndegTwistGamma5(), quda::norm1(), quda::norm2(), quda::Summ< T >::operator()(), quda::Summ< double2 >::operator()(), quda::Summ< double3 >::operator()(), quda::Summ< double4 >::operator()(), quda::Deflation::operator()(), quda::reduce_vector< T >::operator()(), quda::MG::operator()(), quda::CG::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::SimpleBiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::SD::operator()(), quda::XSD::operator()(), quda::PreconditionedSolver::operator()(), quda::MultiShiftCG::operator()(), quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()(), quda::MinResExt::operator()(), quda::GMResDR::operator()(), operator*(), quda::operator*(), quda::operator*=(), quda::operator+(), operator+(), doublesingle::operator+=(), operator+=(), doublesingle2::operator+=(), doublesingle3::operator+=(), quda::operator+=(), operator-(), quda::operator-(), quda::operator-=(), operator/(), doublesingle::operator=(), quda::Matrix< T, N >::operator=(), quda::HMatrix< T, N >::operator=(), operator>(), orthogonalize(), quda::outerProd(), quda::outerProdSpinTrace(), quda::cudaColorSpinorField::pack(), quda::cudaColorSpinorField::packGhost(), quda::Precision_(), quda::DiracWilson::prepare(), quda::DiracWilsonPC::prepare(), quda::DiracClover::prepare(), quda::DiracCloverPC::prepare(), quda::DiracDomainWall::prepare(), quda::DiracDomainWallPC::prepare(), quda::DiracDomainWall4DPC::prepare(), quda::DiracMobius::prepare(), quda::DiracMobiusPC::prepare(), quda::DiracTwistedMass::prepare(), quda::DiracTwistedMassPC::prepare(), quda::DiracTwistedClover::prepare(), quda::DiracTwistedCloverPC::prepare(), quda::DiracStaggered::prepare(), quda::DiracStaggeredPC::prepare(), quda::DiracImprovedStaggered::prepare(), quda::DiracImprovedStaggeredPC::prepare(), quda::DiracCoarse::prepare(), quda::DiracCoarsePC::prepare(), quda::GaugeLaplace::prepare(), quda::GaugeLaplacePC::prepare(), quadSum(), quda::Random< double >(), quda::Random< float >(), quda::DiracWilsonPC::reconstruct(), quda::DiracCloverPC::reconstruct(), quda::DiracDomainWallPC::reconstruct(), quda::DiracDomainWall4DPC::reconstruct(), quda::DiracMobiusPC::reconstruct(), quda::DiracTwistedMassPC::reconstruct(), quda::DiracTwistedCloverPC::reconstruct(), quda::DiracCoarsePC::reconstruct(), quda::GaugeLaplacePC::reconstruct(), quda::blas::reDotProduct(), quda::blas::reDotProductCpu(), mixed::reduceCuda(), reduceCuda(), scalar_mult_add_su3_matrix(), scalar_mult_add_su3_vector(), scalar_mult_sub_su3_matrix(), quda::ColorSpinor< Float, Nc, 4 >::sigma(), site_link_sanity_check_internal_12(), quda::solve(), quda::CG::solve(), su3_adjoint(), su3_projector(), sub(), sub_dbldbl(), sum(), swap(), test(), quda::blas::tripleCGUpdate(), quda::DiracTwistedMass::TwistedDslash(), quda::DiracTwistedMass::TwistedDslashXpay(), twistGamma5(), quda::u32toa(), quda::u64toa(), and ucopy().
| #define DSLASH_SHARED_FLOATS_PER_THREAD 0 |
Definition at line 3 of file dw_dslash4_core.h.
| #define g00_im G0.y |
Definition at line 109 of file dw_dslash4_core.h.
| #define g00_re G0.x |
Definition at line 108 of file dw_dslash4_core.h.
| #define g01_im G0.w |
Definition at line 111 of file dw_dslash4_core.h.
| #define g01_re G0.z |
Definition at line 110 of file dw_dslash4_core.h.
| #define g02_im G1.y |
Definition at line 113 of file dw_dslash4_core.h.
| #define g02_re G1.x |
Definition at line 112 of file dw_dslash4_core.h.
| #define g10_im G1.w |
Definition at line 115 of file dw_dslash4_core.h.
| #define g10_re G1.z |
Definition at line 114 of file dw_dslash4_core.h.
| #define g11_im G2.y |
Definition at line 117 of file dw_dslash4_core.h.
| #define g11_re G2.x |
Definition at line 116 of file dw_dslash4_core.h.
| #define g12_im G2.w |
Definition at line 119 of file dw_dslash4_core.h.
| #define g12_re G2.z |
Definition at line 118 of file dw_dslash4_core.h.
| #define g20_im G3.y |
Definition at line 121 of file dw_dslash4_core.h.
| #define g20_re G3.x |
Definition at line 120 of file dw_dslash4_core.h.
| #define g21_im G3.w |
Definition at line 123 of file dw_dslash4_core.h.
| #define g21_re G3.z |
Definition at line 122 of file dw_dslash4_core.h.
| #define g22_im G4.y |
Definition at line 125 of file dw_dslash4_core.h.
| #define g22_re G4.x |
Definition at line 124 of file dw_dslash4_core.h.
| #define gT00_im (-g00_im) |
Definition at line 131 of file dw_dslash4_core.h.
| #define gT00_re (+g00_re) |
Definition at line 130 of file dw_dslash4_core.h.
| #define gT01_im (-g10_im) |
Definition at line 133 of file dw_dslash4_core.h.
| #define gT01_re (+g10_re) |
Definition at line 132 of file dw_dslash4_core.h.
| #define gT02_im (-g20_im) |
Definition at line 135 of file dw_dslash4_core.h.
| #define gT02_re (+g20_re) |
Definition at line 134 of file dw_dslash4_core.h.
| #define gT10_im (-g01_im) |
Definition at line 137 of file dw_dslash4_core.h.
| #define gT10_re (+g01_re) |
Definition at line 136 of file dw_dslash4_core.h.
| #define gT11_im (-g11_im) |
Definition at line 139 of file dw_dslash4_core.h.
| #define gT11_re (+g11_re) |
Definition at line 138 of file dw_dslash4_core.h.
| #define gT12_im (-g21_im) |
Definition at line 141 of file dw_dslash4_core.h.
| #define gT12_re (+g21_re) |
Definition at line 140 of file dw_dslash4_core.h.
| #define gT20_im (-g02_im) |
Definition at line 143 of file dw_dslash4_core.h.
| #define gT20_re (+g02_re) |
Definition at line 142 of file dw_dslash4_core.h.
| #define gT21_im (-g12_im) |
Definition at line 145 of file dw_dslash4_core.h.
| #define gT21_re (+g12_re) |
Definition at line 144 of file dw_dslash4_core.h.
| #define gT22_im (-g22_im) |
Definition at line 147 of file dw_dslash4_core.h.
| #define gT22_re (+g22_re) |
Definition at line 146 of file dw_dslash4_core.h.
| #define i00_im I0.y |
Definition at line 55 of file dw_dslash4_core.h.
Referenced by if().
| #define i00_re I0.x |
Definition at line 54 of file dw_dslash4_core.h.
Referenced by if().
| #define i01_im I0.w |
Definition at line 57 of file dw_dslash4_core.h.
Referenced by if().
| #define i01_re I0.z |
Definition at line 56 of file dw_dslash4_core.h.
Referenced by if().
| #define i02_im I1.y |
Definition at line 59 of file dw_dslash4_core.h.
Referenced by if().
| #define i02_re I1.x |
Definition at line 58 of file dw_dslash4_core.h.
Referenced by if().
| #define i10_im I1.w |
Definition at line 61 of file dw_dslash4_core.h.
Referenced by if().
| #define i10_re I1.z |
Definition at line 60 of file dw_dslash4_core.h.
Referenced by if().
| #define i11_im I2.y |
Definition at line 63 of file dw_dslash4_core.h.
Referenced by if().
| #define i11_re I2.x |
Definition at line 62 of file dw_dslash4_core.h.
Referenced by if().
| #define i12_im I2.w |
Definition at line 65 of file dw_dslash4_core.h.
Referenced by if().
| #define i12_re I2.z |
Definition at line 64 of file dw_dslash4_core.h.
Referenced by if().
| #define i20_im I3.y |
Definition at line 67 of file dw_dslash4_core.h.
Referenced by if().
| #define i20_re I3.x |
Definition at line 66 of file dw_dslash4_core.h.
Referenced by if().
| #define i21_im I3.w |
Definition at line 69 of file dw_dslash4_core.h.
Referenced by if().
| #define i21_re I3.z |
Definition at line 68 of file dw_dslash4_core.h.
Referenced by if().
| #define i22_im I4.y |
Definition at line 71 of file dw_dslash4_core.h.
Referenced by if().
| #define i22_re I4.x |
Definition at line 70 of file dw_dslash4_core.h.
Referenced by if().
| #define i30_im I4.w |
Definition at line 73 of file dw_dslash4_core.h.
Referenced by if().
| #define i30_re I4.z |
Definition at line 72 of file dw_dslash4_core.h.
Referenced by if().
| #define i31_im I5.y |
Definition at line 75 of file dw_dslash4_core.h.
Referenced by if().
| #define i31_re I5.x |
Definition at line 74 of file dw_dslash4_core.h.
Referenced by if().
| #define i32_im I5.w |
Definition at line 77 of file dw_dslash4_core.h.
Referenced by if().
| #define i32_re I5.z |
Definition at line 76 of file dw_dslash4_core.h.
Referenced by if().
| #define m5 param.m5_f |
Definition at line 78 of file dw_dslash4_core.h.
Referenced by quda::MDWFDslashCuda(), and printQudaInvertParam().
| #define mdwf_b5 param.mdwf_b5_f |
Definition at line 79 of file dw_dslash4_core.h.
| #define mdwf_c5 param.mdwf_c5_f |
Definition at line 80 of file dw_dslash4_core.h.
| #define mferm param.mferm_f |
Definition at line 81 of file dw_dslash4_core.h.
Referenced by dslash_5_inv(), dslashReference_5th(), dslashReference_5th_inv(), dw_4d_mat(), dw_4d_matpc(), dw_dslash(), dw_dslash_5_4d(), dw_mat(), dw_matdagmat(), dw_matpc(), mdw_dslash_4_pre(), mdw_dslash_5(), mdw_mat(), and mdw_matpc().
| #define POW | ( | a, | |
| b | |||
| ) | __fast_pow(a, b) |
Definition at line 53 of file dw_dslash4_core.h.
| #define SHARED_STRIDE 16 |
Definition at line 185 of file dw_dslash4_core.h.
| #define spinorFloat float |
Definition at line 52 of file dw_dslash4_core.h.
Referenced by if().
| #define VOLATILE volatile |
Definition at line 9 of file dw_dslash4_core.h.
| coordsFromFaceIndex< 5, QUDA_4D_PC, kernel_type, 1 > | ( | X | , |
| sid | , | ||
| coord | , | ||
| face_idx | , | ||
| face_num | , | ||
| param | |||
| ) |
| if | ( | sid >=param.threads *param.dc. | Ls | ) |
| if | ( | kernel_type | = = INTERIOR_KERNEL | ) |
| if | ( | ) |
Definition at line 1473 of file dw_dslash4_core.h.
References a0_im, A0_im, a0_re, A0_re, a1_im, A1_im, a1_re, A1_re, a2_im, A2_im, a2_re, A2_re, b0_im, B0_im, b0_re, B0_re, b1_im, B1_im, b1_re, B1_re, b2_im, B2_im, b2_re, B2_re, GHOSTSPINORTEX, i00_im, i00_re, i01_im, i01_re, i02_im, i02_re, i10_im, i10_re, i11_im, i11_re, i12_im, i12_re, i20_im, i20_re, i21_im, i21_re, i22_im, i22_re, i30_im, i30_re, i31_im, i31_re, i32_im, i32_re, INTERIOR_KERNEL, o20_im, o20_re, o21_im, o21_re, o22_im, o22_re, o30_im, o30_re, o31_im, o31_re, o32_im, o32_re, param, READ_SPINOR_DOWN(), READ_SPINOR_GHOST, sp_idx, spinorFloat, SPINORTEX, and TPROJSCALE.

| RECONSTRUCT_GAUGE_MATRIX | ( | 0 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 1 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 2 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 3 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 4 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 5 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 6 | ) |
| RECONSTRUCT_GAUGE_MATRIX | ( | 7 | ) |
Definition at line 306 of file dw_dslash4_core.h.
Referenced by if().
| A0_im = 0 |
Definition at line 363 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 306 of file dw_dslash4_core.h.
Referenced by if().
| A0_re = 0 |
Definition at line 356 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 307 of file dw_dslash4_core.h.
Referenced by if().
| A1_im = 0 |
Definition at line 393 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 307 of file dw_dslash4_core.h.
Referenced by if().
| A1_re = 0 |
Definition at line 386 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 308 of file dw_dslash4_core.h.
Referenced by if().
| A2_im = 0 |
Definition at line 423 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 308 of file dw_dslash4_core.h.
Referenced by if().
| A2_re = 0 |
Definition at line 416 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 309 of file dw_dslash4_core.h.
Referenced by if().
| B0_im = 0 |
Definition at line 377 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 309 of file dw_dslash4_core.h.
Referenced by if().
| B0_re = 0 |
Definition at line 370 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 310 of file dw_dslash4_core.h.
Referenced by if().
| B1_im = 0 |
Definition at line 407 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 310 of file dw_dslash4_core.h.
Referenced by if().
| B1_re = 0 |
Definition at line 400 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 311 of file dw_dslash4_core.h.
Referenced by if().
| B2_im = 0 |
Definition at line 437 of file dw_dslash4_core.h.
Referenced by if().
Definition at line 311 of file dw_dslash4_core.h.
Referenced by if().
| B2_re = 0 |
Definition at line 430 of file dw_dslash4_core.h.
Referenced by if().
| int coord[5] |
Definition at line 196 of file dw_dslash4_core.h.
Referenced by if().
| else |
Definition at line 220 of file dw_dslash4_core.h.
| face_idx = sid - face_num*face_volume |
Definition at line 198 of file dw_dslash4_core.h.
Referenced by coordsFromFaceIndex(), dimFromFaceIndex(), faceIndexFromCoords(), if(), indexFromFaceIndex(), indexFromFaceIndexExtended(), indexFromFaceIndexExtendedStaggered(), and indexFromFaceIndexStaggered().
| const int face_num = (sid >= face_volume) |
Definition at line 223 of file dw_dslash4_core.h.
Referenced by coordsFromFaceIndex(), indexFromFaceIndex(), indexFromFaceIndexExtended(), indexFromFaceIndexExtendedStaggered(), indexFromFaceIndexStaggered(), quda::cudaColorSpinorField::packGhost(), quda::cudaColorSpinorField::packGhostExtended(), and quda::cudaColorSpinorField::unpackGhostExtended().
| float4 G0 |
Definition at line 271 of file dw_dslash4_core.h.
| float4 G1 |
Definition at line 272 of file dw_dslash4_core.h.
| float4 G2 |
Definition at line 273 of file dw_dslash4_core.h.
| float4 G3 |
Definition at line 274 of file dw_dslash4_core.h.
| float4 G4 |
Definition at line 275 of file dw_dslash4_core.h.
Definition at line 301 of file dw_dslash4_core.h.
| o00_im = i00_im |
Definition at line 151 of file dw_dslash4_core.h.
Referenced by if().
| o00_re = i00_re |
Definition at line 150 of file dw_dslash4_core.h.
Referenced by if().
| o01_im = i01_im |
Definition at line 153 of file dw_dslash4_core.h.
Referenced by if().
| o01_re = i01_re |
Definition at line 152 of file dw_dslash4_core.h.
Referenced by if().
| o02_im = i02_im |
Definition at line 155 of file dw_dslash4_core.h.
Referenced by if().
| o02_re = i02_re |
Definition at line 154 of file dw_dslash4_core.h.
Referenced by if().
| o10_im = i10_im |
Definition at line 157 of file dw_dslash4_core.h.
Referenced by if().
| o10_re = i10_re |
Definition at line 156 of file dw_dslash4_core.h.
Referenced by if().
| o11_im = i11_im |
Definition at line 159 of file dw_dslash4_core.h.
Referenced by if().
| o11_re = i11_re |
Definition at line 158 of file dw_dslash4_core.h.
Referenced by if().
| o12_im = i12_im |
Definition at line 161 of file dw_dslash4_core.h.
Referenced by if().
| o12_re = i12_re |
Definition at line 160 of file dw_dslash4_core.h.
Referenced by if().
| o20_im = i20_im |
Definition at line 163 of file dw_dslash4_core.h.
Referenced by if().
| o20_re = i20_re |
Definition at line 162 of file dw_dslash4_core.h.
Referenced by if().
| o21_im = i21_im |
Definition at line 165 of file dw_dslash4_core.h.
Referenced by if().
| o21_re = i21_re |
Definition at line 164 of file dw_dslash4_core.h.
Referenced by if().
| o22_im = i22_im |
Definition at line 167 of file dw_dslash4_core.h.
Referenced by if().
| o22_re = i22_re |
Definition at line 166 of file dw_dslash4_core.h.
Referenced by if().
| o30_im = i30_im |
Definition at line 169 of file dw_dslash4_core.h.
Referenced by if().
| o30_re = i30_re |
Definition at line 168 of file dw_dslash4_core.h.
Referenced by if().
| o31_im = i31_im |
Definition at line 171 of file dw_dslash4_core.h.
Referenced by if().
| o31_re = i31_re |
Definition at line 170 of file dw_dslash4_core.h.
Referenced by if().
| o32_im = i32_im |
Definition at line 173 of file dw_dslash4_core.h.
Referenced by if().
| o32_re = i32_re |
Definition at line 172 of file dw_dslash4_core.h.
Referenced by if().
| int sid = ((blockIdx.y*blockDim.y + threadIdx.y)*gridDim.x + blockIdx.x)*blockDim.x + threadIdx.x |
Definition at line 192 of file dw_dslash4_core.h.
Referenced by if().
| int X |
Definition at line 196 of file dw_dslash4_core.h.
Referenced by if().
1.8.14