17 for (
int d=1; d<4; d++) {
33 errorQuda(
"Half precision not supported at present");
44 ret.
even = allocateParityHw(X, precision);
45 ret.
odd = allocateParityHw(X, precision);
50 static void freeParityHwQuda(
ParityHw parity_hw)
56 parity_hw.
data = NULL;
63 freeParityHwQuda(hw.
even);
64 freeParityHwQuda(hw.
odd);
68 template <
typename Float>
69 static inline void packHwVector(float4* a,
Float *b,
int Vh)
88 template <
typename Float>
89 static inline void packHwVector(float2* a,
Float *b,
int Vh)
110 template <
typename Float>
111 static inline void packHwVector(double2* a,
Float *b,
int Vh)
133 template <
typename Float>
134 static inline void unpackHwVector(
Float *a, float4 *b,
int Vh)
153 template <
typename Float>
154 static inline void unpackHwVector(
Float *a, float2 *b,
int Vh)
176 template <
typename Float>
177 static inline void unpackHwVector(
Float *a, double2 *b,
int Vh)
199 template <
typename Float,
typename FloatN>
202 for (
int i = 0; i <
Vh; i++) {
207 template <
typename Float,
typename FloatN>
208 static void unpackParityHw(
Float *res, FloatN *hwPacked,
int Vh) {
210 for (
int i = 0; i <
Vh; i++) {
211 unpackHwVector(res+i*
hwSiteSize, hwPacked+i, Vh);
220 errorQuda(
"CUDA double precision requires CPU double precision");
236 cudaMemcpy(ret.
data, packedHw1, ret.
bytes, cudaMemcpyHostToDevice);
260 loadParityHw(ret.
even, hw, cpu_prec);
261 loadParityHw(ret.
odd, hw_odd, cpu_prec);
271 cudaMemcpy(packedHw1, hw.
data, hw.
bytes, cudaMemcpyDeviceToHost);
274 unpackParityHw((
double*)res, (double2*)packedHw1, hw.
volume);
277 unpackParityHw((
double*)res, (float2*)packedHw1, hw.
volume);
279 unpackParityHw((
float*)res, (float2*)packedHw1, hw.
volume);
303 retrieveParityHw(res, hw.
even, cpu_prec);
304 retrieveParityHw(res_odd, hw.
odd, cpu_prec);
void freeHwQuda(FullHw hw)
#define pinned_malloc(size)
enum QudaPrecision_s QudaPrecision
void retrieveHwField(void *res, FullHw hw, QudaPrecision cpu_prec)
void loadHwToGPU(FullHw ret, void *hw, QudaPrecision cpu_prec)
FloatingPoint< float > Float
void packParityHw(FloatN *res, Float *hw, int Vh)
Main header file for the QUDA library.
#define device_malloc(size)
FullHw createHwQuda(int *X, QudaPrecision precision)