19 using namespace colorspinor;
22 template<
typename InOrder,
typename FloatIn>
24 FloatIn phi = 2.0*M_PI*Random<FloatIn>(localState);
25 FloatIn radius = Random<FloatIn>(localState);
26 radius =
sqrt(-1.0 *
log(radius));
27 inOrder(0,
x,
s,
c) = complex<FloatIn>(radius*
cos(phi),radius*
sin(phi));
31 template <
typename FloatIn,
int Ns,
int Nc,
typename InOrder>
33 for (
int x=0;
x<volume;
x++) {
34 for (
int s=0;
s<Ns;
s++) {
35 for (
int c=0;
c<Nc;
c++) {
38 genGauss<InOrder, FloatIn>(inOrder, localState,
x,
s,
c);
39 rngstate.
State()[
x] = localState;
46 template <
typename FloatIn,
int Ns,
int Nc,
typename InOrder>
48 int x = blockIdx.x *
blockDim.x + threadIdx.x;
49 if (
x >= volume)
return;
52 for (
int s=0;
s<Ns;
s++) {
53 for (
int c=0;
c<Nc;
c++) {
54 genGauss<InOrder, FloatIn>(inOrder, localState,
x,
s,
c);
57 rngstate.
State()[
x] = localState;
60 template <
typename FloatIn,
int Ns,
int Nc,
typename InOrder>
76 :
in(
in), meta(meta), rngstate(rngstate){ }
80 gaussSpinor<FloatIn, Ns, Nc>(
in, meta.
VolumeCB(), rngstate);
83 gaussSpinorKernel<FloatIn, Ns, Nc, InOrder>
91 long long flops()
const {
return 0; }
92 long long bytes()
const {
return in.Bytes(); }
102 template <
typename FloatIn,
int Ns,
int Nc,
typename InOrder>
109 template <
typename FloatIn,
int Ns,
int Nc>
115 gaussSpinor<FloatIn,Ns,Nc>(inOrder,
in, rngstate);
119 gaussSpinor<FloatIn,Ns,Nc>(inOrder,
in, rngstate);
121 errorQuda(
"Order %d not defined (Ns=%d, Nc=%d)",
in.FieldOrder(), Ns, Nc);
128 if (
src.Ncolor() != 3 ){
129 errorQuda(
" is not implemented for Ncolor!=3");
131 if (
src.Nspin() == 4 ){
133 gaussSpinor<float, 4, 3>(
src, randstates);
135 gaussSpinor<double, 4, 3>(
src, randstates);
137 }
else if (
src.Nspin() == 1 ){
139 gaussSpinor<float, 1, 3>(
src, randstates);
141 gaussSpinor<double, 1, 3>(
src, randstates);
144 errorQuda(
"spinorGauss not implemented for Nspin != 1 or Nspin !=4");
151 RNG* randstates =
new RNG(
src.VolumeCB(), seed,
src.X());
void Init()
Initialize CURAND RNG states.
GaussSpinor(InOrder &in, const ColorSpinorField &meta, RNG &rngstate)
struct curandStateMRG32k3a cuRNGState
unsigned int sharedBytesPerBlock(const TuneParam ¶m) const
const char * AuxString() const
QudaVerbosity getVerbosity()
void spinorGauss(ColorSpinorField &src, int seed)
__host__ __device__ ValueType sqrt(ValueType x)
void apply(const cudaStream_t &stream)
void backup()
Backup CURAND array states initialization.
const char * VolString() const
const ColorSpinorField & meta
__host__ __device__ ValueType sin(ValueType x)
void Release()
Release Device memory for CURAND RNG states.
__device__ __host__ void genGauss(InOrder &inOrder, cuRNGState &localState, int x, int s, int c)
void restore()
Restore CURAND array states initialization.
Class declaration to initialize and hold CURAND RNG states.
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
void gaussSpinor(InOrder &inOrder, int volume, RNG rngstate)
unsigned int minThreads() const
QudaFieldLocation Location() const
__host__ __device__ ValueType log(ValueType x)
__host__ __device__ __inline__ cuRNGState * State()
__host__ __device__ ValueType cos(ValueType x)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
unsigned int sharedBytesPerThread() const
__global__ void gaussSpinorKernel(InOrder inOrder, int volume, RNG rngstate)