QUDA  v1.1.0
A library for QCD on GPUs
new_half.cu
Go to the documentation of this file.
1 /*
2 
3  This is just an experiment into using polar coordinates instead of
4  Cartesian coordinates for storing complex numbers. The supposition
5  is that a fixed-point form for polar coordinates will lead to a much
6  more efficient use of bits leading to higher precision with 8-bit
7  and 16-bit precision than otherwise possible.
8 
9 */
10 
11 #include <quda_internal.h>
12 #include <register_traits.h>
13 
14 using namespace quda;
15 
16 #define MAX_USHORT 65535.0f
17 inline void ucopy(float &a, const ushort &b) { a = (float)b/MAX_USHORT; }
18 inline void ucopy(ushort &a, const float &b) { a = (short)(b*MAX_USHORT); }
19 
20 
21 void old_load_half(float spinor[24], short *in, float *norm, int idx) {
22 
23  for (int i=0; i<24; i++) {
24  copy(spinor[i], in[idx*24+i]);
25  spinor[i] *= norm[idx];
26  }
27 
28 }
29 
30 void old_save_half(float spinor[24], short *out, float *norm, int idx) {
31 
32  float max = 0.0;
33  for (int i=0; i<24; i++) {
34  float tmp = fabs(spinor[i]);
35  if (tmp > max) max = tmp;
36  }
37 
38  norm[idx] = max;
39  for (int i=0; i<24; i++) copy(out[idx*24+i], spinor[i]/max);
40 
41 }
42 
43 void new_load_half(float spinor[24], short *in, float *norm, int idx) {
44 
45  for (int i=0; i<12; i++) {
46  float mag, phase;
47  ucopy(mag,((ushort*)in)[idx*24+i*2+0]);
48  mag *= norm[idx];
49  copy(phase,in[idx*24+i*2+1]);
50  phase *= M_PI;
51  spinor[2*i+0] = mag*cos(phase);
52  spinor[2*i+1] = mag*sin(phase);
53  }
54 
55 }
56 
57 void new_save_half(float spinor[24], short *out, float *norm, int idx) {
58 
59  float max = 0.0;
60  for (int i=0; i<12; i++) {
61  float tmp = sqrt(spinor[2*i+0]*spinor[2*i+0] + spinor[2*i+1]*spinor[2*i+1]);
62  if (tmp > max) max = tmp;
63  }
64 
65  norm[idx] = max;
66  for (int i=0; i<12; i++) {
67  float phase = atan2(spinor[2*i+1], spinor[2*i+0]) / M_PI;
68  float mag = sqrt(spinor[2*i+0]*spinor[2*i+0] + spinor[2*i+1]*spinor[2*i+1]) / max;
69 
70  ucopy(((ushort*)out)[idx*24+i*2+0], mag);
71  copy(out[idx*24+i*2+1], phase);
72  }
73 
74 }
75 
76 void oldCopyToHalf(short *out, float *norm, float *in, int N) {
77  for (int j=0; j<N; j++) {
78  float spinor[24];
79  for (int i=0; i<24; i++) spinor[i] = in[j*24+i];
80  old_save_half(spinor, out, norm, j);
81  }
82 
83 }
84 
85 void oldCopyToFloat(float *out, short *in, float *norm, int N) {
86  for (int j=0; j<N; j++) {
87  float spinor[24];
88  old_load_half(spinor, in, norm, j);
89  for (int i=0; i<24; i++) out[j*24+i] = spinor[i];
90  }
91 
92 }
93 
94 void newCopyToHalf(short *out, float *norm, float *in, int N) {
95  for (int j=0; j<N; j++) {
96  float spinor[24];
97  for (int i=0; i<24; i++) spinor[i] = in[j*24+i];
98  new_save_half(spinor, out, norm, j);
99  }
100 
101 }
102 
103 void newCopyToFloat(float *out, short *in, float *norm, int N) {
104  for (int j=0; j<N; j++) {
105  float spinor[24];
106  new_load_half(spinor, in, norm, j);
107  for (int i=0; i<24; i++) out[j*24+i] = spinor[i];
108  }
109 
110 }
111 
112 void insertNoise(float *field, int N, float power) {
113  for (int j=0; j<N; j++) {
114  for (int i=0; i<24; i++) {
115  field[j*24+i] = 1000*pow(comm_drand(), power);
116  }
117  }
118 }
119 
120 double l2(float *a, float *b, int N) {
121 
122  double rtn = 0.0;
123  for (int j=0; j<N; j++) {
124  double dif = 0;
125  double nrm = 0.0;
126  for (int i=0; i<24; i++) {
127  dif += a[j*24+i]*a[j*24+i] - b[j*24+i]*b[j*24+i];
128  nrm += a[j*24+i]*a[j*24+i];
129  }
130  rtn += sqrt(fabs(dif)/nrm);
131  }
132  return rtn/N;
133 }
134 
135 int main() {
136  const int N = 1000;
137 
138  float *ref = (float*)safe_malloc(24*N*sizeof(float));
139  short *old_half = (short*)safe_malloc(24*N*sizeof(short));
140  float *old_norm = (float*)safe_malloc(N*sizeof(float));
141  float *old_recon = (float*)safe_malloc(24*N*sizeof(float));
142  short *new_half = (short*)safe_malloc(24*N*sizeof(short));
143  float *new_norm = (float*)safe_malloc(N*sizeof(float));
144  float *new_recon = (float*)safe_malloc(24*N*sizeof(float));
145 
146  for (float power=0.0; power<2.0; power+=0.1) {
147  insertNoise(ref, N, power);
148 
149  newCopyToHalf(new_half,new_norm,ref,N);
150  newCopyToFloat(new_recon,new_half,new_norm,N);
151 
152  oldCopyToHalf(old_half,old_norm,ref,N);
153  oldCopyToFloat(old_recon,old_half,old_norm,N);
154 
155  printf("pow=%e, L2 spinor deviation: old = %e, new = %e, ratio = %e\n",
156  power, l2(ref,old_recon,N), l2(ref,new_recon,N),
157  l2(ref,old_recon,N) / l2(ref,new_recon,N));
158 
159  if (N==1) {
160  for (int j=0; j<N; j++) {
161  for (int i=0; i<12; i++) {
162  printf("power=%4.2f i=%d ref=(%e,%e) old=(%e,%e), new=(%e,%e)\n",
163  power, i, ref[j*24+i*2+0], ref[j*24+i*2+1],
164  old_recon[j*24+i*2+0], old_recon[j*24+i*2+1],
165  new_recon[j*24+i*2+0], new_recon[j*24+i*2+1]);
166  }
167  }
168  }
169  }
170 
171  host_free(old_norm);
172  host_free(old_half);
173  host_free(old_recon);
174  host_free(new_norm);
175  host_free(new_half);
176  host_free(new_recon);
177  host_free(ref);
178 }