3 This is just an experiment into using polar coordinates instead of
4 Cartesian coordinates for storing complex numbers. The supposition
5 is that a fixed-point form for polar coordinates will lead to a much
6 more efficient use of bits leading to higher precision with 8-bit
7 and 16-bit precision than otherwise possible.
11 #include <quda_internal.h>
12 #include <register_traits.h>
16 #define MAX_USHORT 65535.0f
17 inline void ucopy(float &a, const ushort &b) { a = (float)b/MAX_USHORT; }
18 inline void ucopy(ushort &a, const float &b) { a = (short)(b*MAX_USHORT); }
21 void old_load_half(float spinor[24], short *in, float *norm, int idx) {
23 for (int i=0; i<24; i++) {
24 copy(spinor[i], in[idx*24+i]);
25 spinor[i] *= norm[idx];
30 void old_save_half(float spinor[24], short *out, float *norm, int idx) {
33 for (int i=0; i<24; i++) {
34 float tmp = fabs(spinor[i]);
35 if (tmp > max) max = tmp;
39 for (int i=0; i<24; i++) copy(out[idx*24+i], spinor[i]/max);
43 void new_load_half(float spinor[24], short *in, float *norm, int idx) {
45 for (int i=0; i<12; i++) {
47 ucopy(mag,((ushort*)in)[idx*24+i*2+0]);
49 copy(phase,in[idx*24+i*2+1]);
51 spinor[2*i+0] = mag*cos(phase);
52 spinor[2*i+1] = mag*sin(phase);
57 void new_save_half(float spinor[24], short *out, float *norm, int idx) {
60 for (int i=0; i<12; i++) {
61 float tmp = sqrt(spinor[2*i+0]*spinor[2*i+0] + spinor[2*i+1]*spinor[2*i+1]);
62 if (tmp > max) max = tmp;
66 for (int i=0; i<12; i++) {
67 float phase = atan2(spinor[2*i+1], spinor[2*i+0]) / M_PI;
68 float mag = sqrt(spinor[2*i+0]*spinor[2*i+0] + spinor[2*i+1]*spinor[2*i+1]) / max;
70 ucopy(((ushort*)out)[idx*24+i*2+0], mag);
71 copy(out[idx*24+i*2+1], phase);
76 void oldCopyToHalf(short *out, float *norm, float *in, int N) {
77 for (int j=0; j<N; j++) {
79 for (int i=0; i<24; i++) spinor[i] = in[j*24+i];
80 old_save_half(spinor, out, norm, j);
85 void oldCopyToFloat(float *out, short *in, float *norm, int N) {
86 for (int j=0; j<N; j++) {
88 old_load_half(spinor, in, norm, j);
89 for (int i=0; i<24; i++) out[j*24+i] = spinor[i];
94 void newCopyToHalf(short *out, float *norm, float *in, int N) {
95 for (int j=0; j<N; j++) {
97 for (int i=0; i<24; i++) spinor[i] = in[j*24+i];
98 new_save_half(spinor, out, norm, j);
103 void newCopyToFloat(float *out, short *in, float *norm, int N) {
104 for (int j=0; j<N; j++) {
106 new_load_half(spinor, in, norm, j);
107 for (int i=0; i<24; i++) out[j*24+i] = spinor[i];
112 void insertNoise(float *field, int N, float power) {
113 for (int j=0; j<N; j++) {
114 for (int i=0; i<24; i++) {
115 field[j*24+i] = 1000*pow(comm_drand(), power);
120 double l2(float *a, float *b, int N) {
123 for (int j=0; j<N; j++) {
126 for (int i=0; i<24; i++) {
127 dif += a[j*24+i]*a[j*24+i] - b[j*24+i]*b[j*24+i];
128 nrm += a[j*24+i]*a[j*24+i];
130 rtn += sqrt(fabs(dif)/nrm);
138 float *ref = (float*)safe_malloc(24*N*sizeof(float));
139 short *old_half = (short*)safe_malloc(24*N*sizeof(short));
140 float *old_norm = (float*)safe_malloc(N*sizeof(float));
141 float *old_recon = (float*)safe_malloc(24*N*sizeof(float));
142 short *new_half = (short*)safe_malloc(24*N*sizeof(short));
143 float *new_norm = (float*)safe_malloc(N*sizeof(float));
144 float *new_recon = (float*)safe_malloc(24*N*sizeof(float));
146 for (float power=0.0; power<2.0; power+=0.1) {
147 insertNoise(ref, N, power);
149 newCopyToHalf(new_half,new_norm,ref,N);
150 newCopyToFloat(new_recon,new_half,new_norm,N);
152 oldCopyToHalf(old_half,old_norm,ref,N);
153 oldCopyToFloat(old_recon,old_half,old_norm,N);
155 printf("pow=%e, L2 spinor deviation: old = %e, new = %e, ratio = %e\n",
156 power, l2(ref,old_recon,N), l2(ref,new_recon,N),
157 l2(ref,old_recon,N) / l2(ref,new_recon,N));
160 for (int j=0; j<N; j++) {
161 for (int i=0; i<12; i++) {
162 printf("power=%4.2f i=%d ref=(%e,%e) old=(%e,%e), new=(%e,%e)\n",
163 power, i, ref[j*24+i*2+0], ref[j*24+i*2+1],
164 old_recon[j*24+i*2+0], old_recon[j*24+i*2+1],
165 new_recon[j*24+i*2+0], new_recon[j*24+i*2+1]);
173 host_free(old_recon);
176 host_free(new_recon);