QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
blas_magma.cpp
Go to the documentation of this file.
1 #include <blas_magma.h>
2 #include <string.h>
3 
4 #ifndef MAX
5 #define MAX(a, b) (a > b) ? a : b;
6 #endif
7 
8 #define MAGMA_15 //version of the MAGMA library
9 
10 #ifdef MAGMA_LIB
11 #include <magma.h>
12 
13 #if (defined MAGMA_14)
14 
15 #define _cV 'V'
16 #define _cU 'U'
17 
18 #define _cR 'R'
19 #define _cL 'L'
20 
21 #define _cC 'C'
22 #define _cN 'N'
23 
24 #elif (defined MAGMA_15)
25 
26 #define _cV MagmaVec
27 #define _cU MagmaUpper
28 
29 #define _cR MagmaRight
30 #define _cL MagmaLeft
31 
32 #define _cC MagmaConjTrans
33 #define _cN MagmaNoTrans
34 
35 #endif
36 
37 #endif
38 
40 
41 #ifdef MAGMA_LIB
42  magma_int_t err = magma_init();
43 
44  if(err != MAGMA_SUCCESS) printf("\nError: cannot initialize MAGMA library\n");
45 
46  int major, minor, micro;
47 
48  magma_version( &major, &minor, &micro);
49  printf("\nMAGMA library version: %d.%d\n\n", major, minor);
50 #else
51  printf("\nError: MAGMA library was not compiled, check your compilation options...\n");
52  exit(-1);
53 #endif
54 
55  return;
56 }
57 
59 
60 #ifdef MAGMA_LIB
61  magma_int_t err = magma_finalize();
62  if(magma_finalize() != MAGMA_SUCCESS) printf("\nError: cannot close MAGMA library\n");
63 #else
64  printf("\nError: MAGMA library was not compiled, check your compilation options...\n");
65  exit(-1);
66 #endif
67 
68  return;
69 }
70 
71  BlasMagmaArgs::BlasMagmaArgs(const int prec) : m(0), nev(0), prec(prec), ldm(0), info(-1), llwork(0),
72  lrwork(0), liwork(0), sideLR(0), htsize(0), dtsize(0), lwork_max(0), W(0), W2(0),
73  hTau(0), dTau(0), lwork(0), rwork(0), iwork(0)
74 {
75 
76 #ifdef MAGMA_LIB
77  magma_int_t dev_info = magma_getdevice_arch();//mostly to check whether magma is intialized...
78  if(dev_info == 0) exit(-1);
79 
80  printf("\nMAGMA will use device architecture %d.\n", dev_info);
81 
82  alloc = false;
83  init = true;
84 #else
85  printf("\nError: MAGMA library was not compiled, check your compilation options...\n");
86  exit(-1);
87 #endif
88 
89  return;
90 }
91 
92 
93 BlasMagmaArgs::BlasMagmaArgs(const int m, const int ldm, const int prec)
94  : m(m), nev(0), prec(prec), ldm(ldm), info(-1), sideLR(0), htsize(0), dtsize(0),
95  W(0), hTau(0), dTau(0)
96 {
97 
98 #ifdef MAGMA_LIB
99 
100  magma_int_t dev_info = magma_getdevice_arch();//mostly to check whether magma is intialized...
101 
102  if(dev_info == 0) exit(-1);
103 
104  printf("\nMAGMA will use device architecture %d.\n", dev_info);
105 
106  const int complex_prec = 2*prec;
107 
108  magma_int_t nbtrd = prec == 4 ? magma_get_chetrd_nb(m) : magma_get_zhetrd_nb(m);//ldm
109 
110  llwork = MAX(m + m*nbtrd, 2*m + m*m);//ldm
111  lrwork = 1 + 5*m + 2*m*m;//ldm
112  liwork = 3 + 5*m;//ldm
113 
114  magma_malloc_pinned((void**)&W2, ldm*m*complex_prec);
115  magma_malloc_pinned((void**)&lwork, llwork*complex_prec);
116  magma_malloc_cpu((void**)&rwork, lrwork*prec);
117  magma_malloc_cpu((void**)&iwork, liwork*sizeof(magma_int_t));
118 
119  init = true;
120  alloc = true;
121 
122 #else
123  printf("\nError: MAGMA library was not compiled, check your compilation options...\n");
124  exit(-1);
125 #endif
126 
127  return;
128 }
129 
130 
131 
132 BlasMagmaArgs::BlasMagmaArgs(const int m, const int nev, const int ldm, const int prec)
133  : m(m), nev(nev), prec(prec), ldm(ldm), info(-1)
134 {
135 
136 #ifdef MAGMA_LIB
137 
138  magma_int_t dev_info = magma_getdevice_arch();//mostly to check whether magma is intialized...
139 
140  if(dev_info == 0) exit(-1);
141 
142  printf("\nMAGMA will use device architecture %d.\n", dev_info);
143 
144  const int complex_prec = 2*prec;
145 
146  magma_int_t nbtrd = prec == 4 ? magma_get_chetrd_nb(m) : magma_get_zhetrd_nb(m);//ldm
147  magma_int_t nbqrf = prec == 4 ? magma_get_cgeqrf_nb(m) : magma_get_zgeqrf_nb(m);//ldm
148 
149  llwork = MAX(m + m*nbtrd, 2*m + m*m);//ldm
150  lrwork = 1 + 5*m + 2*m*m;//ldm
151  liwork = 3 + 5*m;//ldm
152 
153  htsize = 2*nev;//MIN(l,k)-number of Householder vectors, but we always have k <= MIN(m,n)
154  dtsize = ( 2*htsize + ((htsize + 31)/32)*32 )*nbqrf;//in general: MIN(m,k) for side = 'L' and MIN(n,k) for side = 'R'
155 
156  sideLR = (m - 2*nev + nbqrf)*(m + nbqrf) + m*nbqrf;//ldm
157 
158  magma_malloc_pinned((void**)&W, sideLR*complex_prec);
159  magma_malloc_pinned((void**)&W2, ldm*m*complex_prec);
160  magma_malloc_pinned((void**)&hTau, htsize*complex_prec);
161  magma_malloc((void**)&dTau, dtsize*complex_prec);
162 
163  magma_malloc_pinned((void**)&lwork, llwork*complex_prec);
164  magma_malloc_cpu((void**)&rwork, lrwork*prec);
165  magma_malloc_cpu((void**)&iwork, liwork*sizeof(magma_int_t));
166 
167  init = true;
168  alloc = true;
169 
170 #else
171  printf("\nError: MAGMA library was not compiled, check your compilation options...\n");
172  exit(-1);
173 #endif
174 
175  return;
176 }
177 
179 {
180 #ifdef MAGMA_LIB
181 
182  if(alloc == true)
183  {
184  if(dTau) magma_free(dTau);
185  if(hTau) magma_free_pinned(hTau);
186 
187  if(W) magma_free_pinned(W);
188  magma_free_pinned(W2);
189  magma_free_pinned(lwork);
190 
191  magma_free_cpu(rwork);
192  magma_free_cpu(iwork);
193  alloc = false;
194  }
195 
196  init = false;
197 
198 #endif
199 
200  return;
201 }
202 
203 void BlasMagmaArgs::MagmaHEEVD(void *dTvecm, void *hTvalm, const int prob_size, bool host)
204 {
205  if(prob_size > m) printf("\nError in MagmaHEEVD (problem size cannot exceed given search space %d), exit ...\n", m), exit(-1);
206 
207 #ifdef MAGMA_LIB
208  cudaPointerAttributes ptr_attr;
209 
210  if(!host)
211  {
212  //check if dTvecm is a device pointer..
213  cudaPointerGetAttributes(&ptr_attr, dTvecm);
214 
215  if(ptr_attr.memoryType != cudaMemoryTypeDevice || ptr_attr.devicePointer == NULL ) printf("Error in MagmaHEEVD, no device pointer found."), exit(-1);
216 
217  if(prec == 4)
218  {
219  magma_cheevd_gpu(_cV, _cU, prob_size, (magmaFloatComplex*)dTvecm, ldm, (float*)hTvalm, (magmaFloatComplex*)W2, ldm, (magmaFloatComplex*)lwork, llwork, (float*)rwork, lrwork, iwork, liwork, &info);
220  if(info != 0) printf("\nError in MagmaHEEVD (magma_cheevd_gpu), exit ...\n"), exit(-1);
221  }
222  else
223  {
224  magma_zheevd_gpu(_cV, _cU, prob_size, (magmaDoubleComplex*)dTvecm, ldm, (double*)hTvalm, (magmaDoubleComplex*)W2, ldm, (magmaDoubleComplex*)lwork, llwork, (double*)rwork, lrwork, iwork, liwork, &info);
225  if(info != 0) printf("\nError in MagmaHEEVD (magma_zheevd_gpu), exit ...\n"), exit(-1);
226  }
227  }
228  else
229  {
230  //check if dTvecm is a device pointer..
231  cudaPointerGetAttributes(&ptr_attr, dTvecm);
232 
233  if(ptr_attr.memoryType != cudaMemoryTypeHost || ptr_attr.hostPointer == NULL ) printf("Error in MagmaHEEVD, no host pointer found."), exit(-1);
234 
235  if(prec == 4)
236  {
237  magma_cheevd(_cV, _cU, prob_size, (magmaFloatComplex*)dTvecm, ldm, (float*)hTvalm, (magmaFloatComplex*)lwork, llwork, (float*)rwork, lrwork, iwork, liwork, &info);
238  if(info != 0) printf("\nError in MagmaHEEVD (magma_cheevd_gpu), exit ...\n"), exit(-1);
239  }
240  else
241  {
242  magma_zheevd(_cV, _cU, prob_size, (magmaDoubleComplex*)dTvecm, ldm, (double*)hTvalm, (magmaDoubleComplex*)lwork, llwork, (double*)rwork, lrwork, iwork, liwork, &info);
243  if(info != 0) printf("\nError in MagmaHEEVD (magma_zheevd_gpu), exit ...\n"), exit(-1);
244  }
245  }
246 #endif
247  return;
248 }
249 
250 int BlasMagmaArgs::MagmaORTH_2nev(void *dTvecm, void *dTm)
251 {
252  const int l = 2*nev;
253 
254 #ifdef MAGMA_LIB
255  if(prec == 4)
256  {
257  magma_int_t nb = magma_get_cgeqrf_nb(m);//ldm
258 
259  magma_cgeqrf_gpu(m, l, (magmaFloatComplex *)dTvecm, ldm, (magmaFloatComplex *)hTau, (magmaFloatComplex *)dTau, &info);
260  if(info != 0) printf("\nError in MagmaORTH_2nev (magma_cgeqrf_gpu), exit ...\n"), exit(-1);
261 
262  //compute dTevecm0=QHTmQ
263  //get TQ product:
264  magma_cunmqr_gpu(_cR, _cN, m, m, l, (magmaFloatComplex *)dTvecm, ldm, (magmaFloatComplex *)hTau, (magmaFloatComplex *)dTm, ldm, (magmaFloatComplex *)W, sideLR, (magmaFloatComplex *)dTau, nb, &info);
265  if(info != 0) printf("\nError in MagmaORTH_2nev (magma_cunmqr_gpu), exit ...\n"), exit(-1);
266 
267  //get QHT product:
268  magma_cunmqr_gpu(_cL, _cC, m, l, l, (magmaFloatComplex *)dTvecm, ldm, (magmaFloatComplex *)hTau, (magmaFloatComplex *)dTm, ldm, (magmaFloatComplex *)W, sideLR, (magmaFloatComplex *)dTau, nb, &info);
269  if(info != 0) printf("\nError in MagmaORTH_2nev (magma_cunmqr_gpu), exit ...\n"), exit(-1);
270  }
271  else
272  {
273  magma_int_t nb = magma_get_zgeqrf_nb(m);//ldm
274 
275  magma_zgeqrf_gpu(m, l, (magmaDoubleComplex *)dTvecm, ldm, (magmaDoubleComplex *)hTau, (magmaDoubleComplex *)dTau, &info);
276  if(info != 0) printf("\nError in MagmaORTH_2nev (magma_zgeqrf_gpu), exit ...\n"), exit(-1);
277 
278  //compute dTevecm0=QHTmQ
279  //get TQ product:
280  magma_zunmqr_gpu(_cR, _cN, m, m, l, (magmaDoubleComplex *)dTvecm, ldm, (magmaDoubleComplex *)hTau, (magmaDoubleComplex *)dTm, ldm, (magmaDoubleComplex *)W, sideLR, (magmaDoubleComplex *)dTau, nb, &info);
281  if(info != 0) printf("\nError in MagmaORTH_2nev (magma_zunmqr_gpu), exit ...\n"), exit(-1);
282 
283  //get QHT product:
284  magma_zunmqr_gpu(_cL, _cC, m, l, l, (magmaDoubleComplex *)dTvecm, ldm, (magmaDoubleComplex *)hTau, (magmaDoubleComplex *)dTm, ldm, (magmaDoubleComplex *)W, sideLR, (magmaDoubleComplex *)dTau, nb, &info);
285  if(info != 0) printf("\nError in MagmaORTH_2nev (magma_zunmqr_gpu), exit ...\n"), exit(-1);
286 
287  }
288 #endif
289 
290  return l;
291 }
292 
293 void BlasMagmaArgs::RestartV(void *dV, const int vld, const int vlen, const int vprec, void *dTevecm, void *dTm)
294 {
295 #ifdef MAGMA_LIB
296  const int cprec = 2*prec;
297  int l = 2*nev;
298 //
299  //void *Tmp = 0;
300  //magma_malloc((void**)&Tmp, vld*l*cprec);
301 
302  //cudaMemset(Tmp, 0, vld*l*cprec);
303 //
304  const int bufferSize = 2*vld+l*l;
305 
306  int bufferBlock = bufferSize / l;
307 
308  void *buffer = 0;
309  magma_malloc(&buffer, bufferSize*cprec);
310  cudaMemset(buffer, 0, bufferSize*cprec);
311 
312 
313  if(prec == 4)
314  {
315  magma_int_t nb = magma_get_cgeqrf_nb(m);//ldm
316  magma_cunmqr_gpu(_cL, _cN, m, l, l, (magmaFloatComplex*)dTevecm, ldm, (magmaFloatComplex*)hTau, (magmaFloatComplex*)dTm, ldm, (magmaFloatComplex*)W, sideLR, (magmaFloatComplex*)dTau, nb, &info);
317 
318  if(info != 0) printf("\nError in RestartV (magma_cunmqr_gpu), exit ...\n"), exit(-1);
319  }
320  else
321  {
322  magma_int_t nb = magma_get_zgeqrf_nb(m);//ldm
323  magma_zunmqr_gpu(_cL, _cN, m, l, l, (magmaDoubleComplex*)dTevecm, ldm, (magmaDoubleComplex*)hTau, (magmaDoubleComplex*)dTm, ldm, (magmaDoubleComplex*)W, sideLR, (magmaDoubleComplex*)dTau, nb, &info);
324 
325  if(info != 0) printf("\nError in RestartV (magma_zunmqr_gpu), exit ...\n"), exit(-1);
326  }
327 
328  if(vprec == 4)
329  {
330  magmaFloatComplex *dtm;
331 
332  if(prec == 8)
333  {
334  magma_malloc((void**)&dtm, ldm*l*prec);
335 
336  double *hbuff1;
337  float *hbuff2;
338 
339  magma_malloc_pinned((void**)&hbuff1, ldm*l*cprec);
340  magma_malloc_pinned((void**)&hbuff2, ldm*l*prec);
341 
342  cudaMemcpy(hbuff1, dTm, ldm*l*cprec, cudaMemcpyDefault);
343  for(int i = 0; i < ldm*l; i++) hbuff2[i] = (float)hbuff1[i];
344 
345  cudaMemcpy(dtm, hbuff2, ldm*l*prec, cudaMemcpyDefault);
346 
347  magma_free_pinned(hbuff1);
348  magma_free_pinned(hbuff2);
349  }
350  else
351  {
352  dtm = (magmaFloatComplex *)dTm;
353  }
354 
355  //magmablas_cgemm('N', 'N', vlen, l, m, MAGMA_C_ONE, (magmaFloatComplex*)dV, vld, dtm, ldm, MAGMA_C_ZERO, (magmaFloatComplex*)Tmp, vld);
356 
357  for (int blockOffset = 0; blockOffset < vlen; blockOffset += bufferBlock)
358  {
359  if (bufferBlock > (vlen-blockOffset)) bufferBlock = (vlen-blockOffset);
360 
361  magmaFloatComplex *ptrV = &(((magmaFloatComplex*)dV)[blockOffset]);
362 
363  magmablas_cgemm(_cN, _cN, bufferBlock, l, m, MAGMA_C_ONE, ptrV, vld, dtm, ldm, MAGMA_C_ZERO, (magmaFloatComplex*)buffer, bufferBlock);
364 
365  cudaMemcpy2D(ptrV, vld*cprec, buffer, bufferBlock*cprec, bufferBlock*cprec, l, cudaMemcpyDefault);
366 
367  }
368 
369  if(prec == 8) magma_free(dtm);
370 
371  }
372  else
373  {
374  //magmablas_zgemm('N', 'N', vlen, l, m, MAGMA_Z_ONE, (magmaDoubleComplex*)dV, vld, (magmaDoubleComplex*)dTm, ldm, MAGMA_Z_ZERO, (magmaDoubleComplex*)Tmp, vld);
375 
376  for (int blockOffset = 0; blockOffset < vlen; blockOffset += bufferBlock)
377  {
378  if (bufferBlock > (vlen-blockOffset)) bufferBlock = (vlen-blockOffset);
379 
380  magmaDoubleComplex *ptrV = &(((magmaDoubleComplex*)dV)[blockOffset]);
381 
382  magmablas_zgemm(_cN, _cN, bufferBlock, l, m, MAGMA_Z_ONE, ptrV, vld, (magmaDoubleComplex*)dTm, ldm, MAGMA_Z_ZERO, (magmaDoubleComplex*)buffer, bufferBlock);
383 
384  cudaMemcpy2D(ptrV, vld*cprec, buffer, bufferBlock*cprec, bufferBlock*cprec, l, cudaMemcpyDefault);
385  }
386  }
387 
388  //cudaMemcpy(dV, Tmp, vld*l*cprec, cudaMemcpyDefault);
389 
390  //magma_free(Tmp);
391  magma_free(buffer);
392 #endif
393 
394  return;
395 }
396 
397 
398 void BlasMagmaArgs::SolveProjMatrix(void* rhs, const int ldn, const int n, void* H, const int ldH)
399 {
400 #ifdef MAGMA_LIB
401  const int complex_prec = 2*prec;
402  void *tmp;
403  magma_int_t *ipiv;
404  magma_int_t err;
405 
406  magma_malloc_pinned((void**)&tmp, ldH*n*complex_prec);
407  magma_malloc_pinned((void**)&ipiv, n*sizeof(magma_int_t));
408 
409  memcpy(tmp, H, ldH*n*complex_prec);
410 
411  if (prec == 4)
412  {
413  err = magma_cgesv(n, 1, (magmaFloatComplex*)tmp, ldH, ipiv, (magmaFloatComplex*)rhs, ldn, &info);
414  if(err != 0) printf("\nError in SolveProjMatrix (magma_cgesv), exit ...\n"), exit(-1);
415  }
416  else
417  {
418  err = magma_zgesv(n, 1, (magmaDoubleComplex*)tmp, ldH, ipiv, (magmaDoubleComplex*)rhs, ldn, &info);
419  if(err != 0) printf("\nError in SolveProjMatrix (magma_zgesv), exit ...\n"), exit(-1);
420  }
421 
422  magma_free_pinned(tmp);
423  magma_free_pinned(ipiv);
424 #endif
425  return;
426 }
427 
428 void BlasMagmaArgs::SolveGPUProjMatrix(void* rhs, const int ldn, const int n, void* H, const int ldH)
429 {
430 #ifdef MAGMA_LIB
431  const int complex_prec = 2*prec;
432  void *tmp;
433  magma_int_t *ipiv;
434  magma_int_t err;
435 
436  magma_malloc((void**)&tmp, ldH*n*complex_prec);
437  magma_malloc_pinned((void**)&ipiv, n*sizeof(magma_int_t));
438 
439  cudaMemcpy(tmp, H, ldH*n*complex_prec, cudaMemcpyDefault);
440 
441  if (prec == 4)
442  {
443  err = magma_cgesv_gpu(n, 1, (magmaFloatComplex*)tmp, ldH, ipiv, (magmaFloatComplex*)rhs, ldn, &info);
444  if(err != 0) printf("\nError in SolveGPUProjMatrix (magma_cgesv), exit ...\n"), exit(-1);
445  }
446  else
447  {
448  err = magma_zgesv_gpu(n, 1, (magmaDoubleComplex*)tmp, ldH, ipiv, (magmaDoubleComplex*)rhs, ldn, &info);
449  if(err != 0) printf("\nError in SolveGPUProjMatrix (magma_zgesv), exit ...\n"), exit(-1);
450  }
451 
452  magma_free(tmp);
453  magma_free_pinned(ipiv);
454 #endif
455  return;
456 }
457 
459 (void *spinorOut, const void *spinorSetIn, const int sld, const int slen, const void *vec, const int vlen)
460 {
461 #ifdef MAGMA_LIB
462  if (prec == 4)
463  {
464  magmaFloatComplex *spmat = (magmaFloatComplex*)spinorSetIn;
465  magmaFloatComplex *spout = (magmaFloatComplex*)spinorOut;
466 
467  magmablas_cgemv(_cN, slen, vlen, MAGMA_C_ONE, spmat, sld, (magmaFloatComplex*)vec, 1, MAGMA_C_ZERO, spout, 1);//in colour-major format
468  }
469  else
470  {
471  magmaDoubleComplex *spmat = (magmaDoubleComplex*)spinorSetIn;
472  magmaDoubleComplex *spout = (magmaDoubleComplex*)spinorOut;
473 
474  magmablas_zgemv(_cN, slen, vlen, MAGMA_Z_ONE, spmat, sld, (magmaDoubleComplex*)vec, 1, MAGMA_Z_ZERO, spout, 1);//in colour-major format
475  }
476 #endif
477  return;
478 }
479 
480 
481 #ifdef MAGMA_LIB
482 
483 #undef _cV
484 #undef _cU
485 #undef _cR
486 #undef _cL
487 #undef _cC
488 #undef _cN
489 
490 #endif
491 
492 
void RestartV(void *dV, const int vld, const int vlen, const int vprec, void *dTevecm, void *dTm)
Definition: blas_magma.cpp:293
void SolveProjMatrix(void *rhs, const int ldn, const int n, void *H, const int ldH)
Definition: blas_magma.cpp:398
int MagmaORTH_2nev(void *dTvecm, void *dTm)
Definition: blas_magma.cpp:250
cudaColorSpinorField * tmp
void SolveGPUProjMatrix(void *rhs, const int ldn, const int n, void *H, const int ldH)
Definition: blas_magma.cpp:428
void SpinorMatVec(void *spinorOut, const void *spinorSetIn, const int sld, const int slen, const void *vec, const int vlen)
Definition: blas_magma.cpp:459
static void CloseMagma()
Definition: blas_magma.cpp:58
cpuColorSpinorField * spinorOut
Definition: dslash_test.cpp:40
void MagmaHEEVD(void *dTvecm, void *hTvalm, const int problem_size, bool host=false)
Definition: blas_magma.cpp:203
QudaPrecision prec
Definition: test_util.cpp:1551
#define MAX(a, b)
Definition: blas_magma.cpp:5
static void OpenMagma()
Definition: blas_magma.cpp:39