QUDA  0.9.0
fused_exterior_dslash_gen.py
Go to the documentation of this file.
1 import sys
2 
3 
4 
5 def complexify(a):
6  return [complex(x) for x in a]
7 
8 def complexToStr(c):
9  def fltToString(a):
10  if a == int(a): return `int(a)`
11  else: return `a`
12 
13  def imToString(a):
14  if a == 0: return "0i"
15  elif a == -1: return "-i"
16  elif a == 1: return "i"
17  else: return fltToString(a)+"i"
18 
19  re = c.real
20  im = c.imag
21  if re == 0 and im == 0: return "0"
22  elif re == 0: return imToString(im)
23  elif im == 0: return fltToString(re)
24  else:
25  im_str = "-"+imToString(-im) if im < 0 else "+"+imToString(im)
26  return fltToString(re)+im_str
27 
28 
29 
30 
31 id = complexify([
32  1, 0, 0, 0,
33  0, 1, 0, 0,
34  0, 0, 1, 0,
35  0, 0, 0, 1
36 ])
37 
38 gamma1 = complexify([
39  0, 0, 0, 1j,
40  0, 0, 1j, 0,
41  0, -1j, 0, 0,
42  -1j, 0, 0, 0
43 ])
44 
45 gamma2 = complexify([
46  0, 0, 0, 1,
47  0, 0, -1, 0,
48  0, -1, 0, 0,
49  1, 0, 0, 0
50 ])
51 
52 gamma3 = complexify([
53  0, 0, 1j, 0,
54  0, 0, 0, -1j,
55  -1j, 0, 0, 0,
56  0, 1j, 0, 0
57 ])
58 
59 gamma4 = complexify([
60  1, 0, 0, 0,
61  0, 1, 0, 0,
62  0, 0, -1, 0,
63  0, 0, 0, -1
64 ])
65 
66 igamma5 = complexify([
67  0, 0, 1j, 0,
68  0, 0, 0, 1j,
69  1j, 0, 0, 0,
70  0, 1j, 0, 0
71 ])
72 
73 
74 def gplus(g1, g2):
75  return [x+y for (x,y) in zip(g1,g2)]
76 
77 def gminus(g1, g2):
78  return [x-y for (x,y) in zip(g1,g2)]
79 
81  out = ""
82  for i in range(0, 4):
83  for j in range(0,4):
84  out += complexToStr(p[4*i+j]) + " "
85  out += "\n"
86  return out
87 
88 projectors = [
89  gminus(id,gamma1), gplus(id,gamma1),
90  gminus(id,gamma2), gplus(id,gamma2),
91  gminus(id,gamma3), gplus(id,gamma3),
92  gminus(id,gamma4), gplus(id,gamma4),
93 ]
94 
95 
96 
97 def indent(code):
98  def indentline(line): return (" "+line if (line.count("#", 0, 1) == 0) else line)
99  return ''.join([indentline(line)+"\n" for line in code.splitlines()])
100 
101 def block(code):
102  return "{\n"+indent(code)+"}"
103 
104 def sign(x):
105  if x==1: return "+"
106  elif x==-1: return "-"
107  elif x==+2: return "+2*"
108  elif x==-2: return "-2*"
109 
110 def nthFloat4(n):
111  return `(n/4)` + "." + ["x", "y", "z", "w"][n%4]
112 
113 def nthFloat2(n):
114  return `(n/2)` + "." + ["x", "y"][n%2]
115 
116 
117 def in_re(s, c): return "i"+`s`+`c`+"_re"
118 def in_im(s, c): return "i"+`s`+`c`+"_im"
119 def g_re(d, m, n): return ("g" if (d%2==0) else "gT")+`m`+`n`+"_re"
120 def g_im(d, m, n): return ("g" if (d%2==0) else "gT")+`m`+`n`+"_im"
121 def out_re(s, c): return "o"+`s`+`c`+"_re"
122 def out_im(s, c): return "o"+`s`+`c`+"_im"
123 def h1_re(h, c): return ["a","b"][h]+`c`+"_re"
124 def h1_im(h, c): return ["a","b"][h]+`c`+"_im"
125 def h2_re(h, c): return ["A","B"][h]+`c`+"_re"
126 def h2_im(h, c): return ["A","B"][h]+`c`+"_im"
127 def c_re(b, sm, cm, sn, cn): return "c"+`(sm+2*b)`+`cm`+"_"+`(sn+2*b)`+`cn`+"_re"
128 def c_im(b, sm, cm, sn, cn): return "c"+`(sm+2*b)`+`cm`+"_"+`(sn+2*b)`+`cn`+"_im"
129 def a_re(b, s, c): return "a"+`(s+2*b)`+`c`+"_re"
130 def a_im(b, s, c): return "a"+`(s+2*b)`+`c`+"_im"
131 
132 def acc_re(s, c): return "acc"+`s`+`c`+"_re"
133 def acc_im(s, c): return "acc"+`s`+`c`+"_im"
134 
135 def tmp_re(s, c): return "tmp"+`s`+`c`+"_re"
136 def tmp_im(s, c): return "tmp"+`s`+`c`+"_im"
137 
138 def spinor(name, s, c, z):
139  if z==0: return name+`s`+`c`+"_re"
140  else: return name+`s`+`c`+"_im"
141 
143  str = ""
144  str += "// input spinor\n"
145  str += "#ifdef SPINOR_DOUBLE\n"
146  str += "#define spinorFloat double\n"
147  if sharedDslash:
148  str += "#define WRITE_SPINOR_SHARED WRITE_SPINOR_SHARED_DOUBLE2\n"
149  str += "#define READ_SPINOR_SHARED READ_SPINOR_SHARED_DOUBLE2\n"
150 
151  for s in range(0,4):
152  for c in range(0,3):
153  i = 3*s+c
154  str += "#define "+in_re(s,c)+" I"+nthFloat2(2*i+0)+"\n"
155  str += "#define "+in_im(s,c)+" I"+nthFloat2(2*i+1)+"\n"
156  if dslash and not pack:
157  for s in range(0,4):
158  for c in range(0,3):
159  i = 3*s+c
160  str += "#define "+acc_re(s,c)+" accum"+nthFloat2(2*i+0)+"\n"
161  str += "#define "+acc_im(s,c)+" accum"+nthFloat2(2*i+1)+"\n"
162  str += "#else\n"
163  str += "#define spinorFloat float\n"
164  if sharedDslash:
165  str += "#define WRITE_SPINOR_SHARED WRITE_SPINOR_SHARED_FLOAT4\n"
166  str += "#define READ_SPINOR_SHARED READ_SPINOR_SHARED_FLOAT4\n"
167  for s in range(0,4):
168  for c in range(0,3):
169  i = 3*s+c
170  str += "#define "+in_re(s,c)+" I"+nthFloat4(2*i+0)+"\n"
171  str += "#define "+in_im(s,c)+" I"+nthFloat4(2*i+1)+"\n"
172  if dslash and not pack:
173  for s in range(0,4):
174  for c in range(0,3):
175  i = 3*s+c
176  str += "#define "+acc_re(s,c)+" accum"+nthFloat4(2*i+0)+"\n"
177  str += "#define "+acc_im(s,c)+" accum"+nthFloat4(2*i+1)+"\n"
178  str += "#endif // SPINOR_DOUBLE\n\n"
179  return str
180 # end def def_input_spinor
181 
182 
183 def def_gauge():
184  str = "// gauge link\n"
185  str += "#ifdef GAUGE_FLOAT2\n"
186  for m in range(0,3):
187  for n in range(0,3):
188  i = 3*m+n
189  str += "#define "+g_re(0,m,n)+" G"+nthFloat2(2*i+0)+"\n"
190  str += "#define "+g_im(0,m,n)+" G"+nthFloat2(2*i+1)+"\n"
191 
192  str += "\n"
193  str += "#else\n"
194  for m in range(0,3):
195  for n in range(0,3):
196  i = 3*m+n
197  str += "#define "+g_re(0,m,n)+" G"+nthFloat4(2*i+0)+"\n"
198  str += "#define "+g_im(0,m,n)+" G"+nthFloat4(2*i+1)+"\n"
199 
200  str += "\n"
201  str += "#endif // GAUGE_DOUBLE\n\n"
202 
203  str += "// conjugated gauge link\n"
204  for m in range(0,3):
205  for n in range(0,3):
206  i = 3*m+n
207  str += "#define "+g_re(1,m,n)+" (+"+g_re(0,n,m)+")\n"
208  str += "#define "+g_im(1,m,n)+" (-"+g_im(0,n,m)+")\n"
209  str += "\n"
210 
211  return str
212 # end def def_gauge
213 
214 
216  str = "// first chiral block of inverted clover term\n"
217  str += "#ifdef CLOVER_DOUBLE\n"
218  i = 0
219  for m in range(0,6):
220  s = m/3
221  c = m%3
222  str += "#define "+c_re(0,s,c,s,c)+" C"+nthFloat2(i)+"\n"
223  i += 1
224  for n in range(0,6):
225  sn = n/3
226  cn = n%3
227  for m in range(n+1,6):
228  sm = m/3
229  cm = m%3
230  str += "#define "+c_re(0,sm,cm,sn,cn)+" C"+nthFloat2(i)+"\n"
231  str += "#define "+c_im(0,sm,cm,sn,cn)+" C"+nthFloat2(i+1)+"\n"
232  i += 2
233  str += "#else\n"
234  i = 0
235  for m in range(0,6):
236  s = m/3
237  c = m%3
238  str += "#define "+c_re(0,s,c,s,c)+" C"+nthFloat4(i)+"\n"
239  i += 1
240  for n in range(0,6):
241  sn = n/3
242  cn = n%3
243  for m in range(n+1,6):
244  sm = m/3
245  cm = m%3
246  str += "#define "+c_re(0,sm,cm,sn,cn)+" C"+nthFloat4(i)+"\n"
247  str += "#define "+c_im(0,sm,cm,sn,cn)+" C"+nthFloat4(i+1)+"\n"
248  i += 2
249  str += "#endif // CLOVER_DOUBLE\n\n"
250 
251  for n in range(0,6):
252  sn = n/3
253  cn = n%3
254  for m in range(0,n):
255  sm = m/3
256  cm = m%3
257  str += "#define "+c_re(0,sm,cm,sn,cn)+" (+"+c_re(0,sn,cn,sm,cm)+")\n"
258  str += "#define "+c_im(0,sm,cm,sn,cn)+" (-"+c_im(0,sn,cn,sm,cm)+")\n"
259  str += "\n"
260 
261  str += "// second chiral block of inverted clover term (reuses C0,...,C9)\n"
262  for n in range(0,6):
263  sn = n/3
264  cn = n%3
265  for m in range(0,6):
266  sm = m/3
267  cm = m%3
268  str += "#define "+c_re(1,sm,cm,sn,cn)+" "+c_re(0,sm,cm,sn,cn)+"\n"
269  if m != n: str += "#define "+c_im(1,sm,cm,sn,cn)+" "+c_im(0,sm,cm,sn,cn)+"\n"
270  str += "\n"
271 
272  return str
273 # end def def_clover
274 
275 
277 # sharedDslash = True: input spinors stored in shared memory
278 # sharedDslash = False: output spinors stored in shared memory
279  str = "// output spinor\n"
280  for s in range(0,4):
281  for c in range(0,3):
282  i = 3*s+c
283  if 2*i < sharedFloats and not sharedDslash:
284  str += "#define "+out_re(s,c)+" s["+`(2*i+0)`+"*SHARED_STRIDE]\n"
285  else:
286  str += "VOLATILE spinorFloat "+out_re(s,c)+";\n"
287  if 2*i+1 < sharedFloats and not sharedDslash:
288  str += "#define "+out_im(s,c)+" s["+`(2*i+1)`+"*SHARED_STRIDE]\n"
289  else:
290  str += "VOLATILE spinorFloat "+out_im(s,c)+";\n"
291  return str
292 # end def def_output_spinor
293 
294 
295 def prolog():
296  global arch
297 
298  if dslash:
299  prolog_str= ("// *** CUDA DSLASH ***\n\n" if not dagger else "// *** CUDA DSLASH DAGGER ***\n\n")
300  prolog_str+= "#define DSLASH_SHARED_FLOATS_PER_THREAD "+str(sharedFloats)+"\n\n"
301  elif clover:
302  prolog_str= ("// *** CUDA CLOVER ***\n\n")
303  prolog_str+= "#define CLOVER_SHARED_FLOATS_PER_THREAD "+str(sharedFloats)+"\n\n"
304  else:
305  print "Undefined prolog"
306  exit
307  prolog_str+= (
308 """
309 #ifdef MULTI_GPU
310 
311 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
312 #define VOLATILE
313 #else // Open64 compiler
314 #define VOLATILE volatile
315 #endif
316 """)
317 
318  prolog_str+= def_input_spinor()
319  if dslash == True: prolog_str+= def_gauge()
320  if clover == True: prolog_str+= def_clover()
321  prolog_str+= def_output_spinor()
322 
323  if (sharedFloats > 0):
324  if (arch >= 200):
325  prolog_str+= (
326 """
327 #ifdef SPINOR_DOUBLE
328 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi
329 #else
330 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi
331 #endif
332 """)
333  else:
334  prolog_str+= (
335 """
336 #ifdef SPINOR_DOUBLE
337 #define SHARED_STRIDE 8 // to avoid bank conflicts on G80 and GT200
338 #else
339 #define SHARED_STRIDE 16 // to avoid bank conflicts on G80 and GT200
340 #endif
341 """)
342 
343 
344  # set the pointer if using shared memory for pseudo registers
345  if sharedFloats > 0 and not sharedDslash:
346  prolog_str += (
347 """
348 extern __shared__ char s_data[];
349 """)
350 
351  if dslash:
352  prolog_str += (
353 """
354 VOLATILE spinorFloat *s = (spinorFloat*)s_data + DSLASH_SHARED_FLOATS_PER_THREAD*SHARED_STRIDE*(threadIdx.x/SHARED_STRIDE)
355  + (threadIdx.x % SHARED_STRIDE);
356 """)
357  else:
358  prolog_str += (
359 """
360 VOLATILE spinorFloat *s = (spinorFloat*)s_data + CLOVER_SHARED_FLOATS_PER_THREAD*SHARED_STRIDE*(threadIdx.x/SHARED_STRIDE)
361  + (threadIdx.x % SHARED_STRIDE);
362 """)
363 
364 
365  if dslash:
366  prolog_str += (
367 """
368 #include "read_gauge.h"
369 #include "read_clover.h"
370 #include "io_spinor.h"
371 
372 int coord[5];
373 int X;
374 
375 #if (DD_PREC==2) // half precision
376 int sp_norm_idx;
377 #endif // half precision
378 
379 int sid;
380 """)
381 
382  prolog_str += (
383 """
384 
385  sid = blockIdx.x*blockDim.x + threadIdx.x;
386  if (sid >= param.threads) return;
387 
388  int dim = dimFromFaceIndex(sid, param); // sid is also modified
389 
390 
391  const int face_volume = ((param.threadDimMapUpper[dim] - param.threadDimMapLower[dim]) >> 1); // volume of one face
392  int face_num = (sid >= face_volume); // is this thread updating face 0 or 1
393  int face_idx = sid - face_num*face_volume; // index into the respective face
394 
395  // ghostOffset is scaled to include body (includes stride) and number of FloatN arrays (SPINOR_HOP)
396  // face_idx not sid since faces are spin projected and share the same volume index (modulo UP/DOWN reading)
397  //sp_idx = face_idx + param.ghostOffset[dim];
398 
399  switch(dim) {
400  case 0:
401  coordsFromFaceIndex<4,QUDA_4D_PC,0,1>(X, sid, coord, face_idx, face_num, param);
402  break;
403  case 1:
404  coordsFromFaceIndex<4,QUDA_4D_PC,1,1>(X, sid, coord, face_idx, face_num, param);
405  break;
406  case 2:
407  coordsFromFaceIndex<4,QUDA_4D_PC,2,1>(X, sid, coord, face_idx, face_num, param);
408  break;
409  case 3:
410  coordsFromFaceIndex<4,QUDA_4D_PC,3,1>(X, sid, coord, face_idx, face_num, param);
411  break;
412  }
413 
414 
415  bool active = false;
416  for(int dir=0; dir<4; ++dir){
417  active = active || isActive(dim,dir,+1,coord,param.commDim,param.dc.X);
418  }
419  if(!active) return;
420 
421  READ_INTERMEDIATE_SPINOR(INTERTEX, param.sp_stride, sid, sid);
422 
423 """)
424 
425  out = ""
426  for s in range(0,4):
427  for c in range(0,3):
428  out += out_re(s,c)+" = "+in_re(s,c)+"; "+out_im(s,c)+" = "+in_im(s,c)+";\n"
429  prolog_str+= indent(out)
430  prolog_str+=("\n\n")
431 
432  else:
433  prolog_str+=(
434 """
435 #include "read_clover.h"
436 #include "io_spinor.h"
437 
438 int sid = blockIdx.x*blockDim.x + threadIdx.x;
439 if (sid >= param.threads) return;
440 
441 // read spinor from device memory
442 READ_SPINOR(SPINORTEX, param.sp_stride, sid, sid);
443 """)
444  return prolog_str
445 # end def prolog
446 
447 
448 def gen(dir, pack_only=False):
449  projIdx = dir if not dagger else dir + (1 - 2*(dir%2))
450  projStr = projectorToStr(projectors[projIdx])
451  def proj(i,j):
452  return projectors[projIdx][4*i+j]
453 
454  # if row(i) = (j, c), then the i'th row of the projector can be represented
455  # as a multiple of the j'th row: row(i) = c row(j)
456  def row(i):
457  assert i==2 or i==3
458  if proj(i,0) == 0j:
459  return (1, proj(i,1))
460  if proj(i,1) == 0j:
461  return (0, proj(i,0))
462 
463  boundary = ["coord[0]==(param.dc.X[0]-1)", "coord[0]==0", "coord[1]==(param.dc.X[1]-1)", "coord[1]==0", "coord[2]==(param.dc.X[2]-1)", "coord[2]==0", "coord[3]==(param.dc.X[3]-1)", "coord[3]==0"]
464  interior = ["coord[0]<(param.dc.X[0]-1)", "coord[0]>0", "coord[1]<(param.dc.X[1]-1)", "coord[1]>0", "coord[2]<(param.dc.X[2]-1)", "coord[2]>0", "coord[3]<(param.dc.X[3]-1)", "coord[3]>0"]
465  offset = ["+1", "-1", "+1", "-1", "+1", "-1", "+1", "-1"]
466  dim = ["X", "Y", "Z", "T"]
467 
468  # index of neighboring site when not on boundary
469  sp_idx = ["X+1", "X-1", "X+param.dc.X[0]", "X-param.dc.X[0]", "X+param.dc.X2X1", "X-param.dc.X2X1", "X+param.dc.X3X2X1", "X-param.dc.X3X2X1"]
470 
471  # index of neighboring site (across boundary)
472  sp_idx_wrap = ["X-(param.dc.X[0]-1)", "X+(param.dc.X[0]-1)", "X-param.dc.X2X1mX1", "X+param.dc.X2X1mX1", "X-param.dc.X3X2X1mX2X1", "X+param.dc.X3X2X1mX2X1",
473  "X-param.dc.X4X3X2X1mX3X2X1", "X+param.dc.X4X3X2X1mX3X2X1"]
474 
475  cond = ""
476  cond += "if ( isActive(dim," + `dir/2` + "," + offset[dir] + ",coord,param.commDim,param.dc.X) && " +boundary[dir]+" )\n"
477 
478  str = ""
479 
480  projName = "P"+`dir/2`+["-","+"][projIdx%2]
481  str += "// Projector "+projName+"\n"
482  for l in projStr.splitlines():
483  str += "// "+l+"\n"
484  str += "\n"
485 
486  str += "faceIndexFromCoords<4,1>(face_idx,coord," + `dir/2` + ",param);\n"
487  str += "const int sp_idx = face_idx + param.ghostOffset[" + `dir/2` + "][" + `1-dir%2` + "];\n"
488 
489  str += "#if (DD_PREC==2)\n"
490  str += " sp_norm_idx = face_idx + "
491  str += "param.ghostNormOffset[" + `dir/2` + "][" + `1-dir%2` + "];\n"
492  str += "#endif\n"
493 
494 
495  str += "\n"
496  if dir % 2 == 0:
497  str += "const int ga_idx = sid;\n"
498  else:
499  str += "const int ga_idx = param.dc.Vh+face_idx;\n"
500  str += "\n"
501 
502  # scan the projector to determine which loads are required
503  row_cnt = ([0,0,0,0])
504  for h in range(0,4):
505  for s in range(0,4):
506  re = proj(h,s).real
507  im = proj(h,s).imag
508  if re != 0 or im != 0:
509  row_cnt[h] += 1
510  row_cnt[0] += row_cnt[1]
511  row_cnt[2] += row_cnt[3]
512 
513  decl_half = ""
514  for h in range(0, 2):
515  for c in range(0, 3):
516  decl_half += "spinorFloat "+h1_re(h,c)+", "+h1_im(h,c)+";\n";
517  decl_half += "\n"
518 
519  load_spinor = "// read spinor from device memory\n"
520  if row_cnt[0] == 0:
521  load_spinor += "READ_SPINOR_DOWN(SPINORTEX, param.sp_stride, sp_idx, sp_idx);\n"
522  elif row_cnt[2] == 0:
523  load_spinor += "READ_SPINOR_UP(SPINORTEX, param.sp_stride, sp_idx, sp_idx);\n"
524  else:
525  load_spinor += "READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx);\n"
526  load_spinor += "\n"
527 
528  load_half = ""
529  load_half += "const int sp_stride_pad = param.dc.ghostFace[" + `dir/2` + "];\n"
530 
531  if dir >= 6: load_half += "const int t_proj_scale = TPROJSCALE;\n"
532  load_half += "\n"
533  load_half += "// read half spinor from device memory\n"
534 
535  # we have to use the same volume index for backwards and forwards gathers
536  load_half += "READ_SPINOR_GHOST(GHOSTSPINORTEX, sp_stride_pad, sp_idx, sp_norm_idx, "+`dir`+");\n\n"
537  load_gauge = "// read gauge matrix from device memory\n"
538  load_gauge += "READ_GAUGE_MATRIX(G, GAUGE"+`dir%2`+"TEX, "+`dir`+", ga_idx, param.gauge_stride);\n\n"
539 
540  reconstruct_gauge = "// reconstruct gauge matrix\n"
541  reconstruct_gauge += "RECONSTRUCT_GAUGE_MATRIX("+`dir`+");\n\n"
542 
543  project = "// project spinor into half spinors\n"
544  for h in range(0, 2):
545  for c in range(0, 3):
546  strRe = ""
547  strIm = ""
548  for s in range(0, 4):
549  re = proj(h,s).real
550  im = proj(h,s).imag
551  if re==0 and im==0: ()
552  elif im==0:
553  strRe += sign(re)+in_re(s,c)
554  strIm += sign(re)+in_im(s,c)
555  elif re==0:
556  strRe += sign(-im)+in_im(s,c)
557  strIm += sign(im)+in_re(s,c)
558  if row_cnt[0] == 0: # projector defined on lower half only
559  for s in range(0, 4):
560  re = proj(h+2,s).real
561  im = proj(h+2,s).imag
562  if re==0 and im==0: ()
563  elif im==0:
564  strRe += sign(re)+in_re(s,c)
565  strIm += sign(re)+in_im(s,c)
566  elif re==0:
567  strRe += sign(-im)+in_im(s,c)
568  strIm += sign(im)+in_re(s,c)
569 
570  project += h1_re(h,c)+" = "+strRe+";\n"
571  project += h1_im(h,c)+" = "+strIm+";\n"
572 
573  write_shared = (
574 """// store spinor into shared memory
575 WRITE_SPINOR_SHARED(threadIdx.x, threadIdx.y, threadIdx.z, i);\n
576 """)
577 
578  load_shared_1 = (
579 """// load spinor from shared memory
580 int tx = (threadIdx.x > 0) ? threadIdx.x-1 : blockDim.x-1;
581 __syncthreads();
582 READ_SPINOR_SHARED(tx, threadIdx.y, threadIdx.z);\n
583 """)
584 
585  load_shared_2 = (
586 """// load spinor from shared memory
587 int tx = (threadIdx.x + blockDim.x - ((coord[0]+1)&1) ) % blockDim.x;
588 int ty = (threadIdx.y < blockDim.y - 1) ? threadIdx.y + 1 : 0;
589 READ_SPINOR_SHARED(tx, ty, threadIdx.z);\n
590 """)
591 
592  load_shared_3 = (
593 """// load spinor from shared memory
594 int tx = (threadIdx.x + blockDim.x - ((coord[1]+1)&1)) % blockDim.x;
595 int ty = (threadIdx.y > 0) ? threadIdx.y - 1 : blockDim.y - 1;
596 READ_SPINOR_SHARED(tx, ty, threadIdx.z);\n
597 """)
598 
599  load_shared_4 = (
600 """// load spinor from shared memory
601 int tx = (threadIdx.x + blockDim.x - ((coord[0]+1)&1) ) % blockDim.x;
602 int tz = (threadIdx.z < blockDim.z - 1) ? threadIdx.z + 1 : 0;
603 READ_SPINOR_SHARED(tx, threadIdx.y, tz);\n
604 """)
605 
606  load_shared_5 = (
607 """// load spinor from shared memory
608 int tx = (threadIdx.x + blockDim.x - ((coord[0]+1)&1)) % blockDim.x;
609 int tz = (threadIdx.z > 0) ? threadIdx.z - 1 : blockDim.z - 1;
610 READ_SPINOR_SHARED(tx, threadIdx.y, tz);\n
611 """)
612 
613 
614  copy_half = ""
615  for h in range(0, 2):
616  for c in range(0, 3):
617  copy_half += h1_re(h,c)+" = "+("t_proj_scale*" if (dir >= 6) else "")+in_re(h,c)+"; "
618  copy_half += h1_im(h,c)+" = "+("t_proj_scale*" if (dir >= 6) else "")+in_im(h,c)+";\n"
619  copy_half += "\n"
620 
621  prep_half = ""
622  prep_half += "\n"
623  prep_half += load_half
624  prep_half += copy_half
625  prep_half += "\n"
626 
627  ident = "// identity gauge matrix\n"
628  for m in range(0,3):
629  for h in range(0,2):
630  ident += "spinorFloat "+h2_re(h,m)+" = " + h1_re(h,m) + "; "
631  ident += "spinorFloat "+h2_im(h,m)+" = " + h1_im(h,m) + ";\n"
632  ident += "\n"
633 
634  mult = ""
635  for m in range(0,3):
636  mult += "// multiply row "+`m`+"\n"
637  for h in range(0,2):
638  re = "spinorFloat "+h2_re(h,m)+" = 0;\n"
639  im = "spinorFloat "+h2_im(h,m)+" = 0;\n"
640  for c in range(0,3):
641  re += h2_re(h,m) + " += " + g_re(dir,m,c) + " * "+h1_re(h,c)+";\n"
642  re += h2_re(h,m) + " -= " + g_im(dir,m,c) + " * "+h1_im(h,c)+";\n"
643  im += h2_im(h,m) + " += " + g_re(dir,m,c) + " * "+h1_im(h,c)+";\n"
644  im += h2_im(h,m) + " += " + g_im(dir,m,c) + " * "+h1_re(h,c)+";\n"
645  mult += re + im
646  mult += "\n"
647 
648  reconstruct = ""
649  if asymClover:
650  reconstruct += "#ifdef SPINOR_DOUBLE\n"
651  reconstruct += "spinorFloat a = param.a;\n"
652  reconstruct += "#else\n"
653  reconstruct += "spinorFloat a = param.a_f;\n"
654  reconstruct += "#endif\n"
655 
656  for m in range(0,3):
657 
658  for h in range(0,2):
659  h_out = h
660  if row_cnt[0] == 0: # projector defined on lower half only
661  h_out = h+2
662  if not asymClover:
663  reconstruct += out_re(h_out, m) + " += " + h2_re(h,m) + ";\n"
664  reconstruct += out_im(h_out, m) + " += " + h2_im(h,m) + ";\n"
665  else:
666  reconstruct += out_re(h_out, m) + " += a*" + h2_re(h,m) + ";\n"
667  reconstruct += out_im(h_out, m) + " += a*" + h2_im(h,m) + ";\n"
668 
669  for s in range(2,4):
670  (h,c) = row(s)
671  re = c.real
672  im = c.imag
673  if not asymClover:
674  if im == 0 and re == 0: ()
675  elif im == 0:
676  reconstruct += out_re(s, m) + " " + sign(re) + "= " + h2_re(h,m) + ";\n"
677  reconstruct += out_im(s, m) + " " + sign(re) + "= " + h2_im(h,m) + ";\n"
678  elif re == 0:
679  reconstruct += out_re(s, m) + " " + sign(-im) + "= " + h2_im(h,m) + ";\n"
680  reconstruct += out_im(s, m) + " " + sign(+im) + "= " + h2_re(h,m) + ";\n"
681  else:
682  if im == 0 and re == 0: ()
683  elif im == 0:
684  reconstruct += out_re(s, m) + " " + sign(re) + "= a*" + h2_re(h,m) + ";\n"
685  reconstruct += out_im(s, m) + " " + sign(re) + "= a*" + h2_im(h,m) + ";\n"
686  elif re == 0:
687  reconstruct += out_re(s, m) + " " + sign(-im) + "= a*" + h2_im(h,m) + ";\n"
688  reconstruct += out_im(s, m) + " " + sign(+im) + "= a*" + h2_re(h,m) + ";\n"
689 
690  reconstruct += "\n"
691 
692  if dir >= 6:
693  str += "if (param.gauge_fixed && ga_idx < param.dc.X4X3X2X1hmX3X2X1h)\n"
694  str += block(decl_half + prep_half + ident + reconstruct)
695  str += " else "
696  str += block(decl_half + prep_half + load_gauge + reconstruct_gauge + mult + reconstruct)
697  else:
698  str += decl_half + prep_half + load_gauge + reconstruct_gauge + mult + reconstruct
699 
700  if pack_only:
701  out = load_spinor + decl_half + project
702  out = out.replace("sp_idx", "idx")
703  return out
704  else:
705  return cond + block(str)+"\n\n"
706 # end def gen
707 
708 
709 def input_spinor(s,c,z):
710  if dslash:
711  if z==0: return out_re(s,c)
712  else: return out_im(s,c)
713  else:
714  if z==0: return in_re(s,c)
715  else: return in_im(s,c)
716 
717 def to_chiral_basis(v_out,v_in,c):
718  str = ""
719  str += "spinorFloat "+a_re(0,0,c)+" = -"+spinor(v_in,1,c,0)+" - "+spinor(v_in,3,c,0)+";\n"
720  str += "spinorFloat "+a_im(0,0,c)+" = -"+spinor(v_in,1,c,1)+" - "+spinor(v_in,3,c,1)+";\n"
721  str += "spinorFloat "+a_re(0,1,c)+" = "+spinor(v_in,0,c,0)+" + "+spinor(v_in,2,c,0)+";\n"
722  str += "spinorFloat "+a_im(0,1,c)+" = "+spinor(v_in,0,c,1)+" + "+spinor(v_in,2,c,1)+";\n"
723  str += "spinorFloat "+a_re(0,2,c)+" = -"+spinor(v_in,1,c,0)+" + "+spinor(v_in,3,c,0)+";\n"
724  str += "spinorFloat "+a_im(0,2,c)+" = -"+spinor(v_in,1,c,1)+" + "+spinor(v_in,3,c,1)+";\n"
725  str += "spinorFloat "+a_re(0,3,c)+" = "+spinor(v_in,0,c,0)+" - "+spinor(v_in,2,c,0)+";\n"
726  str += "spinorFloat "+a_im(0,3,c)+" = "+spinor(v_in,0,c,1)+" - "+spinor(v_in,2,c,1)+";\n"
727  str += "\n"
728 
729  for s in range (0,4):
730  str += spinor(v_out,s,c,0)+" = "+a_re(0,s,c)+"; "
731  str += spinor(v_out,s,c,1)+" = "+a_im(0,s,c)+";\n"
732 
733  return block(str)+"\n\n"
734 # end def to_chiral_basis
735 
736 def from_chiral_basis(v_out,v_in,c): # note: factor of 1/2 is included in clover term normalization
737  str = ""
738  str += "spinorFloat "+a_re(0,0,c)+" = "+spinor(v_in,1,c,0)+" + "+spinor(v_in,3,c,0)+";\n"
739  str += "spinorFloat "+a_im(0,0,c)+" = "+spinor(v_in,1,c,1)+" + "+spinor(v_in,3,c,1)+";\n"
740  str += "spinorFloat "+a_re(0,1,c)+" = -"+spinor(v_in,0,c,0)+" - "+spinor(v_in,2,c,0)+";\n"
741  str += "spinorFloat "+a_im(0,1,c)+" = -"+spinor(v_in,0,c,1)+" - "+spinor(v_in,2,c,1)+";\n"
742  str += "spinorFloat "+a_re(0,2,c)+" = "+spinor(v_in,1,c,0)+" - "+spinor(v_in,3,c,0)+";\n"
743  str += "spinorFloat "+a_im(0,2,c)+" = "+spinor(v_in,1,c,1)+" - "+spinor(v_in,3,c,1)+";\n"
744  str += "spinorFloat "+a_re(0,3,c)+" = -"+spinor(v_in,0,c,0)+" + "+spinor(v_in,2,c,0)+";\n"
745  str += "spinorFloat "+a_im(0,3,c)+" = -"+spinor(v_in,0,c,1)+" + "+spinor(v_in,2,c,1)+";\n"
746  str += "\n"
747 
748  for s in range (0,4):
749  str += spinor(v_out,s,c,0)+" = "+a_re(0,s,c)+"; "
750  str += spinor(v_out,s,c,1)+" = "+a_im(0,s,c)+";\n"
751 
752  return block(str)+"\n\n"
753 # end def from_chiral_basis
754 
755 
756 def clover_mult(v_out, v_in, chi):
757  str = "READ_CLOVER(CLOVERTEX, "+`chi`+")\n\n"
758 
759  for s in range (0,2):
760  for c in range (0,3):
761  str += "spinorFloat "+a_re(chi,s,c)+" = 0; spinorFloat "+a_im(chi,s,c)+" = 0;\n"
762  str += "\n"
763 
764  for sm in range (0,2):
765  for cm in range (0,3):
766  for sn in range (0,2):
767  for cn in range (0,3):
768  str += a_re(chi,sm,cm)+" += "+c_re(chi,sm,cm,sn,cn)+" * "+spinor(v_in,2*chi+sn,cn,0)+";\n"
769  if (sn != sm) or (cn != cm):
770  str += a_re(chi,sm,cm)+" -= "+c_im(chi,sm,cm,sn,cn)+" * "+spinor(v_in,2*chi+sn,cn,1)+";\n"
771  #else: str += ";\n"
772  str += a_im(chi,sm,cm)+" += "+c_re(chi,sm,cm,sn,cn)+" * "+spinor(v_in,2*chi+sn,cn,1)+";\n"
773  if (sn != sm) or (cn != cm):
774  str += a_im(chi,sm,cm)+" += "+c_im(chi,sm,cm,sn,cn)+" * "+spinor(v_in,2*chi+sn,cn,0)+";\n"
775  #else: str += ";\n"
776  str += "\n"
777 
778  for s in range (0,2):
779  for c in range (0,3):
780  str += spinor(v_out,2*chi+s,c,0)+" = "+a_re(chi,s,c)+"; "
781  str += spinor(v_out,2*chi+s,c,1)+" = "+a_im(chi,s,c)+";\n"
782  str += "\n"
783 
784  return block(str)+"\n\n"
785 # end def clover_mult
786 
787 
788 def apply_clover(v_out,v_in):
789  str = ""
790  if dslash: str += "#ifdef DSLASH_CLOVER\n\n"
791  str += "// change to chiral basis\n"
792  str += to_chiral_basis(v_out,v_in,0) + to_chiral_basis(v_out,v_in,1) + to_chiral_basis(v_out,v_in,2)
793  str += "// apply first chiral block\n"
794  str += clover_mult(v_out,v_out,0)
795  str += "// apply second chiral block\n"
796  str += clover_mult(v_out,v_out,1)
797  str += "// change back from chiral basis\n"
798  str += "// (note: required factor of 1/2 is included in clover term normalization)\n"
799  str += from_chiral_basis(v_out,v_out,0) + from_chiral_basis(v_out,v_out,1) + from_chiral_basis(v_out,v_out,2)
800  if dslash: str += "#endif // DSLASH_CLOVER\n\n"
801 
802  return str
803 # end def clover
804 
805 
807  str = "// apply twisted mass rotation\n"
808 
809  for h in range(0, 4):
810  for c in range(0, 3):
811  strRe = ""
812  strIm = ""
813  for s in range(0, 4):
814  # identity
815  re = id[4*h+s].real
816  im = id[4*h+s].imag
817  if re==0 and im==0: ()
818  elif im==0:
819  strRe += sign(re)+out_re(s,c)
820  strIm += sign(re)+out_im(s,c)
821  elif re==0:
822  strRe += sign(-im)+out_im(s,c)
823  strIm += sign(im)+out_re(s,c)
824 
825  # sign(x)*i*mu*gamma_5
826  re = igamma5[4*h+s].real
827  im = igamma5[4*h+s].imag
828  if re==0 and im==0: ()
829  elif im==0:
830  strRe += sign(re*x)+out_re(s,c) + "*a"
831  strIm += sign(re*x)+out_im(s,c) + "*a"
832  elif re==0:
833  strRe += sign(-im*x)+out_im(s,c) + "*a"
834  strIm += sign(im*x)+out_re(s,c) + "*a"
835 
836  str += "VOLATILE spinorFloat "+tmp_re(h,c)+" = " + strRe + ";\n"
837  str += "VOLATILE spinorFloat "+tmp_im(h,c)+" = " + strIm + ";\n"
838  str += "\n"
839 
840  return str+"\n"
841 
842 
843 def twisted():
844  str = ""
845  str += twisted_rotate(+1)
846 
847  str += "#ifndef DSLASH_XPAY\n"
848  str += "//scale by b = 1/(1 + a*a) \n"
849  for s in range(0,4):
850  for c in range(0,3):
851  str += out_re(s,c) + " = b*" + tmp_re(s,c) + ";\n"
852  str += out_im(s,c) + " = b*" + tmp_im(s,c) + ";\n"
853  str += "#else\n"
854  for s in range(0,4):
855  for c in range(0,3):
856  str += out_re(s,c) + " = " + tmp_re(s,c) + ";\n"
857  str += out_im(s,c) + " = " + tmp_im(s,c) + ";\n"
858  str += "#endif // DSLASH_XPAY\n"
859  str += "\n"
860 
861  return block(str)+"\n"
862 # end def twisted
863 
864 
866  str = ""
867  str += "#ifdef DSLASH_CLOVER_XPAY\n\n"
868  str += "READ_ACCUM(ACCUMTEX, param.sp_stride)\n\n"
869 
870  str += apply_clover("acc","acc")
871 
872  for s in range(0,4):
873  for c in range(0,3):
874  i = 3*s+c
875  str += out_re(s,c) +" = "+acc_re(s,c)+";\n"
876  str += out_im(s,c) +" = "+acc_im(s,c)+";\n"
877 
878  str += "#endif // DSLASH_CLOVER_XPAY\n"
879 
880  return str
881 #end def clover_xpay
882 
883 def xpay():
884  str = ""
885  str += "#ifdef DSLASH_XPAY\n\n"
886  str += "READ_ACCUM(ACCUMTEX, param.sp_stride)\n\n"
887 
888  str += "#ifdef SPINOR_DOUBLE\n"
889  str += "spinorFloat a = param.a;\n"
890  str += "#else\n"
891  str += "spinorFloat a = param.a_f;\n"
892  str += "#endif\n"
893 
894  for s in range(0,4):
895  for c in range(0,3):
896  i = 3*s+c
897  if twist == False:
898  str += out_re(s,c) +" = a*"+out_re(s,c)+"+"+acc_re(s,c)+";\n"
899  str += out_im(s,c) +" = a*"+out_im(s,c)+"+"+acc_im(s,c)+";\n"
900  else:
901  str += out_re(s,c) +" = b*"+out_re(s,c)+"+"+acc_re(s,c)+";\n"
902  str += out_im(s,c) +" = b*"+out_im(s,c)+"+"+acc_im(s,c)+";\n"
903 
904  str += "#endif // DSLASH_XPAY\n"
905 
906  return str
907 # end def xpay
908 
909 
910 def epilog():
911  str = ""
912 # if dslash and not asymClover:
913 # if twist:
914 # str += "#ifdef MULTI_GPU\n"
915 # else:
916 # str += "#if defined MULTI_GPU && (defined DSLASH_XPAY || defined DSLASH_CLOVER)\n"
917 # str += "#endif // MULTI_GPU\n"
918 
919  if not asymClover:
920  block_str = ""
921  if twist: block_str += twisted()
922  #elif asymClover: block_str += clover_xpay()
923  elif dslash: block_str += apply_clover("o","o")
924  else: block_str += apply_clover("o","i")
925  if not asymClover: block_str += xpay()
926 
927  str += block( block_str )
928 
929  str += "\n\n"
930  str += "// write spinor field back to device memory\n"
931  str += "WRITE_SPINOR(param.sp_stride);\n\n"
932 
933  str += "// undefine to prevent warning when precision is changed\n"
934  str += "#undef spinorFloat\n"
935  if sharedDslash:
936  str += "#undef WRITE_SPINOR_SHARED\n"
937  str += "#undef READ_SPINOR_SHARED\n"
938  if sharedFloats > 0: str += "#undef SHARED_STRIDE\n\n"
939 
940  if dslash:
941  for m in range(0,3):
942  for n in range(0,3):
943  i = 3*m+n
944  str += "#undef "+g_re(0,m,n)+"\n"
945  str += "#undef "+g_im(0,m,n)+"\n"
946  str += "\n"
947 
948  for s in range(0,4):
949  for c in range(0,3):
950  i = 3*s+c
951  str += "#undef "+in_re(s,c)+"\n"
952  str += "#undef "+in_im(s,c)+"\n"
953  str += "\n"
954 
955  if dslash:
956  for s in range(0,4):
957  for c in range(0,3):
958  i = 3*s+c
959  str += "#undef "+acc_re(s,c)+"\n"
960  str += "#undef "+acc_im(s,c)+"\n"
961  str += "\n"
962 
963  if clover == True:
964  for m in range(0,6):
965  s = m/3
966  c = m%3
967  str += "#undef "+c_re(0,s,c,s,c)+"\n"
968  for n in range(0,6):
969  sn = n/3
970  cn = n%3
971  for m in range(n+1,6):
972  sm = m/3
973  cm = m%3
974  str += "#undef "+c_re(0,sm,cm,sn,cn)+"\n"
975  str += "#undef "+c_im(0,sm,cm,sn,cn)+"\n"
976  str += "\n"
977 
978  for s in range(0,4):
979  for c in range(0,3):
980  i = 3*s+c
981  if 2*i < sharedFloats:
982  str += "#undef "+out_re(s,c)+"\n"
983  if 2*i+1 < sharedFloats:
984  str += "#undef "+out_im(s,c)+"\n"
985  str += "\n"
986 
987  str += "#undef VOLATILE\n"
988 
989  str += "\n"
990  str += "#endif // MULTI_GPU"
991  return str
992 # end def epilog
993 
994 
995 def pack_face(facenum):
996  str = "\n"
997  str += "switch(dim) {\n"
998  for dim in range(0,4):
999  str += "case "+`dim`+":\n"
1000  proj = gen(2*dim+facenum, pack_only=True)
1001  proj += "\n"
1002  proj += "// write half spinor back to device memory\n"
1003  proj += "WRITE_HALF_SPINOR(face_volume, face_idx);\n"
1004  str += indent(block(proj)+"\n"+"break;\n")
1005  str += "}\n\n"
1006  return str
1007 # end def pack_face
1008 
1010  assert (sharedFloats == 0)
1011  str = ""
1012  str += def_input_spinor()
1013  str += "#include \"io_spinor.h\"\n\n"
1014 
1015  str += "if (face_num) "
1016  str += block(pack_face(1))
1017  str += " else "
1018  str += block(pack_face(0))
1019 
1020  str += "\n\n"
1021  str += "// undefine to prevent warning when precision is changed\n"
1022  str += "#undef spinorFloat\n"
1023  str += "#undef SHARED_STRIDE\n\n"
1024 
1025  for s in range(0,4):
1026  for c in range(0,3):
1027  i = 3*s+c
1028  str += "#undef "+in_re(s,c)+"\n"
1029  str += "#undef "+in_im(s,c)+"\n"
1030  str += "\n"
1031 
1032  return str
1033 # end def generate_pack
1034 
1035 
1037  return prolog() + gen(0) + gen(1) + gen(2) + gen(3) + gen(4) + gen(5) + gen(6) + gen(7) + epilog()
1038 
1040  return prolog() + epilog()
1041 
1042 # generate Wilson-like Dslash kernels
1044  print "Generating dslash kernel for sm" + str(arch/10)
1045 
1046  global sharedFloats
1047  global sharedDslash
1048  global dslash
1049  global dagger
1050  global clover
1051  global twist
1052  global asymClover
1053 
1054  sharedFloats = 0
1055  if arch >= 200:
1056  sharedFloats = 24
1057  sharedDslash = True
1058  name = "fermi"
1059  elif arch >= 120:
1060  sharedFloats = 0
1061  sharedDslash = False
1062  name = "gt200"
1063  else:
1064  sharedFloats = 19
1065  sharedDslash = False
1066  name = "g80"
1067 
1068  print "Shared floats set to " + str(sharedFloats)
1069 
1070  dslash = True
1071  twist = False
1072  clover = True
1073  dagger = False
1074 
1075  filename = 'dslash_core/wilson_fused_exterior_dslash_' + name + '_core.h'
1076  print sys.argv[0] + ": generating " + filename;
1077  f = open(filename, 'w')
1078  f.write(generate_dslash())
1079  f.close()
1080 
1081  dagger = True
1082  filename = 'dslash_core/wilson_fused_exterior_dslash_dagger_' + name + '_core.h'
1083  print sys.argv[0] + ": generating " + filename;
1084  f = open(filename, 'w')
1085  f.write(generate_dslash())
1086  f.close()
1087 
1088  asymClover = True
1089 
1090  dagger = False
1091  filename = 'dslash_core/asym_wilson_clover_fused_exterior_dslash_' + name + '_core.h'
1092  print sys.argv[0] + ": generating " + filename;
1093  f = open(filename, 'w')
1094  f.write(generate_dslash())
1095  f.close()
1096 
1097  dagger = True
1098  filename = 'dslash_core/asym_wilson_clover_fused_exterior_dslash_dagger_' + name + '_core.h'
1099  print sys.argv[0] + ": generating " + filename;
1100  f = open(filename, 'w')
1101  f.write(generate_dslash())
1102  f.close()
1103 
1104  asymClover = False
1105 
1106  twist = True
1107  clover = False
1108  dagger = False
1109  filename = 'dslash_core/tm_fused_exterior_dslash_' + name + '_core.h'
1110  print sys.argv[0] + ": generating " + filename;
1111  f = open(filename, 'w')
1112  f.write(generate_dslash())
1113  f.close()
1114 
1115  dagger = True
1116  filename = 'dslash_core/tm_fused_exterior_dslash_dagger_' + name + '_core.h'
1117  print sys.argv[0] + ": generating " + filename + "\n";
1118  f = open(filename, 'w')
1119  f.write(generate_dslash())
1120  f.close()
1121 
1122  twist = False
1123  dslash = False
1124 
1125 
1126 
1127 dslash = False
1128 dagger = False
1129 twist = False
1130 clover = False
1131 asymClover = False
1132 sharedFloats = 0
1133 sharedDslash = False
1134 pack = False
1135 
1136 # generate dslash kernels
1137 arch = 200
1139 
1140 arch = 130
1142 
1143 # generate packing kernels
1144 #dslash = True
1145 #sharedFloats = 0
1146 #twist = False
1147 #clover = False
1148 #dagger = False
1149 #pack = True
1150 #print sys.argv[0] + ": generating wilson_pack_face_core.h";
1151 #f = open('dslash_core/wilson_pack_face_core.h', 'w')
1152 #f.write(generate_pack())
1153 #f.close()
1154 
1155 #dagger = True
1156 #print sys.argv[0] + ": generating wilson_pack_face_dagger_core.h";
1157 #f = open('dslash_core/wilson_pack_face_dagger_core.h', 'w')
1158 #f.write(generate_pack())
1159 #f.close()
1160 #dslash = False
1161 #pack = False
1162 
1163 # generate clover solo term
1164 #clover = True
1165 #cloverSharedFloats = 0
1166 #sharedFloats = cloverSharedFloats
1167 # for the clover term, only makes sense to use shared memory as pseudo registers
1168 #sharedDslash = False
1169 #print sys.argv[0] + ": generating clover_core.h";
1170 #f = open('dslash_core/clover_core.h', 'w')
1171 #f.write(generate_clover())
1172 #f.close()
def indent(code)
code generation ######################################################################## ...
def complexify(a)
complex numbers ######################################################################## ...
def gen(dir, pack_only=False)
Definition: gen.py:1