QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Macros
launch_kernel.cuh File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define LAUNCH_KERNEL(kernel, tp, stream, arg, ...)
 
#define LAUNCH_KERNEL_LOCAL_PARITY(kernel, tp, stream, arg, ...)
 
#define LAUNCH_KERNEL_MG_BLOCK_SIZE(kernel, tp, stream, arg, ...)
 

Macro Definition Documentation

◆ LAUNCH_KERNEL

#define LAUNCH_KERNEL (   kernel,
  tp,
  stream,
  arg,
  ... 
)

Definition at line 1 of file launch_kernel.cuh.

Referenced by quda::blas::reduceLaunch().

◆ LAUNCH_KERNEL_LOCAL_PARITY

#define LAUNCH_KERNEL_LOCAL_PARITY (   kernel,
  tp,
  stream,
  arg,
  ... 
)

◆ LAUNCH_KERNEL_MG_BLOCK_SIZE

#define LAUNCH_KERNEL_MG_BLOCK_SIZE (   kernel,
  tp,
  stream,
  arg,
  ... 
)
Value:
switch (tp.block.x) { \
case 4: kernel<4, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 8: kernel<8, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 12: kernel<12, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 16: kernel<16, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 27: kernel<27, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 32: kernel<32, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 36: kernel<36, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 54: kernel<54, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 64: kernel<64, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 72: kernel<72, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 81: kernel<81, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 96: kernel<96, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 100: kernel<100, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 108: kernel<108, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 128: kernel<128, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 144: kernel<144, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 192: kernel<192, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 200: kernel<200, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 256: kernel<256, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 288: kernel<288, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 432: kernel<432, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 500: kernel<500, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
case 512: kernel<512, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg); break; \
default: errorQuda("%s block size %d not instantiated", #kernel, tp.block.x); \
}
#define errorQuda(...)
Definition: util_quda.h:121
cudaStream_t * stream
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.

Definition at line 205 of file launch_kernel.cuh.