switch (tp.block.x) { \
case 4: kernel<4, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 8: kernel<8, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 12: kernel<12, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 16: kernel<16, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 27: kernel<27, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 32: kernel<32, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 36: kernel<36, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 54: kernel<54, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 64: kernel<64, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 72: kernel<72, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 81: kernel<81, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 96: kernel<96, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 100: kernel<100, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 108: kernel<108, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 128: kernel<128, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 144: kernel<144, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 192: kernel<192, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 200: kernel<200, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 256: kernel<256, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 288: kernel<288, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 432: kernel<432, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 500: kernel<500, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
case 512: kernel<512, __VA_ARGS__><<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
break; \
default:
errorQuda(
"%s block size %d not instantiated", #kernel, tp.block.x); \
}
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.