QUDA
v0.7.0
A library for QCD on GPUs
|
Go to the source code of this file.
Classes | |
struct | BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > |
class | BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor > |
Functions | |
template<typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor > | |
__global__ void | blasKernel (BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > arg) |
template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW> | |
void | blasCuda (const double2 &a, const double2 &b, const double2 &c, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w) |
|
inline |
Driver for generic blas routine with four loads and two store.
Definition at line 114 of file blas_core.h.
__global__ void blasKernel | ( | BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > | arg | ) |
Generic blas kernel with four loads and up to four stores.
Definition at line 22 of file blas_core.h.