walberla::gpu::Kernel< FuncPtr > Class Template Reference

Detailed Description

template<typename FuncPtr>
class walberla::gpu::Kernel< FuncPtr >

Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler.

Example:

// Declaration of kernel, implementation has to be in a file compiled with nvcc
void kernel_func ( double * inputData, int size );
auto kernel = make_kernel( kernel_func );
kernel.addParam<double*> ( argument1 );
kernel.addParam<int> ( 20 );
kernel.configure( dim3( 3,3,3), dim3( 4,4,4) );
kernel();
// this code is equivalent to:
kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 );

Why use this strange wrapper class instead of the nice kernel call syntax "<<<griddim, blockdim >>>" ??

  • This syntax is nice but has to be compiled with the device compiler
  • The wrapper allows to compile the kernel call with the host compiler

Drawbacks of this class compared to kernel call syntax: Type checking of parameters can only be done at runtime (is done only in Debug mode!). Consider the following example:

// Declaration of kernel, implementation has to be in a file compiled with nvcc
void kernel_func ( double * inputData, int size );
auto kernel = make_kernel( kernel_func );
kernel.addParam<float*> ( argument1 );
kernel.addParam<unsigned int> ( 40 );
kernel.configure( dim3( 3,3,3), dim3( 4,4,4) );
kernel();
// this code is equivalent to:
kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 );

The parameter types of the kernel and the parameters added at the gpu::Kernel class do not match. This is only detected when the code is run and was compiled in DEBUG mode!

Advantages of this class compared to kernel call syntax: Integrates nicely with waLBerlas field indexing and accessor concepts:

void kernel_func( gpu::SimpleFieldAccessor<double> f );
auto myKernel = gpu::make_kernel( &kernel_double );
myKernel.addFieldIndexingParam( gpu::SimpleFieldIndexing<double>::xyz( gpuField ) );
myKernel();

When using at least one FieldIndexingParameter configure() does not have to be called, since the thread and grid setup is done by the indexing scheme. If two FieldIndexingParameters are passed, the two indexing schemes have to be consistent.

#include <Kernel.h>

Public Member Functions

 Kernel (FuncPtr funcPtr)
 
template<typename T >
void addParam (const T &param)
 
template<typename T >
void addFieldIndexingParam (const T &indexing)
 
void configure (dim3 gridDim, dim3 blockDim, std::size_t sharedMemSize=0)
 
void operator() (gpuStream_t stream=nullptr) const
 
template<typename Indexing >
void addFieldIndexingParam (const Indexing &indexing)
 

Protected Attributes

Members


FuncPtr funcPtr_
 
bool configured_ { false }
 
dim3 gridDim_
 
dim3 blockDim_
 
std::size_t sharedMemSize_ { 0 }
 
std::vector< std::vector< char > > params_
 

Type checking of parameters


typedef std::remove_pointer< FuncPtr >::type FuncType
 
template<typename T >
bool checkParameter0 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 0), T >::type *=0)
 
template<typename T >
bool checkParameter0 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=0), T >::type *=0)
 
template<typename T >
bool checkParameter1 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 1), T >::type *=0)
 
template<typename T >
bool checkParameter1 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=1), T >::type *=0)
 
template<typename T >
bool checkParameter2 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 2), T >::type *=0)
 
template<typename T >
bool checkParameter2 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=2), T >::type *=0)
 
template<typename T >
bool checkParameter3 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 3), T >::type *=0)
 
template<typename T >
bool checkParameter3 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=3), T >::type *=0)
 
template<typename T >
bool checkParameter4 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 4), T >::type *=0)
 
template<typename T >
bool checkParameter4 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=4), T >::type *=0)
 
template<typename T >
bool checkParameter5 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 5), T >::type *=0)
 
template<typename T >
bool checkParameter5 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=5), T >::type *=0)
 
template<typename T >
bool checkParameter6 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 6), T >::type *=0)
 
template<typename T >
bool checkParameter6 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=6), T >::type *=0)
 
template<typename T >
bool checkParameter7 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 7), T >::type *=0)
 
template<typename T >
bool checkParameter7 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=7), T >::type *=0)
 
template<typename T >
bool checkParameter (uint_t n)
 

Member Typedef Documentation

◆ FuncType

template<typename FuncPtr >
typedef std::remove_pointer<FuncPtr>::type walberla::gpu::Kernel< FuncPtr >::FuncType
protected

Constructor & Destructor Documentation

◆ Kernel()

template<typename FuncPtr >
walberla::gpu::Kernel< FP >::Kernel ( FuncPtr  funcPtr)

Member Function Documentation

◆ addFieldIndexingParam() [1/2]

template<typename FuncPtr >
template<typename Indexing >
void walberla::gpu::Kernel< FuncPtr >::addFieldIndexingParam ( const Indexing &  indexing)

◆ addFieldIndexingParam() [2/2]

template<typename FuncPtr >
template<typename T >
void walberla::gpu::Kernel< FuncPtr >::addFieldIndexingParam ( const T &  indexing)

◆ addParam()

template<typename FP >
template<typename T >
void walberla::gpu::Kernel< FP >::addParam ( const T &  param)

◆ checkParameter()

template<typename FP >
template<typename T >
bool walberla::gpu::Kernel< FP >::checkParameter ( uint_t  n)
protected

◆ checkParameter0() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter0 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 0), T >::type *  = 0)
inlineprotected

◆ checkParameter0() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter0 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=0), T >::type *  = 0)
inlineprotected

◆ checkParameter1() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter1 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 1), T >::type *  = 0)
inlineprotected

◆ checkParameter1() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter1 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=1), T >::type *  = 0)
inlineprotected

◆ checkParameter2() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter2 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 2), T >::type *  = 0)
inlineprotected

◆ checkParameter2() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter2 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=2), T >::type *  = 0)
inlineprotected

◆ checkParameter3() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter3 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 3), T >::type *  = 0)
inlineprotected

◆ checkParameter3() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter3 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=3), T >::type *  = 0)
inlineprotected

◆ checkParameter4() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter4 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 4), T >::type *  = 0)
inlineprotected

◆ checkParameter4() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter4 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=4), T >::type *  = 0)
inlineprotected

◆ checkParameter5() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter5 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 5), T >::type *  = 0)
inlineprotected

◆ checkParameter5() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter5 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=5), T >::type *  = 0)
inlineprotected

◆ checkParameter6() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter6 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 6), T >::type *  = 0)
inlineprotected

◆ checkParameter6() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter6 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=6), T >::type *  = 0)
inlineprotected

◆ checkParameter7() [1/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter7 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 7), T >::type *  = 0)
inlineprotected

◆ checkParameter7() [2/2]

template<typename FuncPtr >
template<typename T >
bool walberla::gpu::Kernel< FuncPtr >::checkParameter7 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=7), T >::type *  = 0)
inlineprotected

◆ configure()

template<typename FP >
void walberla::gpu::Kernel< FP >::configure ( dim3  gridDim,
dim3  blockDim,
std::size_t  sharedMemSize = 0 
)

◆ operator()()

template<typename FP >
void walberla::gpu::Kernel< FP >::operator() ( gpuStream_t  stream = nullptr) const

Member Data Documentation

◆ blockDim_

template<typename FuncPtr >
dim3 walberla::gpu::Kernel< FuncPtr >::blockDim_
protected

◆ configured_

template<typename FuncPtr >
bool walberla::gpu::Kernel< FuncPtr >::configured_ { false }
protected

◆ funcPtr_

template<typename FuncPtr >
FuncPtr walberla::gpu::Kernel< FuncPtr >::funcPtr_
protected

◆ gridDim_

template<typename FuncPtr >
dim3 walberla::gpu::Kernel< FuncPtr >::gridDim_
protected

◆ params_

template<typename FuncPtr >
std::vector< std::vector<char> > walberla::gpu::Kernel< FuncPtr >::params_
protected

◆ sharedMemSize_

template<typename FuncPtr >
std::size_t walberla::gpu::Kernel< FuncPtr >::sharedMemSize_ { 0 }
protected

The documentation for this class was generated from the following file:
Kernel< FuncPtr > make_kernel(FuncPtr funcPtr)
Definition: Kernel.h:158