Detailed Description

template<typename FuncPtr>
class walberla::gpu::Kernel< FuncPtr >

Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler.

Example:

// Declaration of kernel, implementation has to be in a file compiled with nvcc
void kernel_func ( double * inputData, int size );
 
auto kernel = make_kernel( kernel_func );
kernel.addParam<double*> ( argument1 );
kernel.addParam<int>     ( 20 );
kernel.configure( dim3( 3,3,3), dim3( 4,4,4) );
kernel();
// this code is equivalent to:
kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 );

Why use this strange wrapper class instead of the nice kernel call syntax "<<<griddim, blockdim >>>" ??

This syntax is nice but has to be compiled with the device compiler
The wrapper allows to compile the kernel call with the host compiler

Drawbacks of this class compared to kernel call syntax: Type checking of parameters can only be done at runtime (is done only in Debug mode!). Consider the following example:

// Declaration of kernel, implementation has to be in a file compiled with nvcc
void kernel_func ( double * inputData, int size );
 
auto kernel = make_kernel( kernel_func );
kernel.addParam<float*>       ( argument1 );
kernel.addParam<unsigned int> ( 40 );
kernel.configure( dim3( 3,3,3), dim3( 4,4,4) );
kernel();
// this code is equivalent to:
kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 );

The parameter types of the kernel and the parameters added at the gpu::Kernel class do not match. This is only detected when the code is run and was compiled in DEBUG mode!

Advantages of this class compared to kernel call syntax: Integrates nicely with waLBerlas field indexing and accessor concepts:

void kernel_func( gpu::SimpleFieldAccessor<double> f );
 
auto myKernel = gpu::make_kernel( &kernel_double );
myKernel.addFieldIndexingParam( gpu::SimpleFieldIndexing<double>::xyz( gpuField ) );
myKernel();

When using at least one FieldIndexingParameter configure() does not have to be called, since the thread and grid setup is done by the indexing scheme. If two FieldIndexingParameters are passed, the two indexing schemes have to be consistent.

#include <Kernel.h>

Public Member Functions
	Kernel (FuncPtr funcPtr)

template<typename T >
void	addParam (const T &param)

template<typename T >
void	addFieldIndexingParam (const T &indexing)

void	configure (dim3 gridDim, dim3 blockDim, std::size_t sharedMemSize=0)

void	operator() (gpuStream_t stream=nullptr) const

template<typename Indexing >
void	addFieldIndexingParam (const Indexing &indexing)

Protected Attributes
Members

FuncPtr	funcPtr_

bool	configured_ { false }

dim3	gridDim_

dim3	blockDim_

std::size_t	sharedMemSize_ { 0 }

std::vector< std::vector< char > >	params_

Type checking of parameters

typedef std::remove_pointer< FuncPtr >::type	FuncType

template<typename T >
bool	checkParameter0 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 0), T >::type *=0)

template<typename T >
bool	checkParameter0 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=0), T >::type *=0)

template<typename T >
bool	checkParameter1 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 1), T >::type *=0)

template<typename T >
bool	checkParameter1 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=1), T >::type *=0)

template<typename T >
bool	checkParameter2 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 2), T >::type *=0)

template<typename T >
bool	checkParameter2 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=2), T >::type *=0)

template<typename T >
bool	checkParameter3 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 3), T >::type *=0)

template<typename T >
bool	checkParameter3 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=3), T >::type *=0)

template<typename T >
bool	checkParameter4 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 4), T >::type *=0)

template<typename T >
bool	checkParameter4 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=4), T >::type *=0)

template<typename T >
bool	checkParameter5 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 5), T >::type *=0)

template<typename T >
bool	checkParameter5 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=5), T >::type *=0)

template<typename T >
bool	checkParameter6 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 6), T >::type *=0)

template<typename T >
bool	checkParameter6 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=6), T >::type *=0)

template<typename T >
bool	checkParameter7 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 7), T >::type *=0)

template<typename T >
bool	checkParameter7 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=7), T >::type *=0)

template<typename T >
bool	checkParameter (uint_t n)

Member Typedef Documentation

◆ FuncType

template<typename FuncPtr >

typedef std::remove_pointer<FuncPtr>::type walberla::gpu::Kernel< FuncPtr >::FuncType

protected

Constructor & Destructor Documentation

◆ Kernel()

template<typename FuncPtr >

walberla::gpu::Kernel< FP >::Kernel ( FuncPtr funcPtr )

Member Function Documentation

◆ addFieldIndexingParam() [1/2]

template<typename FuncPtr >

template<typename Indexing >

void walberla::gpu::Kernel< FuncPtr >::addFieldIndexingParam ( const Indexing & indexing )

◆ addFieldIndexingParam() [2/2]

template<typename FuncPtr >

template<typename T >

void walberla::gpu::Kernel< FuncPtr >::addFieldIndexingParam ( const T & indexing )

◆ addParam()

template<typename FP >

template<typename T >

void walberla::gpu::Kernel< FP >::addParam ( const T & param )

◆ checkParameter()

template<typename FP >

template<typename T >

bool walberla::gpu::Kernel< FP >::checkParameter ( uint_t n )

protected

◆ checkParameter0() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter0 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 0), T >::type * = 0 )

inlineprotected

◆ checkParameter0() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter0 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=0), T >::type * = 0 )

inlineprotected

◆ checkParameter1() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter1 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 1), T >::type * = 0 )

inlineprotected

◆ checkParameter1() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter1 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=1), T >::type * = 0 )

inlineprotected

◆ checkParameter2() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter2 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 2), T >::type * = 0 )

inlineprotected

◆ checkParameter2() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter2 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=2), T >::type * = 0 )

inlineprotected

◆ checkParameter3() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter3 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 3), T >::type * = 0 )

inlineprotected

◆ checkParameter3() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter3 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=3), T >::type * = 0 )

inlineprotected

◆ checkParameter4() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter4 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 4), T >::type * = 0 )

inlineprotected

◆ checkParameter4() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter4 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=4), T >::type * = 0 )

inlineprotected

◆ checkParameter5() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter5 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 5), T >::type * = 0 )

inlineprotected

◆ checkParameter5() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter5 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=5), T >::type * = 0 )

inlineprotected

◆ checkParameter6() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter6 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 6), T >::type * = 0 )

inlineprotected

◆ checkParameter6() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter6 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=6), T >::type * = 0 )

inlineprotected

◆ checkParameter7() [1/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter7 ( typename std::enable_if<(FunctionTraits< FuncType >::arity > 7), T >::type * = 0 )

inlineprotected

◆ checkParameter7() [2/2]

template<typename FuncPtr >

template<typename T >

bool walberla::gpu::Kernel< FuncPtr >::checkParameter7 ( typename std::enable_if<(FunctionTraits< FuncType >::arity<=7), T >::type * = 0 )

inlineprotected

◆ configure()

template<typename FP >

void walberla::gpu::Kernel< FP >::configure	(	dim3	gridDim,
		dim3	blockDim,
		std::size_t	sharedMemSize = `0`
	)

◆ operator()()

template<typename FP >

void walberla::gpu::Kernel< FP >::operator() ( gpuStream_t stream = nullptr ) const

Member Data Documentation

◆ blockDim_

template<typename FuncPtr >

dim3 walberla::gpu::Kernel< FuncPtr >::blockDim_

protected

◆ configured_

template<typename FuncPtr >

bool walberla::gpu::Kernel< FuncPtr >::configured_ { false }

protected

◆ funcPtr_

template<typename FuncPtr >

FuncPtr walberla::gpu::Kernel< FuncPtr >::funcPtr_

protected

◆ gridDim_

template<typename FuncPtr >

dim3 walberla::gpu::Kernel< FuncPtr >::gridDim_

protected

◆ params_

template<typename FuncPtr >

std::vector< std::vector<char> > walberla::gpu::Kernel< FuncPtr >::params_

protected

◆ sharedMemSize_

template<typename FuncPtr >

std::size_t walberla::gpu::Kernel< FuncPtr >::sharedMemSize_ { 0 }

protected

The documentation for this class was generated from the following file:

/builds/administration/walberla-website/walberla/src/gpu/Kernel.h

Detailed Description

template<typename FuncPtr> class walberla::gpu::Kernel< FuncPtr >

Public Member Functions

Protected Attributes

Type checking of parameters

Member Typedef Documentation

◆ FuncType

Constructor & Destructor Documentation

◆ Kernel()

Member Function Documentation

◆ addFieldIndexingParam() [1/2]

◆ addFieldIndexingParam() [2/2]

◆ addParam()

◆ checkParameter()

◆ checkParameter0() [1/2]

◆ checkParameter0() [2/2]

◆ checkParameter1() [1/2]

◆ checkParameter1() [2/2]

◆ checkParameter2() [1/2]

◆ checkParameter2() [2/2]

◆ checkParameter3() [1/2]

◆ checkParameter3() [2/2]

◆ checkParameter4() [1/2]

◆ checkParameter4() [2/2]

◆ checkParameter5() [1/2]

◆ checkParameter5() [2/2]

◆ checkParameter6() [1/2]

◆ checkParameter6() [2/2]

◆ checkParameter7() [1/2]

◆ checkParameter7() [2/2]

◆ configure()

◆ operator()()

Member Data Documentation

◆ blockDim_

◆ configured_

◆ funcPtr_

◆ gridDim_

◆ params_

◆ sharedMemSize_

template<typename FuncPtr>
class walberla::gpu::Kernel< FuncPtr >