template<typename FuncPtr>
class walberla::gpu::Kernel< FuncPtr >
Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler.
Example:
void kernel_func ( double * inputData, int size );
kernel.addParam<double*> ( argument1 );
kernel.addParam<int> ( 20 );
kernel.configure( dim3( 3,3,3), dim3( 4,4,4) );
kernel();
kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 );
Why use this strange wrapper class instead of the nice kernel call syntax "<<<griddim, blockdim >>>" ??
- This syntax is nice but has to be compiled with the device compiler
- The wrapper allows to compile the kernel call with the host compiler
Drawbacks of this class compared to kernel call syntax: Type checking of parameters can only be done at runtime (is done only in Debug mode!). Consider the following example:
void kernel_func ( double * inputData, int size );
kernel.addParam<float*> ( argument1 );
kernel.addParam<unsigned int> ( 40 );
kernel.configure( dim3( 3,3,3), dim3( 4,4,4) );
kernel();
kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 );
The parameter types of the kernel and the parameters added at the gpu::Kernel class do not match. This is only detected when the code is run and was compiled in DEBUG mode!
Advantages of this class compared to kernel call syntax: Integrates nicely with waLBerlas field indexing and accessor concepts:
void kernel_func( gpu::SimpleFieldAccessor<double> f );
myKernel.addFieldIndexingParam( gpu::SimpleFieldIndexing<double>::xyz( gpuField ) );
myKernel();
When using at least one FieldIndexingParameter configure() does not have to be called, since the thread and grid setup is done by the indexing scheme. If two FieldIndexingParameters are passed, the two indexing schemes have to be consistent.
|
|
typedef std::remove_pointer< FuncPtr >::type | FuncType |
|
template<typename T > |
bool | checkParameter0 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 0), T >::type *=0) |
|
template<typename T > |
bool | checkParameter0 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=0), T >::type *=0) |
|
template<typename T > |
bool | checkParameter1 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 1), T >::type *=0) |
|
template<typename T > |
bool | checkParameter1 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=1), T >::type *=0) |
|
template<typename T > |
bool | checkParameter2 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 2), T >::type *=0) |
|
template<typename T > |
bool | checkParameter2 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=2), T >::type *=0) |
|
template<typename T > |
bool | checkParameter3 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 3), T >::type *=0) |
|
template<typename T > |
bool | checkParameter3 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=3), T >::type *=0) |
|
template<typename T > |
bool | checkParameter4 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 4), T >::type *=0) |
|
template<typename T > |
bool | checkParameter4 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=4), T >::type *=0) |
|
template<typename T > |
bool | checkParameter5 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 5), T >::type *=0) |
|
template<typename T > |
bool | checkParameter5 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=5), T >::type *=0) |
|
template<typename T > |
bool | checkParameter6 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 6), T >::type *=0) |
|
template<typename T > |
bool | checkParameter6 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=6), T >::type *=0) |
|
template<typename T > |
bool | checkParameter7 (typename std::enable_if<(FunctionTraits< FuncType >::arity > 7), T >::type *=0) |
|
template<typename T > |
bool | checkParameter7 (typename std::enable_if<(FunctionTraits< FuncType >::arity<=7), T >::type *=0) |
|
template<typename T > |
bool | checkParameter (uint_t n) |
|