Namespaces | |
communication | |
internal | |
Classes | |
class | EventRAII |
class | FieldAccessor |
class | FieldAccessor3D |
class | FieldAccessorXYZ |
class | FieldIndexing |
class | FieldIndexing3D |
class | FieldIndexing3DBase |
class | FieldIndexingXYZ |
class | GeneratedGPUPackInfo |
class | GeneratedNonUniformGPUPackInfo |
class | GPUField |
GhostLayerField stored on a CUDA/HIP GPU. More... | |
class | GPUSweepBase |
class | HostFieldAllocator |
Allocator that allocates a CPU! field using gpuHostAlloc without padding. More... | |
class | Kernel |
Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler. More... | |
class | NvtxRange |
class | ParallelSection |
class | ParallelStreams |
Helper class to run CUDA/HIP operations on parallel streams. More... | |
class | StreamRAII |
Functions | |
template<typename GPUField_T > | |
BlockDataID | addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, const std::string &identifier, uint_t fSize, const Layout layout=fzyx, uint_t nrOfGhostLayers=1, bool usePitchedMem=true) |
Adds a gpu::GPUField to a StructuredBlockStorage. More... | |
template<typename Field_T > | |
BlockDataID | addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, ConstBlockDataID cpuFieldID, const std::string &identifier, bool usePitchedMem=true) |
Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field. More... | |
void * | allocate_aligned_with_offset (uint_t size, uint_t alignment, uint_t offset) |
void | free_aligned_with_offset (void *ptr) |
void * | allocate_pitched_with_offset (size_t &pitchOut, size_t width, size_t height, size_t alignment, size_t alignmentOffset) |
void | selectDeviceBasedOnMpiRank () |
Selects active GPU device based on MPI rank. More... | |
void | checkForError (gpuError_t code, const std::string &callerPath, const int line) |
void | checkForLastError (const std::string &callerPath, const int line) |
template<typename DstType , typename SrcType > | |
void | fieldCpy (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID) |
template<typename DstType , typename SrcType > | |
std::function< void()> | fieldCpyFunctor (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID) |
template<typename DstType , typename SrcType > | |
void | fieldCpySweepFunction (BlockDataID dstID, ConstBlockDataID srcID, IBlock *block) |
template<typename DstType , typename SrcType > | |
std::function< void(IBlock *)> | fieldCpyFunctor (BlockDataID dstID, ConstBlockDataID srcID) |
template<typename T , uint_t fs> | |
void | fieldCpy (gpu::GPUField< T > &dst, const field::Field< T, fs > &src) |
template<typename T , uint_t fs> | |
void | fieldCpy (field::Field< T, fs > &dst, const gpu::GPUField< T > &src) |
template<typename T > | |
void | shiftCoordinatesWhileFastestCoordHasSizeOne (typename FieldAccessor< T >::IndexingScheme &indexing, dim3 &gridDim, dim3 &blockDim) |
unsigned int | iDivUp (unsigned int a, unsigned int b) |
void | copyDevToDevFZYX (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream) |
Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout. More... | |
void | copyDevToDevZYXF (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream) |
Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout. More... | |
void | copyHostToDevFZYX (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream) |
Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout. More... | |
void | copyHostToDevZYXF (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream) |
Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout. More... | |
void | copyDevToHostFZYX (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream) |
Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout. More... | |
void | copyDevToHostZYXF (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream) |
Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout. More... | |
template<typename FuncPtr > | |
Kernel< FuncPtr > | make_kernel (FuncPtr funcPtr) |
void | nvtxMarker (const std::string &name, const uint32_t color=0xaaaaaa) |
void | nameStream (const cudaStream_t &stream, const std::string &name) |
template<typename... GpuFields> | |
void | exportModuleToPython (py::module_ &m) |
template<typename... CpuFields> | |
void | exportCopyFunctionsToPython (py::module_ &m) |
Variables | |
static std::map< void *, void * > | freePointers_ |
BlockDataID walberla::gpu::addGPUFieldToStorage | ( | const shared_ptr< StructuredBlockStorage > & | bs, |
const std::string & | identifier, | ||
uint_t | fSize, | ||
const Layout | layout = fzyx , |
||
uint_t | nrOfGhostLayers = 1 , |
||
bool | usePitchedMem = true |
||
) |
Adds a gpu::GPUField to a StructuredBlockStorage.
BlockDataID walberla::gpu::addGPUFieldToStorage | ( | const shared_ptr< StructuredBlockStorage > & | bs, |
ConstBlockDataID | cpuFieldID, | ||
const std::string & | identifier, | ||
bool | usePitchedMem = true |
||
) |
Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field.
Field_T | type of the CPU field, the created GPUField will be of type gpu::GPUField<Field_T::value_type> |
void * walberla::gpu::allocate_pitched_with_offset | ( | size_t & | pitchOut, |
size_t | width, | ||
size_t | height, | ||
size_t | alignment, | ||
size_t | alignmentOffset | ||
) |
|
inline |
|
inline |
void walberla::gpu::copyDevToDevFZYX | ( | const gpuPitchedPtr & | dst, |
const gpuPitchedPtr & | src, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | dstOffset, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | srcOffset, | ||
uint_t | dstAllocSizeZ, | ||
uint_t | srcAllocSizeZ, | ||
uint_t | typeSize, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | intervalSize, | ||
gpuStream_t | copyStream | ||
) |
Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout.
dst | destination buffer |
src | source buffer |
dstOffset | (x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer |
srcOffset | (x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer |
dstAllocSizeZ | allocation size in z direction of the destination buffer |
srcAllocSizeZ | allocation size in z direction of the source buffer |
typeSize | size of an f element |
intervalSize | interval size |
copyStream | CUDA/HIP stream, if not NULL copy operations will be performed asynchronously |
void walberla::gpu::copyDevToDevZYXF | ( | const gpuPitchedPtr & | dst, |
const gpuPitchedPtr & | src, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | dstOffset, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | srcOffset, | ||
uint_t | dstAllocSizeY, | ||
uint_t | srcAllocSizeY, | ||
uint_t | typeSize, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | intervalSize, | ||
gpuStream_t | copyStream | ||
) |
Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout.
dst | destination buffer |
src | source buffer |
dstOffset | (x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer |
srcOffset | (x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer |
dstAllocSizeY | allocation size in y direction of the destination buffer |
srcAllocSizeY | allocation size in y direction of the source buffer |
typeSize | size of an f element |
intervalSize | interval size |
copyStream | CUDA/HIP stream, if not NULL copy operations will be performed asynchronously |
void walberla::gpu::copyDevToHostFZYX | ( | unsigned char * | dst, |
const gpuPitchedPtr & | src, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | dstOffset, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | srcOffset, | ||
uint_t | dstAllocSizeZ, | ||
uint_t | srcAllocSizeZ, | ||
uint_t | typeSize, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | intervalSize, | ||
gpuStream_t | copyStream | ||
) |
Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout.
See copyDevToDevFZYX() for parameter information.
void walberla::gpu::copyDevToHostZYXF | ( | unsigned char * | dst, |
const gpuPitchedPtr & | src, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | dstOffset, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | srcOffset, | ||
uint_t | dstAllocSizeY, | ||
uint_t | srcAllocSizeY, | ||
uint_t | typeSize, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | intervalSize, | ||
gpuStream_t | copyStream | ||
) |
Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout.
See copyDevToDevZYXF() for parameter information.
void walberla::gpu::copyHostToDevFZYX | ( | const gpuPitchedPtr & | dst, |
unsigned char * | src, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | dstOffset, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | srcOffset, | ||
uint_t | dstAllocSizeZ, | ||
uint_t | srcAllocSizeZ, | ||
uint_t | typeSize, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | intervalSize, | ||
gpuStream_t | copyStream | ||
) |
Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout.
See copyDevToDevFZYX() for parameter information.
void walberla::gpu::copyHostToDevZYXF | ( | const gpuPitchedPtr & | dst, |
unsigned char * | src, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | dstOffset, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | srcOffset, | ||
uint_t | dstAllocSizeY, | ||
uint_t | srcAllocSizeY, | ||
uint_t | typeSize, | ||
std::tuple< uint_t, uint_t, uint_t, uint_t > & | intervalSize, | ||
gpuStream_t | copyStream | ||
) |
Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout.
See copyDevToDevZYXF() for parameter information.
void walberla::gpu::exportCopyFunctionsToPython | ( | py::module_ & | m | ) |
void walberla::gpu::exportModuleToPython | ( | py::module_ & | m | ) |
void walberla::gpu::fieldCpy | ( | const shared_ptr< StructuredBlockStorage > & | blocks, |
BlockDataID | dstID, | ||
ConstBlockDataID | srcID | ||
) |
void walberla::gpu::fieldCpy | ( | field::Field< T, fs > & | dst, |
const gpu::GPUField< T > & | src | ||
) |
void walberla::gpu::fieldCpy | ( | gpu::GPUField< T > & | dst, |
const field::Field< T, fs > & | src | ||
) |
std::function<void(IBlock*)> walberla::gpu::fieldCpyFunctor | ( | BlockDataID | dstID, |
ConstBlockDataID | srcID | ||
) |
std::function<void()> walberla::gpu::fieldCpyFunctor | ( | const shared_ptr< StructuredBlockStorage > & | blocks, |
BlockDataID | dstID, | ||
ConstBlockDataID | srcID | ||
) |
void walberla::gpu::fieldCpySweepFunction | ( | BlockDataID | dstID, |
ConstBlockDataID | srcID, | ||
IBlock * | block | ||
) |
void walberla::gpu::free_aligned_with_offset | ( | void * | ptr | ) |
|
inline |
Kernel<FuncPtr> walberla::gpu::make_kernel | ( | FuncPtr | funcPtr | ) |
|
inline |
|
inline |
void walberla::gpu::selectDeviceBasedOnMpiRank | ( | ) |
Selects active GPU device based on MPI rank.
assumes that on each node there are as many MPI processes started as there are GPUs
void walberla::gpu::shiftCoordinatesWhileFastestCoordHasSizeOne | ( | typename FieldAccessor< T >::IndexingScheme & | indexing, |
dim3 & | gridDim, | ||
dim3 & | blockDim | ||
) |
|
static |