Namespaces
	communication

	internal

Classes
class	EventRAII

class	FieldAccessor

class	FieldAccessor3D

class	FieldAccessorXYZ

class	FieldIndexing

class	FieldIndexing3D

class	FieldIndexing3DBase

class	FieldIndexingXYZ

class	GeneratedGPUPackInfo

class	GeneratedNonUniformGPUPackInfo

class	GPUField
	GhostLayerField stored on a CUDA/HIP GPU. More...

class	GPUSweepBase

class	HostFieldAllocator
	Allocator that allocates a CPU! field using gpuHostAlloc without padding. More...

class	Kernel
	Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler. More...

class	NvtxRange

class	ParallelSection

class	ParallelStreams
	Helper class to run CUDA/HIP operations on parallel streams. More...

class	StreamRAII

Functions
template<typename GPUField_T >
BlockDataID	addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, const std::string &identifier, uint_t fSize, const Layout layout=fzyx, uint_t nrOfGhostLayers=1, bool usePitchedMem=true)
	Adds a gpu::GPUField to a StructuredBlockStorage. More...

template<typename Field_T >
BlockDataID	addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, ConstBlockDataID cpuFieldID, const std::string &identifier, bool usePitchedMem=true)
	Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field. More...

void *	allocate_aligned_with_offset (uint_t size, uint_t alignment, uint_t offset)

void	free_aligned_with_offset (void *ptr)

void *	allocate_pitched_with_offset (size_t &pitchOut, size_t width, size_t height, size_t alignment, size_t alignmentOffset)

void	selectDeviceBasedOnMpiRank ()
	Selects active GPU device based on MPI rank. More...

void	checkForError (gpuError_t code, const std::string &callerPath, const int line)

void	checkForLastError (const std::string &callerPath, const int line)

template<typename DstType , typename SrcType >
void	fieldCpy (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID)

template<typename DstType , typename SrcType >
std::function< void()>	fieldCpyFunctor (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID)

template<typename DstType , typename SrcType >
void	fieldCpySweepFunction (BlockDataID dstID, ConstBlockDataID srcID, IBlock *block)

template<typename DstType , typename SrcType >
std::function< void(IBlock *)>	fieldCpyFunctor (BlockDataID dstID, ConstBlockDataID srcID)

template<typename T , uint_t fs>
void	fieldCpy (gpu::GPUField< T > &dst, const field::Field< T, fs > &src)

template<typename T , uint_t fs>
void	fieldCpy (field::Field< T, fs > &dst, const gpu::GPUField< T > &src)

template<typename T >
void	shiftCoordinatesWhileFastestCoordHasSizeOne (typename FieldAccessor< T >::IndexingScheme &indexing, dim3 &gridDim, dim3 &blockDim)

unsigned int	iDivUp (unsigned int a, unsigned int b)

void	copyDevToDevFZYX (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
	Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout. More...

void	copyDevToDevZYXF (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
	Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout. More...

void	copyHostToDevFZYX (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
	Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout. More...

void	copyHostToDevZYXF (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
	Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout. More...

void	copyDevToHostFZYX (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
	Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout. More...

void	copyDevToHostZYXF (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
	Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout. More...

template<typename FuncPtr >
Kernel< FuncPtr >	make_kernel (FuncPtr funcPtr)

void	nvtxMarker (const std::string &name, const uint32_t color=0xaaaaaa)

void	nameStream (const cudaStream_t &stream, const std::string &name)

template<typename... GpuFields>
void	exportModuleToPython (py::module_ &m)

template<typename... CpuFields>
void	exportCopyFunctionsToPython (py::module_ &m)

Variables
static std::map< void , void >	freePointers_

Function Documentation

◆ addGPUFieldToStorage() [1/2]

template<typename GPUField_T >

BlockDataID walberla::gpu::addGPUFieldToStorage	(	const shared_ptr< StructuredBlockStorage > &	bs,
		const std::string &	identifier,
		uint_t	fSize,
		const Layout	layout = `fzyx`,
		uint_t	nrOfGhostLayers = `1`,
		bool	usePitchedMem = `true`
	)

Adds a gpu::GPUField to a StructuredBlockStorage.

Similar to walberla::field::addToStorage() functions
created field is uninitialized

◆ addGPUFieldToStorage() [2/2]

template<typename Field_T >

BlockDataID walberla::gpu::addGPUFieldToStorage	(	const shared_ptr< StructuredBlockStorage > &	bs,
		ConstBlockDataID	cpuFieldID,
		const std::string &	identifier,
		bool	usePitchedMem = `true`
	)

Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field.

adds a GPU field to a StructuredBlockStorage using a CPU field
sizes, number of ghostlayers and layout are the same as the CPU field
GPU field is initialized with the data currently stored in the CPU field
Template Parameters

Field_T type of the CPU field, the created GPUField will be of type gpu::GPUField<Field_T::value_type>

◆ allocate_aligned_with_offset()

void * walberla::gpu::allocate_aligned_with_offset	(	uint_t	size,
		uint_t	alignment,
		uint_t	offset
	)

◆ allocate_pitched_with_offset()

void * walberla::gpu::allocate_pitched_with_offset	(	size_t &	pitchOut,
		size_t	width,
		size_t	height,
		size_t	alignment,
		size_t	alignmentOffset
	)

◆ checkForError()

void walberla::gpu::checkForError	(	gpuError_t	code,
		const std::string &	callerPath,
		const int	line
	)

inline

◆ checkForLastError()

void walberla::gpu::checkForLastError	(	const std::string &	callerPath,
		const int	line
	)

inline

◆ copyDevToDevFZYX()

void walberla::gpu::copyDevToDevFZYX	(	const gpuPitchedPtr &	dst,
		const gpuPitchedPtr &	src,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	dstOffset,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	srcOffset,
		uint_t	dstAllocSizeZ,
		uint_t	srcAllocSizeZ,
		uint_t	typeSize,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	intervalSize,
		gpuStream_t	copyStream
	)

Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout.

Parameters

dst	destination buffer
src	source buffer
dstOffset	(x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer
srcOffset	(x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer
dstAllocSizeZ	allocation size in z direction of the destination buffer
srcAllocSizeZ	allocation size in z direction of the source buffer
typeSize	size of an f element
intervalSize	interval size
copyStream	CUDA/HIP stream, if not NULL copy operations will be performed asynchronously

◆ copyDevToDevZYXF()

void walberla::gpu::copyDevToDevZYXF	(	const gpuPitchedPtr &	dst,
		const gpuPitchedPtr &	src,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	dstOffset,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	srcOffset,
		uint_t	dstAllocSizeY,
		uint_t	srcAllocSizeY,
		uint_t	typeSize,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	intervalSize,
		gpuStream_t	copyStream
	)

Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout.

Parameters

dst	destination buffer
src	source buffer
dstOffset	(x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer
srcOffset	(x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer
dstAllocSizeY	allocation size in y direction of the destination buffer
srcAllocSizeY	allocation size in y direction of the source buffer
typeSize	size of an f element
intervalSize	interval size
copyStream	CUDA/HIP stream, if not NULL copy operations will be performed asynchronously

◆ copyDevToHostFZYX()

void walberla::gpu::copyDevToHostFZYX	(	unsigned char *	dst,
		const gpuPitchedPtr &	src,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	dstOffset,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	srcOffset,
		uint_t	dstAllocSizeZ,
		uint_t	srcAllocSizeZ,
		uint_t	typeSize,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	intervalSize,
		gpuStream_t	copyStream
	)

Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout.

See copyDevToDevFZYX() for parameter information.

◆ copyDevToHostZYXF()

void walberla::gpu::copyDevToHostZYXF	(	unsigned char *	dst,
		const gpuPitchedPtr &	src,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	dstOffset,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	srcOffset,
		uint_t	dstAllocSizeY,
		uint_t	srcAllocSizeY,
		uint_t	typeSize,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	intervalSize,
		gpuStream_t	copyStream
	)

Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout.

See copyDevToDevZYXF() for parameter information.

◆ copyHostToDevFZYX()

void walberla::gpu::copyHostToDevFZYX	(	const gpuPitchedPtr &	dst,
		unsigned char *	src,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	dstOffset,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	srcOffset,
		uint_t	dstAllocSizeZ,
		uint_t	srcAllocSizeZ,
		uint_t	typeSize,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	intervalSize,
		gpuStream_t	copyStream
	)

Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout.

See copyDevToDevFZYX() for parameter information.

◆ copyHostToDevZYXF()

void walberla::gpu::copyHostToDevZYXF	(	const gpuPitchedPtr &	dst,
		unsigned char *	src,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	dstOffset,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	srcOffset,
		uint_t	dstAllocSizeY,
		uint_t	srcAllocSizeY,
		uint_t	typeSize,
		std::tuple< uint_t, uint_t, uint_t, uint_t > &	intervalSize,
		gpuStream_t	copyStream
	)

Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout.

See copyDevToDevZYXF() for parameter information.

◆ exportCopyFunctionsToPython()

template<typename... CpuFields>

void walberla::gpu::exportCopyFunctionsToPython ( py::module_ & m )

◆ exportModuleToPython()

template<typename... GpuFields>

void walberla::gpu::exportModuleToPython ( py::module_ & m )

◆ fieldCpy() [1/3]

template<typename DstType , typename SrcType >

void walberla::gpu::fieldCpy	(	const shared_ptr< StructuredBlockStorage > &	blocks,
		BlockDataID	dstID,
		ConstBlockDataID	srcID
	)

◆ fieldCpy() [2/3]

template<typename T , uint_t fs>

void walberla::gpu::fieldCpy	(	field::Field< T, fs > &	dst,
		const gpu::GPUField< T > &	src
	)

◆ fieldCpy() [3/3]

template<typename T , uint_t fs>

void walberla::gpu::fieldCpy	(	gpu::GPUField< T > &	dst,
		const field::Field< T, fs > &	src
	)

◆ fieldCpyFunctor() [1/2]

template<typename DstType , typename SrcType >

std::function<void(IBlock*)> walberla::gpu::fieldCpyFunctor	(	BlockDataID	dstID,
		ConstBlockDataID	srcID
	)

◆ fieldCpyFunctor() [2/2]

template<typename DstType , typename SrcType >

std::function<void()> walberla::gpu::fieldCpyFunctor	(	const shared_ptr< StructuredBlockStorage > &	blocks,
		BlockDataID	dstID,
		ConstBlockDataID	srcID
	)

◆ fieldCpySweepFunction()

template<typename DstType , typename SrcType >

void walberla::gpu::fieldCpySweepFunction	(	BlockDataID	dstID,
		ConstBlockDataID	srcID,
		IBlock *	block
	)

◆ free_aligned_with_offset()

void walberla::gpu::free_aligned_with_offset ( void * ptr )

◆ iDivUp()

unsigned int walberla::gpu::iDivUp	(	unsigned int	a,
		unsigned int	b
	)

inline

◆ make_kernel()

template<typename FuncPtr >

Kernel<FuncPtr> walberla::gpu::make_kernel ( FuncPtr funcPtr )

◆ nameStream()

void walberla::gpu::nameStream	(	const cudaStream_t &	stream,
		const std::string &	name
	)

inline

◆ nvtxMarker()

void walberla::gpu::nvtxMarker	(	const std::string &	name,
		const uint32_t	color = `0xaaaaaa`
	)

inline

◆ selectDeviceBasedOnMpiRank()

void walberla::gpu::selectDeviceBasedOnMpiRank ( )

Selects active GPU device based on MPI rank.

assumes that on each node there are as many MPI processes started as there are GPUs

if there are more GPUs than processes on a node, a warning is printed and not all GPUs are utilized
if there are more processes than GPUs, also a warning is printed and multiple processes may access the same GPU. Processes are assigned to GPUs in a round-robin fashion

◆ shiftCoordinatesWhileFastestCoordHasSizeOne()

template<typename T >

void walberla::gpu::shiftCoordinatesWhileFastestCoordHasSizeOne	(	typename FieldAccessor< T >::IndexingScheme &	indexing,
		dim3 &	gridDim,
		dim3 &	blockDim
	)

Variable Documentation

◆ freePointers_

std::map<void *, void*> walberla::gpu::freePointers_

static

Namespaces

Classes

Functions

Variables

Function Documentation

◆ addGPUFieldToStorage() [1/2]

◆ addGPUFieldToStorage() [2/2]

◆ allocate_aligned_with_offset()

◆ allocate_pitched_with_offset()

◆ checkForError()

◆ checkForLastError()

◆ copyDevToDevFZYX()

◆ copyDevToDevZYXF()

◆ copyDevToHostFZYX()

◆ copyDevToHostZYXF()

◆ copyHostToDevFZYX()

◆ copyHostToDevZYXF()

◆ exportCopyFunctionsToPython()

◆ exportModuleToPython()

◆ fieldCpy() [1/3]

◆ fieldCpy() [2/3]

◆ fieldCpy() [3/3]

◆ fieldCpyFunctor() [1/2]

◆ fieldCpyFunctor() [2/2]

◆ fieldCpySweepFunction()

◆ free_aligned_with_offset()

◆ iDivUp()

◆ make_kernel()

◆ nameStream()

◆ nvtxMarker()

◆ selectDeviceBasedOnMpiRank()

◆ shiftCoordinatesWhileFastestCoordHasSizeOne()

Variable Documentation

◆ freePointers_