Common CUDA utilities and definitions for shared GPU functionality. More...

#include <stdio.h>
#include <vector>
#include <cuda_runtime.h>

Include dependency graph for gpuUtils.cuh:

This graph shows which files directly or indirectly include this file:

Macros
#define	CudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__)

#define	CudaCheckError() __cudaCheckError(__FILE__, __LINE__)

#define	_BlockSize_ 1024
	KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.

Functions
void	__cudaSafeCall (cudaError err, const char *file, const int line)
	Check for a safe call on GPU.

void	__cudaCheckError (const char *file, const int line)
	Check if there's been an error.

void	checkGpuMem ()
	KS: Get some fancy info about VRAM usage.

void	PrintNdevices ()
	KS: Get some fancy info about GPU.

void	ResetDevice ()
	KS: Completely clean GPU, this is time consuming and may lead to unexpected behaviour.

void	SetDevice (const int deviceId)
	KS: Only useful if using multiple GPU.

int	GetNumGPUThreads (const int Device=0)
	KS: Get number of GPU threads for currently used GPU.

size_t	GetL2CacheSize (const int device=0)
	KS: Get L2 cache size (in bytes) for the specified GPU device.

size_t	GetMaxTexture1DSize (const int device=0)
	KS: Get the maximum size for 1D textures on the specified GPU device.

size_t	GetSharedMemoryPerBlock (const int device=0)
	KS: Returns the maximum shared memory per block for a given GPU device.

Detailed Description

Common CUDA utilities and definitions for shared GPU functionality.

Author: Richard Calland; Kamil Skwarczynski

Definition in file gpuUtils.cuh.

Macro Definition Documentation

◆ _BlockSize_

#define _BlockSize_ 1024

KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.

Definition at line 25 of file gpuUtils.cuh.

◆ CudaCheckError

#define CudaCheckError ( ) __cudaCheckError(__FILE__, __LINE__)

Definition at line 22 of file gpuUtils.cuh.

◆ CudaSafeCall

#define CudaSafeCall ( err ) __cudaSafeCall(err, __FILE__, __LINE__)

Definition at line 21 of file gpuUtils.cuh.

Function Documentation

◆ __cudaCheckError()

void __cudaCheckError	(	const char *	file,
		const int	line
	)

Check if there's been an error.

This function checks if there has been any CUDA runtime API error and reports it.

Parameters

file	The file name where the error occurred.
line	The line number where the error occurred.

Definition at line 18 of file gpuUtils.cu.

                                                          {
// **************************************************
#ifdef CUDA_ERROR_CHECK
  cudaError err = cudaGetLastError();
  if (cudaSuccess != err) {
    fprintf(stderr, "%s failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
    throw;
  }
 
  // More careful checking. However, this will affect performance.
  // Comment away if needed.
  err = cudaDeviceSynchronize();
  if (cudaSuccess != err) {
    fprintf(stderr, "%s with sync failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
    throw;
  }
#endif
}

◆ __cudaSafeCall()

void __cudaSafeCall	(	cudaError	err,
		const char *	file,
		const int	line
	)

Check for a safe call on GPU.

Todo:

KS: There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp

KS: We might want to port some of these utilities, for example having bool if there is unified memory etc.

This function checks the error status returned by CUDA runtime API functions and reports any errors.

Parameters

err	The CUDA error code.
file	The file name where the error occurred.
line	The line number where the error occurred.

Definition at line 6 of file gpuUtils.cu.

                                                                       {
// **************************************************
#ifdef CUDA_ERROR_CHECK
  if (cudaSuccess != err) {
    fprintf(stderr, "%s failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
    throw;
  }
#endif
}

◆ checkGpuMem()

void checkGpuMem ( )

KS: Get some fancy info about VRAM usage.

Definition at line 43 of file gpuUtils.cu.

                   {
// *******************************************
  float free_m, total_m,used_m;
  size_t free_t, total_t;
 
  cudaMemGetInfo(&free_t, &total_t);
  CudaCheckError();
 
  free_m = static_cast<uint>(free_t)/1048576.0;
  total_m = static_cast<uint>(total_t)/1048576.0;
  used_m = total_m - free_m;
 
  printf("  Memory free %f MB, total memory %f MB, memory used %f MB\n", free_m, total_m, used_m);
}

◆ GetL2CacheSize()

size_t GetL2CacheSize ( const int device = 0 )

KS: Get L2 cache size (in bytes) for the specified GPU device.

Parameters

device The ID of the device. Defaults to 0.

Returns: The size of the L2 cache in bytes

Definition at line 122 of file gpuUtils.cu.

                                        {
// *******************************************
  cudaDeviceProp prop;
  cudaError_t err = cudaGetDeviceProperties(&prop, device);
  if (err != cudaSuccess) {
    printf("No CUDA devices found");
    throw;
  }
  return prop.l2CacheSize;  // size in bytes
}

◆ GetMaxTexture1DSize()

size_t GetMaxTexture1DSize ( const int device = 0 )

KS: Get the maximum size for 1D textures on the specified GPU device.

Parameters

device The ID of the device. Defaults to 0.

Returns: The maximum 1D texture size supported by the device

Definition at line 134 of file gpuUtils.cu.

                                             {
// *******************************************
  cudaDeviceProp prop;
  cudaError_t err = cudaGetDeviceProperties(&prop, device);
  if (err != cudaSuccess) {
    printf("No CUDA devices found");
    throw;
  }
  return prop.maxTexture1D;
}

◆ GetNumGPUThreads()

int GetNumGPUThreads ( const int Device = 0 )

KS: Get number of GPU threads for currently used GPU.

Parameters

Device The ID of the device. Defaults to 0.

Returns: The number of GPU threads.

Definition at line 102 of file gpuUtils.cu.

                                       {
// *******************************************
  int deviceCount;
  cudaGetDeviceCount(&deviceCount);
 
  if (deviceCount == 0) {
    printf("No CUDA devices found");
    throw;
  }
 
  cudaDeviceProp deviceProp;
  cudaGetDeviceProperties(&deviceProp, Device);
 
  // Define the number of threads per block
  int nThreadsBlocks = (deviceProp.multiProcessorCount * deviceProp.maxThreadsPerMultiProcessor);
 
  return nThreadsBlocks;
}

◆ GetSharedMemoryPerBlock()

size_t GetSharedMemoryPerBlock ( const int device = 0 )

KS: Returns the maximum shared memory per block for a given GPU device.

Parameters

device CUDA device ID (default = 0)

Returns: Maximum shared memory per block in bytes

Definition at line 146 of file gpuUtils.cu.

                                                 {
// *******************************************
  cudaDeviceProp prop;
  cudaError_t err = cudaGetDeviceProperties(&prop, device);
 
  if (err != cudaSuccess) {
    printf("No CUDA devices found");
    throw;
  }
  return static_cast<size_t>(prop.sharedMemPerBlock);
}

◆ PrintNdevices()

void PrintNdevices ( )

KS: Get some fancy info about GPU.

Definition at line 60 of file gpuUtils.cu.

                     {
// *******************************************
  int nDevices;
  cudaGetDeviceCount(&nDevices);
  CudaCheckError();
 
  if (nDevices == 0) {
    printf("No CUDA devices found");
    throw;
  }
 
  printf("  Found %i GPUs, currently I only support one GPU\n", nDevices);
}

◆ ResetDevice()

void ResetDevice ( )

KS: Completely clean GPU, this is time consuming and may lead to unexpected behaviour.

Definition at line 77 of file gpuUtils.cu.

                   {
// *******************************************
  cudaDeviceReset();
  CudaCheckError();
}

◆ SetDevice()

void SetDevice ( const int deviceId )

KS: Only useful if using multiple GPU.

Parameters

deviceId The ID of the device to be set as active.

KS: Only useful if using multiple GPU.

Definition at line 85 of file gpuUtils.cu.

                                   {
// *******************************************
  // Check if the device ID is valid
  int deviceCount;
  cudaGetDeviceCount(&deviceCount);
  if (deviceId < 0 || deviceId >= deviceCount) {
    printf("Invalid device ID: %i \n", deviceId);
    throw;
  }
 
  cudaSetDevice(deviceId);
  CudaCheckError();
  printf("GPU device set to ID: %i \n", deviceId);
}

Macros

Functions

Detailed Description

Macro Definition Documentation

◆ _BlockSize_

◆ CudaCheckError

◆ CudaSafeCall

Function Documentation

◆ __cudaCheckError()

◆ __cudaSafeCall()

◆ checkGpuMem()

◆ GetL2CacheSize()

◆ GetMaxTexture1DSize()

◆ GetNumGPUThreads()

◆ GetSharedMemoryPerBlock()

◆ PrintNdevices()

◆ ResetDevice()

◆ SetDevice()