MaCh3  2.2.3
Reference Guide
Macros | Functions
gpuUtils.cuh File Reference

Common CUDA utilities and definitions for shared GPU functionality. More...

#include <stdio.h>
#include <vector>
#include <cuda_runtime.h>
Include dependency graph for gpuUtils.cuh:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define CudaSafeCall(err)   __cudaSafeCall(err, __FILE__, __LINE__)
 
#define CudaCheckError()   __cudaCheckError(__FILE__, __LINE__)
 
#define _BlockSize_   1024
 KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now. More...
 

Functions

void __cudaSafeCall (cudaError err, const char *file, const int line)
 Check for a safe call on GPU. More...
 
void __cudaCheckError (const char *file, const int line)
 Check if there's been an error. More...
 
void checkGpuMem ()
 KS: Get some fancy info about VRAM usage. More...
 
void PrintNdevices ()
 KS: Get some fancy info about GPU. More...
 
void ResetDevice ()
 KS: Completely clean GPU, this is time consuming and may lead to unexpected behaviour. More...
 
void SetDevice (const int deviceId)
 KS: Only useful if using multiple GPU. More...
 
int GetNumGPUThreads (const int Device=0)
 KS: Get number of GPU threads for currently used GPU. More...
 
size_t GetL2CacheSize (const int device=0)
 KS: Get L2 cache size (in bytes) for the specified GPU device. More...
 
size_t GetMaxTexture1DSize (const int device=0)
 KS: Get the maximum size for 1D textures on the specified GPU device. More...
 
size_t GetSharedMemoryPerBlock (const int device=0)
 KS: Returns the maximum shared memory per block for a given GPU device. More...
 

Detailed Description

Common CUDA utilities and definitions for shared GPU functionality.

Author
Richard Calland
Kamil Skwarczynski

Definition in file gpuUtils.cuh.

Macro Definition Documentation

◆ _BlockSize_

#define _BlockSize_   1024

KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.

Definition at line 25 of file gpuUtils.cuh.

◆ CudaCheckError

#define CudaCheckError ( )    __cudaCheckError(__FILE__, __LINE__)

Definition at line 22 of file gpuUtils.cuh.

◆ CudaSafeCall

#define CudaSafeCall (   err)    __cudaSafeCall(err, __FILE__, __LINE__)

Definition at line 21 of file gpuUtils.cuh.

Function Documentation

◆ __cudaCheckError()

void __cudaCheckError ( const char *  file,
const int  line 
)

Check if there's been an error.

This function checks if there has been any CUDA runtime API error and reports it.

Parameters
fileThe file name where the error occurred.
lineThe line number where the error occurred.

Definition at line 18 of file gpuUtils.cu.

18  {
19 // **************************************************
20 #ifdef CUDA_ERROR_CHECK
21  cudaError err = cudaGetLastError();
22  if (cudaSuccess != err) {
23  fprintf(stderr, "%s failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
24  throw;
25  }
26 
27  // More careful checking. However, this will affect performance.
28  // Comment away if needed.
29  err = cudaDeviceSynchronize();
30  if (cudaSuccess != err) {
31  fprintf(stderr, "%s with sync failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
32  throw;
33  }
34 #endif
35 }

◆ __cudaSafeCall()

void __cudaSafeCall ( cudaError  err,
const char *  file,
const int  line 
)

Check for a safe call on GPU.

Todo:

KS: There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp

KS: We might want to port some of these utilities, for example having bool if there is unified memory etc.

This function checks the error status returned by CUDA runtime API functions and reports any errors.

Parameters
errThe CUDA error code.
fileThe file name where the error occurred.
lineThe line number where the error occurred.

Definition at line 6 of file gpuUtils.cu.

6  {
7 // **************************************************
8 #ifdef CUDA_ERROR_CHECK
9  if (cudaSuccess != err) {
10  fprintf(stderr, "%s failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
11  throw;
12  }
13 #endif
14 }

◆ checkGpuMem()

void checkGpuMem ( )

KS: Get some fancy info about VRAM usage.

Definition at line 43 of file gpuUtils.cu.

43  {
44 // *******************************************
45  float free_m, total_m,used_m;
46  size_t free_t, total_t;
47 
48  cudaMemGetInfo(&free_t, &total_t);
50 
51  free_m = static_cast<uint>(free_t)/1048576.0;
52  total_m = static_cast<uint>(total_t)/1048576.0;
53  used_m = total_m - free_m;
54 
55  printf(" Memory free %f MB, total memory %f MB, memory used %f MB\n", free_m, total_m, used_m);
56 }
#define CudaCheckError()
Definition: gpuUtils.cuh:22

◆ GetL2CacheSize()

size_t GetL2CacheSize ( const int  device = 0)

KS: Get L2 cache size (in bytes) for the specified GPU device.

Parameters
deviceThe ID of the device. Defaults to 0.
Returns
The size of the L2 cache in bytes

Definition at line 122 of file gpuUtils.cu.

122  {
123 // *******************************************
124  cudaDeviceProp prop;
125  cudaError_t err = cudaGetDeviceProperties(&prop, device);
126  if (err != cudaSuccess) {
127  printf("No CUDA devices found");
128  throw;
129  }
130  return prop.l2CacheSize; // size in bytes
131 }

◆ GetMaxTexture1DSize()

size_t GetMaxTexture1DSize ( const int  device = 0)

KS: Get the maximum size for 1D textures on the specified GPU device.

Parameters
deviceThe ID of the device. Defaults to 0.
Returns
The maximum 1D texture size supported by the device

Definition at line 134 of file gpuUtils.cu.

134  {
135 // *******************************************
136  cudaDeviceProp prop;
137  cudaError_t err = cudaGetDeviceProperties(&prop, device);
138  if (err != cudaSuccess) {
139  printf("No CUDA devices found");
140  throw;
141  }
142  return prop.maxTexture1D;
143 }

◆ GetNumGPUThreads()

int GetNumGPUThreads ( const int  Device = 0)

KS: Get number of GPU threads for currently used GPU.

Parameters
DeviceThe ID of the device. Defaults to 0.
Returns
The number of GPU threads.

Definition at line 102 of file gpuUtils.cu.

102  {
103 // *******************************************
104  int deviceCount;
105  cudaGetDeviceCount(&deviceCount);
106 
107  if (deviceCount == 0) {
108  printf("No CUDA devices found");
109  throw;
110  }
111 
112  cudaDeviceProp deviceProp;
113  cudaGetDeviceProperties(&deviceProp, Device);
114 
115  // Define the number of threads per block
116  int nThreadsBlocks = (deviceProp.multiProcessorCount * deviceProp.maxThreadsPerMultiProcessor);
117 
118  return nThreadsBlocks;
119 }

◆ GetSharedMemoryPerBlock()

size_t GetSharedMemoryPerBlock ( const int  device = 0)

KS: Returns the maximum shared memory per block for a given GPU device.

Parameters
deviceCUDA device ID (default = 0)
Returns
Maximum shared memory per block in bytes

Definition at line 146 of file gpuUtils.cu.

146  {
147 // *******************************************
148  cudaDeviceProp prop;
149  cudaError_t err = cudaGetDeviceProperties(&prop, device);
150 
151  if (err != cudaSuccess) {
152  printf("No CUDA devices found");
153  throw;
154  }
155  return static_cast<size_t>(prop.sharedMemPerBlock);
156 }

◆ PrintNdevices()

void PrintNdevices ( )

KS: Get some fancy info about GPU.

Definition at line 60 of file gpuUtils.cu.

60  {
61 // *******************************************
62  int nDevices;
63  cudaGetDeviceCount(&nDevices);
65 
66  if (nDevices == 0) {
67  printf("No CUDA devices found");
68  throw;
69  }
70 
71  printf(" Found %i GPUs, currently I only support one GPU\n", nDevices);
72 }

◆ ResetDevice()

void ResetDevice ( )

KS: Completely clean GPU, this is time consuming and may lead to unexpected behaviour.

Definition at line 77 of file gpuUtils.cu.

77  {
78 // *******************************************
79  cudaDeviceReset();
81 }

◆ SetDevice()

void SetDevice ( const int  deviceId)

KS: Only useful if using multiple GPU.

Parameters
deviceIdThe ID of the device to be set as active.

KS: Only useful if using multiple GPU.

Definition at line 85 of file gpuUtils.cu.

85  {
86 // *******************************************
87  // Check if the device ID is valid
88  int deviceCount;
89  cudaGetDeviceCount(&deviceCount);
90  if (deviceId < 0 || deviceId >= deviceCount) {
91  printf("Invalid device ID: %i \n", deviceId);
92  throw;
93  }
94 
95  cudaSetDevice(deviceId);
97  printf("GPU device set to ID: %i \n", deviceId);
98 }