MaCh3 2.2.1
Reference Guide
Loading...
Searching...
No Matches
Macros | Functions
gpuUtils.cuh File Reference

Common CUDA utilities and definitions for shared GPU functionality. More...

#include <stdio.h>
#include <vector>
#include <cuda_runtime.h>
Include dependency graph for gpuUtils.cuh:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define CudaSafeCall(err)   __cudaSafeCall(err, __FILE__, __LINE__)
 
#define CudaCheckError()   __cudaCheckError(__FILE__, __LINE__)
 
#define _BlockSize_   1024
 KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.
 

Functions

void __cudaSafeCall (cudaError err, const char *file, const int line)
 Check for a safe call on GPU.
 
void __cudaCheckError (const char *file, const int line)
 Check if there's been an error.
 
void checkGpuMem ()
 KS: Get some fancy info about VRAM usage.
 
void PrintNdevices ()
 KS: Get some fancy info about GPU.
 
void ResetDevice ()
 KS: Completely clean GPU, this is time consuming and may lead to unexpected behaviour.
 
void SetDevice (const int deviceId)
 KS: Only useful if using multiple GPU.
 
int GetNumGPUThreads (const int Device=0)
 KS: Get number of GPU threads for currently used GPU.
 
size_t GetL2CacheSize (const int device=0)
 KS: Get L2 cache size (in bytes) for the specified GPU device.
 
size_t GetMaxTexture1DSize (const int device=0)
 KS: Get the maximum size for 1D textures on the specified GPU device.
 
size_t GetSharedMemoryPerBlock (const int device=0)
 KS: Returns the maximum shared memory per block for a given GPU device.
 

Detailed Description

Common CUDA utilities and definitions for shared GPU functionality.

Author
Richard Calland
Kamil Skwarczynski

Definition in file gpuUtils.cuh.

Macro Definition Documentation

◆ _BlockSize_

#define _BlockSize_   1024

KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.

Definition at line 25 of file gpuUtils.cuh.

◆ CudaCheckError

#define CudaCheckError ( )    __cudaCheckError(__FILE__, __LINE__)

Definition at line 22 of file gpuUtils.cuh.

◆ CudaSafeCall

#define CudaSafeCall (   err)    __cudaSafeCall(err, __FILE__, __LINE__)

Definition at line 21 of file gpuUtils.cuh.

Function Documentation

◆ __cudaCheckError()

void __cudaCheckError ( const char *  file,
const int  line 
)

Check if there's been an error.

This function checks if there has been any CUDA runtime API error and reports it.

Parameters
fileThe file name where the error occurred.
lineThe line number where the error occurred.

Definition at line 18 of file gpuUtils.cu.

18 {
19// **************************************************
20#ifdef CUDA_ERROR_CHECK
21 cudaError err = cudaGetLastError();
22 if (cudaSuccess != err) {
23 fprintf(stderr, "%s failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
24 throw;
25 }
26
27 // More careful checking. However, this will affect performance.
28 // Comment away if needed.
29 err = cudaDeviceSynchronize();
30 if (cudaSuccess != err) {
31 fprintf(stderr, "%s with sync failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
32 throw;
33 }
34#endif
35}

◆ __cudaSafeCall()

void __cudaSafeCall ( cudaError  err,
const char *  file,
const int  line 
)

Check for a safe call on GPU.

Todo:

KS: There is plenty of useful stuff here https://github.com/NVIDIA/cuda-samples/blob/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp

KS: We might want to port some of these utilities, for example having bool if there is unified memory etc.

This function checks the error status returned by CUDA runtime API functions and reports any errors.

Parameters
errThe CUDA error code.
fileThe file name where the error occurred.
lineThe line number where the error occurred.

Definition at line 6 of file gpuUtils.cu.

6 {
7// **************************************************
8#ifdef CUDA_ERROR_CHECK
9 if (cudaSuccess != err) {
10 fprintf(stderr, "%s failed at %s:%i : %s\n", __func__, file, line, cudaGetErrorString(err));
11 throw;
12 }
13#endif
14}

◆ checkGpuMem()

void checkGpuMem ( )

KS: Get some fancy info about VRAM usage.

Definition at line 43 of file gpuUtils.cu.

43 {
44// *******************************************
45 float free_m, total_m,used_m;
46 size_t free_t, total_t;
47
48 cudaMemGetInfo(&free_t, &total_t);
50
51 free_m = static_cast<uint>(free_t)/1048576.0;
52 total_m = static_cast<uint>(total_t)/1048576.0;
53 used_m = total_m - free_m;
54
55 printf(" Memory free %f MB, total memory %f MB, memory used %f MB\n", free_m, total_m, used_m);
56}
#define CudaCheckError()
Definition: gpuUtils.cuh:22

◆ GetL2CacheSize()

size_t GetL2CacheSize ( const int  device = 0)

KS: Get L2 cache size (in bytes) for the specified GPU device.

Parameters
deviceThe ID of the device. Defaults to 0.
Returns
The size of the L2 cache in bytes

Definition at line 122 of file gpuUtils.cu.

122 {
123// *******************************************
124 cudaDeviceProp prop;
125 cudaError_t err = cudaGetDeviceProperties(&prop, device);
126 if (err != cudaSuccess) {
127 printf("No CUDA devices found");
128 throw;
129 }
130 return prop.l2CacheSize; // size in bytes
131}

◆ GetMaxTexture1DSize()

size_t GetMaxTexture1DSize ( const int  device = 0)

KS: Get the maximum size for 1D textures on the specified GPU device.

Parameters
deviceThe ID of the device. Defaults to 0.
Returns
The maximum 1D texture size supported by the device

Definition at line 134 of file gpuUtils.cu.

134 {
135// *******************************************
136 cudaDeviceProp prop;
137 cudaError_t err = cudaGetDeviceProperties(&prop, device);
138 if (err != cudaSuccess) {
139 printf("No CUDA devices found");
140 throw;
141 }
142 return prop.maxTexture1D;
143}

◆ GetNumGPUThreads()

int GetNumGPUThreads ( const int  Device = 0)

KS: Get number of GPU threads for currently used GPU.

Parameters
DeviceThe ID of the device. Defaults to 0.
Returns
The number of GPU threads.

Definition at line 102 of file gpuUtils.cu.

102 {
103// *******************************************
104 int deviceCount;
105 cudaGetDeviceCount(&deviceCount);
106
107 if (deviceCount == 0) {
108 printf("No CUDA devices found");
109 throw;
110 }
111
112 cudaDeviceProp deviceProp;
113 cudaGetDeviceProperties(&deviceProp, Device);
114
115 // Define the number of threads per block
116 int nThreadsBlocks = (deviceProp.multiProcessorCount * deviceProp.maxThreadsPerMultiProcessor);
117
118 return nThreadsBlocks;
119}

◆ GetSharedMemoryPerBlock()

size_t GetSharedMemoryPerBlock ( const int  device = 0)

KS: Returns the maximum shared memory per block for a given GPU device.

Parameters
deviceCUDA device ID (default = 0)
Returns
Maximum shared memory per block in bytes

Definition at line 146 of file gpuUtils.cu.

146 {
147// *******************************************
148 cudaDeviceProp prop;
149 cudaError_t err = cudaGetDeviceProperties(&prop, device);
150
151 if (err != cudaSuccess) {
152 printf("No CUDA devices found");
153 throw;
154 }
155 return static_cast<size_t>(prop.sharedMemPerBlock);
156}

◆ PrintNdevices()

void PrintNdevices ( )

KS: Get some fancy info about GPU.

Definition at line 60 of file gpuUtils.cu.

60 {
61// *******************************************
62 int nDevices;
63 cudaGetDeviceCount(&nDevices);
65
66 if (nDevices == 0) {
67 printf("No CUDA devices found");
68 throw;
69 }
70
71 printf(" Found %i GPUs, currently I only support one GPU\n", nDevices);
72}

◆ ResetDevice()

void ResetDevice ( )

KS: Completely clean GPU, this is time consuming and may lead to unexpected behaviour.

Definition at line 77 of file gpuUtils.cu.

77 {
78// *******************************************
79 cudaDeviceReset();
81}

◆ SetDevice()

void SetDevice ( const int  deviceId)

KS: Only useful if using multiple GPU.

Parameters
deviceIdThe ID of the device to be set as active.

KS: Only useful if using multiple GPU.

Definition at line 85 of file gpuUtils.cu.

85 {
86// *******************************************
87 // Check if the device ID is valid
88 int deviceCount;
89 cudaGetDeviceCount(&deviceCount);
90 if (deviceId < 0 || deviceId >= deviceCount) {
91 printf("Invalid device ID: %i \n", deviceId);
92 throw;
93 }
94
95 cudaSetDevice(deviceId);
97 printf("GPU device set to ID: %i \n", deviceId);
98}