MaCh3  2.4.2
Reference Guide
Public Member Functions | Private Attributes | List of all members
MCMCProcessorGPU Class Reference

Class responsible for performing MCMC Processing with GPU. More...

Public Member Functions

 MCMCProcessorGPU ()
 constructor More...
 
virtual ~MCMCProcessorGPU ()
 destructor More...
 
__host__ void InitGPU_AutoCorr (int n_Entries, int n_Pars, const int n_Lags)
 KS: Initialiser, here we allocate memory for variables and copy constants. More...
 
__host__ void CopyToGPU_AutoCorr (float *ParStep_cpu, float *NumeratorSum_cpu, float *ParamSums_cpu, float *DenomSum_cpu)
 KS: Copy necessary variables from CPU to GPU. More...
 
__host__ void RunGPU_AutoCorr (float *NumeratorSum_cpu, float *DenomSum_cpu)
 KS: This call the main kernel responsible for calculating LagL and later copy results back to CPU. More...
 
__host__ void CleanupGPU_AutoCorr ()
 KS: free memory on gpu. More...
 

Private Attributes

float * ParStep_gpu
 Value of each param at GPU. More...
 
float * NumeratorSum_gpu
 
float * ParamSums_gpu
 
float * DenomSum_gpu
 
int h_nLag
 
int h_nDraws
 
int h_nEntries
 

Detailed Description

Class responsible for performing MCMC Processing with GPU.

Author
Kamil Skwarczynski

Definition at line 29 of file gpuMCMCProcessorUtils.cuh.

Constructor & Destructor Documentation

◆ MCMCProcessorGPU()

MCMCProcessorGPU::MCMCProcessorGPU ( )

constructor

Definition at line 16 of file gpuMCMCProcessorUtils.cu.

16  {
17  ParStep_gpu = nullptr;
18  NumeratorSum_gpu = nullptr;
19  ParamSums_gpu = nullptr;
20  DenomSum_gpu = nullptr;
21 
22  h_nLag = -1;
23  h_nDraws = -1;
24  h_nEntries = -1;
25 }
float * ParStep_gpu
Value of each param at GPU.

◆ ~MCMCProcessorGPU()

MCMCProcessorGPU::~MCMCProcessorGPU ( )
virtual

destructor

Definition at line 27 of file gpuMCMCProcessorUtils.cu.

27  {
28 }

Member Function Documentation

◆ CleanupGPU_AutoCorr()

__host__ void MCMCProcessorGPU::CleanupGPU_AutoCorr ( )

KS: free memory on gpu.

Definition at line 191 of file gpuMCMCProcessorUtils.cu.

191  {
192 // *********************************
193  if(ParStep_gpu) cudaFree(ParStep_gpu);
194  CudaCheckError();
195  if(NumeratorSum_gpu) cudaFree(NumeratorSum_gpu);
196  CudaCheckError();
197  if(ParamSums_gpu) cudaFree(ParamSums_gpu);
198  CudaCheckError();
199  if(DenomSum_gpu) cudaFree(DenomSum_gpu);
200  CudaCheckError();
201 
202  printf(" Cleared memory at GPU, I am free \n");
203 }
#define CudaCheckError()
Definition: gpuUtils.cuh:22

◆ CopyToGPU_AutoCorr()

__host__ void MCMCProcessorGPU::CopyToGPU_AutoCorr ( float *  ParStep_cpu,
float *  NumeratorSum_cpu,
float *  ParamSums_cpu,
float *  DenomSum_cpu 
)

KS: Copy necessary variables from CPU to GPU.

Parameters
ParStep_cpuParameter value at each step
NumeratorSum_cpuSum used for nominator of autocorrelation calculations
ParamSums_cpuOverall sum for each parameter over all steps
DenomSum_cpuSum used for denominator of autocorrelation calculations

Definition at line 79 of file gpuMCMCProcessorUtils.cu.

83  {
84 // ******************************************************
85  //store value of parameter for each step
86  cudaMemcpy(ParStep_gpu, ParStep_cpu, h_nDraws*h_nEntries*sizeof(float), cudaMemcpyHostToDevice);
88 
89  //Mean value for a given parameter
90  cudaMemcpy(ParamSums_gpu, ParamSums_cpu, h_nDraws*sizeof(float), cudaMemcpyHostToDevice);
92 
93  //Numerator which is directly used for calculating LagL
94  cudaMemcpy(NumeratorSum_gpu, NumeratorSum_cpu, h_nLag*h_nDraws*sizeof(float), cudaMemcpyHostToDevice);
96 
97  //Denominator which is directly used for calculating LagL
98  cudaMemcpy(DenomSum_gpu, DenomSum_cpu, h_nLag*h_nDraws*sizeof(float), cudaMemcpyHostToDevice);
100 }

◆ InitGPU_AutoCorr()

__host__ void MCMCProcessorGPU::InitGPU_AutoCorr ( int  n_Entries,
int  n_Pars,
const int  n_Lags 
)

KS: Initialiser, here we allocate memory for variables and copy constants.

Parameters
n_EntriesTotal number of entries in mcmc chain
n_ParsNumber of relevant parameters
n_LagsValue of Lag in autocreation calculation

Definition at line 32 of file gpuMCMCProcessorUtils.cu.

35  {
36 // *******************************************
37  // Write to the global statics (h_* denotes host stored variable)
38  h_nDraws = n_Pars;
39  h_nLag = n_Lags;
40  h_nEntries = n_Entries;
41 
42  // Copy the constants
43  cudaMemcpyToSymbol(d_nLag, &h_nLag, sizeof(h_nLag));
45 
46  cudaMemcpyToSymbol(d_nDraws, &h_nDraws, sizeof(h_nDraws));
48 
49  cudaMemcpyToSymbol(d_nEntries, &h_nEntries, sizeof(h_nEntries));
51 
52  // Allocate chunks of memory to GPU
53  //Numerator which is directly used for calculating LagL
54  cudaMalloc((void **) &NumeratorSum_gpu, h_nLag*h_nDraws*sizeof(float));
56 
57  //Denominator which is directly used for calculating LagL
58  cudaMalloc((void **) &DenomSum_gpu, h_nLag*h_nDraws*sizeof(float));
60 
61  //Mean value for a given parameter
62  cudaMalloc((void **) &ParamSums_gpu, h_nDraws*sizeof(float));
64 
65  //store value of parameter for each step
66  cudaMalloc((void **) &ParStep_gpu, h_nDraws*h_nEntries*sizeof(float*));
68 
69  printf(" Allocated in total %f MB for autocorrelations calculations on GPU\n",
70  static_cast<double>(sizeof(float) * (h_nLag * h_nDraws + h_nLag * h_nDraws + h_nDraws + h_nDraws * h_nEntries)) / 1.0e6);
71 }
__device__ __constant__ int d_nEntries
__device__ __constant__ int d_nDraws
__device__ __constant__ int d_nLag

◆ RunGPU_AutoCorr()

__host__ void MCMCProcessorGPU::RunGPU_AutoCorr ( float *  NumeratorSum_cpu,
float *  DenomSum_cpu 
)

KS: This call the main kernel responsible for calculating LagL and later copy results back to CPU.

Parameters
NumeratorSum_cpuSum used for nominator of autocorrelation calculations
DenomSum_cpuSum used for denominator of autocorrelation calculations

Definition at line 159 of file gpuMCMCProcessorUtils.cu.

160  {
161 // *****************************************
162  dim3 block_size;
163  dim3 grid_size;
164 
165  block_size.x = _BlockSize_;
166  grid_size.x = (h_nLag*h_nDraws / block_size.x) + 1;
167 
168  EvalOnGPU_AutoCorr<<<grid_size, block_size>>>(
169  ParStep_gpu,
172  DenomSum_gpu);
173  CudaCheckError();
174 
175  printf(" Finished calculating now copying results back to CPU \n");
176 
177  //KS: Finally copy paste memory from GPU to CPU
178  cudaMemcpy(NumeratorSum_cpu, NumeratorSum_gpu, h_nLag*h_nDraws*sizeof(float), cudaMemcpyDeviceToHost);
179  CudaCheckError();
180 
181  cudaMemcpy(DenomSum_cpu, DenomSum_gpu, h_nLag*h_nDraws*sizeof(float), cudaMemcpyDeviceToHost);
182  CudaCheckError();
183 }
#define _BlockSize_
KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.
Definition: gpuUtils.cuh:25

Member Data Documentation

◆ DenomSum_gpu

float* MCMCProcessorGPU::DenomSum_gpu
private

Definition at line 72 of file gpuMCMCProcessorUtils.cuh.

◆ h_nDraws

int MCMCProcessorGPU::h_nDraws
private

Definition at line 76 of file gpuMCMCProcessorUtils.cuh.

◆ h_nEntries

int MCMCProcessorGPU::h_nEntries
private

Definition at line 77 of file gpuMCMCProcessorUtils.cuh.

◆ h_nLag

int MCMCProcessorGPU::h_nLag
private

Definition at line 75 of file gpuMCMCProcessorUtils.cuh.

◆ NumeratorSum_gpu

float* MCMCProcessorGPU::NumeratorSum_gpu
private

Definition at line 70 of file gpuMCMCProcessorUtils.cuh.

◆ ParamSums_gpu

float* MCMCProcessorGPU::ParamSums_gpu
private

Definition at line 71 of file gpuMCMCProcessorUtils.cuh.

◆ ParStep_gpu

float* MCMCProcessorGPU::ParStep_gpu
private

Value of each param at GPU.

Definition at line 69 of file gpuMCMCProcessorUtils.cuh.


The documentation for this class was generated from the following files: