Go to the source code of this file.
|
__host__ void | InitGPU_AutoCorr (float **ParStep_gpu, float **NumeratorSum_gpu, float **ParamSums_gpu, float **DenomSum_gpu, int n_Entries, int n_Pars, const int n_Lags) |
| KS: Initialiser, here we allocate memory for variables and copy constants. More...
|
|
__host__ void | CopyToGPU_AutoCorr (float *ParStep_cpu, float *NumeratorSum_cpu, float *ParamSums_cpu, float *DenomSum_cpu, float *ParStep_gpu, float *NumeratorSum_gpu, float *ParamSums_gpu, float *DenomSum_gpu) |
| KS: Copy necessary variables from CPU to GPU. More...
|
|
__global__ void | EvalOnGPU_AutoCorr (const float *__restrict__ ParStep_gpu, const float *__restrict__ ParamSums_gpu, float *NumeratorSum_gpu, float *DenomSum_gpu) |
| Eval autocorrelations based on Box and Jenkins. More...
|
|
__host__ void | RunGPU_AutoCorr (float *ParStep_gpu, float *ParamSums_gpu, float *NumeratorSum_gpu, float *DenomSum_gpu, float *NumeratorSum_cpu, float *DenomSum_cpu) |
| KS: This call the main kernel responsible for calculating LagL and later copy results back to CPU. More...
|
|
__host__ void | CleanupGPU_AutoCorr (float *ParStep_gpu, float *NumeratorSum_gpu, float *ParamSums_gpu, float *DenomSum_gpu) |
| KS: free memory on gpu. More...
|
|
◆ CleanupGPU_AutoCorr()
__host__ void CleanupGPU_AutoCorr |
( |
float * |
ParStep_gpu, |
|
|
float * |
NumeratorSum_gpu, |
|
|
float * |
ParamSums_gpu, |
|
|
float * |
DenomSum_gpu |
|
) |
| |
KS: free memory on gpu.
- Parameters
-
ParStep_gpu | Parameter value at each step |
NumeratorSum_gpu | Sum used for nominator of autocorrelation calculations |
ParamSums_gpu | Overall sum for each parameter over all steps |
DenomSum_gpu | Sum used for denominator of autocorrelation calculations |
Definition at line 197 of file gpuMCMCProcessorUtils.cu.
203 cudaFree(ParStep_gpu);
204 cudaFree(NumeratorSum_gpu);
205 cudaFree(ParamSums_gpu);
206 cudaFree(DenomSum_gpu);
208 printf(
" Cleared memory at GPU, I am free \n");
◆ CopyToGPU_AutoCorr()
__host__ void CopyToGPU_AutoCorr |
( |
float * |
ParStep_cpu, |
|
|
float * |
NumeratorSum_cpu, |
|
|
float * |
ParamSums_cpu, |
|
|
float * |
DenomSum_cpu, |
|
|
float * |
ParStep_gpu, |
|
|
float * |
NumeratorSum_gpu, |
|
|
float * |
ParamSums_gpu, |
|
|
float * |
DenomSum_gpu |
|
) |
| |
KS: Copy necessary variables from CPU to GPU.
- Parameters
-
ParStep_cpu | Parameter value at each step |
NumeratorSum_cpu | Sum used for nominator of autocorrelation calculations |
ParamSums_cpu | Overall sum for each parameter over all steps |
DenomSum_cpu | Sum used for denominator of autocorrelation calculations |
ParStep_gpu | Parameter value at each step |
NumeratorSum_gpu | Sum used for nominator of autocorrelation calculations |
ParamSums_gpu | Overall sum for each parameter over all steps |
DenomSum_gpu | Sum used for denominator of autocorrelation calculations |
Definition at line 75 of file gpuMCMCProcessorUtils.cu.
87 cudaMemcpy(ParStep_gpu, ParStep_cpu,
h_nDraws*
h_nEntries*
sizeof(
float), cudaMemcpyHostToDevice);
91 cudaMemcpy(ParamSums_gpu, ParamSums_cpu,
h_nDraws*
sizeof(
float), cudaMemcpyHostToDevice);
95 cudaMemcpy(NumeratorSum_gpu, NumeratorSum_cpu,
h_nLag*
h_nDraws*
sizeof(
float), cudaMemcpyHostToDevice);
99 cudaMemcpy(DenomSum_gpu, DenomSum_cpu,
h_nLag*
h_nDraws*
sizeof(
float), cudaMemcpyHostToDevice);
◆ EvalOnGPU_AutoCorr()
__global__ void EvalOnGPU_AutoCorr |
( |
const float *__restrict__ |
ParStep_gpu, |
|
|
const float *__restrict__ |
ParamSums_gpu, |
|
|
float * |
NumeratorSum_gpu, |
|
|
float * |
DenomSum_gpu |
|
) |
| |
Eval autocorrelations based on Box and Jenkins.
- Parameters
-
ParStep_gpu | Parameter value at each step |
NumeratorSum_gpu | Sum used for nominator of autocorrelation calculations |
ParamSums_gpu | Overall sum for each parameter over all steps |
DenomSum_gpu | Sum used for denominator of autocorrelation calculations |
Definition at line 110 of file gpuMCMCProcessorUtils.cu.
116 const unsigned int CurrentLagNum = (blockIdx.x * blockDim.x + threadIdx.x);
125 shared_NumeratorSum[threadIdx.x] = 0;
126 shared_DenomSum[threadIdx.x] = 0;
129 const int Param = int(CurrentLagNum/
d_nLag);
130 const int nLag = CurrentLagNum - Param*
d_nLag;
136 const float Diff = ParStep_gpu[CurrParStep]-ParamSums_gpu[Param];
141 CurrParStep = fmaf(Param,
d_nEntries, i + nLag);
142 const float LagTerm = ParStep_gpu[CurrParStep]-ParamSums_gpu[Param];
143 const float Product = Diff*LagTerm;
144 shared_NumeratorSum[threadIdx.x] += Product;
147 const float Denom = Diff*Diff;
148 shared_DenomSum[threadIdx.x] += Denom;
153 NumeratorSum_gpu[CurrentLagNum] = shared_NumeratorSum[threadIdx.x];
154 DenomSum_gpu[CurrentLagNum] = shared_DenomSum[threadIdx.x];
__device__ __constant__ int d_nEntries
__device__ __constant__ int d_nDraws
__device__ __constant__ int d_nLag
#define _BlockSize_
KS: Need it for shared memory, there is way to use dynamic shared memory but I am lazy right now.
◆ InitGPU_AutoCorr()
__host__ void InitGPU_AutoCorr |
( |
float ** |
ParStep_gpu, |
|
|
float ** |
NumeratorSum_gpu, |
|
|
float ** |
ParamSums_gpu, |
|
|
float ** |
DenomSum_gpu, |
|
|
int |
n_Entries, |
|
|
int |
n_Pars, |
|
|
const int |
n_Lags |
|
) |
| |
KS: Initialiser, here we allocate memory for variables and copy constants.
- Parameters
-
ParStep_gpu | Parameter value at each step |
NumeratorSum_gpu | Sum used for nominator of autocorrelation calculations |
ParamSums_gpu | Overall sum for each parameter over all steps |
DenomSum_gpu | Sum used for denominator of autocorrelation calculations |
n_Entries | Total number of entries in mcmc chain |
n_Pars | Number of relevant parameters |
n_Lags | Value of Lag in autocreation calculation |
Definition at line 23 of file gpuMCMCProcessorUtils.cu.
50 cudaMalloc((
void **) NumeratorSum_gpu,
h_nLag*
h_nDraws*
sizeof(
float));
58 cudaMalloc((
void **) ParamSums_gpu,
h_nDraws*
sizeof(
float));
65 printf(
" Allocated in total %f MB for autocorrelations calculations on GPU\n",
◆ RunGPU_AutoCorr()
__host__ void RunGPU_AutoCorr |
( |
float * |
ParStep_gpu, |
|
|
float * |
ParamSums_gpu, |
|
|
float * |
NumeratorSum_gpu, |
|
|
float * |
DenomSum_gpu, |
|
|
float * |
NumeratorSum_cpu, |
|
|
float * |
DenomSum_cpu |
|
) |
| |
KS: This call the main kernel responsible for calculating LagL and later copy results back to CPU.
- Parameters
-
ParStep_gpu | Parameter value at each step |
NumeratorSum_gpu | Sum used for nominator of autocorrelation calculations |
ParamSums_gpu | Overall sum for each parameter over all steps |
DenomSum_gpu | Sum used for denominator of autocorrelation calculations |
NumeratorSum_cpu | Sum used for nominator of autocorrelation calculations |
DenomSum_cpu | Sum used for denominator of autocorrelation calculations |
Definition at line 160 of file gpuMCMCProcessorUtils.cu.
174 EvalOnGPU_AutoCorr<<<grid_size, block_size>>>(
181 printf(
" Finished calculating now copying results back to CPU \n");
184 cudaMemcpy(NumeratorSum_cpu, NumeratorSum_gpu,
h_nLag*
h_nDraws*
sizeof(
float), cudaMemcpyDeviceToHost);
187 cudaMemcpy(DenomSum_cpu, DenomSum_gpu,
h_nLag*
h_nDraws*
sizeof(
float), cudaMemcpyDeviceToHost);
◆ d_nDraws
__device__ __constant__ int d_nDraws |
◆ d_nEntries
__device__ __constant__ int d_nEntries |
◆ d_nLag
__device__ __constant__ int d_nLag |
◆ h_nDraws
◆ h_nEntries
◆ h_nLag