![]() |
MaCh3 2.2.1
Reference Guide
|
MaCh3 event-by-event cross-section spline code. More...
Go to the source code of this file.
Classes | |
class | SMonolithGPU |
Class responsible for calculating spline weight on GPU. More... | |
Functions | |
__host__ void | SynchroniseSplines () |
Make sure all Cuda threads finished execution. | |
__global__ void | EvalOnGPU_Splines (const short int *__restrict__ gpu_paramNo_arr, const unsigned int *__restrict__ gpu_nKnots_arr, const float *__restrict__ gpu_coeff_many, float *__restrict__ gpu_weights, const cudaTextureObject_t __restrict__ text_coeff_x) |
Evaluate the spline on the GPU Using one {y,b,c,d} array and one {x} array Should be most efficient at cache hitting and memory coalescence But using spline segments rather than the parameter value: avoids doing binary search on GPU. | |
__global__ void | EvalOnGPU_TF1 (const float *__restrict__ gpu_coeffs_tf1, const short int *__restrict__ gpu_paramNo_arr_tf1, float *__restrict__ gpu_weights_tf1) |
Evaluate the TF1 on the GPU Using 5th order polynomial. | |
__global__ void | EvalOnGPU_TotWeight (const float *__restrict__ gpu_weights, const float *__restrict__ gpu_weights_tf1, float *__restrict__ gpu_total_weights, const cudaTextureObject_t __restrict__ text_nParamPerEvent, const cudaTextureObject_t __restrict__ text_nParamPerEvent_TF1) |
KS: Evaluate the total spline event weight on the GPU, as in most cases GPU is faster, even more this significant reduce memory transfer from GPU to CPU. | |
MaCh3 event-by-event cross-section spline code.
Contains code to run on CUDA GPUs. Essentially we load up stripped TSpline3 objects to the GPU and do the equivalent of TSpline3->Eval(double) for all events Now also supports TF1 evals Called from Samples/samplePDFND.cpp -> Splines/SplineMonolith.cpp -> Splines/gpuSplineUtils.cu
Definition in file gpuSplineUtils.cuh.
__global__ void EvalOnGPU_Splines | ( | const short int *__restrict__ | gpu_paramNo_arr, |
const unsigned int *__restrict__ | gpu_nKnots_arr, | ||
const float *__restrict__ | gpu_coeff_many, | ||
float *__restrict__ | gpu_weights, | ||
const cudaTextureObject_t __restrict__ | text_coeff_x | ||
) |
Evaluate the spline on the GPU Using one {y,b,c,d} array and one {x} array Should be most efficient at cache hitting and memory coalescence But using spline segments rather than the parameter value: avoids doing binary search on GPU.
gpu_paramNo_arr | has length = spln_counter (keeps track of which parameter we're using on this thread) |
gpu_nKnots_arr | has length = spln_counter (keeps track where current spline starts) |
gpu_coeff_many | has length = nKnots * 4, stores all coefficients for all splines and knots |
gpu_weights | has length = spln_counter * spline_size |
text_coeff_x | array storing info about X coeff, uses texture memory. Has length = n_params * spline_size, |
Definition at line 348 of file gpuSplineUtils.cu.
__global__ void EvalOnGPU_TF1 | ( | const float *__restrict__ | gpu_coeffs_tf1, |
const short int *__restrict__ | gpu_paramNo_arr_tf1, | ||
float *__restrict__ | gpu_weights_tf1 | ||
) |
Evaluate the TF1 on the GPU Using 5th order polynomial.
gpu_coeffs_tf1 | coefficients of TF1, has length = tf1 coeef counter |
gpu_paramNo_arr_tf1 | has length = spln_counter (keeps track of which parameter we're using on this thread) |
gpu_weights_tf1 | has length = spln_counter * spline_size |
Definition at line 397 of file gpuSplineUtils.cu.
__global__ void EvalOnGPU_TotWeight | ( | const float *__restrict__ | gpu_weights, |
const float *__restrict__ | gpu_weights_tf1, | ||
float *__restrict__ | gpu_total_weights, | ||
const cudaTextureObject_t __restrict__ | text_nParamPerEvent, | ||
const cudaTextureObject_t __restrict__ | text_nParamPerEvent_TF1 | ||
) |
KS: Evaluate the total spline event weight on the GPU, as in most cases GPU is faster, even more this significant reduce memory transfer from GPU to CPU.
gpu_weights | Weight for each spline object |
gpu_weights_tf1 | Weight for each TF1 object |
gpu_total_weights | Total weight for each event |
text_nParamPerEvent | map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event} |
text_nParamPerEvent_TF1 | map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event} |
Definition at line 424 of file gpuSplineUtils.cu.
__host__ void SynchroniseSplines | ( | ) |
Make sure all Cuda threads finished execution.
Definition at line 73 of file gpuSplineUtils.cu.