![]() |
MaCh3
2.5.0
Reference Guide
|
#include "Splines/gpuSplineUtils.cuh"Go to the source code of this file.
Functions | |
| __host__ void | SynchroniseSplines () |
| Make sure all Cuda threads finished execution. More... | |
| __global__ void | EvalOnGPU_Splines (const unsigned int gpu_n_splines, const short int gpu_spline_size, const short int *__restrict__ gpu_paramNo_arr, const unsigned int *__restrict__ gpu_nKnots_arr, const float *__restrict__ gpu_coeff_many, const float *__restrict__ gpu_par_val, const short int *__restrict__ gpu_spline_segment, float *__restrict__ gpu_weights, const cudaTextureObject_t __restrict__ text_coeff_x) |
| Evaluate the spline on the GPU Using one {y,b,c,d} array and one {x} array Should be most efficient at cache hitting and memory coalescence But using spline segments rather than the parameter value: avoids doing binary search on GPU. More... | |
| __global__ void | EvalOnGPU_TF1 (const unsigned int gpu_n_TF1, const float *__restrict__ gpu_coeffs_tf1, const short int *__restrict__ gpu_paramNo_arr_tf1, const float *__restrict__ gpu_par_val, float *__restrict__ gpu_weights_tf1) |
| Evaluate the TF1 on the GPU Using first order polynomial Polynomial form: w(x) = a0 + a1·x. More... | |
| __global__ void | EvalOnGPU_TotWeight (const int gpu_n_events, const float *__restrict__ gpu_weights, const float *__restrict__ gpu_weights_tf1, M3::float_t *__restrict__ gpu_total_weights, const cudaTextureObject_t __restrict__ text_nParamPerEvent, const cudaTextureObject_t __restrict__ text_nParamPerEvent_TF1) |
| KS: Evaluate the total spline event weight on the GPU, as in most cases GPU is faster, even more this significant reduce memory transfer from GPU to CPU. More... | |
| __global__ void EvalOnGPU_Splines | ( | const unsigned int | gpu_n_splines, |
| const short int | gpu_spline_size, | ||
| const short int *__restrict__ | gpu_paramNo_arr, | ||
| const unsigned int *__restrict__ | gpu_nKnots_arr, | ||
| const float *__restrict__ | gpu_coeff_many, | ||
| const float *__restrict__ | gpu_par_val, | ||
| const short int *__restrict__ | gpu_spline_segment, | ||
| float *__restrict__ | gpu_weights, | ||
| const cudaTextureObject_t __restrict__ | text_coeff_x | ||
| ) |
Evaluate the spline on the GPU Using one {y,b,c,d} array and one {x} array Should be most efficient at cache hitting and memory coalescence But using spline segments rather than the parameter value: avoids doing binary search on GPU.
| gpu_n_splines | Total number of splines to evaluate (one thread per spline) |
| gpu_spline_size | Max number of knots per spline (shared across all splines) |
| gpu_paramNo_arr | has length = spln_counter (keeps track of which parameter we're using on this thread) |
| gpu_nKnots_arr | has length = spln_counter (keeps track where current spline starts) |
| gpu_coeff_many | has length = nKnots * 4, stores all coefficients for all splines and knots |
| gpu_weights | has length = spln_counter * spline_size |
| text_coeff_x | array storing info about X coeff, uses texture memory. Has length = n_params * spline_size, |
Definition at line 330 of file gpuSplineUtils.cu.
| __global__ void EvalOnGPU_TF1 | ( | const unsigned int | gpu_n_TF1, |
| const float *__restrict__ | gpu_coeffs_tf1, | ||
| const short int *__restrict__ | gpu_paramNo_arr_tf1, | ||
| const float *__restrict__ | gpu_par_val, | ||
| float *__restrict__ | gpu_weights_tf1 | ||
| ) |
Evaluate the TF1 on the GPU Using first order polynomial Polynomial form: w(x) = a0 + a1·x.
| gpu_n_TF1 | Total number of TF1 functions to evaluate (one thread per TF1) |
| gpu_coeffs_tf1 | coefficients of TF1, has length = tf1 coeef counter |
| gpu_paramNo_arr_tf1 | has length = spln_counter (keeps track of which parameter we're using on this thread) |
| gpu_weights_tf1 | has length = spln_counter * spline_size |
Definition at line 383 of file gpuSplineUtils.cu.
| __global__ void EvalOnGPU_TotWeight | ( | const int | gpu_n_events, |
| const float *__restrict__ | gpu_weights, | ||
| const float *__restrict__ | gpu_weights_tf1, | ||
| M3::float_t *__restrict__ | gpu_total_weights, | ||
| const cudaTextureObject_t __restrict__ | text_nParamPerEvent, | ||
| const cudaTextureObject_t __restrict__ | text_nParamPerEvent_TF1 | ||
| ) |
KS: Evaluate the total spline event weight on the GPU, as in most cases GPU is faster, even more this significant reduce memory transfer from GPU to CPU.
| gpu_n_events | Total number of events to process (one thread per event) |
| gpu_weights | Weight for each spline object |
| gpu_weights_tf1 | Weight for each TF1 object |
| gpu_total_weights | Total weight for each event |
| text_nParamPerEvent | map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event} |
| text_nParamPerEvent_TF1 | map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event} |
Definition at line 410 of file gpuSplineUtils.cu.
| __host__ void SynchroniseSplines | ( | ) |
Make sure all Cuda threads finished execution.
Definition at line 50 of file gpuSplineUtils.cu.