![]() |
MaCh3
2.4.2
Reference Guide
|
#include "Splines/gpuSplineUtils.cuh"Go to the source code of this file.
Macros | |
| #define | _N_SPLINES_ NSplines_GPU |
Functions | |
| __host__ void | SynchroniseSplines () |
| Make sure all Cuda threads finished execution. More... | |
| __global__ void | EvalOnGPU_Splines (const short int *__restrict__ gpu_paramNo_arr, const unsigned int *__restrict__ gpu_nKnots_arr, const float *__restrict__ gpu_coeff_many, float *__restrict__ gpu_weights, const cudaTextureObject_t __restrict__ text_coeff_x) |
| Evaluate the spline on the GPU Using one {y,b,c,d} array and one {x} array Should be most efficient at cache hitting and memory coalescence But using spline segments rather than the parameter value: avoids doing binary search on GPU. More... | |
| __global__ void | EvalOnGPU_TF1 (const float *__restrict__ gpu_coeffs_tf1, const short int *__restrict__ gpu_paramNo_arr_tf1, float *__restrict__ gpu_weights_tf1) |
| Evaluate the TF1 on the GPU Using 5th order polynomial. More... | |
| __global__ void | EvalOnGPU_TotWeight (const float *__restrict__ gpu_weights, const float *__restrict__ gpu_weights_tf1, float *__restrict__ gpu_total_weights, const cudaTextureObject_t __restrict__ text_nParamPerEvent, const cudaTextureObject_t __restrict__ text_nParamPerEvent_TF1) |
| KS: Evaluate the total spline event weight on the GPU, as in most cases GPU is faster, even more this significant reduce memory transfer from GPU to CPU. More... | |
Variables | |
| __device__ __constant__ unsigned int | d_n_splines |
| Number of splines living on GPU. More... | |
| __device__ __constant__ unsigned int | d_n_TF1 |
| Number of tf1 living on GPU. More... | |
| __device__ __constant__ short int | d_spline_size |
| Size of splines living on GPU. More... | |
| __device__ __constant__ int | d_n_events |
| Number of events living on GPU. More... | |
| __device__ __constant__ float | val_gpu [NSplines_GPU] |
| CW: Constant memory needs to be hard-coded on compile time. Could make this texture memory instead, but don't care enough right now... More... | |
| __device__ __constant__ short int | segment_gpu [NSplines_GPU] |
| #define _N_SPLINES_ NSplines_GPU |
Hard code the number of splines Not entirely necessary: only used for val_gpu and segment_gpu being device constants. Could move them to not being device constants
Definition at line 6 of file gpuSplineUtils.cu.
| __global__ void EvalOnGPU_Splines | ( | const short int *__restrict__ | gpu_paramNo_arr, |
| const unsigned int *__restrict__ | gpu_nKnots_arr, | ||
| const float *__restrict__ | gpu_coeff_many, | ||
| float *__restrict__ | gpu_weights, | ||
| const cudaTextureObject_t __restrict__ | text_coeff_x | ||
| ) |
Evaluate the spline on the GPU Using one {y,b,c,d} array and one {x} array Should be most efficient at cache hitting and memory coalescence But using spline segments rather than the parameter value: avoids doing binary search on GPU.
| gpu_paramNo_arr | has length = spln_counter (keeps track of which parameter we're using on this thread) |
| gpu_nKnots_arr | has length = spln_counter (keeps track where current spline starts) |
| gpu_coeff_many | has length = nKnots * 4, stores all coefficients for all splines and knots |
| gpu_weights | has length = spln_counter * spline_size |
| text_coeff_x | array storing info about X coeff, uses texture memory. Has length = n_params * spline_size, |
Definition at line 347 of file gpuSplineUtils.cu.
| __global__ void EvalOnGPU_TF1 | ( | const float *__restrict__ | gpu_coeffs_tf1, |
| const short int *__restrict__ | gpu_paramNo_arr_tf1, | ||
| float *__restrict__ | gpu_weights_tf1 | ||
| ) |
Evaluate the TF1 on the GPU Using 5th order polynomial.
| gpu_coeffs_tf1 | coefficients of TF1, has length = tf1 coeef counter |
| gpu_paramNo_arr_tf1 | has length = spln_counter (keeps track of which parameter we're using on this thread) |
| gpu_weights_tf1 | has length = spln_counter * spline_size |
Definition at line 396 of file gpuSplineUtils.cu.
| __global__ void EvalOnGPU_TotWeight | ( | const float *__restrict__ | gpu_weights, |
| const float *__restrict__ | gpu_weights_tf1, | ||
| float *__restrict__ | gpu_total_weights, | ||
| const cudaTextureObject_t __restrict__ | text_nParamPerEvent, | ||
| const cudaTextureObject_t __restrict__ | text_nParamPerEvent_TF1 | ||
| ) |
KS: Evaluate the total spline event weight on the GPU, as in most cases GPU is faster, even more this significant reduce memory transfer from GPU to CPU.
| gpu_weights | Weight for each spline object |
| gpu_weights_tf1 | Weight for each TF1 object |
| gpu_total_weights | Total weight for each event |
| text_nParamPerEvent | map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event} |
| text_nParamPerEvent_TF1 | map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event} |
Definition at line 423 of file gpuSplineUtils.cu.
| __host__ void SynchroniseSplines | ( | ) |
Make sure all Cuda threads finished execution.
Definition at line 73 of file gpuSplineUtils.cu.
| __device__ __constant__ int d_n_events |
Number of events living on GPU.
Definition at line 64 of file gpuSplineUtils.cu.
| __device__ __constant__ unsigned int d_n_splines |
Number of splines living on GPU.
Definition at line 57 of file gpuSplineUtils.cu.
| __device__ __constant__ unsigned int d_n_TF1 |
Number of tf1 living on GPU.
Definition at line 59 of file gpuSplineUtils.cu.
| __device__ __constant__ short int d_spline_size |
Size of splines living on GPU.
Definition at line 61 of file gpuSplineUtils.cu.
| __device__ __constant__ short int segment_gpu[NSplines_GPU] |
Definition at line 68 of file gpuSplineUtils.cu.
| __device__ __constant__ float val_gpu[NSplines_GPU] |
CW: Constant memory needs to be hard-coded on compile time. Could make this texture memory instead, but don't care enough right now...
Definition at line 67 of file gpuSplineUtils.cu.