Class responsible for calculating spline weight on GPU. More...

Public Member Functions
	SplineMonolithGPU ()
	constructor More...

virtual	~SplineMonolithGPU ()
	This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and TF1 stuff at GPU. More...

__host__ void	InitGPU_SplineMonolith (M3::float_t **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
	Allocate memory on gpu for spline monolith. More...

__host__ void	CopyToGPU_SplineMonolith (const SplineMonoStruct *cpu_spline_handler, const std::vector< float > &cpu_many_array_TF1, const std::vector< short int > &cpu_paramNo_arr_TF1, const int n_events, const std::vector< unsigned int > &cpu_nParamPerEvent, const std::vector< unsigned int > &cpu_nParamPerEvent_TF1, const int n_params, const unsigned int n_splines, const short int spline_size, const unsigned int total_nknots, const unsigned int n_tf1)
	Copies data from CPU to GPU for the spline monolith. More...

__host__ void	InitGPU_Segments (short int **segment)
	Allocate memory for spline segments. More...

__host__ void	InitGPU_Vals (float **vals)
	Allocate memory for spline segments. More...

__host__ void	RunGPU_SplineMonolith (M3::float_t cpu_total_weights, float vals, short int *segment)
	Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment and the parameter values (binary search already performed in SplineBase::FindSplineSegment() More...

__host__ void	CleanupPinnedMemory (M3::float_t cpu_total_weights, short int segment, float *vals)
	Clean up pinned variables at CPU. More...

Private Attributes
unsigned int *	gpu_nParamPerEvent
	KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}. More...

unsigned int *	gpu_nParamPerEvent_TF1
	KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}. More...

float *	gpu_coeff_x
	KS: GPU arrays to hold X coefficient. More...

float *	gpu_coeff_many
	GPU arrays to hold other coefficients. More...

unsigned int *	gpu_nKnots_arr
	KS: GPU Number of knots per spline. More...

short int *	gpu_paramNo_arr
	CW: GPU array with the number of points per spline (not per spline point!) More...

float *	gpu_coeff_TF1_many
	GPU arrays to hold TF1 coefficients. More...

short int *	gpu_nPoints_arr
	GPU arrays to hold number of points. More...

short int *	gpu_paramNo_TF1_arr
	CW: GPU array with the number of points per TF1 object. More...

float *	gpu_par_val
	CW: parameter value on GPU. More...

short int *	gpu_spline_segment
	CW: Spline segment on GPU. More...

M3::float_t *	gpu_total_weights
	GPU arrays to hold weight for event. More...

float *	gpu_weights
	GPU arrays to hold weight for each spline. More...

float *	gpu_weights_tf1
	GPU arrays to hold weight for each TF1. More...

short int	cpu_spline_size
	Size of splines living on CPU. More...

unsigned int	cpu_n_splines
	Number of splines living on CPU. More...

unsigned int	cpu_n_TF1
	Number of tf1 living on CPU. More...

int	cpu_n_params
	Number of params living on CPU. More...

int	cpu_n_events
	Number of events living on CPU. More...

cudaTextureObject_t	text_coeff_x = 0
	KS: Textures are L1 cache variables which are well optimised for fetching. Make texture only for variables you often access but rarely overwrite. There are limits on texture memory so don't use huge arrays. More...

cudaTextureObject_t	text_nParamPerEvent = 0
	KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}. More...

cudaTextureObject_t	text_nParamPerEvent_TF1 = 0
	KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}. More...

Detailed Description

Class responsible for calculating spline weight on GPU.

Author: Richard Calland; Asher Kaboth; Clarence Wret; Kamil Skwarczynski

Definition at line 74 of file gpuSplineUtils.cuh.

Constructor & Destructor Documentation

◆ SplineMonolithGPU()

SplineMonolithGPU::SplineMonolithGPU ( )

constructor

Number of events living on CPU

Definition at line 58 of file gpuSplineUtils.cu.

                                      {
   cpu_n_params = -1;
   cpu_n_events = -1;
   cpu_n_TF1 = 0;
   cpu_n_splines = 0;
   cpu_spline_size = 0;
  
   gpu_weights = nullptr;
   gpu_total_weights = nullptr;
   gpu_nParamPerEvent = nullptr;
   gpu_nPoints_arr = nullptr;
   gpu_paramNo_arr = nullptr;
   gpu_nKnots_arr = nullptr;
   gpu_coeff_x = nullptr;
   gpu_coeff_many = nullptr;
   gpu_coeff_TF1_many = nullptr;
   gpu_paramNo_TF1_arr = nullptr;
   gpu_nParamPerEvent_TF1 = nullptr;
   gpu_weights_tf1 = nullptr;
 }

◆ ~SplineMonolithGPU()

SplineMonolithGPU::~SplineMonolithGPU ( )

virtual

This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and TF1 stuff at GPU.

Definition at line 81 of file gpuSplineUtils.cu.

                                       {
 // *******************************************
   cudaFree(gpu_paramNo_arr);
   cudaFree(gpu_nKnots_arr);
  
   // free the coefficient arrays
   cudaDestroyTextureObject(text_coeff_x);
   cudaFree(gpu_coeff_x);
   cudaFree(gpu_coeff_many);
  
   cudaFree(gpu_par_val);
   cudaFree(gpu_spline_segment);
  
   cudaFree(gpu_coeff_TF1_many);
   cudaFree(gpu_paramNo_TF1_arr);
   // free weights on the gpu
   cudaFree(gpu_weights);
   cudaFree(gpu_weights_tf1);
   cudaFree(gpu_total_weights);
   //KS: Before removing variable let's destroy texture
   cudaDestroyTextureObject(text_nParamPerEvent);
   cudaDestroyTextureObject(text_nParamPerEvent_TF1);
  
   cudaFree(gpu_nParamPerEvent);
   cudaFree(gpu_nParamPerEvent_TF1);
 }

Member Function Documentation

◆ CleanupPinnedMemory()

__host__ void SplineMonolithGPU::CleanupPinnedMemory	(	M3::float_t *	cpu_total_weights,
		short int *	segment,
		float *	vals
	)

Clean up pinned variables at CPU.

Parameters

cpu_total_weights	Pointer to the total weights array on the CPU (used if `Weight_On_SplineBySpline_Basis` is not defined).
segment	Found spline segment for each parameter
vals	Value to which we want reweight for each parameter

Definition at line 518 of file gpuSplineUtils.cu.

                                                                                       {
 // *******************************************
   cudaFreeHost(cpu_total_weights);
   cudaFreeHost(segment);
   cudaFreeHost(vals);
  
   cpu_total_weights = nullptr;
   segment = nullptr;
   vals = nullptr;
 }

◆ CopyToGPU_SplineMonolith()

__host__ void SplineMonolithGPU::CopyToGPU_SplineMonolith	(	const SplineMonoStruct *	cpu_spline_handler,
		const std::vector< float > &	cpu_many_array_TF1,
		const std::vector< short int > &	cpu_paramNo_arr_TF1,
		const int	n_events,
		const std::vector< unsigned int > &	cpu_nParamPerEvent,
		const std::vector< unsigned int > &	cpu_nParamPerEvent_TF1,
		const int	n_params,
		const unsigned int	n_splines,
		const short int	spline_size,
		const unsigned int	total_nknots,
		const unsigned int	n_tf1
	)

Copies data from CPU to GPU for the spline monolith.

This function transfers the necessary spline data and parameters from the CPU to the GPU, including TF1-related arrays and parameters. This setup is crucial for spline evaluations on the GPU.

Parameters

cpu_spline_handler	Pointer to the structure managing spline data on the CPU.
cpu_many_array_TF1	Array of TF1 parameters on the CPU.
cpu_paramNo_arr_TF1	Array containing parameter numbers for TF1 objects.
n_events	Number of events, necessary for correct data handling.
cpu_nParamPerEvent	Array indicating the number of parameters per event.
cpu_nParamPerEvent_TF1	Array indicating the number of parameters per TF1 object.
n_params	Total number of parameters across all splines.
n_splines	Total number of spline objects.
spline_size	Size of each spline object.
total_nknots	Total number of knots across all splines.
n_tf1	Total number of TF1 objects.

Size of splines living

Definition at line 200 of file gpuSplineUtils.cu.

                                                       {
 // ******************************************************
   // Write to the global statics (h_* denotes host stored variable)
   cpu_n_params  = n_params;
   // Number of events
   cpu_n_events  = n_events;
   // Total number of valid TF1 for all loaded events
   cpu_n_TF1     = n_tf1;
   // Total number of valid splines for all loaded events
   cpu_n_splines = n_splines;
   cpu_spline_size = spline_size;
  
   //CW: Allocate memory for the frequently copied objects
   cudaMalloc(&gpu_par_val, n_params * sizeof(float));
   cudaMalloc(&gpu_spline_segment, n_params * sizeof(short int));
  
   // Copy the coefficient arrays to the GPU; this only happens once per entire Markov Chain so is OK to do multiple extensive memory copies
   cudaMemcpy(gpu_coeff_many, cpu_spline_handler->coeff_many.data(), sizeof(float)*total_nknots*_nCoeff_, cudaMemcpyHostToDevice);
   CudaCheckError();
  
   cudaMemcpy(gpu_coeff_x, cpu_spline_handler->coeff_x.data(), sizeof(float)*spline_size*n_params, cudaMemcpyHostToDevice);
   CudaCheckError();
  
   //KS: Bind our texture with the GPU variable
   //KS: Tried also moving gpu_many_array to texture memory it only worked with restricted number of MC runs, most likely hit texture memory limit :(
   struct cudaResourceDesc resDesc_coeff_x;
   memset(&resDesc_coeff_x, 0, sizeof(resDesc_coeff_x));
   resDesc_coeff_x.resType = cudaResourceTypeLinear;
   resDesc_coeff_x.res.linear.devPtr = gpu_coeff_x;
   resDesc_coeff_x.res.linear.desc = cudaCreateChannelDesc<float>();
   resDesc_coeff_x.res.linear.sizeInBytes = sizeof(float)*spline_size*n_params;
  
   // Specify texture object parameters
   struct cudaTextureDesc texDesc_coeff_x;
   memset(&texDesc_coeff_x, 0, sizeof(texDesc_coeff_x));
   texDesc_coeff_x.readMode = cudaReadModeElementType;
  
   // Create texture object
   cudaCreateTextureObject(&text_coeff_x, &resDesc_coeff_x, &texDesc_coeff_x, nullptr);
   CudaCheckError();
  
   // Also copy the parameter number for each spline onto the GPU; i.e. what spline parameter are we calculating right now
   cudaMemcpy(gpu_paramNo_arr, cpu_spline_handler->paramNo_arr.data(), n_splines*sizeof(short int), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   // Also copy the knot map for each spline onto the GPU;
   cudaMemcpy(gpu_nKnots_arr, cpu_spline_handler->nKnots_arr.data(), n_splines*sizeof(unsigned int), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   //Now TF1
   // Copy the coefficient arrays to the GPU; this only happens once per entire Markov Chain so is OK to do multiple extensive memory copies
   cudaMemcpy(gpu_coeff_TF1_many, cpu_many_array_TF1.data(), sizeof(float)*n_tf1*_nTF1Coeff_, cudaMemcpyHostToDevice);
   CudaCheckError();
  
   // Also copy the parameter number for each TF1 onto the GPU; i.e. what TF1 parameter are we calculating right now
   cudaMemcpy(gpu_paramNo_TF1_arr, cpu_paramNo_arr_TF1.data(), n_tf1*sizeof(short int), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   //KS: Keep track how much splines each event has
   cudaMemcpy(gpu_nParamPerEvent, cpu_nParamPerEvent.data(), 2*n_events*sizeof(unsigned int), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   //KS: Bind our texture with the GPU variable
   // create a resource descriptor based on device pointers
   struct cudaResourceDesc resDesc_nParamPerEvent;
   memset(&resDesc_nParamPerEvent, 0, sizeof(resDesc_nParamPerEvent));
   resDesc_nParamPerEvent.resType = cudaResourceTypeLinear;
   resDesc_nParamPerEvent.res.linear.devPtr = gpu_nParamPerEvent;
   resDesc_nParamPerEvent.res.linear.desc = cudaCreateChannelDesc<unsigned int>();
   resDesc_nParamPerEvent.res.linear.sizeInBytes = 2*n_events*sizeof(unsigned int);
  
   // Specify texture object parameters
   struct cudaTextureDesc texDesc_nParamPerEvent;
   memset(&texDesc_nParamPerEvent, 0, sizeof(texDesc_nParamPerEvent));
   texDesc_nParamPerEvent.readMode = cudaReadModeElementType;
  
   //Finally create texture object
   cudaCreateTextureObject(&text_nParamPerEvent, &resDesc_nParamPerEvent, &texDesc_nParamPerEvent, nullptr);
   CudaCheckError();
  
   // Now TF1
   cudaMemcpy(gpu_nParamPerEvent_TF1, cpu_nParamPerEvent_TF1.data(), 2*n_events*sizeof(unsigned int), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   //KS: Bind our texture with the GPU variable
   // create a resource descriptor based on device pointers
   struct cudaResourceDesc resDesc_nParamPerEvent_tf1;
   memset(&resDesc_nParamPerEvent_tf1, 0, sizeof(resDesc_nParamPerEvent_tf1));
   resDesc_nParamPerEvent_tf1.resType = cudaResourceTypeLinear;
   resDesc_nParamPerEvent_tf1.res.linear.devPtr = gpu_nParamPerEvent_TF1;
   resDesc_nParamPerEvent_tf1.res.linear.desc = cudaCreateChannelDesc<unsigned int>();
   resDesc_nParamPerEvent_tf1.res.linear.sizeInBytes = 2*n_events*sizeof(unsigned int);
  
   // Specify texture object parameters
   struct cudaTextureDesc texDesc_nParamPerEvent_tf1;
   memset(&texDesc_nParamPerEvent_tf1, 0, sizeof(texDesc_nParamPerEvent_tf1));
   texDesc_nParamPerEvent_tf1.readMode = cudaReadModeElementType;
  
   //Finally create texture object
   cudaCreateTextureObject(&text_nParamPerEvent_TF1, &resDesc_nParamPerEvent_tf1, &texDesc_nParamPerEvent_tf1, nullptr);
   CudaCheckError();
 }

◆ InitGPU_Segments()

__host__ void SplineMonolithGPU::InitGPU_Segments ( short int ** segment )

Allocate memory for spline segments.

Parameters

segment Found spline segment for each parameter

Definition at line 178 of file gpuSplineUtils.cu.

                                                                      {
 // *******************************************
   //KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
   cudaMallocHost((void **) segment, cpu_n_params*sizeof(short int));
   CudaCheckError();
 }

◆ InitGPU_SplineMonolith()

__host__ void SplineMonolithGPU::InitGPU_SplineMonolith	(	M3::float_t **	cpu_total_weights,
		int	n_events,
		unsigned int	total_nknots,
		unsigned int	n_splines,
		unsigned int	n_tf1,
		int	Eve_size
	)

Allocate memory on gpu for spline monolith.

Parameters

cpu_total_weights	KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
n_events	Number of events, this is necessary to allocate correctly memory
total_nknots	Total number of knots in all splines summed
n_splines	Total number of spline objects, not knots
n_tf1	Total number of TF1 objects, not coefficients

Definition at line 110 of file gpuSplineUtils.cu.

                                         {
 // *******************************************
   // Allocate chunks of memory to GPU
   cudaMalloc((void **) &gpu_paramNo_arr, n_splines*sizeof(short int));
   CudaCheckError();
  
   cudaMalloc((void **) &gpu_nKnots_arr, n_splines*sizeof(unsigned int));
   CudaCheckError();
  
   cudaMalloc((void **) &gpu_coeff_x, Eve_size*sizeof(float));
   CudaCheckError();
  
   cudaMalloc((void **) &gpu_coeff_many, _nCoeff_*total_nknots*sizeof(float));
   CudaCheckError();
  
   // Allocate memory for the array of weights to be returned to CPU
   cudaMalloc((void **) &gpu_weights, n_splines*sizeof(float));
   CudaCheckError();
  
   // Now TF1 specific
   cudaMalloc((void **) &gpu_coeff_TF1_many, _nTF1Coeff_*n_tf1*sizeof(float));
   CudaCheckError();
  
   cudaMalloc((void **) &gpu_weights_tf1, n_tf1*sizeof(float));
   CudaCheckError();
  
   cudaMalloc((void **) &gpu_paramNo_TF1_arr, n_tf1*sizeof(short int));
   CudaCheckError();
  
   //KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
   cudaMallocHost((void **) cpu_total_weights, n_events*sizeof(M3::float_t));
   CudaCheckError();
  
   //KS: Allocate memory for the array of total weights to be returned to CPU
   cudaMalloc((void **) &gpu_total_weights, n_events*sizeof(M3::float_t));
   CudaCheckError();
   
   //KS: Allocate memory for the map keeping track how many splines each parameter has
   cudaMalloc((void **) &gpu_nParamPerEvent, 2*n_events*sizeof(unsigned int));
   CudaCheckError();
   
   //KS: Allocate memory for the map keeping track how many TF1 each parameter has
   cudaMalloc((void **) &gpu_nParamPerEvent_TF1, 2*n_events*sizeof(unsigned int));
   CudaCheckError();
  
   // Print allocation info to user
   printf("Allocated %i entries for paramNo and nKnots arrays, size = %f MB\n",
          n_splines, static_cast<double>(sizeof(short int) * n_splines + sizeof(unsigned int) * n_splines) / 1.0e6);
   printf("Allocated %i entries for x coeff arrays, size = %f MB\n",
          Eve_size, static_cast<double>(sizeof(float) * Eve_size) / 1.0e6);
   printf("Allocated %i entries for {ybcd} coeff arrays, size = %f MB\n",
          _nCoeff_ * total_nknots, static_cast<double>(sizeof(float) * _nCoeff_ * total_nknots) / 1.0e6);
   printf("Allocated %i entries for TF1 coefficient arrays, size = %f MB\n",
          _nTF1Coeff_ * n_tf1, static_cast<double>(sizeof(float) * _nTF1Coeff_ * n_tf1) / 1.0e6);
  
   //KS: Ask CUDA about memory usage
   checkGpuMem();
   PrintNdevices();
 }

◆ InitGPU_Vals()

__host__ void SplineMonolithGPU::InitGPU_Vals ( float ** vals )

Allocate memory for spline segments.

Parameters

vals	Value to which we want reweight for each parameter

Definition at line 187 of file gpuSplineUtils.cu.

                                                           {
 // *******************************************
   //KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
   cudaMallocHost((void **) vals, cpu_n_params*sizeof(float));
   CudaCheckError();
 }

◆ RunGPU_SplineMonolith()

__host__ void SplineMonolithGPU::RunGPU_SplineMonolith	(	M3::float_t *	cpu_total_weights,
		float *	vals,
		short int *	segment
	)

Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment and the parameter values (binary search already performed in SplineBase::FindSplineSegment()

Executes the GPU code for calculating spline weights.

This function runs the GPU computation for the spline monolith. It assumes that the appropriate segment has already been identified through binary search in the SplineBase::FindSplineSegment() function.

Parameters

cpu_weights	Pointer to the array of weights on the CPU (used if `Weight_On_SplineBySpline_Basis` is defined).
cpu_weights_tf1	Pointer to the array of TF1 weights (used if `Weight_On_SplineBySpline_Basis` is defined).
cpu_total_weights	Pointer to the total weights array (used if `Weight_On_SplineBySpline_Basis` is not defined).
vals	Pointer to an array holding the parameter values to be processed.
segment	Pointer to an array containing segment indices for parameters.

Definition at line 443 of file gpuSplineUtils.cu.

                         {
 // *****************************************
   dim3 block_size;
   dim3 grid_size;
  
   block_size.x = _BlockSize_;
   grid_size.x = (cpu_n_splines / block_size.x) + 1;
  
   // Copy the segment values to the GPU (segment_gpu), which is cpu_n_params long
   cudaMemcpy(gpu_spline_segment, segment, cpu_n_params * sizeof(short int), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   // Copy the parameter values values to the GPU (vals_gpu), which is cpu_n_params long
   cudaMemcpy(gpu_par_val, vals, cpu_n_params * sizeof(float), cudaMemcpyHostToDevice);
   CudaCheckError();
  
   // KS: Consider asynchronous kernel call, this might help EvalOnGPU_Splines and EvalOnGPU_TF1 are independent
   // Set the cache config to prefer L1 for the kernel
   //cudaFuncSetCacheConfig(EvalOnGPU_Splines, cudaFuncCachePreferL1);
   EvalOnGPU_Splines<<<grid_size, block_size>>>(
     cpu_n_splines,
     cpu_spline_size,
     gpu_paramNo_arr,
     gpu_nKnots_arr,
     gpu_coeff_many,
     gpu_par_val,
     gpu_spline_segment,
  
     gpu_weights,
     text_coeff_x
   );
   CudaCheckError();
  
   grid_size.x = (cpu_n_TF1 / block_size.x) + 1;
   EvalOnGPU_TF1<<<grid_size, block_size>>>(
     cpu_n_TF1,
     gpu_coeff_TF1_many,
     gpu_paramNo_TF1_arr,
     gpu_par_val,
     gpu_weights_tf1
   );
   CudaCheckError();
  
   grid_size.x = (cpu_n_events / block_size.x) + 1;
  
   EvalOnGPU_TotWeight<<<grid_size, block_size>>>(
       cpu_n_events,
       gpu_weights,
       gpu_weights_tf1,
  
       gpu_total_weights,
  
       text_nParamPerEvent,
       text_nParamPerEvent_TF1
       );
   CudaCheckError();
  
   //KS: Here we have to make a somewhat large GPU->CPU transfer because it is proportional to number of events
   //KS: Normally code wait for memory transfer to finish before moving further cudaMemcpyAsync means we will continue to execute code and in a meantime keep copying stuff.
   cudaMemcpyAsync(cpu_total_weights, gpu_total_weights, cpu_n_events * sizeof(M3::float_t), cudaMemcpyDeviceToHost, 0);
   CudaCheckError();
  
   #ifdef MACH3_DEBUG
     printf("Copied GPU total weights to CPU with SUCCESS (drink more tea)\n");
     printf("Released calculated response from GPU with SUCCESS (drink most tea)\n");
   #endif
 }

Member Data Documentation

◆ cpu_n_events

int SplineMonolithGPU::cpu_n_events

private

Number of events living on CPU.

Definition at line 219 of file gpuSplineUtils.cuh.

◆ cpu_n_params

int SplineMonolithGPU::cpu_n_params

private

Number of params living on CPU.

Definition at line 217 of file gpuSplineUtils.cuh.

◆ cpu_n_splines

unsigned int SplineMonolithGPU::cpu_n_splines

private

Number of splines living on CPU.

Definition at line 213 of file gpuSplineUtils.cuh.

◆ cpu_n_TF1

unsigned int SplineMonolithGPU::cpu_n_TF1

private

Number of tf1 living on CPU.

Definition at line 215 of file gpuSplineUtils.cuh.

◆ cpu_spline_size

short int SplineMonolithGPU::cpu_spline_size

private

Size of splines living on CPU.

Definition at line 211 of file gpuSplineUtils.cuh.

◆ gpu_coeff_many

float* SplineMonolithGPU::gpu_coeff_many

private

GPU arrays to hold other coefficients.

Definition at line 183 of file gpuSplineUtils.cuh.

◆ gpu_coeff_TF1_many

float* SplineMonolithGPU::gpu_coeff_TF1_many

private

GPU arrays to hold TF1 coefficients.

Definition at line 192 of file gpuSplineUtils.cuh.

◆ gpu_coeff_x

float* SplineMonolithGPU::gpu_coeff_x

private

KS: GPU arrays to hold X coefficient.

Definition at line 180 of file gpuSplineUtils.cuh.

◆ gpu_nKnots_arr

unsigned int* SplineMonolithGPU::gpu_nKnots_arr

private

KS: GPU Number of knots per spline.

Definition at line 186 of file gpuSplineUtils.cuh.

◆ gpu_nParamPerEvent

unsigned int* SplineMonolithGPU::gpu_nParamPerEvent

private

KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}.

Definition at line 175 of file gpuSplineUtils.cuh.

◆ gpu_nParamPerEvent_TF1

unsigned int* SplineMonolithGPU::gpu_nParamPerEvent_TF1

private

KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}.

Definition at line 177 of file gpuSplineUtils.cuh.

◆ gpu_nPoints_arr

short int* SplineMonolithGPU::gpu_nPoints_arr

private

GPU arrays to hold number of points.

Definition at line 194 of file gpuSplineUtils.cuh.

◆ gpu_par_val

float* SplineMonolithGPU::gpu_par_val

private

CW: parameter value on GPU.

Definition at line 199 of file gpuSplineUtils.cuh.

◆ gpu_paramNo_arr

short int* SplineMonolithGPU::gpu_paramNo_arr

private

CW: GPU array with the number of points per spline (not per spline point!)

Definition at line 189 of file gpuSplineUtils.cuh.

◆ gpu_paramNo_TF1_arr

short int* SplineMonolithGPU::gpu_paramNo_TF1_arr

private

CW: GPU array with the number of points per TF1 object.

Definition at line 196 of file gpuSplineUtils.cuh.

◆ gpu_spline_segment

short int* SplineMonolithGPU::gpu_spline_segment

private

CW: Spline segment on GPU.

Definition at line 201 of file gpuSplineUtils.cuh.

◆ gpu_total_weights

M3::float_t* SplineMonolithGPU::gpu_total_weights

private

GPU arrays to hold weight for event.

Definition at line 204 of file gpuSplineUtils.cuh.

◆ gpu_weights

float* SplineMonolithGPU::gpu_weights

private

GPU arrays to hold weight for each spline.

Definition at line 206 of file gpuSplineUtils.cuh.

◆ gpu_weights_tf1

float* SplineMonolithGPU::gpu_weights_tf1

private

GPU arrays to hold weight for each TF1.

Definition at line 208 of file gpuSplineUtils.cuh.

◆ text_coeff_x

cudaTextureObject_t SplineMonolithGPU::text_coeff_x = 0

private

KS: Textures are L1 cache variables which are well optimised for fetching. Make texture only for variables you often access but rarely overwrite. There are limits on texture memory so don't use huge arrays.

Definition at line 225 of file gpuSplineUtils.cuh.

◆ text_nParamPerEvent

cudaTextureObject_t SplineMonolithGPU::text_nParamPerEvent = 0

private

KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}.

Definition at line 227 of file gpuSplineUtils.cuh.

◆ text_nParamPerEvent_TF1

cudaTextureObject_t SplineMonolithGPU::text_nParamPerEvent_TF1 = 0

private

KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}.

Definition at line 229 of file gpuSplineUtils.cuh.

The documentation for this class was generated from the following files:

Splines/gpuSplineUtils.cuh
Splines/gpuSplineUtils.cu

Public Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ SplineMonolithGPU()

◆ ~SplineMonolithGPU()

Member Function Documentation

◆ CleanupPinnedMemory()

◆ CopyToGPU_SplineMonolith()

◆ InitGPU_Segments()

◆ InitGPU_SplineMonolith()

◆ InitGPU_Vals()

◆ RunGPU_SplineMonolith()

Member Data Documentation

◆ cpu_n_events

◆ cpu_n_params

◆ cpu_n_splines

◆ cpu_n_TF1

◆ cpu_spline_size

◆ gpu_coeff_many

◆ gpu_coeff_TF1_many

◆ gpu_coeff_x

◆ gpu_nKnots_arr

◆ gpu_nParamPerEvent

◆ gpu_nParamPerEvent_TF1

◆ gpu_nPoints_arr

◆ gpu_par_val

◆ gpu_paramNo_arr

◆ gpu_paramNo_TF1_arr

◆ gpu_spline_segment

◆ gpu_total_weights

◆ gpu_weights

◆ gpu_weights_tf1

◆ text_coeff_x

◆ text_nParamPerEvent

◆ text_nParamPerEvent_TF1