Class responsible for calculating spline weight on GPU. More...

Public Member Functions
	SMonolithGPU ()
	constructor

virtual	~SMonolithGPU ()
	destructor

__host__ void	InitGPU_SplineMonolith (float **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
	Allocate memory on gpu for spline monolith.

__host__ void	CopyToGPU_SplineMonolith (SplineMonoStruct *cpu_spline_handler, std::vector< float > cpu_many_array_TF1, std::vector< short int > cpu_paramNo_arr_TF1, int n_events, std::vector< unsigned int > cpu_nParamPerEvent, std::vector< unsigned int > cpu_nParamPerEvent_TF1, int n_params, unsigned int n_splines, short int spline_size, unsigned int total_nknots, unsigned int n_tf1)
	Copies data from CPU to GPU for the spline monolith.

__host__ void	InitGPU_Segments (short int **segment)
	Allocate memory for spline segments.

__host__ void	InitGPU_Vals (float **vals)
	Allocate memory for spline segments.

__host__ void	RunGPU_SplineMonolith (float cpu_total_weights, float vals, short int *segment, const unsigned int h_n_splines, const unsigned int h_n_tf1)
	Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment and the parameter values (binary search already performed in SplineMonolith::FindSplineSegment()

__host__ void	CleanupGPU_SplineMonolith (float *cpu_total_weights)
	This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and TF1 stuff at GPU.

__host__ void	CleanupGPU_Segments (short int segment, float vals)
	Clean up pinned variables at CPU.

Private Attributes
unsigned int *	gpu_nParamPerEvent
	KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}.

unsigned int *	gpu_nParamPerEvent_TF1
	KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}.

float *	gpu_coeff_x
	KS: GPU arrays to hold X coefficient.

float *	gpu_coeff_many
	GPU arrays to hold other coefficients.

unsigned int *	gpu_nKnots_arr
	KS: GPU Number of knots per spline.

short int *	gpu_paramNo_arr
	CW: GPU array with the number of points per spline (not per spline point!)

float *	gpu_coeff_TF1_many
	GPU arrays to hold TF1 coefficients.

short int *	gpu_nPoints_arr
	GPU arrays to hold number of points.

short int *	gpu_paramNo_TF1_arr
	CW: GPU array with the number of points per TF1 object.

float *	gpu_total_weights
	GPU arrays to hold weight for event.

float *	gpu_weights
	GPU arrays to hold weight for each spline.

float *	gpu_weights_tf1
	GPU arrays to hold weight for each TF1.

int	h_n_params
	Number of params living on CPU.

int	h_n_events
	Number of events living on CPU.

cudaTextureObject_t	text_coeff_x = 0
	KS: Textures are L1 cache variables which are well optimised for fetching. Make texture only for variables you often access but rarely overwrite. There are limits on texture memory so don't use huge arrays.

cudaTextureObject_t	text_nParamPerEvent = 0
	KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}.

cudaTextureObject_t	text_nParamPerEvent_TF1 = 0
	KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}.

Detailed Description

Class responsible for calculating spline weight on GPU.

Definition at line 61 of file gpuSplineUtils.cuh.

Constructor & Destructor Documentation

◆ SMonolithGPU()

SMonolithGPU::SMonolithGPU ( )

constructor

Number of events living on CPU

Definition at line 81 of file gpuSplineUtils.cu.

                          {
  h_n_params     = -1;
  h_n_events = -1;
 
  gpu_weights = nullptr;
  gpu_total_weights = nullptr;
  gpu_nParamPerEvent = nullptr;
  gpu_nPoints_arr = nullptr;
  gpu_paramNo_arr = nullptr;
  gpu_nKnots_arr = nullptr;
  gpu_coeff_x = nullptr;
  gpu_coeff_many = nullptr;
  gpu_coeff_TF1_many = nullptr;
  gpu_paramNo_TF1_arr = nullptr;
  gpu_nParamPerEvent_TF1 = nullptr;
  gpu_weights_tf1 = nullptr;
}

◆ ~SMonolithGPU()

SMonolithGPU::~SMonolithGPU ( )

virtual

destructor

Definition at line 100 of file gpuSplineUtils.cu.

                           {
 
}

Member Function Documentation

◆ CleanupGPU_Segments()

__host__ void SMonolithGPU::CleanupGPU_Segments	(	short int *	segment,
		float *	vals
	)

Clean up pinned variables at CPU.

Parameters

segment	Found spline segment for each parameter
vals	Value to which we want reweight for each parameter

Definition at line 587 of file gpuSplineUtils.cu.

                                                                               {
// *******************************************
  cudaFreeHost(segment);
  cudaFreeHost(vals);
 
  segment = nullptr;
  vals = nullptr;
}

◆ CleanupGPU_SplineMonolith()

__host__ void SMonolithGPU::CleanupGPU_SplineMonolith ( float * cpu_total_weights )

This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and TF1 stuff at GPU.

Parameters

cpu_total_weights Pointer to the total weights array on the CPU (used if Weight_On_SplineBySpline_Basis is not defined).

Definition at line 552 of file gpuSplineUtils.cu.

 {
// *********************************
  cudaFree(gpu_paramNo_arr);
  cudaFree(gpu_nKnots_arr);
 
  // free the coefficient arrays
  cudaDestroyTextureObject(text_coeff_x);
 
  cudaFree(gpu_coeff_x);
  cudaFree(gpu_coeff_many);
 
  cudaFree(gpu_coeff_TF1_many);
  cudaFree(gpu_paramNo_TF1_arr);
  // free weights on the gpu
  cudaFree(gpu_weights);
  cudaFree(gpu_weights_tf1);
#ifndef Weight_On_SplineBySpline_Basis
  cudaFree(gpu_total_weights);
  //KS: Before removing variable let's destroy texture
  cudaDestroyTextureObject(text_nParamPerEvent);
  cudaDestroyTextureObject(text_nParamPerEvent_TF1);
 
  cudaFree(gpu_nParamPerEvent);
  cudaFree(gpu_nParamPerEvent_TF1);
  cudaFreeHost(cpu_total_weights);
  cpu_total_weights = nullptr;
#endif
}

◆ CopyToGPU_SplineMonolith()

__host__ void SMonolithGPU::CopyToGPU_SplineMonolith	(	SplineMonoStruct *	cpu_spline_handler,
		std::vector< float >	cpu_many_array_TF1,
		std::vector< short int >	cpu_paramNo_arr_TF1,
		int	n_events,
		std::vector< unsigned int >	cpu_nParamPerEvent,
		std::vector< unsigned int >	cpu_nParamPerEvent_TF1,
		int	n_params,
		unsigned int	n_splines,
		short int	spline_size,
		unsigned int	total_nknots,
		unsigned int	n_tf1
	)

Copies data from CPU to GPU for the spline monolith.

This function transfers the necessary spline data and parameters from the CPU to the GPU, including TF1-related arrays and parameters. This setup is crucial for spline evaluations on the GPU.

Parameters

cpu_spline_handler	Pointer to the structure managing spline data on the CPU.
cpu_many_array_TF1	Array of TF1 parameters on the CPU.
cpu_paramNo_arr_TF1	Array containing parameter numbers for TF1 objects.
n_events	Number of events, necessary for correct data handling.
cpu_nParamPerEvent	Array indicating the number of parameters per event.
cpu_nParamPerEvent_TF1	Array indicating the number of parameters per TF1 object.
n_params	Total number of parameters across all splines.
n_splines	Total number of spline objects.
spline_size	Size of each spline object.
total_nknots	Total number of knots across all splines.
n_tf1	Total number of TF1 objects.

Definition at line 201 of file gpuSplineUtils.cu.

                                                {
// ******************************************************
  if (n_params != _N_SPLINES_) {
    printf("Number of splines not equal to %i, GPU code for event-by-event splines will fail\n", _N_SPLINES_);
    printf("n_params = %i\n", n_params);
    printf("%s : %i\n", __FILE__, __LINE__);
    throw;
  }
 
  // Write to the global statics (h_* denotes host stored variable)
  h_n_params = n_params;
#ifndef Weight_On_SplineBySpline_Basis
  h_n_events    = n_events;
#endif
  // Copy the constants
  // Total number of valid splines for all loaded events
  cudaMemcpyToSymbol(d_n_splines, &n_splines, sizeof(n_splines));
  CudaCheckError();
 
  // Total number of valid TF1 for all loaded events
  cudaMemcpyToSymbol(d_n_TF1,   &n_tf1, sizeof(n_tf1));
  CudaCheckError();
 
  // Total spline size per spline; i.e. just the number of points or knots in the spline
  cudaMemcpyToSymbol(d_spline_size, &spline_size, sizeof(spline_size));
  CudaCheckError();
#ifndef Weight_On_SplineBySpline_Basis
  // Number of events
  cudaMemcpyToSymbol(d_n_events, &h_n_events, sizeof(h_n_events));
  CudaCheckError();
#endif
  // Copy the coefficient arrays to the GPU; this only happens once per entire Markov Chain so is OK to do multiple extensive memory copies
  cudaMemcpy(gpu_coeff_many, cpu_spline_handler->coeff_many.data(), sizeof(float)*total_nknots*_nCoeff_, cudaMemcpyHostToDevice);
  CudaCheckError();
 
  cudaMemcpy(gpu_coeff_x, cpu_spline_handler->coeff_x.data(), sizeof(float)*spline_size*n_params, cudaMemcpyHostToDevice);
  CudaCheckError();
 
  //KS: Bind our texture with the GPU variable
  //KS: Tried also moving gpu_many_array to texture memory it only worked with restricted number of MC runs, most likely hit texture memory limit :(
  struct cudaResourceDesc resDesc_coeff_x;
  memset(&resDesc_coeff_x, 0, sizeof(resDesc_coeff_x));
  resDesc_coeff_x.resType = cudaResourceTypeLinear;
  resDesc_coeff_x.res.linear.devPtr = gpu_coeff_x;
  resDesc_coeff_x.res.linear.desc = cudaCreateChannelDesc<float>();
  resDesc_coeff_x.res.linear.sizeInBytes = sizeof(float)*spline_size*n_params;
 
  // Specify texture object parameters
  struct cudaTextureDesc texDesc_coeff_x;
  memset(&texDesc_coeff_x, 0, sizeof(texDesc_coeff_x));
  texDesc_coeff_x.readMode = cudaReadModeElementType;
 
  // Create texture object
  cudaCreateTextureObject(&text_coeff_x, &resDesc_coeff_x, &texDesc_coeff_x, nullptr);
  CudaCheckError();
 
  // Also copy the parameter number for each spline onto the GPU; i.e. what spline parameter are we calculating right now
  cudaMemcpy(gpu_paramNo_arr, cpu_spline_handler->paramNo_arr.data(), n_splines*sizeof(short int), cudaMemcpyHostToDevice);
  CudaCheckError();
 
  // Also copy the knot map for each spline onto the GPU;
  cudaMemcpy(gpu_nKnots_arr, cpu_spline_handler->nKnots_arr.data(), n_splines*sizeof(unsigned int), cudaMemcpyHostToDevice);
  CudaCheckError();
 
  //Now TF1
  // Copy the coefficient arrays to the GPU; this only happens once per entire Markov Chain so is OK to do multiple extensive memory copies
  cudaMemcpy(gpu_coeff_TF1_many, cpu_many_array_TF1.data(), sizeof(float)*n_tf1*_nTF1Coeff_, cudaMemcpyHostToDevice);
  CudaCheckError();
 
  // Also copy the parameter number for each TF1 onto the GPU; i.e. what TF1 parameter are we calculating right now
  cudaMemcpy(gpu_paramNo_TF1_arr, cpu_paramNo_arr_TF1.data(), n_tf1*sizeof(short int), cudaMemcpyHostToDevice);
  CudaCheckError();
 
  #ifndef Weight_On_SplineBySpline_Basis
  //KS: Keep track how much splines each event has
  cudaMemcpy(gpu_nParamPerEvent, cpu_nParamPerEvent.data(), 2*n_events*sizeof(unsigned int), cudaMemcpyHostToDevice);
  CudaCheckError();
 
  //KS: Bind our texture with the GPU variable
  // create a resource descriptor based on device pointers
  struct cudaResourceDesc resDesc_nParamPerEvent;
  memset(&resDesc_nParamPerEvent, 0, sizeof(resDesc_nParamPerEvent));
  resDesc_nParamPerEvent.resType = cudaResourceTypeLinear;
  resDesc_nParamPerEvent.res.linear.devPtr = gpu_nParamPerEvent;
  resDesc_nParamPerEvent.res.linear.desc = cudaCreateChannelDesc<unsigned int>();
  resDesc_nParamPerEvent.res.linear.sizeInBytes = 2*n_events*sizeof(unsigned int);
 
  // Specify texture object parameters
  struct cudaTextureDesc texDesc_nParamPerEvent;
  memset(&texDesc_nParamPerEvent, 0, sizeof(texDesc_nParamPerEvent));
  texDesc_nParamPerEvent.readMode = cudaReadModeElementType;
 
  //Finally create texture object
  cudaCreateTextureObject(&text_nParamPerEvent, &resDesc_nParamPerEvent, &texDesc_nParamPerEvent, nullptr);
  CudaCheckError();
 
  // Now TF1
  cudaMemcpy(gpu_nParamPerEvent_TF1, cpu_nParamPerEvent_TF1.data(), 2*n_events*sizeof(unsigned int), cudaMemcpyHostToDevice);
  CudaCheckError();
 
  //KS: Bind our texture with the GPU variable
  // create a resource descriptor based on device pointers
  struct cudaResourceDesc resDesc_nParamPerEvent_tf1;
  memset(&resDesc_nParamPerEvent_tf1, 0, sizeof(resDesc_nParamPerEvent_tf1));
  resDesc_nParamPerEvent_tf1.resType = cudaResourceTypeLinear;
  resDesc_nParamPerEvent_tf1.res.linear.devPtr = gpu_nParamPerEvent_TF1;
  resDesc_nParamPerEvent_tf1.res.linear.desc = cudaCreateChannelDesc<unsigned int>();
  resDesc_nParamPerEvent_tf1.res.linear.sizeInBytes = 2*n_events*sizeof(unsigned int);
 
  // Specify texture object parameters
  struct cudaTextureDesc texDesc_nParamPerEvent_tf1;
  memset(&texDesc_nParamPerEvent_tf1, 0, sizeof(texDesc_nParamPerEvent_tf1));
  texDesc_nParamPerEvent_tf1.readMode = cudaReadModeElementType;
 
  //Finally create texture object
  cudaCreateTextureObject(&text_nParamPerEvent_TF1, &resDesc_nParamPerEvent_tf1, &texDesc_nParamPerEvent_tf1, nullptr);
  CudaCheckError();
  #endif
}

◆ InitGPU_Segments()

__host__ void SMonolithGPU::InitGPU_Segments ( short int ** segment )

Allocate memory for spline segments.

Parameters

segment Found spline segment for each parameter

Definition at line 178 of file gpuSplineUtils.cu.

                                                                {
// *******************************************
  //KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
  cudaMallocHost((void **) segment, _N_SPLINES_*sizeof(short int));
  CudaCheckError();
}

◆ InitGPU_SplineMonolith()

__host__ void SMonolithGPU::InitGPU_SplineMonolith	(	float **	cpu_total_weights,
		int	n_events,
		unsigned int	total_nknots,
		unsigned int	n_splines,
		unsigned int	n_tf1,
		int	Eve_size
	)

Allocate memory on gpu for spline monolith.

Parameters

cpu_total_weights	KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
n_events	Number of events, this is necessary to allocate correctly memory
total_nknots	Total number of knots in all splines summed
n_splines	Total number of spline objects, not knots
n_tf1	Total number of TF1 objects, not coefficients

Definition at line 106 of file gpuSplineUtils.cu.

                                        {
// *******************************************
  // Allocate chunks of memory to GPU
  cudaMalloc((void **) &gpu_paramNo_arr, n_splines*sizeof(short int));
  CudaCheckError();
 
  cudaMalloc((void **) &gpu_nKnots_arr, n_splines*sizeof(unsigned int));
  CudaCheckError();
 
  cudaMalloc((void **) &gpu_coeff_x, Eve_size*sizeof(float));
  CudaCheckError();
 
  cudaMalloc((void **) &gpu_coeff_many, _nCoeff_*total_nknots*sizeof(float));
  CudaCheckError();
 
  // Allocate memory for the array of weights to be returned to CPU
  cudaMalloc((void **) &gpu_weights, n_splines*sizeof(float));
  CudaCheckError();
 
  // Now TF1 specific
  cudaMalloc((void **) &gpu_coeff_TF1_many, _nTF1Coeff_*n_tf1*sizeof(float));
  CudaCheckError();
 
  cudaMalloc((void **) &gpu_weights_tf1, n_tf1*sizeof(float));
  CudaCheckError();
 
  cudaMalloc((void **) &gpu_paramNo_TF1_arr, n_tf1*sizeof(short int));
  CudaCheckError();
 
#ifndef Weight_On_SplineBySpline_Basis
  //KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
  cudaMallocHost((void **) cpu_total_weights, n_events*sizeof(float));
  CudaCheckError();
 
  //KS: Allocate memory for the array of total weights to be returned to CPU
  cudaMalloc((void **) &gpu_total_weights, n_events*sizeof(float));
  CudaCheckError();
  
  //KS: Allocate memory for the map keeping track how many splines each parameter has
  cudaMalloc((void **) &gpu_nParamPerEvent, 2*n_events*sizeof(unsigned int));
  CudaCheckError();
  
  //KS: Allocate memory for the map keeping track how many TF1 each parameter has
  cudaMalloc((void **) &gpu_nParamPerEvent_TF1, 2*n_events*sizeof(unsigned int));
  CudaCheckError();
#endif
  
  // Print allocation info to user
  printf("Allocated %i entries for paramNo and nKnots arrays, size = %f MB\n",
         n_splines, static_cast<double>(sizeof(short int) * n_splines + sizeof(unsigned int) * n_splines) / 1.0e6);
  printf("Allocated %i entries for x coeff arrays, size = %f MB\n",
         Eve_size, static_cast<double>(sizeof(float) * Eve_size) / 1.0e6);
  printf("Allocated %i entries for {ybcd} coeff arrays, size = %f MB\n",
         _nCoeff_ * total_nknots, static_cast<double>(sizeof(float) * _nCoeff_ * total_nknots) / 1.0e6);
  printf("Allocated %i entries for TF1 coefficient arrays, size = %f MB\n",
         _nTF1Coeff_ * n_tf1, static_cast<double>(sizeof(float) * _nTF1Coeff_ * n_tf1) / 1.0e6);
 
  //KS: Ask CUDA about memory usage
  checkGpuMem();
  PrintNdevices();
}

◆ InitGPU_Vals()

__host__ void SMonolithGPU::InitGPU_Vals ( float ** vals )

Allocate memory for spline segments.

Parameters

vals	Value to which we want reweight for each parameter

Definition at line 187 of file gpuSplineUtils.cu.

                                                     {
// *******************************************
  //KS: Rather than allocate memory in standard way this fancy cuda tool allows to pin host memory which make memory transfer faster
  cudaMallocHost((void **) vals, _N_SPLINES_*sizeof(float));
  CudaCheckError();
}

◆ RunGPU_SplineMonolith()

__host__ void SMonolithGPU::RunGPU_SplineMonolith	(	float *	cpu_total_weights,
		float *	vals,
		short int *	segment,
		const unsigned int	h_n_splines,
		const unsigned int	h_n_tf1
	)

Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment and the parameter values (binary search already performed in SplineMonolith::FindSplineSegment()

Executes the GPU code for calculating spline weights.

This function runs the GPU computation for the spline monolith. It assumes that the appropriate segment has already been identified through binary search in the SplineMonolith::FindSplineSegment() function.

Parameters

cpu_weights	Pointer to the array of weights on the CPU (used if `Weight_On_SplineBySpline_Basis` is defined).
cpu_weights_tf1	Pointer to the array of TF1 weights (used if `Weight_On_SplineBySpline_Basis` is defined).
cpu_total_weights	Pointer to the total weights array (used if `Weight_On_SplineBySpline_Basis` is not defined).
vals	Pointer to an array holding the parameter values to be processed.
segment	Pointer to an array containing segment indices for parameters.
h_n_splines	Total number of spline objects in the GPU context.
h_n_tf1	Total number of TF1 objects in the GPU context.

Definition at line 459 of file gpuSplineUtils.cu.

                                {
// *****************************************
  dim3 block_size;
  dim3 grid_size;
 
  block_size.x = _BlockSize_;
  grid_size.x = (h_n_splines / block_size.x) + 1;
 
  // Copy the segment values to the GPU (segment_gpu), which is h_n_params long
  cudaMemcpyToSymbol(segment_gpu, segment, h_n_params*sizeof(short int));
  CudaCheckError();
 
  // Copy the parameter values values to the GPU (vals_gpu), which is h_n_params long
  cudaMemcpyToSymbol(val_gpu, vals, h_n_params*sizeof(float));
  CudaCheckError();
 
  // KS: Consider asynchronous kernel call, this might help EvalOnGPU_Splines and EvalOnGPU_TF1 are independent
  // Set the cache config to prefer L1 for the kernel
  //cudaFuncSetCacheConfig(EvalOnGPU_Splines, cudaFuncCachePreferL1);
  EvalOnGPU_Splines<<<grid_size, block_size>>>(
    gpu_paramNo_arr,
    gpu_nKnots_arr,
 
    gpu_coeff_many,
 
    gpu_weights,
    text_coeff_x
  );
  CudaCheckError();
 
  grid_size.x = (h_n_tf1 / block_size.x) + 1;
  EvalOnGPU_TF1<<<grid_size, block_size>>>(
    gpu_coeff_TF1_many,
    gpu_paramNo_TF1_arr,
 
    gpu_weights_tf1
  );
  CudaCheckError();
 
//KS: We can either copy gpu_weight and calculate total weight in reweighting loop, or not copy and calculate total weight stall at GPU, which means less memory transfer
#ifdef Weight_On_SplineBySpline_Basis
  // Here we have to make a somewhat large GPU->CPU transfer because it's all the splines' response
  cudaMemcpy(cpu_weights, gpu_weights, h_n_splines*sizeof(float), cudaMemcpyDeviceToHost);
  CudaCheckError();
 
  cudaMemcpy(cpu_weights_tf1, gpu_weights_tf1, h_n_tf1*sizeof(float), cudaMemcpyDeviceToHost);
  CudaCheckError();
 
//KS: Else calculate Total Weight
#else
  grid_size.x = (h_n_events / block_size.x) + 1;
 
  EvalOnGPU_TotWeight<<<grid_size, block_size>>>(
      gpu_weights,
      gpu_weights_tf1,
 
      gpu_total_weights,
 
      text_nParamPerEvent,
      text_nParamPerEvent_TF1
      );
  CudaCheckError();
 
  //KS: Here we have to make a somewhat large GPU->CPU transfer because it is proportional to number of events
  //KS: Normally code wait for memory transfer to finish before moving further cudaMemcpyAsync means we will continue to execute code and in a meantime keep copying stuff.
  cudaMemcpyAsync(cpu_total_weights, gpu_total_weights, h_n_events * sizeof(float), cudaMemcpyDeviceToHost, 0);
  CudaCheckError();
#endif
 
  #ifdef DEBUG
    printf("Copied GPU total weights to CPU with SUCCESS (drink more tea)\n");
    printf("Released calculated response from GPU with SUCCESS (drink most tea)\n");
  #endif
}

Member Data Documentation

◆ gpu_coeff_many

float* SMonolithGPU::gpu_coeff_many

private

GPU arrays to hold other coefficients.

Definition at line 192 of file gpuSplineUtils.cuh.

◆ gpu_coeff_TF1_many

float* SMonolithGPU::gpu_coeff_TF1_many

private

GPU arrays to hold TF1 coefficients.

Definition at line 201 of file gpuSplineUtils.cuh.

◆ gpu_coeff_x

float* SMonolithGPU::gpu_coeff_x

private

KS: GPU arrays to hold X coefficient.

Definition at line 189 of file gpuSplineUtils.cuh.

◆ gpu_nKnots_arr

unsigned int* SMonolithGPU::gpu_nKnots_arr

private

KS: GPU Number of knots per spline.

Definition at line 195 of file gpuSplineUtils.cuh.

◆ gpu_nParamPerEvent

unsigned int* SMonolithGPU::gpu_nParamPerEvent

private

KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}.

Definition at line 184 of file gpuSplineUtils.cuh.

◆ gpu_nParamPerEvent_TF1

unsigned int* SMonolithGPU::gpu_nParamPerEvent_TF1

private

KS: GPU map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}.

Definition at line 186 of file gpuSplineUtils.cuh.

◆ gpu_nPoints_arr

short int* SMonolithGPU::gpu_nPoints_arr

private

GPU arrays to hold number of points.

Definition at line 203 of file gpuSplineUtils.cuh.

◆ gpu_paramNo_arr

short int* SMonolithGPU::gpu_paramNo_arr

private

CW: GPU array with the number of points per spline (not per spline point!)

Definition at line 198 of file gpuSplineUtils.cuh.

◆ gpu_paramNo_TF1_arr

short int* SMonolithGPU::gpu_paramNo_TF1_arr

private

CW: GPU array with the number of points per TF1 object.

Definition at line 205 of file gpuSplineUtils.cuh.

◆ gpu_total_weights

float* SMonolithGPU::gpu_total_weights

private

GPU arrays to hold weight for event.

Definition at line 208 of file gpuSplineUtils.cuh.

◆ gpu_weights

float* SMonolithGPU::gpu_weights

private

GPU arrays to hold weight for each spline.

Definition at line 210 of file gpuSplineUtils.cuh.

◆ gpu_weights_tf1

float* SMonolithGPU::gpu_weights_tf1

private

GPU arrays to hold weight for each TF1.

Definition at line 212 of file gpuSplineUtils.cuh.

◆ h_n_events

int SMonolithGPU::h_n_events

private

Number of events living on CPU.

Definition at line 218 of file gpuSplineUtils.cuh.

◆ h_n_params

int SMonolithGPU::h_n_params

private

Number of params living on CPU.

Definition at line 216 of file gpuSplineUtils.cuh.

◆ text_coeff_x

cudaTextureObject_t SMonolithGPU::text_coeff_x = 0

private

KS: Textures are L1 cache variables which are well optimised for fetching. Make texture only for variables you often access but rarely overwrite. There are limits on texture memory so don't use huge arrays.

Definition at line 224 of file gpuSplineUtils.cuh.

◆ text_nParamPerEvent

cudaTextureObject_t SMonolithGPU::text_nParamPerEvent = 0

private

KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}.

Definition at line 227 of file gpuSplineUtils.cuh.

◆ text_nParamPerEvent_TF1

cudaTextureObject_t SMonolithGPU::text_nParamPerEvent_TF1 = 0

private

KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of TF1 per event, index where TF1 start for a given event}.

Definition at line 229 of file gpuSplineUtils.cuh.

The documentation for this class was generated from the following files:

Splines/gpuSplineUtils.cuh
Splines/gpuSplineUtils.cu

Public Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ SMonolithGPU()

◆ ~SMonolithGPU()

Member Function Documentation

◆ CleanupGPU_Segments()

◆ CleanupGPU_SplineMonolith()

◆ CopyToGPU_SplineMonolith()

◆ InitGPU_Segments()

◆ InitGPU_SplineMonolith()

◆ InitGPU_Vals()

◆ RunGPU_SplineMonolith()

Member Data Documentation

◆ gpu_coeff_many

◆ gpu_coeff_TF1_many

◆ gpu_coeff_x

◆ gpu_nKnots_arr

◆ gpu_nParamPerEvent

◆ gpu_nParamPerEvent_TF1

◆ gpu_nPoints_arr

◆ gpu_paramNo_arr

◆ gpu_paramNo_TF1_arr

◆ gpu_total_weights

◆ gpu_weights

◆ gpu_weights_tf1

◆ h_n_events

◆ h_n_params

◆ text_coeff_x

◆ text_nParamPerEvent

◆ text_nParamPerEvent_TF1