7 #pragma GCC diagnostic ignored "-Wuseless-cast"
8 #pragma GCC diagnostic ignored "-Wfloat-conversion"
37 const std::vector<RespFuncType> &SplineType,
38 const bool SaveFlatTree,
39 const std::string& _FastSplineName) :
SplineBase() {
45 MACH3LOG_INFO(
"-- GPUING WITH arrays and master spline containing TResponseFunction_red");
71 MACH3LOG_INFO(
"Found {} maximum number of splines in an event", maxnSplines);
115 unsigned int KnotCounter = 0;
116 unsigned int TF1PointsCounter = 0;
117 unsigned int NSplinesCounter = 0;
118 unsigned int TF1sCounter = 0;
119 int ParamCounter = 0;
120 int ParamCounterGlobal = 0;
121 int ParamCounter_TF1 = 0;
122 int ParamCounterGlobalTF1 = 0;
124 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
127 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
129 if (MasterSpline[EventCounter][ParamNumber] == NULL)
continue;
143 if (nPoints_tmp == 1)
continue;
148 for (
int j = 0; j < nPoints_tmp; ++j) {
149 for (
int k = 0; k <
_nCoeff_; k++) {
157 KnotCounter += nPoints_tmp;
163 else if (SplineType[ParamNumber] ==
kTF1_red)
168 TF1_red* CurrSpline =
dynamic_cast<TF1_red*
>(MasterSpline[EventCounter][ParamNumber]);
178 TF1PointsCounter += nPoints_tmp;
187 delete MasterSpline[EventCounter][ParamNumber];
188 MasterSpline[EventCounter][ParamNumber] =
nullptr;
193 ParamCounterGlobal += ParamCounter;
197 ParamCounterGlobalTF1 += ParamCounter_TF1;
200 ParamCounter_TF1 = 0;
204 delete[] temp_coeffs;
207 for (
unsigned int j = 0; j < event_size_max; j++) {
212 MACH3LOG_WARN(
"Indicates some parameter doesn't have a single spline");
216 if(BadXCounter == 5)
MACH3LOG_WARN(
"There is more unutilised knots although I will stop spamming");
244 MACH3LOG_INFO(
"Total size = {:.2f} MB memory on CPU to move to GPU",
245 (
double(
sizeof(
float) *
nKnots *
_nCoeff_) +
double(
sizeof(
float) * event_size_max) / 1.E6 +
247 MACH3LOG_INFO(
"Total TF1 size = {:.2f} MB memory on CPU to move to GPU",
250 MACH3LOG_INFO(
"Since you are running Total event weight mode then GPU weight array (GPU->CPU every step) = {:.2f} MB",
251 double(
sizeof(
float) *
NEvents) / 1.E6);
252 MACH3LOG_INFO(
"Parameter value array (CPU->GPU every step) = {:.4f} MB",
double(
sizeof(
float) *
nParams) / 1.E6);
305 unsigned int &nEvents,
306 short int &MaxPoints,
307 short int &numParams,
309 unsigned int &NSplinesValid,
310 unsigned int &numKnots,
311 unsigned int &nTF1Valid,
312 unsigned int &nTF1_coeff,
313 const std::vector<RespFuncType> &SplineType) {
328 nEvents = int(MasterSpline.size());
331 int nMaxSplines_PerEvent = 0;
334 numParams = short(MasterSpline[0].size());
339 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
342 int TempSize = int(MasterSpline[EventCounter].size());
343 if (TempSize != numParams) {
344 MACH3LOG_ERROR(
"Found {} parameters for event {}", TempSize, EventCounter);
345 MACH3LOG_ERROR(
"but was expecting {} since that's what I found for the previous event", numParams);
346 MACH3LOG_ERROR(
"Somehow this event has a different number of spline parameters... Please study further!");
350 numParams = short(MasterSpline[EventCounter].size());
352 int nSplines_SingleEvent = 0;
355 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
356 if (MasterSpline[EventCounter][ParamNumber]) {
362 nPoints = CurrSpline->
GetNp();
365 if (nPoints > MaxPoints) {
366 MaxPoints =
static_cast<short int>(nPoints);
369 nSplines_SingleEvent++;
383 CurrSpline->
GetKnot(k, xtemp, ytemp);
389 else if (SplineType[ParamNumber] ==
kTF1_red)
393 nPoints = CurrSpline->
GetSize();
394 nTF1_coeff += nPoints;
402 if (nSplines_SingleEvent > nMaxSplines_PerEvent) nMaxSplines_PerEvent = nSplines_SingleEvent;
404 nSplines = nMaxSplines_PerEvent;
408 for (
M3::int_t i = 0; i < numParams; ++i)
411 if (SplineType[i] ==
kTF1_red)
continue;
415 if (nPoints == -999 || xArray.size() == 0) {
424 MACH3LOG_WARN(
"In total SplineInfoArray for {} hasn't been initialised", Counter);
433 MACH3LOG_INFO(
"-- GPUING WITH {X} and {Y,B,C,D} arrays and master spline containing TSpline3_red");
443 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"OPEN");
444 TTree *Settings = SplineFile->Get<TTree>(
"Settings");
445 TTree *Monolith_TF1 = SplineFile->Get<TTree>(
"Monolith_TF1");
446 TTree *
EventInfo = SplineFile->Get<TTree>(
"EventInfo");
447 TTree *SplineTree = SplineFile->Get<TTree>(
"SplineTree");
449 unsigned int NEvents_temp;
450 short int nParams_temp;
452 unsigned int nKnots_temp;
453 unsigned int NSplines_valid_temp;
454 unsigned int nTF1Valid_temp;
455 unsigned int nTF1coeff_temp;
457 Settings->SetBranchAddress(
"NEvents", &NEvents_temp);
458 Settings->SetBranchAddress(
"nParams", &nParams_temp);
459 Settings->SetBranchAddress(
"_max_knots", &_max_knots_temp);
460 Settings->SetBranchAddress(
"nKnots", &nKnots_temp);
461 Settings->SetBranchAddress(
"NSplines_valid", &NSplines_valid_temp);
462 Settings->SetBranchAddress(
"NTF1_valid", &nTF1Valid_temp);
463 Settings->SetBranchAddress(
"nTF1coeff", &nTF1coeff_temp);
465 Settings->GetEntry(0);
469 _max_knots =
static_cast<short int>(_max_knots_temp);
487 SplineTree->GetEntry(0);
489 float coeff_tf1 = 0.;
490 Monolith_TF1->SetBranchAddress(
"cpu_coeff_TF1_many", &coeff_tf1);
491 for(
unsigned int i = 0; i <
nTF1coeff; i++)
493 Monolith_TF1->GetEntry(i);
497 unsigned int nParamPerEvent = 0;
498 unsigned int nParamPerEvent_tf1 = 0;
500 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent", &nParamPerEvent);
501 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1);
502 for(
unsigned int i = 0; i < 2*
NEvents; i++)
545 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"recreate");
546 TTree *Settings =
new TTree(
"Settings",
"Settings");
547 TTree *Monolith_TF1 =
new TTree(
"Monolith_TF1",
"Monolith_TF1");
548 TTree *XKnots =
new TTree(
"XKnots",
"XKnots");
549 TTree *
EventInfo =
new TTree(
"EventInfo",
"EventInfo");
551 unsigned int NEvents_temp =
NEvents;
552 short int nParams_temp =
nParams;
554 unsigned int nKnots_temp =
nKnots;
559 Settings->Branch(
"NEvents", &NEvents_temp,
"NEvents/i");
560 Settings->Branch(
"nParams", &nParams_temp,
"nParams/S");
561 Settings->Branch(
"_max_knots", &_max_knots_temp,
"_max_knots/I");
562 Settings->Branch(
"nKnots", &nKnots_temp,
"nKnots/i");
563 Settings->Branch(
"NSplines_valid", &NSplines_valid_temp,
"NSplines_valid/i");
564 Settings->Branch(
"NTF1_valid", &nTF1Valid_temp,
"NTF1_valid/i");
565 Settings->Branch(
"nTF1coeff", &nTF1coeff_temp,
"nTF1coeff/i");
572 TTree *SplineTree =
new TTree(
"SplineTree",
"SplineTree");
579 float coeff_tf1 = 0.;
580 Monolith_TF1->Branch(
"cpu_coeff_TF1_many", &coeff_tf1,
"cpu_coeff_TF1_many/F");
581 for(
unsigned int i = 0; i <
nTF1coeff; i++)
584 Monolith_TF1->Fill();
587 Monolith_TF1->Write();
589 unsigned int nParamPerEvent = 0;
590 unsigned int nParamPerEvent_tf1 = 0;
592 EventInfo->Branch(
"cpu_nParamPerEvent", &nParamPerEvent,
"cpu_nParamPerEvent/i");
593 EventInfo->Branch(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1,
"cpu_nParamPerEvent_tf1/i");
595 for(
unsigned int i = 0; i < 2*
NEvents; i++)
643 for (
int j = 0; j <
_nCoeff_; j++) {
648 int Np = spl->
GetNp();
655 MACH3LOG_ERROR(
"This _WILL_ cause problems with GPU splines and _SHOULD_ be fixed!");
664 for(
int i = 0; i < Np; i++) {
668 xArray[i] = float(x);
673 if((xArray[i] == -999) || (manyArray[i*
_nCoeff_] == -999) || (manyArray[i*
_nCoeff_ +1] == -999) || (manyArray[i*
_nCoeff_+2] == -999) || (manyArray[i*
_nCoeff_+3] == -999)){
674 MACH3LOG_ERROR(
"*********** Bad params in {} ************", __func__);
675 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", x, y, b, c, d);
676 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", xArray[i], manyArray[i*
_nCoeff_], manyArray[i*
_nCoeff_+1], manyArray[i*
_nCoeff_+2], manyArray[i*
_nCoeff_+3]);
677 MACH3LOG_ERROR(
"This will cause problems when preparing for GPU");
678 MACH3LOG_ERROR(
"***************************************************************");
729 #pragma omp for simd nowait
731 for (
unsigned int splineNum = 0; splineNum <
NSplines_valid; ++splineNum)
740 const short int segment_X = short(Param*
_max_knots+segment);
763 for (
unsigned int tf1Num = 0; tf1Num <
NTF1_valid; ++tf1Num)
769 const unsigned int TF1_Index = tf1Num *
_nTF1Coeff_;
788 #pragma omp parallel for
790 for (
unsigned int EventNum = 0; EventNum <
NEvents; ++EventNum)
792 float totalWeight = 1.0f;
794 const unsigned int Offset = 2 * EventNum;
802 #pragma omp simd reduction(*:totalWeight)
804 for (
unsigned int id = 0;
id < numParams; ++id) {
814 #pragma omp simd reduction(*:totalWeight)
816 for (
unsigned int id = 0;
id < numParams_tf1; ++id) {
834 MACH3LOG_INFO(
"Size of x array = {:.4f} MB",
double(
sizeof(
float)*event_size_max)/1.E6);
@ kTF1_red
Uses TF1_red for interpolation.
@ kTSpline3_red
Uses TSpline3_red for interpolation.
void CleanVector(T &)
Base case: do nothing for non-vector types.
constexpr int _nCoeff_
KS: We store coefficients {y,b,c,d} in one array one by one, this is only to define it once rather th...
constexpr int _nTF1Coeff_
KS: For TF1 we store at most 5 coefficients, we could make it more flexible but for now define it her...
Custom exception class used throughout MaCh3.
Base class for calculating weight from spline.
void GetTF1Coeff(TF1_red *&spl, int &nPoints, float *&coeffs) const
CW: Gets the polynomial coefficients for TF1.
short int nParams
Number of parameters that have splines.
void FindSplineSegment()
CW:Code used in step by step reweighting, Find Spline Segment for each param.
short int * SplineSegments
std::vector< FastSplineInfo > SplineInfoArray
float * ParamValues
Store parameter values they are not in FastSplineInfo as in case of GPU we need to copy paste it to G...
void LoadFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile)
KS: Load preprocessed FastSplineInfo.
void PrepareFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile) const
KS: Prepare Fast Spline Info within SplineFile.
Class responsible for calculating spline weight on GPU.
__host__ void RunGPU_SplineMonolith(M3::float_t *cpu_total_weights, float *vals, short int *segment)
Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment ...
__host__ void InitGPU_Vals(float **vals)
Allocate memory for spline segments.
__host__ void CleanupPinnedMemory(M3::float_t *cpu_total_weights, short int *segment, float *vals)
Clean up pinned variables at CPU.
__host__ void InitGPU_SplineMonolith(M3::float_t **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
Allocate memory on gpu for spline monolith.
__host__ void CopyToGPU_SplineMonolith(const SplineMonoStruct *cpu_spline_handler, const std::vector< float > &cpu_many_array_TF1, const std::vector< short int > &cpu_paramNo_arr_TF1, const int n_events, const std::vector< unsigned int > &cpu_nParamPerEvent, const std::vector< unsigned int > &cpu_nParamPerEvent_TF1, const int n_params, const unsigned int n_splines, const short int spline_size, const unsigned int total_nknots, const unsigned int n_tf1)
Copies data from CPU to GPU for the spline monolith.
__host__ void InitGPU_Segments(short int **segment)
Allocate memory for spline segments.
CW: A reduced TF1 class only. Only saves parameters for each TF1 and how many parameters each paramet...
int GetSize()
Get the size.
KS: A reduced ResponseFunction Generic function used for evaluating weight.
CW: Reduced TSpline3 class.
void GetKnot(int i, M3::float_t &xtmp, M3::float_t &ytmp)
M3::int_t GetNp() override
CW: Get the number of points.
void GetCoeff(int segment, M3::float_t &x, M3::float_t &y, M3::float_t &b, M3::float_t &c, M3::float_t &d)
CW: Get the coefficient of a given segment.
float * cpu_weights_tf1_var
CPU arrays to hold weight for each TF1.
float * cpu_weights_spline_var
CPU arrays to hold weight for each spline.
unsigned int nTF1coeff
Sum of all coefficients over all TF1.
unsigned int NTF1_valid
Number of valid TF1.
void ScanMasterSpline(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, unsigned int &nEvents, short int &MaxPoints, short int &numParams, int &nSplines, unsigned int &NSplinesValid, unsigned int &numKnots, unsigned int &nTF1Valid, unsigned int &nTF1_coeff, const std::vector< RespFuncType > &SplineType)
CW: Function to scan through the MasterSpline of TSpline3.
void PrepareSplineFile(std::string FileName) final
KS: Prepare spline file that can be used for fast loading.
std::vector< unsigned int > cpu_nParamPerEvent
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
void Evaluate() final
CW: This Eval should be used when using two separate x,{y,a,b,c,d} arrays to store the weights; proba...
void SynchroniseMemTransfer() const final
KS: After calculations are done on GPU we copy memory to CPU. This operation is asynchronous meaning ...
std::vector< short int > cpu_paramNo_TF1_arr
CW: CPU array with the number of points per spline (not per spline point!)
void PrintInitialsiation() const
KS: Print info about how much knots etc has been initialised.
std::vector< unsigned int > cpu_nParamPerEvent_tf1
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
SplineMonolithGPU * gpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
unsigned int NSplines_valid
Number of valid splines.
void MoveToGPU()
CW: The shared initialiser from constructors of TResponseFunction_red.
unsigned int nKnots
Sum of all knots over all splines.
void CalcSplineWeights() final
CPU based code which eval weight for each spline.
short int _max_knots
Max knots for production.
std::vector< short int > cpu_nPoints_arr
CPU arrays to hold number of points.
void PrepareForGPU(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType)
CW: Prepare the TSpline3_red objects for the GPU.
void Initialise()
KS: Set everything to null etc.
SplineMonoStruct * cpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
void LoadSplineFile(std::string FileName) final
KS: Load preprocessed spline file.
virtual ~UnbinnedSplineHandler()
Destructor for UnbinnedSplineHandler class.
std::vector< float > cpu_coeff_TF1_many
CPU arrays to hold TF1 coefficients.
void GetSplineCoeff_SepMany(TSpline3_red *&spl, int &nPoints, float *&xArray, float *&manyArray) const
CW: This loads up coefficients into two arrays: one x array and one yabcd array.
M3::float_t * cpu_total_weights
KS: This holds the total CPU weights that gets read in SampleHandler.
UnbinnedSplineHandler(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType, const bool SaveFlatTree=false, const std::string &_FastSplineName="SplineFile.root")
Constructor.
bool SaveSplineFile
Flag telling whether we are saving spline monolith into handy root file.
unsigned int NEvents
Number of events.
std::string FastSplineName
Name of Fast Spline to which will be saved.
void CalcTotalEventWeight()
Calc total event weight.
__host__ void SynchroniseSplines()
Make sure all Cuda threads finished execution.
MaCh3 event-by-event cross-section spline code.
void AddPath(std::string &FilePath)
Prepends the MACH3 environment path to FilePath if it is not already present.
Stores info about each MC event used during reweighting routine.
KS: Struct storing information for spline monolith.
std::vector< unsigned int > nKnots_arr
KS: CPU Number of knots per spline.
std::vector< float > coeff_x
KS: CPU arrays to hold X coefficient.
std::vector< float > coeff_many
CPU arrays to hold other coefficients.
std::vector< short int > paramNo_arr
CW: CPU array with the number of points per spline (not per spline point!)