7#pragma GCC diagnostic ignored "-Wuseless-cast"
8#pragma GCC diagnostic ignored "-Wfloat-conversion"
38SMonolith::SMonolith(std::vector<std::vector<TResponseFunction_red*> > &MasterSpline,
const std::vector<RespFuncType> &SplineType,
const bool SaveFlatTree)
45 MACH3LOG_INFO(
"-- GPUING WITH arrays and master spline containing TResponseFunction_red");
53void SMonolith::PrepareForGPU(std::vector<std::vector<TResponseFunction_red*> > &MasterSpline,
const std::vector<RespFuncType> &SplineType) {
72 MACH3LOG_INFO(
"Found {} maximum number of splines in an event", maxnSplines);
109 for (
unsigned int j = 0; j < event_size_max; j++) {
122 #ifdef Weight_On_SplineBySpline_Basis
128 #pragma omp parallel for
141 #pragma omp parallel for
143 for (
unsigned int j = 0; j < 2*
NEvents; j++) {
163 unsigned int KnotCounter = 0;
164 unsigned int TF1PointsCounter = 0;
165 unsigned int NSplinesCounter = 0;
166 unsigned int TF1sCounter = 0;
167 int ParamCounter = 0;
168 int ParamCounterGlobal = 0;
169 int ParamCounter_TF1 = 0;
170 int ParamCounterGlobalTF1 = 0;
172 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
175 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
178 if (MasterSpline[EventCounter][ParamNumber] == NULL)
continue;
192 if (nPoints_tmp == 1)
continue;
197 for (
int j = 0; j < nPoints_tmp; ++j) {
198 for (
int k = 0; k <
_nCoeff_; k++) {
206 KnotCounter += nPoints_tmp;
208 #ifdef Weight_On_SplineBySpline_Basis
217 else if (SplineType[ParamNumber] ==
kTF1_red)
222 TF1_red* CurrSpline =
dynamic_cast<TF1_red*
>(MasterSpline[EventCounter][ParamNumber]);
232 TF1PointsCounter += nPoints_tmp;
235 #ifdef Weight_On_SplineBySpline_Basis
246 delete MasterSpline[EventCounter][ParamNumber];
247 MasterSpline[EventCounter][ParamNumber] = NULL;
250 #ifndef Weight_On_SplineBySpline_Basis
253 ParamCounterGlobal += ParamCounter;
257 ParamCounterGlobalTF1 += ParamCounter_TF1;
260 ParamCounter_TF1 = 0;
265 delete[] temp_coeffs;
268 for (
unsigned int j = 0; j < event_size_max; j++) {
273 MACH3LOG_WARN(
"Indicates some parameter doesn't have a single spline");
277 if(BadXCounter == 5)
MACH3LOG_WARN(
"There is more unutilised knots although I will stop spamming");
281 #ifdef Weight_On_SplineBySpline_Basis
308 MACH3LOG_INFO(
"Total size = {:.2f} MB memory on CPU to move to GPU",
309 (
double(
sizeof(
float) *
nKnots *
_nCoeff_) +
double(
sizeof(
float) * event_size_max) / 1.E6 +
311 MACH3LOG_INFO(
"Total TF1 size = {:.2f} MB memory on CPU to move to GPU",
314 #ifndef Weight_On_SplineBySpline_Basis
315 MACH3LOG_INFO(
"Since you are running Total event weight mode then GPU weight array (GPU->CPU every step) = {:.2f} MB",
316 double(
sizeof(
float) *
NEvents) / 1.E6);
318 MACH3LOG_INFO(
"Parameter value array (CPU->GPU every step) = {:.4f} MB",
double(
sizeof(
float) *
nParams) / 1.E6);
329 #ifndef Weight_On_SplineBySpline_Basis
349 #ifndef Weight_On_SplineBySpline_Basis
363 #ifndef Weight_On_SplineBySpline_Basis
377 unsigned int &nEvents,
378 short int &MaxPoints,
379 short int &numParams,
381 unsigned int &NSplinesValid,
382 unsigned int &numKnots,
383 unsigned int &nTF1Valid,
384 unsigned int &nTF1_coeff,
385 const std::vector<RespFuncType> &SplineType) {
400 nEvents = int(MasterSpline.size());
403 int nMaxSplines_PerEvent = 0;
406 numParams = short(MasterSpline[0].
size());
411 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
414 int TempSize = int(MasterSpline[EventCounter].
size());
415 if (TempSize != numParams) {
416 MACH3LOG_ERROR(
"Found {} parameters for event {}", TempSize, EventCounter);
417 MACH3LOG_ERROR(
"but was expecting {} since that's what I found for the previous event", numParams);
418 MACH3LOG_ERROR(
"Somehow this event has a different number of spline parameters... Please study further!");
422 numParams = short(MasterSpline[EventCounter].
size());
424 int nSplines_SingleEvent = 0;
427 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
429 if (MasterSpline[EventCounter][ParamNumber]) {
435 nPoints = CurrSpline->
GetNp();
438 if (nPoints > MaxPoints) {
439 MaxPoints =
static_cast<short int>(nPoints);
442 nSplines_SingleEvent++;
456 CurrSpline->
GetKnot(k, xtemp, ytemp);
462 else if (SplineType[ParamNumber] ==
kTF1_red)
466 nPoints = CurrSpline->
GetSize();
467 nTF1_coeff += nPoints;
475 if (nSplines_SingleEvent > nMaxSplines_PerEvent) nMaxSplines_PerEvent = nSplines_SingleEvent;
477 nSplines = nMaxSplines_PerEvent;
481 for (
M3::int_t i = 0; i < numParams; ++i)
484 if (SplineType[i] ==
kTF1_red)
continue;
488 if (nPoints == -999 || xArray.size() == 0) {
497 MACH3LOG_WARN(
"In total SplineInfoArray for {} hasn't been initialised", Counter);
506 MACH3LOG_INFO(
"-- GPUING WITH {X} and {Y,B,C,D} arrays and master spline containing TSpline3_red");
515 #ifdef Weight_On_SplineBySpline_Basis
516 MACH3LOG_ERROR(
"Trying to load Monolith from file using weight by weight base, this is not supported right now, sorry");
520 if (std::getenv(
"MACH3") !=
nullptr) {
521 FileName.insert(0, std::string(std::getenv(
"MACH3"))+
"/");
524 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"OPEN");
525 TTree *Settings = SplineFile->Get<TTree>(
"Settings");
526 TTree *Monolith_TF1 = SplineFile->Get<TTree>(
"Monolith_TF1");
527 TTree *EventInfo = SplineFile->Get<TTree>(
"EventInfo");
528 TTree *FastSplineInfoTree = SplineFile->Get<TTree>(
"FastSplineInfoTree");
529 TTree *SplineTree = SplineFile->Get<TTree>(
"SplineTree");
531 unsigned int NEvents_temp;
532 short int nParams_temp;
534 unsigned int nKnots_temp;
535 unsigned int NSplines_valid_temp;
536 unsigned int nTF1Valid_temp;
537 unsigned int nTF1coeff_temp;
539 Settings->SetBranchAddress(
"NEvents", &NEvents_temp);
540 Settings->SetBranchAddress(
"nParams", &nParams_temp);
541 Settings->SetBranchAddress(
"_max_knots", &_max_knots_temp);
542 Settings->SetBranchAddress(
"nKnots", &nKnots_temp);
543 Settings->SetBranchAddress(
"NSplines_valid", &NSplines_valid_temp);
544 Settings->SetBranchAddress(
"NTF1_valid", &nTF1Valid_temp);
545 Settings->SetBranchAddress(
"nTF1coeff", &nTF1coeff_temp);
547 Settings->GetEntry(0);
551 _max_knots =
static_cast<short int>(_max_knots_temp);
578 SplineTree->GetEntry(0);
580 float coeff_tf1 = 0.;
581 Monolith_TF1->SetBranchAddress(
"cpu_coeff_TF1_many", &coeff_tf1);
582 for(
unsigned int i = 0; i <
nTF1coeff; i++)
584 Monolith_TF1->GetEntry(i);
588 unsigned int nParamPerEvent = 0;
589 unsigned int nParamPerEvent_tf1 = 0;
591 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent", &nParamPerEvent);
592 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1);
593 for(
unsigned int i = 0; i < 2*
NEvents; i++)
595 EventInfo->GetEntry(i);
602 FastSplineInfoTree->SetBranchAddress(
"nPts", &nPoints);
603 FastSplineInfoTree->SetBranchAddress(
"xPts", &xtemp);
607 FastSplineInfoTree->GetEntry(i);
611 if(nPoints == -999)
continue;
630 std::string FileName =
"SplineFile.root";
631 if (std::getenv(
"MACH3") !=
nullptr) {
632 FileName.insert(0, std::string(std::getenv(
"MACH3"))+
"/");
635 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"recreate");
636 TTree *Settings =
new TTree(
"Settings",
"Settings");
637 TTree *Monolith_TF1 =
new TTree(
"Monolith_TF1",
"Monolith_TF1");
638 TTree *XKnots =
new TTree(
"XKnots",
"XKnots");
639 TTree *EventInfo =
new TTree(
"EventInfo",
"EventInfo");
640 TTree *FastSplineInfoTree =
new TTree(
"FastSplineInfoTree",
"FastSplineInfoTree");
642 unsigned int NEvents_temp =
NEvents;
643 short int nParams_temp =
nParams;
645 unsigned int nKnots_temp =
nKnots;
650 Settings->Branch(
"NEvents", &NEvents_temp,
"NEvents/i");
651 Settings->Branch(
"nParams", &nParams_temp,
"nParams/S");
652 Settings->Branch(
"_max_knots", &_max_knots_temp,
"_max_knots/I");
653 Settings->Branch(
"nKnots", &nKnots_temp,
"nKnots/i");
654 Settings->Branch(
"NSplines_valid", &NSplines_valid_temp,
"NSplines_valid/i");
655 Settings->Branch(
"NTF1_valid", &nTF1Valid_temp,
"NTF1_valid/i");
656 Settings->Branch(
"nTF1coeff", &nTF1coeff_temp,
"nTF1coeff/i");
663 TTree *SplineTree =
new TTree(
"SplineTree",
"SplineTree");
670 float coeff_tf1 = 0.;
671 Monolith_TF1->Branch(
"cpu_coeff_TF1_many", &coeff_tf1,
"cpu_coeff_TF1_many/F");
672 for(
unsigned int i = 0; i <
nTF1coeff; i++)
675 Monolith_TF1->Fill();
678 Monolith_TF1->Write();
680 unsigned int nParamPerEvent = 0;
681 unsigned int nParamPerEvent_tf1 = 0;
683 EventInfo->Branch(
"cpu_nParamPerEvent", &nParamPerEvent,
"cpu_nParamPerEvent/i");
684 EventInfo->Branch(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1,
"cpu_nParamPerEvent_tf1/i");
686 for(
unsigned int i = 0; i < 2*
NEvents; i++)
697 FastSplineInfoTree->Branch(
"nPts", &nPoints,
"nPts/I");
698 FastSplineInfoTree->Branch(
"xPts", xtemp,
"xPts[nPts]/F");
708 FastSplineInfoTree->Fill();
712 FastSplineInfoTree->Write();
718 delete FastSplineInfoTree;
729 #ifndef Weight_On_SplineBySpline_Basis
760 for (
int j = 0; j <
_nCoeff_; j++) {
765 int Np = spl->
GetNp();
772 MACH3LOG_ERROR(
"This _WILL_ cause problems with GPU splines and _SHOULD_ be fixed!");
781 for(
int i = 0; i < Np; i++) {
785 xArray[i] = float(x);
790 if((xArray[i] == -999) || (manyArray[i*
_nCoeff_] == -999) || (manyArray[i*4+1] == -999) || (manyArray[i*
_nCoeff_+2] == -999) || (manyArray[i*
_nCoeff_+3] == -999)){
791 MACH3LOG_ERROR(
"*********** Bad params in getSplineCoeff_SepMany() ************");
792 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", x, y, b, c, d);
793 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", xArray[i], manyArray[i*4], manyArray[i*4+1], manyArray[i*4+2], manyArray[i*
_nCoeff_+3]);
794 MACH3LOG_ERROR(
"This will cause problems when preparing for GPU");
795 MACH3LOG_ERROR(
"***************************************************************");
815 #ifdef Weight_On_SplineBySpline_Basis
856 #pragma omp for simd nowait
858 for (
unsigned int splineNum = 0; splineNum <
NSplines_valid; ++splineNum)
867 const short int segment_X = short(Param*
_max_knots+segment);
890 for (
unsigned int tf1Num = 0; tf1Num <
NTF1_valid; ++tf1Num)
896 const unsigned int TF1_Index = tf1Num *
_nTF1Coeff_;
914#ifndef Weight_On_SplineBySpline_Basis
916 #pragma omp parallel for
918 for (
unsigned int EventNum = 0; EventNum <
NEvents; ++EventNum)
920 float totalWeight = 1.0f;
922 const unsigned int Offset = 2 * EventNum;
932 for (
unsigned int id = 0;
id < numParams; ++id) {
944 for (
unsigned int id = 0;
id < numParams_tf1; ++id) {
961#ifdef Weight_On_SplineBySpline_Basis
964 #pragma omp parallel for
987 MACH3LOG_INFO(
"Size of x array = {:.4f} MB",
double(
sizeof(
float)*event_size_max)/1.E6);
@ kTF1_red
Uses TF1_red for interpolation.
@ kTSpline3_red
Uses TSpline3_red for interpolation.
void CleanVector(std::vector< T > &vec)
Generic cleanup function.
#define _nCoeff_
KS: We store coefficients {y,b,c,d} in one array one by one, this is only to define it once rather th...
#define _nTF1Coeff_
KS: For TF1 we store at most 5 coefficients, we could make it more flexible but for now define it her...
Custom exception class for MaCh3 errors.
Class responsible for calculating spline weight on GPU.
__host__ void CleanupGPU_SplineMonolith(float *cpu_total_weights)
This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and T...
__host__ void InitGPU_Vals(float **vals)
Allocate memory for spline segments.
__host__ void InitGPU_SplineMonolith(float **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
Allocate memory on gpu for spline monolith.
__host__ void RunGPU_SplineMonolith(float *cpu_total_weights, float *vals, short int *segment, const unsigned int h_n_splines, const unsigned int h_n_tf1)
Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment ...
__host__ void InitGPU_Segments(short int **segment)
Allocate memory for spline segments.
__host__ void CopyToGPU_SplineMonolith(SplineMonoStruct *cpu_spline_handler, std::vector< float > cpu_many_array_TF1, std::vector< short int > cpu_paramNo_arr_TF1, int n_events, std::vector< unsigned int > cpu_nParamPerEvent, std::vector< unsigned int > cpu_nParamPerEvent_TF1, int n_params, unsigned int n_splines, short int spline_size, unsigned int total_nknots, unsigned int n_tf1)
Copies data from CPU to GPU for the spline monolith.
__host__ void CleanupGPU_Segments(short int *segment, float *vals)
Clean up pinned variables at CPU.
SMonolithGPU * gpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
std::vector< unsigned int > cpu_nParamPerEvent
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
unsigned int NTF1_valid
Number of valid TF1.
std::vector< short int > cpu_nPoints_arr
CPU arrays to hold number of points.
void Evaluate() override
CW: This Eval should be used when using two separate x,{y,a,b,c,d} arrays to store the weights; proba...
unsigned int nTF1coeff
Sum of all coefficients over all TF1.
std::vector< float > cpu_coeff_TF1_many
CPU arrays to hold TF1 coefficients.
void Initialise()
KS: Set everything to null etc.
SplineMonoStruct * cpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
float * cpu_weights_tf1_var
CPU arrays to hold weight for each TF1.
SMonolith(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType, const bool SaveFlatTree=false)
Constructor.
void PrintInitialsiation()
KS: Print info about how much knots etc has been initialised.
std::vector< unsigned int > cpu_nParamPerEvent_tf1
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
void LoadSplineFile(std::string FileName)
KS: Load preprocessed spline file.
void SynchroniseMemTransfer()
KS: After calculations are done on GPU we copy memory to CPU. This operation is asynchronous meaning ...
std::vector< int > index_TF1_cpu
holds the index for good TF1; don't do unsigned since starts with negative value!
void ModifyWeights_GPU()
Conversion from valid splines to all.
void PrepareSplineFile()
KS: Prepare spline file that can be used for fast loading.
float * cpu_weights
The returned gpu weights, read by the GPU.
float * cpu_weights_spline_var
CPU arrays to hold weight for each spline.
std::vector< int > index_spline_cpu
holds the index for good splines; don't do unsigned since starts with negative value!
void PrepareForGPU(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType)
CW: Prepare the TSpline3_red objects for the GPU.
void CalcSplineWeights() override
CPU based code which eval weight for each spline.
void MoveToGPU()
CW: The shared initialiser from constructors of TResponseFunction_red.
bool SaveSplineFile
Flag telling whether we are saving spline monolith into handy root file.
float * cpu_total_weights
KS: This holds the total CPU weights that gets read in samplePDFND.
unsigned int NSplines_valid
Number of valid splines.
virtual ~SMonolith()
Destructor for SMonolith class.
void ModifyWeights() override
Calc total event weight.
unsigned int NEvents
Number of events.
void getSplineCoeff_SepMany(TSpline3_red *&spl, int &nPoints, float *&xArray, float *&manyArray)
CW: This loads up coefficients into two arrays: one x array and one yabcd array.
unsigned int NSplines_total_large
Number of total splines if each event had every parameter's spline.
void ScanMasterSpline(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, unsigned int &nEvents, short int &MaxPoints, short int &numParams, int &nSplines, unsigned int &NSplinesValid, unsigned int &numKnots, unsigned int &nTF1Valid, unsigned int &nTF1_coeff, const std::vector< RespFuncType > &SplineType)
CW: Function to scan through the MasterSpline of TSpline3.
short int _max_knots
Max knots for production.
std::vector< short int > cpu_paramNo_TF1_arr
CW: CPU array with the number of points per spline (not per spline point!)
unsigned int nKnots
Sum of all knots over all splines.
Base class for calculating weight from spline.
short int nParams
Number of parameters that have splines.
void FindSplineSegment()
CW:Code used in step by step reweighting, Find Spline Segment for each param.
short int * SplineSegments
std::vector< FastSplineInfo > SplineInfoArray
float * ParamValues
Store parameter values they are not in FastSplineInfo as in case of GPU we need to copy paste it to G...
void getTF1Coeff(TF1_red *&spl, int &nPoints, float *&coeffs)
CW: Gets the polynomial coefficients for TF1.
CW: A reduced TF1 class only. Only saves parameters for each TF1 and how many parameters each paramet...
int GetSize()
Get the size.
KS: A reduced ResponseFunction Generic function used for evaluating weight.
CW: Reduced TSpline3 class.
void GetKnot(int i, M3::float_t &xtmp, M3::float_t &ytmp)
M3::int_t GetNp() override
CW: Get the number of points.
void GetCoeff(int segment, M3::float_t &x, M3::float_t &y, M3::float_t &b, M3::float_t &c, M3::float_t &d)
CW: Get the coefficient of a given segment.
__host__ void SynchroniseSplines()
Make sure all Cuda threads finished execution.
MaCh3 event-by-event cross-section spline code.
KS: Struct storing information for spline monolith.
std::vector< unsigned int > nKnots_arr
KS: CPU Number of knots per spline.
std::vector< float > coeff_x
KS: CPU arrays to hold X coefficient.
std::vector< float > coeff_many
CPU arrays to hold other coefficients.
std::vector< short int > paramNo_arr
CW: CPU array with the number of points per spline (not per spline point!)