7 #pragma GCC diagnostic ignored "-Wuseless-cast"
8 #pragma GCC diagnostic ignored "-Wfloat-conversion"
39 const std::vector<RespFuncType> &SplineType,
40 const bool SaveFlatTree,
41 const std::string& _FastSplineName)
48 MACH3LOG_INFO(
"-- GPUING WITH arrays and master spline containing TResponseFunction_red");
56 void SMonolith::PrepareForGPU(std::vector<std::vector<TResponseFunction_red*> > &MasterSpline,
const std::vector<RespFuncType> &SplineType) {
75 MACH3LOG_INFO(
"Found {} maximum number of splines in an event", maxnSplines);
112 for (
unsigned int j = 0; j < event_size_max; j++) {
125 #ifdef Weight_On_SplineBySpline_Basis
131 #pragma omp parallel for
144 #pragma omp parallel for
146 for (
unsigned int j = 0; j < 2*
NEvents; j++) {
166 unsigned int KnotCounter = 0;
167 unsigned int TF1PointsCounter = 0;
168 unsigned int NSplinesCounter = 0;
169 unsigned int TF1sCounter = 0;
170 int ParamCounter = 0;
171 int ParamCounterGlobal = 0;
172 int ParamCounter_TF1 = 0;
173 int ParamCounterGlobalTF1 = 0;
175 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
178 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
180 if (MasterSpline[EventCounter][ParamNumber] == NULL)
continue;
194 if (nPoints_tmp == 1)
continue;
199 for (
int j = 0; j < nPoints_tmp; ++j) {
200 for (
int k = 0; k <
_nCoeff_; k++) {
208 KnotCounter += nPoints_tmp;
210 #ifdef Weight_On_SplineBySpline_Basis
219 else if (SplineType[ParamNumber] ==
kTF1_red)
224 TF1_red* CurrSpline =
dynamic_cast<TF1_red*
>(MasterSpline[EventCounter][ParamNumber]);
234 TF1PointsCounter += nPoints_tmp;
237 #ifdef Weight_On_SplineBySpline_Basis
248 delete MasterSpline[EventCounter][ParamNumber];
249 MasterSpline[EventCounter][ParamNumber] = NULL;
252 #ifndef Weight_On_SplineBySpline_Basis
255 ParamCounterGlobal += ParamCounter;
259 ParamCounterGlobalTF1 += ParamCounter_TF1;
262 ParamCounter_TF1 = 0;
267 delete[] temp_coeffs;
270 for (
unsigned int j = 0; j < event_size_max; j++) {
275 MACH3LOG_WARN(
"Indicates some parameter doesn't have a single spline");
279 if(BadXCounter == 5)
MACH3LOG_WARN(
"There is more unutilised knots although I will stop spamming");
283 #ifdef Weight_On_SplineBySpline_Basis
309 MACH3LOG_INFO(
"Total size = {:.2f} MB memory on CPU to move to GPU",
310 (
double(
sizeof(
float) *
nKnots *
_nCoeff_) +
double(
sizeof(
float) * event_size_max) / 1.E6 +
312 MACH3LOG_INFO(
"Total TF1 size = {:.2f} MB memory on CPU to move to GPU",
315 #ifndef Weight_On_SplineBySpline_Basis
316 MACH3LOG_INFO(
"Since you are running Total event weight mode then GPU weight array (GPU->CPU every step) = {:.2f} MB",
317 double(
sizeof(
float) *
NEvents) / 1.E6);
319 MACH3LOG_INFO(
"Parameter value array (CPU->GPU every step) = {:.4f} MB",
double(
sizeof(
float) *
nParams) / 1.E6);
330 #ifndef Weight_On_SplineBySpline_Basis
350 #ifndef Weight_On_SplineBySpline_Basis
364 #ifndef Weight_On_SplineBySpline_Basis
378 unsigned int &nEvents,
379 short int &MaxPoints,
380 short int &numParams,
382 unsigned int &NSplinesValid,
383 unsigned int &numKnots,
384 unsigned int &nTF1Valid,
385 unsigned int &nTF1_coeff,
386 const std::vector<RespFuncType> &SplineType) {
401 nEvents = int(MasterSpline.size());
404 int nMaxSplines_PerEvent = 0;
407 numParams = short(MasterSpline[0].size());
412 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
415 int TempSize = int(MasterSpline[EventCounter].size());
416 if (TempSize != numParams) {
417 MACH3LOG_ERROR(
"Found {} parameters for event {}", TempSize, EventCounter);
418 MACH3LOG_ERROR(
"but was expecting {} since that's what I found for the previous event", numParams);
419 MACH3LOG_ERROR(
"Somehow this event has a different number of spline parameters... Please study further!");
423 numParams = short(MasterSpline[EventCounter].size());
425 int nSplines_SingleEvent = 0;
428 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
429 if (MasterSpline[EventCounter][ParamNumber]) {
435 nPoints = CurrSpline->
GetNp();
438 if (nPoints > MaxPoints) {
439 MaxPoints =
static_cast<short int>(nPoints);
442 nSplines_SingleEvent++;
456 CurrSpline->
GetKnot(k, xtemp, ytemp);
462 else if (SplineType[ParamNumber] ==
kTF1_red)
466 nPoints = CurrSpline->
GetSize();
467 nTF1_coeff += nPoints;
475 if (nSplines_SingleEvent > nMaxSplines_PerEvent) nMaxSplines_PerEvent = nSplines_SingleEvent;
477 nSplines = nMaxSplines_PerEvent;
481 for (
M3::int_t i = 0; i < numParams; ++i)
484 if (SplineType[i] ==
kTF1_red)
continue;
488 if (nPoints == -999 || xArray.size() == 0) {
497 MACH3LOG_WARN(
"In total SplineInfoArray for {} hasn't been initialised", Counter);
506 MACH3LOG_INFO(
"-- GPUING WITH {X} and {Y,B,C,D} arrays and master spline containing TSpline3_red");
515 #ifdef Weight_On_SplineBySpline_Basis
516 MACH3LOG_ERROR(
"Trying to load Monolith from file using weight by weight base, this is not supported right now, sorry");
521 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"OPEN");
522 TTree *Settings = SplineFile->Get<TTree>(
"Settings");
523 TTree *Monolith_TF1 = SplineFile->Get<TTree>(
"Monolith_TF1");
524 TTree *
EventInfo = SplineFile->Get<TTree>(
"EventInfo");
525 TTree *SplineTree = SplineFile->Get<TTree>(
"SplineTree");
527 unsigned int NEvents_temp;
528 short int nParams_temp;
530 unsigned int nKnots_temp;
531 unsigned int NSplines_valid_temp;
532 unsigned int nTF1Valid_temp;
533 unsigned int nTF1coeff_temp;
535 Settings->SetBranchAddress(
"NEvents", &NEvents_temp);
536 Settings->SetBranchAddress(
"nParams", &nParams_temp);
537 Settings->SetBranchAddress(
"_max_knots", &_max_knots_temp);
538 Settings->SetBranchAddress(
"nKnots", &nKnots_temp);
539 Settings->SetBranchAddress(
"NSplines_valid", &NSplines_valid_temp);
540 Settings->SetBranchAddress(
"NTF1_valid", &nTF1Valid_temp);
541 Settings->SetBranchAddress(
"nTF1coeff", &nTF1coeff_temp);
543 Settings->GetEntry(0);
547 _max_knots =
static_cast<short int>(_max_knots_temp);
574 SplineTree->GetEntry(0);
576 float coeff_tf1 = 0.;
577 Monolith_TF1->SetBranchAddress(
"cpu_coeff_TF1_many", &coeff_tf1);
578 for(
unsigned int i = 0; i <
nTF1coeff; i++)
580 Monolith_TF1->GetEntry(i);
584 unsigned int nParamPerEvent = 0;
585 unsigned int nParamPerEvent_tf1 = 0;
587 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent", &nParamPerEvent);
588 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1);
589 for(
unsigned int i = 0; i < 2*
NEvents; i++)
612 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"recreate");
613 TTree *Settings =
new TTree(
"Settings",
"Settings");
614 TTree *Monolith_TF1 =
new TTree(
"Monolith_TF1",
"Monolith_TF1");
615 TTree *XKnots =
new TTree(
"XKnots",
"XKnots");
616 TTree *
EventInfo =
new TTree(
"EventInfo",
"EventInfo");
618 unsigned int NEvents_temp =
NEvents;
619 short int nParams_temp =
nParams;
621 unsigned int nKnots_temp =
nKnots;
626 Settings->Branch(
"NEvents", &NEvents_temp,
"NEvents/i");
627 Settings->Branch(
"nParams", &nParams_temp,
"nParams/S");
628 Settings->Branch(
"_max_knots", &_max_knots_temp,
"_max_knots/I");
629 Settings->Branch(
"nKnots", &nKnots_temp,
"nKnots/i");
630 Settings->Branch(
"NSplines_valid", &NSplines_valid_temp,
"NSplines_valid/i");
631 Settings->Branch(
"NTF1_valid", &nTF1Valid_temp,
"NTF1_valid/i");
632 Settings->Branch(
"nTF1coeff", &nTF1coeff_temp,
"nTF1coeff/i");
639 TTree *SplineTree =
new TTree(
"SplineTree",
"SplineTree");
646 float coeff_tf1 = 0.;
647 Monolith_TF1->Branch(
"cpu_coeff_TF1_many", &coeff_tf1,
"cpu_coeff_TF1_many/F");
648 for(
unsigned int i = 0; i <
nTF1coeff; i++)
651 Monolith_TF1->Fill();
654 Monolith_TF1->Write();
656 unsigned int nParamPerEvent = 0;
657 unsigned int nParamPerEvent_tf1 = 0;
659 EventInfo->Branch(
"cpu_nParamPerEvent", &nParamPerEvent,
"cpu_nParamPerEvent/i");
660 EventInfo->Branch(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1,
"cpu_nParamPerEvent_tf1/i");
662 for(
unsigned int i = 0; i < 2*
NEvents; i++)
687 #ifndef Weight_On_SplineBySpline_Basis
718 for (
int j = 0; j <
_nCoeff_; j++) {
723 int Np = spl->
GetNp();
730 MACH3LOG_ERROR(
"This _WILL_ cause problems with GPU splines and _SHOULD_ be fixed!");
739 for(
int i = 0; i < Np; i++) {
743 xArray[i] = float(x);
748 if((xArray[i] == -999) || (manyArray[i*
_nCoeff_] == -999) || (manyArray[i*
_nCoeff_ +1] == -999) || (manyArray[i*
_nCoeff_+2] == -999) || (manyArray[i*
_nCoeff_+3] == -999)){
749 MACH3LOG_ERROR(
"*********** Bad params in getSplineCoeff_SepMany() ************");
750 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", x, y, b, c, d);
751 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", xArray[i], manyArray[i*
_nCoeff_], manyArray[i*
_nCoeff_+1], manyArray[i*
_nCoeff_+2], manyArray[i*
_nCoeff_+3]);
752 MACH3LOG_ERROR(
"This will cause problems when preparing for GPU");
753 MACH3LOG_ERROR(
"***************************************************************");
773 #ifdef Weight_On_SplineBySpline_Basis
814 #pragma omp for simd nowait
816 for (
unsigned int splineNum = 0; splineNum <
NSplines_valid; ++splineNum)
825 const short int segment_X = short(Param*
_max_knots+segment);
848 for (
unsigned int tf1Num = 0; tf1Num <
NTF1_valid; ++tf1Num)
854 const unsigned int TF1_Index = tf1Num *
_nTF1Coeff_;
872 #ifndef Weight_On_SplineBySpline_Basis
874 #pragma omp parallel for
876 for (
unsigned int EventNum = 0; EventNum <
NEvents; ++EventNum)
878 float totalWeight = 1.0f;
880 const unsigned int Offset = 2 * EventNum;
888 #pragma omp simd reduction(*:totalWeight)
890 for (
unsigned int id = 0;
id < numParams; ++id) {
900 #pragma omp simd reduction(*:totalWeight)
902 for (
unsigned int id = 0;
id < numParams_tf1; ++id) {
919 #ifdef Weight_On_SplineBySpline_Basis
922 #pragma omp parallel for
945 MACH3LOG_INFO(
"Size of x array = {:.4f} MB",
double(
sizeof(
float)*event_size_max)/1.E6);
@ kTF1_red
Uses TF1_red for interpolation.
@ kTSpline3_red
Uses TSpline3_red for interpolation.
void CleanVector(T &)
Base case: do nothing for non-vector types.
constexpr int _nCoeff_
KS: We store coefficients {y,b,c,d} in one array one by one, this is only to define it once rather th...
constexpr int _nTF1Coeff_
KS: For TF1 we store at most 5 coefficients, we could make it more flexible but for now define it her...
Custom exception class used throughout MaCh3.
Class responsible for calculating spline weight on GPU.
__host__ void CleanupGPU_SplineMonolith(float *cpu_total_weights)
This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and T...
__host__ void InitGPU_Vals(float **vals)
Allocate memory for spline segments.
__host__ void InitGPU_SplineMonolith(float **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
Allocate memory on gpu for spline monolith.
__host__ void RunGPU_SplineMonolith(float *cpu_total_weights, float *vals, short int *segment, const unsigned int h_n_splines, const unsigned int h_n_tf1)
Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment ...
__host__ void InitGPU_Segments(short int **segment)
Allocate memory for spline segments.
__host__ void CopyToGPU_SplineMonolith(SplineMonoStruct *cpu_spline_handler, std::vector< float > cpu_many_array_TF1, std::vector< short int > cpu_paramNo_arr_TF1, int n_events, std::vector< unsigned int > cpu_nParamPerEvent, std::vector< unsigned int > cpu_nParamPerEvent_TF1, int n_params, unsigned int n_splines, short int spline_size, unsigned int total_nknots, unsigned int n_tf1)
Copies data from CPU to GPU for the spline monolith.
__host__ void CleanupGPU_Segments(short int *segment, float *vals)
Clean up pinned variables at CPU.
SMonolithGPU * gpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
std::vector< unsigned int > cpu_nParamPerEvent
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
unsigned int NTF1_valid
Number of valid TF1.
std::vector< short int > cpu_nPoints_arr
CPU arrays to hold number of points.
void SynchroniseMemTransfer() const override
KS: After calculations are done on GPU we copy memory to CPU. This operation is asynchronous meaning ...
void Evaluate() override
CW: This Eval should be used when using two separate x,{y,a,b,c,d} arrays to store the weights; proba...
unsigned int nTF1coeff
Sum of all coefficients over all TF1.
std::vector< float > cpu_coeff_TF1_many
CPU arrays to hold TF1 coefficients.
void Initialise()
KS: Set everything to null etc.
SplineMonoStruct * cpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
float * cpu_weights_tf1_var
CPU arrays to hold weight for each TF1.
void PrintInitialsiation()
KS: Print info about how much knots etc has been initialised.
std::vector< unsigned int > cpu_nParamPerEvent_tf1
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
std::vector< int > index_TF1_cpu
holds the index for good TF1; don't do unsigned since starts with negative value!
void ModifyWeights_GPU()
Conversion from valid splines to all.
float * cpu_weights
The returned gpu weights, read by the GPU.
float * cpu_weights_spline_var
CPU arrays to hold weight for each spline.
std::vector< int > index_spline_cpu
holds the index for good splines; don't do unsigned since starts with negative value!
void PrepareForGPU(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType)
CW: Prepare the TSpline3_red objects for the GPU.
void CalcSplineWeights() override
CPU based code which eval weight for each spline.
std::string FastSplineName
Name of Fast Spline to which will be saved.
void MoveToGPU()
CW: The shared initialiser from constructors of TResponseFunction_red.
bool SaveSplineFile
Flag telling whether we are saving spline monolith into handy root file.
float * cpu_total_weights
KS: This holds the total CPU weights that gets read in samplePDFND.
unsigned int NSplines_valid
Number of valid splines.
SMonolith(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType, const bool SaveFlatTree=false, const std::string &_FastSplineName="SplineFile.root")
Constructor.
virtual ~SMonolith()
Destructor for SMonolith class.
void ModifyWeights() override
Calc total event weight.
unsigned int NEvents
Number of events.
void getSplineCoeff_SepMany(TSpline3_red *&spl, int &nPoints, float *&xArray, float *&manyArray)
CW: This loads up coefficients into two arrays: one x array and one yabcd array.
void LoadSplineFile(std::string FileName) override
KS: Load preprocessed spline file.
unsigned int NSplines_total_large
Number of total splines if each event had every parameter's spline.
void ScanMasterSpline(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, unsigned int &nEvents, short int &MaxPoints, short int &numParams, int &nSplines, unsigned int &NSplinesValid, unsigned int &numKnots, unsigned int &nTF1Valid, unsigned int &nTF1_coeff, const std::vector< RespFuncType > &SplineType)
CW: Function to scan through the MasterSpline of TSpline3.
void PrepareSplineFile(std::string FileName) override
KS: Prepare spline file that can be used for fast loading.
short int _max_knots
Max knots for production.
std::vector< short int > cpu_paramNo_TF1_arr
CW: CPU array with the number of points per spline (not per spline point!)
unsigned int nKnots
Sum of all knots over all splines.
Base class for calculating weight from spline.
short int nParams
Number of parameters that have splines.
void FindSplineSegment()
CW:Code used in step by step reweighting, Find Spline Segment for each param.
short int * SplineSegments
std::vector< FastSplineInfo > SplineInfoArray
float * ParamValues
Store parameter values they are not in FastSplineInfo as in case of GPU we need to copy paste it to G...
void getTF1Coeff(TF1_red *&spl, int &nPoints, float *&coeffs)
CW: Gets the polynomial coefficients for TF1.
void LoadFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile)
KS: Load preprocessed FastSplineInfo.
void PrepareFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile) const
KS: Prepare Fast Spline Info within SplineFile.
CW: A reduced TF1 class only. Only saves parameters for each TF1 and how many parameters each paramet...
int GetSize()
Get the size.
KS: A reduced ResponseFunction Generic function used for evaluating weight.
CW: Reduced TSpline3 class.
void GetKnot(int i, M3::float_t &xtmp, M3::float_t &ytmp)
M3::int_t GetNp() override
CW: Get the number of points.
void GetCoeff(int segment, M3::float_t &x, M3::float_t &y, M3::float_t &b, M3::float_t &c, M3::float_t &d)
CW: Get the coefficient of a given segment.
__host__ void SynchroniseSplines()
Make sure all Cuda threads finished execution.
MaCh3 event-by-event cross-section spline code.
void AddPath(std::string &FilePath)
Prepends the MACH3 environment path to FilePath if it is not already present.
Stores info about each MC event used during reweighting routine.
KS: Struct storing information for spline monolith.
std::vector< unsigned int > nKnots_arr
KS: CPU Number of knots per spline.
std::vector< float > coeff_x
KS: CPU arrays to hold X coefficient.
std::vector< float > coeff_many
CPU arrays to hold other coefficients.
std::vector< short int > paramNo_arr
CW: CPU array with the number of points per spline (not per spline point!)