7 #pragma GCC diagnostic ignored "-Wuseless-cast"
8 #pragma GCC diagnostic ignored "-Wfloat-conversion"
39 const std::vector<RespFuncType> &SplineType,
40 const bool SaveFlatTree,
41 const std::string& _FastSplineName)
48 MACH3LOG_INFO(
"-- GPUING WITH arrays and master spline containing TResponseFunction_red");
56 void SMonolith::PrepareForGPU(std::vector<std::vector<TResponseFunction_red*> > &MasterSpline,
const std::vector<RespFuncType> &SplineType) {
75 MACH3LOG_INFO(
"Found {} maximum number of splines in an event", maxnSplines);
112 for (
unsigned int j = 0; j < event_size_max; j++) {
125 #ifdef Weight_On_SplineBySpline_Basis
131 #pragma omp parallel for
144 #pragma omp parallel for
146 for (
unsigned int j = 0; j < 2*
NEvents; j++) {
166 unsigned int KnotCounter = 0;
167 unsigned int TF1PointsCounter = 0;
168 unsigned int NSplinesCounter = 0;
169 unsigned int TF1sCounter = 0;
170 int ParamCounter = 0;
171 int ParamCounterGlobal = 0;
172 int ParamCounter_TF1 = 0;
173 int ParamCounterGlobalTF1 = 0;
175 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
178 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
181 if (MasterSpline[EventCounter][ParamNumber] == NULL)
continue;
195 if (nPoints_tmp == 1)
continue;
200 for (
int j = 0; j < nPoints_tmp; ++j) {
201 for (
int k = 0; k <
_nCoeff_; k++) {
209 KnotCounter += nPoints_tmp;
211 #ifdef Weight_On_SplineBySpline_Basis
220 else if (SplineType[ParamNumber] ==
kTF1_red)
225 TF1_red* CurrSpline =
dynamic_cast<TF1_red*
>(MasterSpline[EventCounter][ParamNumber]);
235 TF1PointsCounter += nPoints_tmp;
238 #ifdef Weight_On_SplineBySpline_Basis
249 delete MasterSpline[EventCounter][ParamNumber];
250 MasterSpline[EventCounter][ParamNumber] = NULL;
253 #ifndef Weight_On_SplineBySpline_Basis
256 ParamCounterGlobal += ParamCounter;
260 ParamCounterGlobalTF1 += ParamCounter_TF1;
263 ParamCounter_TF1 = 0;
268 delete[] temp_coeffs;
271 for (
unsigned int j = 0; j < event_size_max; j++) {
276 MACH3LOG_WARN(
"Indicates some parameter doesn't have a single spline");
280 if(BadXCounter == 5)
MACH3LOG_WARN(
"There is more unutilised knots although I will stop spamming");
284 #ifdef Weight_On_SplineBySpline_Basis
310 MACH3LOG_INFO(
"Total size = {:.2f} MB memory on CPU to move to GPU",
311 (
double(
sizeof(
float) *
nKnots *
_nCoeff_) +
double(
sizeof(
float) * event_size_max) / 1.E6 +
313 MACH3LOG_INFO(
"Total TF1 size = {:.2f} MB memory on CPU to move to GPU",
316 #ifndef Weight_On_SplineBySpline_Basis
317 MACH3LOG_INFO(
"Since you are running Total event weight mode then GPU weight array (GPU->CPU every step) = {:.2f} MB",
318 double(
sizeof(
float) *
NEvents) / 1.E6);
320 MACH3LOG_INFO(
"Parameter value array (CPU->GPU every step) = {:.4f} MB",
double(
sizeof(
float) *
nParams) / 1.E6);
331 #ifndef Weight_On_SplineBySpline_Basis
351 #ifndef Weight_On_SplineBySpline_Basis
365 #ifndef Weight_On_SplineBySpline_Basis
379 unsigned int &nEvents,
380 short int &MaxPoints,
381 short int &numParams,
383 unsigned int &NSplinesValid,
384 unsigned int &numKnots,
385 unsigned int &nTF1Valid,
386 unsigned int &nTF1_coeff,
387 const std::vector<RespFuncType> &SplineType) {
402 nEvents = int(MasterSpline.size());
405 int nMaxSplines_PerEvent = 0;
408 numParams = short(MasterSpline[0].
size());
413 for(
unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {
416 int TempSize = int(MasterSpline[EventCounter].
size());
417 if (TempSize != numParams) {
418 MACH3LOG_ERROR(
"Found {} parameters for event {}", TempSize, EventCounter);
419 MACH3LOG_ERROR(
"but was expecting {} since that's what I found for the previous event", numParams);
420 MACH3LOG_ERROR(
"Somehow this event has a different number of spline parameters... Please study further!");
424 numParams = short(MasterSpline[EventCounter].
size());
426 int nSplines_SingleEvent = 0;
429 for(
unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {
431 if (MasterSpline[EventCounter][ParamNumber]) {
437 nPoints = CurrSpline->
GetNp();
440 if (nPoints > MaxPoints) {
441 MaxPoints =
static_cast<short int>(nPoints);
444 nSplines_SingleEvent++;
458 CurrSpline->
GetKnot(k, xtemp, ytemp);
464 else if (SplineType[ParamNumber] ==
kTF1_red)
468 nPoints = CurrSpline->
GetSize();
469 nTF1_coeff += nPoints;
477 if (nSplines_SingleEvent > nMaxSplines_PerEvent) nMaxSplines_PerEvent = nSplines_SingleEvent;
479 nSplines = nMaxSplines_PerEvent;
483 for (
M3::int_t i = 0; i < numParams; ++i)
486 if (SplineType[i] ==
kTF1_red)
continue;
490 if (nPoints == -999 || xArray.size() == 0) {
499 MACH3LOG_WARN(
"In total SplineInfoArray for {} hasn't been initialised", Counter);
508 MACH3LOG_INFO(
"-- GPUING WITH {X} and {Y,B,C,D} arrays and master spline containing TSpline3_red");
517 #ifdef Weight_On_SplineBySpline_Basis
518 MACH3LOG_ERROR(
"Trying to load Monolith from file using weight by weight base, this is not supported right now, sorry");
523 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"OPEN");
524 TTree *Settings = SplineFile->Get<TTree>(
"Settings");
525 TTree *Monolith_TF1 = SplineFile->Get<TTree>(
"Monolith_TF1");
526 TTree *EventInfo = SplineFile->Get<TTree>(
"EventInfo");
527 TTree *SplineTree = SplineFile->Get<TTree>(
"SplineTree");
529 unsigned int NEvents_temp;
530 short int nParams_temp;
532 unsigned int nKnots_temp;
533 unsigned int NSplines_valid_temp;
534 unsigned int nTF1Valid_temp;
535 unsigned int nTF1coeff_temp;
537 Settings->SetBranchAddress(
"NEvents", &NEvents_temp);
538 Settings->SetBranchAddress(
"nParams", &nParams_temp);
539 Settings->SetBranchAddress(
"_max_knots", &_max_knots_temp);
540 Settings->SetBranchAddress(
"nKnots", &nKnots_temp);
541 Settings->SetBranchAddress(
"NSplines_valid", &NSplines_valid_temp);
542 Settings->SetBranchAddress(
"NTF1_valid", &nTF1Valid_temp);
543 Settings->SetBranchAddress(
"nTF1coeff", &nTF1coeff_temp);
545 Settings->GetEntry(0);
549 _max_knots =
static_cast<short int>(_max_knots_temp);
576 SplineTree->GetEntry(0);
578 float coeff_tf1 = 0.;
579 Monolith_TF1->SetBranchAddress(
"cpu_coeff_TF1_many", &coeff_tf1);
580 for(
unsigned int i = 0; i <
nTF1coeff; i++)
582 Monolith_TF1->GetEntry(i);
586 unsigned int nParamPerEvent = 0;
587 unsigned int nParamPerEvent_tf1 = 0;
589 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent", &nParamPerEvent);
590 EventInfo->SetBranchAddress(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1);
591 for(
unsigned int i = 0; i < 2*
NEvents; i++)
593 EventInfo->GetEntry(i);
614 auto SplineFile = std::make_unique<TFile>(FileName.c_str(),
"recreate");
615 TTree *Settings =
new TTree(
"Settings",
"Settings");
616 TTree *Monolith_TF1 =
new TTree(
"Monolith_TF1",
"Monolith_TF1");
617 TTree *XKnots =
new TTree(
"XKnots",
"XKnots");
618 TTree *EventInfo =
new TTree(
"EventInfo",
"EventInfo");
620 unsigned int NEvents_temp =
NEvents;
621 short int nParams_temp =
nParams;
623 unsigned int nKnots_temp =
nKnots;
628 Settings->Branch(
"NEvents", &NEvents_temp,
"NEvents/i");
629 Settings->Branch(
"nParams", &nParams_temp,
"nParams/S");
630 Settings->Branch(
"_max_knots", &_max_knots_temp,
"_max_knots/I");
631 Settings->Branch(
"nKnots", &nKnots_temp,
"nKnots/i");
632 Settings->Branch(
"NSplines_valid", &NSplines_valid_temp,
"NSplines_valid/i");
633 Settings->Branch(
"NTF1_valid", &nTF1Valid_temp,
"NTF1_valid/i");
634 Settings->Branch(
"nTF1coeff", &nTF1coeff_temp,
"nTF1coeff/i");
641 TTree *SplineTree =
new TTree(
"SplineTree",
"SplineTree");
648 float coeff_tf1 = 0.;
649 Monolith_TF1->Branch(
"cpu_coeff_TF1_many", &coeff_tf1,
"cpu_coeff_TF1_many/F");
650 for(
unsigned int i = 0; i <
nTF1coeff; i++)
653 Monolith_TF1->Fill();
656 Monolith_TF1->Write();
658 unsigned int nParamPerEvent = 0;
659 unsigned int nParamPerEvent_tf1 = 0;
661 EventInfo->Branch(
"cpu_nParamPerEvent", &nParamPerEvent,
"cpu_nParamPerEvent/i");
662 EventInfo->Branch(
"cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1,
"cpu_nParamPerEvent_tf1/i");
664 for(
unsigned int i = 0; i < 2*
NEvents; i++)
689 #ifndef Weight_On_SplineBySpline_Basis
720 for (
int j = 0; j <
_nCoeff_; j++) {
725 int Np = spl->
GetNp();
732 MACH3LOG_ERROR(
"This _WILL_ cause problems with GPU splines and _SHOULD_ be fixed!");
741 for(
int i = 0; i < Np; i++) {
745 xArray[i] = float(x);
750 if((xArray[i] == -999) || (manyArray[i*
_nCoeff_] == -999) || (manyArray[i*
_nCoeff_ +1] == -999) || (manyArray[i*
_nCoeff_+2] == -999) || (manyArray[i*
_nCoeff_+3] == -999)){
751 MACH3LOG_ERROR(
"*********** Bad params in getSplineCoeff_SepMany() ************");
752 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", x, y, b, c, d);
753 MACH3LOG_ERROR(
"pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", xArray[i], manyArray[i*
_nCoeff_], manyArray[i*
_nCoeff_+1], manyArray[i*
_nCoeff_+2], manyArray[i*
_nCoeff_+3]);
754 MACH3LOG_ERROR(
"This will cause problems when preparing for GPU");
755 MACH3LOG_ERROR(
"***************************************************************");
775 #ifdef Weight_On_SplineBySpline_Basis
816 #pragma omp for simd nowait
818 for (
unsigned int splineNum = 0; splineNum <
NSplines_valid; ++splineNum)
827 const short int segment_X = short(Param*
_max_knots+segment);
850 for (
unsigned int tf1Num = 0; tf1Num <
NTF1_valid; ++tf1Num)
856 const unsigned int TF1_Index = tf1Num *
_nTF1Coeff_;
874 #ifndef Weight_On_SplineBySpline_Basis
876 #pragma omp parallel for
878 for (
unsigned int EventNum = 0; EventNum <
NEvents; ++EventNum)
880 float totalWeight = 1.0f;
882 const unsigned int Offset = 2 * EventNum;
892 for (
unsigned int id = 0;
id < numParams; ++id) {
904 for (
unsigned int id = 0;
id < numParams_tf1; ++id) {
921 #ifdef Weight_On_SplineBySpline_Basis
924 #pragma omp parallel for
947 MACH3LOG_INFO(
"Size of x array = {:.4f} MB",
double(
sizeof(
float)*event_size_max)/1.E6);
@ kTF1_red
Uses TF1_red for interpolation.
@ kTSpline3_red
Uses TSpline3_red for interpolation.
void CleanVector(std::vector< T > &vec)
Generic cleanup function.
constexpr int _nCoeff_
KS: We store coefficients {y,b,c,d} in one array one by one, this is only to define it once rather th...
constexpr int _nTF1Coeff_
KS: For TF1 we store at most 5 coefficients, we could make it more flexible but for now define it her...
Custom exception class for MaCh3 errors.
Class responsible for calculating spline weight on GPU.
__host__ void CleanupGPU_SplineMonolith(float *cpu_total_weights)
This function deallocates the resources allocated for the separate {x} and {ybcd} arrays in the and T...
__host__ void InitGPU_Vals(float **vals)
Allocate memory for spline segments.
__host__ void InitGPU_SplineMonolith(float **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
Allocate memory on gpu for spline monolith.
__host__ void RunGPU_SplineMonolith(float *cpu_total_weights, float *vals, short int *segment, const unsigned int h_n_splines, const unsigned int h_n_tf1)
Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment ...
__host__ void InitGPU_Segments(short int **segment)
Allocate memory for spline segments.
__host__ void CopyToGPU_SplineMonolith(SplineMonoStruct *cpu_spline_handler, std::vector< float > cpu_many_array_TF1, std::vector< short int > cpu_paramNo_arr_TF1, int n_events, std::vector< unsigned int > cpu_nParamPerEvent, std::vector< unsigned int > cpu_nParamPerEvent_TF1, int n_params, unsigned int n_splines, short int spline_size, unsigned int total_nknots, unsigned int n_tf1)
Copies data from CPU to GPU for the spline monolith.
__host__ void CleanupGPU_Segments(short int *segment, float *vals)
Clean up pinned variables at CPU.
SMonolithGPU * gpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
std::vector< unsigned int > cpu_nParamPerEvent
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
unsigned int NTF1_valid
Number of valid TF1.
std::vector< short int > cpu_nPoints_arr
CPU arrays to hold number of points.
void Evaluate() override
CW: This Eval should be used when using two separate x,{y,a,b,c,d} arrays to store the weights; proba...
unsigned int nTF1coeff
Sum of all coefficients over all TF1.
std::vector< float > cpu_coeff_TF1_many
CPU arrays to hold TF1 coefficients.
void Initialise()
KS: Set everything to null etc.
SplineMonoStruct * cpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
float * cpu_weights_tf1_var
CPU arrays to hold weight for each TF1.
void PrintInitialsiation()
KS: Print info about how much knots etc has been initialised.
std::vector< unsigned int > cpu_nParamPerEvent_tf1
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
void SynchroniseMemTransfer()
KS: After calculations are done on GPU we copy memory to CPU. This operation is asynchronous meaning ...
std::vector< int > index_TF1_cpu
holds the index for good TF1; don't do unsigned since starts with negative value!
void ModifyWeights_GPU()
Conversion from valid splines to all.
float * cpu_weights
The returned gpu weights, read by the GPU.
float * cpu_weights_spline_var
CPU arrays to hold weight for each spline.
std::vector< int > index_spline_cpu
holds the index for good splines; don't do unsigned since starts with negative value!
void PrepareForGPU(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType)
CW: Prepare the TSpline3_red objects for the GPU.
void CalcSplineWeights() override
CPU based code which eval weight for each spline.
std::string FastSplineName
Name of Fast Spline to which will be saved.
void MoveToGPU()
CW: The shared initialiser from constructors of TResponseFunction_red.
bool SaveSplineFile
Flag telling whether we are saving spline monolith into handy root file.
float * cpu_total_weights
KS: This holds the total CPU weights that gets read in samplePDFND.
unsigned int NSplines_valid
Number of valid splines.
SMonolith(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType, const bool SaveFlatTree=false, const std::string &_FastSplineName="SplineFile.root")
Constructor.
virtual ~SMonolith()
Destructor for SMonolith class.
void ModifyWeights() override
Calc total event weight.
unsigned int NEvents
Number of events.
void getSplineCoeff_SepMany(TSpline3_red *&spl, int &nPoints, float *&xArray, float *&manyArray)
CW: This loads up coefficients into two arrays: one x array and one yabcd array.
void LoadSplineFile(std::string FileName) override
KS: Load preprocessed spline file.
unsigned int NSplines_total_large
Number of total splines if each event had every parameter's spline.
void ScanMasterSpline(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, unsigned int &nEvents, short int &MaxPoints, short int &numParams, int &nSplines, unsigned int &NSplinesValid, unsigned int &numKnots, unsigned int &nTF1Valid, unsigned int &nTF1_coeff, const std::vector< RespFuncType > &SplineType)
CW: Function to scan through the MasterSpline of TSpline3.
void PrepareSplineFile(std::string FileName) override
KS: Prepare spline file that can be used for fast loading.
short int _max_knots
Max knots for production.
std::vector< short int > cpu_paramNo_TF1_arr
CW: CPU array with the number of points per spline (not per spline point!)
unsigned int nKnots
Sum of all knots over all splines.
Base class for calculating weight from spline.
short int nParams
Number of parameters that have splines.
void FindSplineSegment()
CW:Code used in step by step reweighting, Find Spline Segment for each param.
short int * SplineSegments
std::vector< FastSplineInfo > SplineInfoArray
float * ParamValues
Store parameter values they are not in FastSplineInfo as in case of GPU we need to copy paste it to G...
void getTF1Coeff(TF1_red *&spl, int &nPoints, float *&coeffs)
CW: Gets the polynomial coefficients for TF1.
void LoadFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile)
KS: Load preprocessed FastSplineInfo.
void PrepareFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile) const
KS: Prepare Fast Spline Info within SplineFile.
CW: A reduced TF1 class only. Only saves parameters for each TF1 and how many parameters each paramet...
int GetSize()
Get the size.
KS: A reduced ResponseFunction Generic function used for evaluating weight.
CW: Reduced TSpline3 class.
void GetKnot(int i, M3::float_t &xtmp, M3::float_t &ytmp)
M3::int_t GetNp() override
CW: Get the number of points.
void GetCoeff(int segment, M3::float_t &x, M3::float_t &y, M3::float_t &b, M3::float_t &c, M3::float_t &d)
CW: Get the coefficient of a given segment.
__host__ void SynchroniseSplines()
Make sure all Cuda threads finished execution.
MaCh3 event-by-event cross-section spline code.
void AddPath(std::string &FilePath)
Prepends the MACH3 environment path to FilePath if it is not already present.
KS: Struct storing information for spline monolith.
std::vector< unsigned int > nKnots_arr
KS: CPU Number of knots per spline.
std::vector< float > coeff_x
KS: CPU arrays to hold X coefficient.
std::vector< float > coeff_many
CPU arrays to hold other coefficients.
std::vector< short int > paramNo_arr
CW: CPU array with the number of points per spline (not per spline point!)