MaCh3/UnbinnedSplineHandler_8cpp_source.html

 #include "UnbinnedSplineHandler.h"


 #ifdef MaCh3_CUDA

 #include "Splines/gpuSplineUtils.cuh"

 #endif


 #pragma GCC diagnostic ignored "-Wuseless-cast"

 #pragma GCC diagnostic ignored "-Wfloat-conversion"


 // *****************************************

 //Set everything to NULL or 0

 void UnbinnedSplineHandler::Initialise() {

 // *****************************************

 #ifdef MaCh3_CUDA

   MACH3LOG_INFO("Using GPU version event by event monolith");

   gpu_spline_handler = nullptr;

 #endif


   cpu_spline_handler = new SplineMonoStruct();


   nKnots = 0;

   nTF1coeff = 0;

   NEvents = 0;

   _max_knots = 0;


   NSplines_valid = 0;

   NTF1_valid = 0;


   cpu_weights_spline_var = nullptr;

   cpu_weights_tf1_var = nullptr;


   cpu_total_weights = nullptr;

 }


 // *****************************************

 UnbinnedSplineHandler::UnbinnedSplineHandler(std::vector<std::vector<TResponseFunction_red*> > &MasterSpline,

                      const std::vector<RespFuncType> &SplineType,

                      const bool SaveFlatTree,

                      const std::string& _FastSplineName) : SplineBase() {

 // *****************************************

   //KS: If true it will save spline monolith into huge ROOT file

   SaveSplineFile = SaveFlatTree;

   FastSplineName = _FastSplineName;

   Initialise();

   MACH3LOG_INFO("-- GPUING WITH arrays and master spline containing TResponseFunction_red");


   // Convert the TSpline3 pointers to the reduced form and call the reduced constructor

   PrepareForGPU(MasterSpline, SplineType);

 }


 // *****************************************

 // The shared initialiser from constructors of TSpline3 and TSpline3_red

 void UnbinnedSplineHandler::PrepareForGPU(std::vector<std::vector<TResponseFunction_red*> > &MasterSpline, const std::vector<RespFuncType> &SplineType) {

 // *****************************************

   // Scan for the max number of knots, the number of events (number of splines), and number of parameters

   int maxnSplines = 0;

   ScanMasterSpline(MasterSpline,

                    NEvents,

                    _max_knots,

                    nParams,

                    maxnSplines,

                    NSplines_valid,

                    nKnots,

                    NTF1_valid,

                    nTF1coeff,

                    SplineType);


   MACH3LOG_INFO("Found {} events", NEvents);

   MACH3LOG_INFO("Found {} knots at max", _max_knots);

   MACH3LOG_INFO("Found {} parameters", nParams);

   MACH3LOG_INFO("Found {} maximum number of splines in an event", maxnSplines);

   MACH3LOG_INFO("Found total {} knots in all splines", nKnots);

   MACH3LOG_INFO("Number of splines = {}", NSplines_valid);

   MACH3LOG_INFO("Found total {} coeffs in all TF1", nTF1coeff);

   MACH3LOG_INFO("Number of TF1 = {}", NTF1_valid);


   unsigned int event_size_max = _max_knots * nParams;

   // Declare the {x}, {y,b,c,d} arrays for all possible splines which the event has

   // We'll filter off the flat and "disabled" (e.g. CCQE event should not have MARES spline) ones in the next for loop, but need to declare these beasts here


   // Declare the {y,b,c,d} for each knot

   // float because GPU precision (could change to double, but will incur significant speed reduction on GPU unless you're very rich!)

   cpu_spline_handler->coeff_many.resize(nKnots*_nCoeff_); // *4 because we store y,b,c,d parameters in this array

   //KS: For x coeff we assume that for given dial (MAQE) spacing is identical,

   // here we are sloppy and assume each dial has the same number of knots, not a big problem

   cpu_spline_handler->coeff_x.resize(event_size_max, -999);


   //CW: With TF1 we only save the coefficients and the order of the polynomial

   // Makes most sense to have one large monolithic array, but then it becomes impossible to tell apart a coefficient from a "number of points". So have two arrays: one of coefficients and one of number of points

   // Let's first assume all are of _max_knots size

   // Now declare the arrays for each point in the valid splines which the event actually has (i.e. include the splines that the event undergoes)

   // Also make array with the number of points per spline (not per spline point!)

   // float because GPU precision (could change to double, but will incur significant speed reduction on GPU unless you're very rich!)

   cpu_nPoints_arr.resize(NTF1_valid);

   cpu_coeff_TF1_many.resize(nTF1coeff); // *5 because this array holds  a,b,c,d,e parameters


   //KS: Map keeping track how many parameters applies to each event, we keep two numbers here {number of splines per event, index where splines start for a given event}

   cpu_nParamPerEvent.resize(2 * NEvents, -1);

   cpu_nParamPerEvent_tf1.resize(2 * NEvents, -1);


   // Make array with the number of points per spline (not per spline point!)

   cpu_spline_handler->paramNo_arr.resize(NSplines_valid);

   //KS: And array which tells where each spline stars in a big monolith array, sort of knot map

   cpu_spline_handler->nKnots_arr.resize(NSplines_valid);

   cpu_paramNo_TF1_arr.resize(NTF1_valid);


   // Temporary arrays to hold the coefficients for each spline

   // We get one x, one y, one b,... for each point, so only need to be _max_knots big

   //KS: Some params has less splines but this is all right main array will get proper number while this temp will be deleted

   float *x_tmp = new float[_max_knots]();

   float *many_tmp = new float[_max_knots*_nCoeff_]();

   float *temp_coeffs = new float[_nTF1Coeff_]();


   // Count the number of events

   unsigned int KnotCounter = 0;

   unsigned int TF1PointsCounter = 0;

   unsigned int NSplinesCounter = 0;

   unsigned int TF1sCounter = 0;

   int ParamCounter = 0;

   int ParamCounterGlobal = 0;

   int ParamCounter_TF1 = 0;

   int ParamCounterGlobalTF1 = 0;

   // Loop over events and extract the spline coefficients

   for(unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {

     // Structure of MasterSpline is std::vector<std::vector<TSpline3*>>

     // A conventional iterator to count which parameter a given spline should be applied to

     for(unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {

       // If NULL we don't have this spline for the event, so move to next spline

       if (MasterSpline[EventCounter][ParamNumber] == NULL) continue;


       if(SplineType[ParamNumber] == kTSpline3_red)

       {

         //KS: how much knots each spline has

         int nPoints_tmp = 0;

         // Get a pointer to the current spline for this event

         TResponseFunction_red* TespFunc = MasterSpline[EventCounter][ParamNumber];

         TSpline3_red* CurrSpline = static_cast<TSpline3_red*>(TespFunc);


         // If the number of knots are greater than 2 the spline is not a dummy and we should extract coefficients to load onto the GPU

         GetSplineCoeff_SepMany(CurrSpline, nPoints_tmp, x_tmp, many_tmp);


         //KS: One knot means flat spline so ignore

         if (nPoints_tmp == 1) continue;

         for (int j = 0; j < _max_knots; ++j) {

           cpu_spline_handler->coeff_x[ParamNumber*_max_knots + j] = x_tmp[j];

         }

         //KS: Contrary to X coeff we keep for other coeff only filled knots, there is no much gain for doing so for x coeff

         for (int j = 0; j < nPoints_tmp; ++j) {

           for (int k = 0; k < _nCoeff_; k++) {

             cpu_spline_handler->coeff_many[KnotCounter*_nCoeff_ + j*_nCoeff_ + k] = many_tmp[j*_nCoeff_+k];

           }

         }

         // Set the parameter number for this spline

         cpu_spline_handler->paramNo_arr[NSplinesCounter] = short(ParamNumber);

         //KS: Fill map when each spline starts

         cpu_spline_handler->nKnots_arr[NSplinesCounter] = KnotCounter;

         KnotCounter += nPoints_tmp;


         ++ParamCounter;

         // Increment the counter for the number of good splines we have

         ++NSplinesCounter;

       }

       else if (SplineType[ParamNumber] == kTF1_red)

       {

         // Don't actually use this ever -- we give each spline the maximum number of points found in all splines

         int nPoints_tmp = 0;

         // Get a pointer to the current spline for this event

         TF1_red* CurrSpline = dynamic_cast<TF1_red*>(MasterSpline[EventCounter][ParamNumber]);


         // If the number of knots are greater than 2 the spline is not a dummy and we should extract coefficients to load onto the GPU

         GetTF1Coeff(CurrSpline, nPoints_tmp, temp_coeffs);

         for (int j = 0; j < _nTF1Coeff_; ++j) {

           cpu_coeff_TF1_many[TF1PointsCounter+j] = temp_coeffs[j];

         }

         // Save the number of points for this spline

         cpu_nPoints_arr[TF1sCounter] = short(nPoints_tmp);


         TF1PointsCounter += nPoints_tmp;

         // Set the parameter number for this spline

         cpu_paramNo_TF1_arr[TF1sCounter] = short(ParamNumber);

         ++ParamCounter_TF1;

         // Increment the counter for the number of good splines we have

         ++TF1sCounter;

       }

       //KS: Don't delete in debug

       #ifndef MACH3_DEBUG

       delete MasterSpline[EventCounter][ParamNumber];

       MasterSpline[EventCounter][ParamNumber] = nullptr;

       #endif

     } // End the loop over the parameters in the MasterSpline

     cpu_nParamPerEvent[2*EventCounter] = ParamCounter;

     cpu_nParamPerEvent[2*EventCounter+1] = ParamCounterGlobal;

     ParamCounterGlobal += ParamCounter;


     cpu_nParamPerEvent_tf1[2*EventCounter] = ParamCounter_TF1;

     cpu_nParamPerEvent_tf1[2*EventCounter+1] = ParamCounterGlobalTF1;

     ParamCounterGlobalTF1 += ParamCounter_TF1;


     ParamCounter = 0;

     ParamCounter_TF1 = 0;

   } // End the loop over the number of events

   delete[] many_tmp;

   delete[] x_tmp;

   delete[] temp_coeffs;


   int BadXCounter = 0;

   for (unsigned int j = 0; j < event_size_max; j++) {

     if (cpu_spline_handler->coeff_x[j] == -999) BadXCounter++;

     // Perform checks that all entries have been modified from initial values

     if (cpu_spline_handler->coeff_x[j] == -999 && BadXCounter < 5) {

       MACH3LOG_WARN("***** BAD X !! *****");

       MACH3LOG_WARN("Indicates some parameter doesn't have a single spline");

       MACH3LOG_WARN("j = {}", j);

       //throw MaCh3Exception(__FILE__ , __LINE__ );

     }

     if(BadXCounter == 5) MACH3LOG_WARN("There is more unutilised knots although I will stop spamming");

   }


   MACH3LOG_WARN("Found in total {} BAD X", BadXCounter);

   //KS: This is tricky as this variable use both by CPU and GPU, however if use CUDA we use cudaMallocHost

   #ifndef MaCh3_CUDA

   cpu_total_weights = new M3::float_t[NEvents]();

   cpu_weights_spline_var = new float[NSplines_valid]();

   cpu_weights_tf1_var = new float[NTF1_valid]();

   #endif


   // Print some info; could probably make this to a separate function

   PrintInitialsiation();

   if(SaveSplineFile) PrepareSplineFile(FastSplineName);


   MoveToGPU();


   // Can pass the spline segments to the GPU instead of the values

   // Make these here and only refill them for each loop, avoiding unnecessary new/delete on each reconfigure

   SetupSegments();

 }


 // *****************************************

 // The shared initialiser from constructors of TSpline3 and TSpline3_red

 void UnbinnedSplineHandler::MoveToGPU() {

 // *****************************************

   #ifdef MaCh3_CUDA

   unsigned int event_size_max = _max_knots * nParams;

   MACH3LOG_INFO("Total size = {:.2f} MB memory on CPU to move to GPU",

                 (double(sizeof(float) * nKnots * _nCoeff_) + double(sizeof(float) * event_size_max) / 1.E6 +

                 double(sizeof(short int) * NSplines_valid)) / 1.E6);

   MACH3LOG_INFO("Total TF1 size = {:.2f} MB memory on CPU to move to GPU",

                 double(sizeof(float) * NTF1_valid * _nTF1Coeff_) / 1.E6);

   MACH3LOG_INFO("GPU weight array (GPU->CPU every step) = {:.2f} MB", static_cast<double>(sizeof(float)) * (NSplines_valid + NTF1_valid) / 1.0e6);

   MACH3LOG_INFO("Since you are running Total event weight mode then GPU weight array (GPU->CPU every step) = {:.2f} MB",

                 double(sizeof(float) * NEvents) / 1.E6);

   MACH3LOG_INFO("Parameter value array (CPU->GPU every step) = {:.4f} MB", double(sizeof(float) * nParams) / 1.E6);

   //CW: With the new set-up we have:   1 coefficient array of size coeff_array_size, all same size

   //                                1 coefficient array of size coeff_array_size*4, holding y,b,c,d in order (y11,b11,c11,d11; y12,b12,c12,d12;...) where ynm is n = spline number, m = spline point. Should really make array so that order is (y11,b11,c11,d11; y21,b21,c21,d21;...) because it will optimise cache hits I think; try this if you have time

   //                                return gpu_weights


   gpu_spline_handler = new SplineMonolithGPU();


   // The gpu_XY arrays don't actually need initialising, since they are only placeholders for what we'll move onto the GPU. As long as we cudaMalloc the size of the arrays correctly there shouldn't be any problems

   // Can probably make this a bit prettier but will do for now

   // Could be a lot smaller of a function...

   gpu_spline_handler->InitGPU_SplineMonolith(

           &cpu_total_weights,

           NEvents,

           nKnots, // How many entries in coefficient array (*4 for the "many" array)

           NSplines_valid, // What's the number of splines we have (also number of entries in gpu_nPoints_arr)

           NTF1_valid,

           event_size_max //Knots times event number of unique splines

   );


   // Move number of splines and spline size to constant GPU memory; every thread does not need a copy...

   // The implementation lives in splines/gpuSplineUtils.cu

   // The GPU splines don't actually need declaring but is good for demonstration, kind of

   // fixed by passing const reference

   gpu_spline_handler->CopyToGPU_SplineMonolith(

           cpu_spline_handler,


           // TFI related now

           cpu_coeff_TF1_many,

           cpu_paramNo_TF1_arr,

           NEvents,

           cpu_nParamPerEvent,

           cpu_nParamPerEvent_tf1,

           nParams,

           NSplines_valid,

           _max_knots,

           nKnots,

           NTF1_valid);


   // Delete all the coefficient arrays from the CPU once they are on the GPU

   CleanVector(cpu_coeff_TF1_many);

   CleanVector(cpu_paramNo_TF1_arr);

   CleanVector(cpu_nParamPerEvent);

   CleanVector(cpu_nParamPerEvent_tf1);

   delete cpu_spline_handler;

   cpu_spline_handler = nullptr;

   MACH3LOG_INFO("Good GPU loading");

   #endif

 }


 // Need to specify template functions in header

 // *****************************************

 // Scan the master spline to get the maximum number of knots in any of the TSpline3*

 void UnbinnedSplineHandler::ScanMasterSpline(std::vector<std::vector<TResponseFunction_red*> > & MasterSpline,

                                  unsigned int &nEvents,

                                  short int &MaxPoints,

                                  short int &numParams,

                                  int &nSplines,

                                  unsigned int &NSplinesValid,

                                  unsigned int &numKnots,

                                  unsigned int &nTF1Valid,

                                  unsigned int &nTF1_coeff,

                                  const std::vector<RespFuncType> &SplineType) {

 // *****************************************

   // Need to extract: the total number of events

   //                  number of parameters

   //                  maximum number of knots

   MaxPoints = 0;

   nEvents   = 0;

   numParams   = 0;

   nSplines = 0;

   numKnots = 0;

   NSplinesValid = 0;

   nTF1Valid = 0;

   nTF1_coeff = 0;


   // Check the number of events

   nEvents = int(MasterSpline.size());


   // Maximum number of splines one event can have (scan through and find this number)

   int nMaxSplines_PerEvent = 0;


   //KS: We later check that each event has the same number of splines so this is fine

   numParams = short(MasterSpline[0].size());

   // Initialise

   SplineInfoArray.resize(numParams);


   // Loop over each parameter

   for(unsigned int EventCounter = 0; EventCounter < MasterSpline.size(); ++EventCounter) {

     // Check that each event has each spline saved

     if (numParams > 0) {

       int TempSize = int(MasterSpline[EventCounter].size());

       if (TempSize != numParams) {

         MACH3LOG_ERROR("Found {} parameters for event {}", TempSize, EventCounter);

         MACH3LOG_ERROR("but was expecting {} since that's what I found for the previous event", numParams);

         MACH3LOG_ERROR("Somehow this event has a different number of spline parameters... Please study further!");

         throw MaCh3Exception(__FILE__ , __LINE__ );

       }

     }

     numParams = short(MasterSpline[EventCounter].size());


     int nSplines_SingleEvent = 0;

     int nPoints = 0;

     // Loop over each pointer

     for(unsigned int ParamNumber = 0; ParamNumber < MasterSpline[EventCounter].size(); ++ParamNumber) {

       if (MasterSpline[EventCounter][ParamNumber]) {

         if(SplineType[ParamNumber] == kTSpline3_red)

         {

           TResponseFunction_red* TespFunc = MasterSpline[EventCounter][ParamNumber];

           TSpline3_red* CurrSpline = dynamic_cast<TSpline3_red*>(TespFunc);

           if(CurrSpline){

             nPoints = CurrSpline->GetNp();

           }


           if (nPoints > MaxPoints) {

             MaxPoints = static_cast<short int>(nPoints);

           }

           numKnots += nPoints;

           nSplines_SingleEvent++;


           // Fill the SplineInfoArray entries with information on each splinified parameter

           if (SplineInfoArray[ParamNumber].xPts.size() == 0)

           {

             // Fill the number of points

             SplineInfoArray[ParamNumber].nPts = CurrSpline->GetNp();


             // Fill the x points

             SplineInfoArray[ParamNumber].xPts.resize(SplineInfoArray[ParamNumber].nPts);

             for (M3::int_t k = 0; k < SplineInfoArray[ParamNumber].nPts; ++k)

             {

               M3::float_t xtemp = M3::float_t(-999.99);

               M3::float_t ytemp = M3::float_t(-999.99);

               CurrSpline->GetKnot(k, xtemp, ytemp);

               SplineInfoArray[ParamNumber].xPts[k] = xtemp;

             }

           }

           NSplinesValid++;

         }

         else if (SplineType[ParamNumber] == kTF1_red)

         {

           TResponseFunction_red* TespFunc = MasterSpline[EventCounter][ParamNumber];

           TF1_red* CurrSpline = dynamic_cast<TF1_red*>(TespFunc);

           nPoints = CurrSpline->GetSize();

           nTF1_coeff += nPoints;

           nTF1Valid++;

         }

       } else {

         // If NULL we don't have this spline for the event, so move to next spline

         continue;

       }

     }

     if (nSplines_SingleEvent > nMaxSplines_PerEvent) nMaxSplines_PerEvent = nSplines_SingleEvent;

   }

   nSplines = nMaxSplines_PerEvent;


   int Counter = 0;

   //KS: Sanity check that everything was set correctly

   for (M3::int_t i = 0; i < numParams; ++i)

   {

     // KS: We don't find segment for TF1, so ignore this

     if (SplineType[i] == kTF1_red) continue;


     const M3::int_t nPoints = SplineInfoArray[i].nPts;

     const std::vector<M3::float_t>& xArray = SplineInfoArray[i].xPts;

     if (nPoints == -999 || xArray.size() == 0) {

       Counter++;

       if(Counter < 5) {

         MACH3LOG_WARN("SplineInfoArray[{}] isn't set yet", i);

       }

       continue;

       //throw MaCh3Exception(__FILE__ , __LINE__ );

     }

   }

   MACH3LOG_WARN("In total SplineInfoArray for {} hasn't been initialised", Counter);

 }


 // *****************************************

 // Load SplineFile

 UnbinnedSplineHandler::UnbinnedSplineHandler(const std::string& FileName)

           : SplineBase() {

 // *****************************************

   Initialise();

   MACH3LOG_INFO("-- GPUING WITH {X} and {Y,B,C,D} arrays and master spline containing TSpline3_red");

   // Convert the TSpline3 pointers to the reduced form and call the reduced constructor

   LoadSplineFile(FileName);

 }


 // *****************************************

 // Load SplineMonolith from ROOT file

 void UnbinnedSplineHandler::LoadSplineFile(std::string FileName) {

 // *****************************************

   M3::AddPath(FileName);

   auto SplineFile = std::make_unique<TFile>(FileName.c_str(), "OPEN");

   TTree *Settings = SplineFile->Get<TTree>("Settings");

   TTree *Monolith_TF1 = SplineFile->Get<TTree>("Monolith_TF1");

   TTree *EventInfo = SplineFile->Get<TTree>("EventInfo");

   TTree *SplineTree = SplineFile->Get<TTree>("SplineTree");


   unsigned int NEvents_temp;

   short int nParams_temp;

   int _max_knots_temp;

   unsigned int nKnots_temp;

   unsigned int NSplines_valid_temp;

   unsigned int nTF1Valid_temp;

   unsigned int nTF1coeff_temp;


   Settings->SetBranchAddress("NEvents", &NEvents_temp);

   Settings->SetBranchAddress("nParams", &nParams_temp);

   Settings->SetBranchAddress("_max_knots", &_max_knots_temp);

   Settings->SetBranchAddress("nKnots", &nKnots_temp);

   Settings->SetBranchAddress("NSplines_valid", &NSplines_valid_temp);

   Settings->SetBranchAddress("NTF1_valid", &nTF1Valid_temp);

   Settings->SetBranchAddress("nTF1coeff", &nTF1coeff_temp);


   Settings->GetEntry(0);


   NEvents = NEvents_temp;

   nParams = nParams_temp;

   _max_knots = static_cast<short int>(_max_knots_temp);

   nKnots = nKnots_temp;

   NSplines_valid = NSplines_valid_temp;

   NTF1_valid = nTF1Valid_temp;

   nTF1coeff = nTF1coeff_temp;


   cpu_nParamPerEvent.resize(2*NEvents);

   cpu_nParamPerEvent_tf1.resize(2*NEvents);

   cpu_coeff_TF1_many.resize(nTF1coeff);


   //KS: This is tricky as this variable use both by CPU and GPU, however if use CUDA we use cudaMallocHost

 #ifndef MaCh3_CUDA

   cpu_total_weights = new M3::float_t[NEvents]();

   cpu_weights_spline_var = new float[NSplines_valid]();

   cpu_weights_tf1_var = new float[NTF1_valid]();

 #endif


   SplineTree->SetBranchAddress("SplineObject", &cpu_spline_handler);

   SplineTree->GetEntry(0);


   float coeff_tf1 = 0.;

   Monolith_TF1->SetBranchAddress("cpu_coeff_TF1_many", &coeff_tf1);

   for(unsigned int i = 0; i < nTF1coeff; i++)

   {

     Monolith_TF1->GetEntry(i);

     cpu_coeff_TF1_many[i] = coeff_tf1;

   }


   unsigned int nParamPerEvent = 0;

   unsigned int nParamPerEvent_tf1 = 0;


   EventInfo->SetBranchAddress("cpu_nParamPerEvent", &nParamPerEvent);

   EventInfo->SetBranchAddress("cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1);

   for(unsigned int i = 0; i < 2*NEvents; i++)

   {

     EventInfo->GetEntry(i);

     cpu_nParamPerEvent[i] = nParamPerEvent;

     cpu_nParamPerEvent_tf1[i] = nParamPerEvent_tf1;

   }


   LoadFastSplineInfoDir(SplineFile);


   SplineFile->Close();


   // Print some info; could probably make this to a separate function

   PrintInitialsiation();


   MoveToGPU();


   SetupSegments();

 }


 // *****************************************

 void UnbinnedSplineHandler::SetupSegments() {

 // *****************************************

   //KS: Since we are going to copy it each step use fancy CUDA memory allocation

   #ifdef MaCh3_CUDA

   gpu_spline_handler->InitGPU_Segments(&SplineSegments);

   gpu_spline_handler->InitGPU_Vals(&ParamValues);

   #else

   SplineSegments = new short int[nParams]();

   ParamValues = new float[nParams]();

   #endif

   for (M3::int_t j = 0; j < nParams; j++)

   {

     SplineSegments[j] = 0;

     ParamValues[j] = -999;

   }

 }


 // *****************************************

 // Save SplineMonolith into ROOT file

 void UnbinnedSplineHandler::PrepareSplineFile(std::string FileName) {

 // *****************************************

   M3::AddPath(FileName);


   auto SplineFile = std::make_unique<TFile>(FileName.c_str(), "recreate");

   TTree *Settings = new TTree("Settings", "Settings");

   TTree *Monolith_TF1 = new TTree("Monolith_TF1", "Monolith_TF1");

   TTree *XKnots = new TTree("XKnots", "XKnots");

   TTree *EventInfo = new TTree("EventInfo", "EventInfo");


   unsigned int NEvents_temp = NEvents;

   short int nParams_temp = nParams;

   int _max_knots_temp = _max_knots;

   unsigned int nKnots_temp = nKnots;

   unsigned int NSplines_valid_temp = NSplines_valid;

   unsigned int nTF1Valid_temp = NTF1_valid;

   unsigned int nTF1coeff_temp = nTF1coeff;


   Settings->Branch("NEvents", &NEvents_temp, "NEvents/i");

   Settings->Branch("nParams", &nParams_temp, "nParams/S");

   Settings->Branch("_max_knots", &_max_knots_temp, "_max_knots/I");

   Settings->Branch("nKnots", &nKnots_temp, "nKnots/i");

   Settings->Branch("NSplines_valid", &NSplines_valid_temp, "NSplines_valid/i");

   Settings->Branch("NTF1_valid", &nTF1Valid_temp, "NTF1_valid/i");

   Settings->Branch("nTF1coeff", &nTF1coeff_temp, "nTF1coeff/i");


   Settings->Fill();


   SplineFile->cd();

   Settings->Write();


   TTree *SplineTree = new TTree("SplineTree", "SplineTree");

   // Create a branch for the SplineMonoStruct object

   SplineTree->Branch("SplineObject", &cpu_spline_handler);

   SplineTree->Fill();

   SplineTree->Write();

   delete SplineTree;


   float coeff_tf1 = 0.;

   Monolith_TF1->Branch("cpu_coeff_TF1_many", &coeff_tf1, "cpu_coeff_TF1_many/F");

   for(unsigned int i = 0; i < nTF1coeff; i++)

   {

     coeff_tf1 = cpu_coeff_TF1_many[i];

     Monolith_TF1->Fill();

   }

   SplineFile->cd();

   Monolith_TF1->Write();


   unsigned int nParamPerEvent = 0;

   unsigned int nParamPerEvent_tf1 = 0;


   EventInfo->Branch("cpu_nParamPerEvent", &nParamPerEvent, "cpu_nParamPerEvent/i");

   EventInfo->Branch("cpu_nParamPerEvent_tf1", &nParamPerEvent_tf1, "cpu_nParamPerEvent_tf1/i");


   for(unsigned int i = 0; i < 2*NEvents; i++)

   {

     nParamPerEvent = cpu_nParamPerEvent[i];

     nParamPerEvent_tf1 = cpu_nParamPerEvent_tf1[i];

     EventInfo->Fill();

   }

   SplineFile->cd();

   EventInfo->Write();


   PrepareFastSplineInfoDir(SplineFile);


   delete Settings;

   delete Monolith_TF1;

   delete XKnots;

   delete EventInfo;

   SplineFile->Close();

 }


 // *****************************************

 // Destructor

 // Cleans up the allocated GPU memory

 UnbinnedSplineHandler::~UnbinnedSplineHandler() {

 // *****************************************

   #ifdef MaCh3_CUDA

   //KS: Since we declared them using CUDA alloc we have to free memory using also cuda functions

   gpu_spline_handler->CleanupPinnedMemory(cpu_total_weights, SplineSegments, ParamValues);

   delete gpu_spline_handler;

   #else

   if(SplineSegments != nullptr) delete[] SplineSegments;

   if(ParamValues != nullptr) delete[] ParamValues;

   if(cpu_total_weights != nullptr) delete[] cpu_total_weights;

   #endif


   if(cpu_weights_spline_var != nullptr) delete[] cpu_weights_spline_var;

   if(cpu_weights_tf1_var != nullptr) delete[] cpu_weights_tf1_var;


   if(cpu_spline_handler != nullptr) delete cpu_spline_handler;

 }


 // *****************************************

 // Get the spline coefficients from the TSpline3 so that we can load ONLY these onto the GPU, not the whole TSpline3 object

 // This loads up coefficients into two arrays: one x array and one yabcd array

 // This should maximize our cache hits!

 void UnbinnedSplineHandler::GetSplineCoeff_SepMany(TSpline3_red* &spl, int &nPoints, float *& xArray, float *& manyArray) const {

 // *****************************************

   // Initialise all arrays to 1.0

   for (int i = 0; i < _max_knots; ++i) {

     xArray[i] = 1.0;

     for (int j = 0; j < _nCoeff_; j++) {

       manyArray[i*_nCoeff_+j] = 1.0;

     }

   }

   // Get number of points in spline

   int Np = spl->GetNp();

   // If spline is flat, set number of knots to 1.0,

   // This is used later to expedite the calculations for flat splines

   // tmpArray[0] is number of knots

   nPoints = Np;

   if (Np > _max_knots) {

     MACH3LOG_ERROR("Error, number of points is greater than saved {}", _max_knots);

     MACH3LOG_ERROR("This _WILL_ cause problems with GPU splines and _SHOULD_ be fixed!");

     MACH3LOG_ERROR("nPoints = {}, _max_knots = {}", nPoints, _max_knots);

     throw MaCh3Exception(__FILE__ , __LINE__ );

   }


   // The coefficients we're writing to

   M3::float_t x, y, b, c, d;

   // TSpline3 can only take doubles, not floats

   // But our GPU is slow with doubles, so need to cast to float

   for(int i = 0; i < Np; i++) {

     // Get the coefficients from the TSpline3 object

     spl->GetCoeff(i, x, y, b, c, d);

     // Write the arrays

     xArray[i] = float(x);

     manyArray[i*_nCoeff_] = float(y); // 4 because manyArray stores y,b,c,d

     manyArray[i*_nCoeff_+1] = float(b);

     manyArray[i*_nCoeff_+2] = float(c);

     manyArray[i*_nCoeff_+3] = float(d);

     if((xArray[i] == -999) || (manyArray[i*_nCoeff_] == -999) || (manyArray[i*_nCoeff_ +1] == -999) || (manyArray[i*_nCoeff_+2] == -999) || (manyArray[i*_nCoeff_+3] == -999)){

       MACH3LOG_ERROR("*********** Bad params in {} ************", __func__);

       MACH3LOG_ERROR("pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", x, y, b, c, d);

       MACH3LOG_ERROR("pre cast to float (x, y, b, c, d) = {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}", xArray[i], manyArray[i*_nCoeff_], manyArray[i*_nCoeff_+1], manyArray[i*_nCoeff_+2], manyArray[i*_nCoeff_+3]);

       MACH3LOG_ERROR("This will cause problems when preparing for GPU");

       MACH3LOG_ERROR("***************************************************************");

     }

   }

 }


 #ifdef MaCh3_CUDA

 // *****************************************

 // Tell the GPU to evaluate the weights

 // Load up the two x,{y,b,c,d} arrays into memory and have GPU read them with more coalescence instead of one monolithic array

 // This should be used when we're using separate x,y,a,b,c,d arrays

 // Also pass the segments for the parameter along with their parameter values

 // This avoids doing lots of binary searches on the GPU

 void UnbinnedSplineHandler::Evaluate() {

 // *****************************************

   // There's a parameter mapping that goes from spline parameter to a global parameter index

   // Find the spline segments

   FindSplineSegment();


   // The main call to the GPU

   gpu_spline_handler->RunGPU_SplineMonolith(

           cpu_total_weights,

           ParamValues,

           SplineSegments);

 }

 #else

 //If CUDA is not enabled do the same on CPU

 // *****************************************

 void UnbinnedSplineHandler::Evaluate() {

 // *****************************************

   // There's a parameter mapping that goes from spline parameter to a global parameter index

   // Find the spline segments

   FindSplineSegment();


   //KS: Huge MP loop over all valid splines

   CalcSplineWeights();


   //KS: Huge MP loop over all events calculating total weight per event

   CalcTotalEventWeight();

 }

 #endif


 //*********************************************************

 void UnbinnedSplineHandler::CalcSplineWeights() {

 //*********************************************************

   #ifdef MULTITHREAD

   //KS: Open parallel region

   #pragma omp parallel

   {

   #endif

     //KS: First we calculate

     #ifdef MULTITHREAD

     #pragma omp for simd nowait

     #endif

     for (unsigned int splineNum = 0; splineNum < NSplines_valid; ++splineNum)

     {

       //CW: Which Parameter we are accessing

       const short int Param = cpu_spline_handler->paramNo_arr[splineNum];


       //CW: Avoids doing costly binary search on GPU

       const short int segment = SplineSegments[Param];


       //KS: Segment for coeff_x is simply parameter*max knots + segment as each parameters has the same spacing

       const short int segment_X = short(Param*_max_knots+segment);


       //KS: Find knot position in out monolithical structure

       const unsigned int CurrentKnotPos = cpu_spline_handler->nKnots_arr[splineNum]*_nCoeff_+segment*_nCoeff_;


       // We've read the segment straight from CPU and is saved in segment_gpu

       // polynomial parameters from the monolithic splineMonolith

       const float fY = cpu_spline_handler->coeff_many[CurrentKnotPos];

       const float fB = cpu_spline_handler->coeff_many[CurrentKnotPos + 1];

       const float fC = cpu_spline_handler->coeff_many[CurrentKnotPos + 2];

       const float fD = cpu_spline_handler->coeff_many[CurrentKnotPos + 3];

       // The is the variation itself (needed to evaluate variation - stored spline point = dx)

       const float dx = ParamValues[Param] - cpu_spline_handler->coeff_x[segment_X];


       //CW: Wooow, let's use some fancy intrinsic and pull down the processing time by <1% from normal multiplication! HURRAY

       cpu_weights_spline_var[splineNum] = fmaf(dx, fmaf(dx, fmaf(dx, fD, fC), fB), fY);

       // Or for the more "easy to read" version:

       //cpu_weights_spline_var[splineNum] = (fY+dx*(fB+dx*(fC+dx*fD)));

     }


     #ifdef MULTITHREAD

     #pragma omp for simd

     #endif

     for (unsigned int tf1Num = 0; tf1Num < NTF1_valid; ++tf1Num)

     {

       // The is the variation itself (needed to evaluate variation - stored spline point = dx)

       const float x = ParamValues[cpu_paramNo_TF1_arr[tf1Num]];


       // Read the coefficients

       const unsigned int TF1_Index = tf1Num * _nTF1Coeff_;

       const float a = cpu_coeff_TF1_many[TF1_Index];

       const float b = cpu_coeff_TF1_many[TF1_Index + 1];


       cpu_weights_tf1_var[tf1Num] = fmaf(a, x, b);

       // cpu_weights_tf1_var[tf1Num] = a*x + b;

       //cpu_weights_tf1_var[splineNum] = 1 + a*x + b*x*x + c*x*x*x + d*x*x*x*x + e*x*x*x*x*x;

     }

   #ifdef MULTITHREAD

   //KS: End parallel region

   }

   #endif

 }


 //*********************************************************

 //KS: Calc total event weight on CPU

 void UnbinnedSplineHandler::CalcTotalEventWeight() {

 //*********************************************************

   #ifdef MULTITHREAD

   #pragma omp parallel for

   #endif

   for (unsigned int EventNum = 0; EventNum < NEvents; ++EventNum)

   {

     float totalWeight = 1.0f; // Initialize total weight for each event


     const unsigned int Offset = 2 * EventNum;


     // Extract the parameters for the current event

     const unsigned int startIndex = cpu_nParamPerEvent[Offset + 1];

     const unsigned int numParams = cpu_nParamPerEvent[Offset];


     // Compute total weight for the current event

     #ifdef MULTITHREAD

     #pragma omp simd reduction(*:totalWeight)

     #endif

     for (unsigned int id = 0; id < numParams; ++id) {

       totalWeight *= cpu_weights_spline_var[startIndex + id];

     }

     //Now TF1

     // Extract the parameters for the current event

     const unsigned int startIndex_tf1 = cpu_nParamPerEvent_tf1[Offset + 1];

     const unsigned int numParams_tf1 = cpu_nParamPerEvent_tf1[Offset];


     // Compute total weight for the current event

     #ifdef MULTITHREAD

     #pragma omp simd reduction(*:totalWeight)

     #endif

     for (unsigned int id = 0; id < numParams_tf1; ++id) {

       totalWeight *= cpu_weights_tf1_var[startIndex_tf1 + id];

     }


     // Store the total weight for the current event

     cpu_total_weights[EventNum] = static_cast<M3::float_t>(totalWeight);

   }

 }


 //*********************************************************

 //KS: Print info about how much knots etc has been initialised

 void UnbinnedSplineHandler::PrintInitialsiation() const {

 //*********************************************************

   unsigned int event_size_max = _max_knots * nParams;


   MACH3LOG_INFO("--- INITIALISED Spline Monolith ---");

   MACH3LOG_INFO("{} events with {} splines", NEvents, NSplines_valid);

   MACH3LOG_INFO("On average {:.2f} splines per event ({}/{})", float(NSplines_valid)/float(NEvents), NSplines_valid, NEvents);

   MACH3LOG_INFO("Size of x array = {:.4f} MB", double(sizeof(float)*event_size_max)/1.E6);

   MACH3LOG_INFO("Size of coefficient (y,b,c,d) array = {:.2f} MB", double(sizeof(float)*nKnots*_nCoeff_)/1.E6);

   MACH3LOG_INFO("Size of parameter # array = {:.2f} MB", double(sizeof(short int)*NSplines_valid)/1.E6);


   MACH3LOG_INFO("On average {:.2f} TF1 per event ({}/{})", float(NTF1_valid)/float(NEvents), NTF1_valid, NEvents);

   MACH3LOG_INFO("Size of TF1 coefficient (a,b,c,d,e) array = {:.2f} MB", double(sizeof(float)*NTF1_valid*_nTF1Coeff_)/1.E6);

 }


 //*********************************************************

 //KS: After calculations are done on GPU we copy memory to CPU. This operation is asynchronous meaning while memory is being copied some operations are being carried. Memory must be copied before actual reweight. This function make sure all has been copied.

 void UnbinnedSplineHandler::SynchroniseMemTransfer() const {

 //*********************************************************

   #ifdef MaCh3_CUDA

   SynchroniseSplines();

   CudaCheckError();

   #endif

 }

MACH3LOG_ERROR
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37

MACH3LOG_INFO
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35

MACH3LOG_WARN
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:36

kTF1_red
@ kTF1_red
Uses TF1_red for interpolation.
Definition: ParameterStructs.h:148

kTSpline3_red
@ kTSpline3_red
Uses TSpline3_red for interpolation.
Definition: ParameterStructs.h:147

CleanVector
void CleanVector(T &)
Base case: do nothing for non-vector types.
Definition: ParameterStructs.h:42

_nCoeff_
constexpr int _nCoeff_
KS: We store coefficients {y,b,c,d} in one array one by one, this is only to define it once rather th...
Definition: SplineCommon.h:18

_nTF1Coeff_
constexpr int _nTF1Coeff_
KS: For TF1 we store at most 5 coefficients, we could make it more flexible but for now define it her...
Definition: SplineCommon.h:20

UnbinnedSplineHandler.h

MaCh3Exception
Custom exception class used throughout MaCh3.
Definition: MaCh3Exception.h:23

SplineBase
Base class for calculating weight from spline.
Definition: SplineBase.h:27

SplineBase::GetTF1Coeff
void GetTF1Coeff(TF1_red *&spl, int &nPoints, float *&coeffs) const
CW: Gets the polynomial coefficients for TF1.
Definition: SplineBase.cpp:115

SplineBase::nParams
short int nParams
Number of parameters that have splines.
Definition: SplineBase.h:81

SplineBase::FindSplineSegment
void FindSplineSegment()
CW:Code used in step by step reweighting, Find Spline Segment for each param.
Definition: SplineBase.cpp:44

SplineBase::SplineSegments
short int * SplineSegments
Definition: SplineBase.h:77

SplineBase::SplineInfoArray
std::vector< FastSplineInfo > SplineInfoArray
Definition: SplineBase.h:74

SplineBase::ParamValues
float * ParamValues
Store parameter values they are not in FastSplineInfo as in case of GPU we need to copy paste it to G...
Definition: SplineBase.h:79

SplineBase::LoadFastSplineInfoDir
void LoadFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile)
KS: Load preprocessed FastSplineInfo.
Definition: SplineBase.cpp:167

SplineBase::PrepareFastSplineInfoDir
void PrepareFastSplineInfoDir(std::unique_ptr< TFile > &SplineFile) const
KS: Prepare Fast Spline Info within SplineFile.
Definition: SplineBase.cpp:139

SplineMonolithGPU
Class responsible for calculating spline weight on GPU.
Definition: gpuSplineUtils.cuh:75

SplineMonolithGPU::RunGPU_SplineMonolith
__host__ void RunGPU_SplineMonolith(M3::float_t *cpu_total_weights, float *vals, short int *segment)
Run the GPU code for the separate many arrays. As in separate {x}, {y,b,c,d} arrays Pass the segment ...
Definition: gpuSplineUtils.cu:443

SplineMonolithGPU::InitGPU_Vals
__host__ void InitGPU_Vals(float **vals)
Allocate memory for spline segments.
Definition: gpuSplineUtils.cu:187

SplineMonolithGPU::CleanupPinnedMemory
__host__ void CleanupPinnedMemory(M3::float_t *cpu_total_weights, short int *segment, float *vals)
Clean up pinned variables at CPU.
Definition: gpuSplineUtils.cu:518

SplineMonolithGPU::InitGPU_SplineMonolith
__host__ void InitGPU_SplineMonolith(M3::float_t **cpu_total_weights, int n_events, unsigned int total_nknots, unsigned int n_splines, unsigned int n_tf1, int Eve_size)
Allocate memory on gpu for spline monolith.
Definition: gpuSplineUtils.cu:110

SplineMonolithGPU::CopyToGPU_SplineMonolith
__host__ void CopyToGPU_SplineMonolith(const SplineMonoStruct *cpu_spline_handler, const std::vector< float > &cpu_many_array_TF1, const std::vector< short int > &cpu_paramNo_arr_TF1, const int n_events, const std::vector< unsigned int > &cpu_nParamPerEvent, const std::vector< unsigned int > &cpu_nParamPerEvent_TF1, const int n_params, const unsigned int n_splines, const short int spline_size, const unsigned int total_nknots, const unsigned int n_tf1)
Copies data from CPU to GPU for the spline monolith.
Definition: gpuSplineUtils.cu:200

SplineMonolithGPU::InitGPU_Segments
__host__ void InitGPU_Segments(short int **segment)
Allocate memory for spline segments.
Definition: gpuSplineUtils.cu:178

TF1_red
CW: A reduced TF1 class only. Only saves parameters for each TF1 and how many parameters each paramet...
Definition: SplineStructs.h:151

TF1_red::GetSize
int GetSize() const
Get the size.
Definition: SplineStructs.h:238

TResponseFunction_red
KS: A reduced ResponseFunction Generic function used for evaluating weight.
Definition: SplineStructs.h:134

TSpline3_red
CW: Reduced TSpline3 class.
Definition: SplineStructs.h:268

TSpline3_red::GetKnot
void GetKnot(int i, M3::float_t &xtmp, M3::float_t &ytmp) const
Definition: SplineStructs.h:724

TSpline3_red::GetCoeff
void GetCoeff(int segment, M3::float_t &x, M3::float_t &y, M3::float_t &b, M3::float_t &c, M3::float_t &d) const
CW: Get the coefficient of a given segment.
Definition: SplineStructs.h:730

TSpline3_red::GetNp
M3::int_t GetNp() const override
CW: Get the number of points.
Definition: SplineStructs.h:722

UnbinnedSplineHandler::cpu_weights_tf1_var
float * cpu_weights_tf1_var
CPU arrays to hold weight for each TF1.
Definition: UnbinnedSplineHandler.h:117

UnbinnedSplineHandler::cpu_weights_spline_var
float * cpu_weights_spline_var
CPU arrays to hold weight for each spline.
Definition: UnbinnedSplineHandler.h:115

UnbinnedSplineHandler::nTF1coeff
unsigned int nTF1coeff
Sum of all coefficients over all TF1.
Definition: UnbinnedSplineHandler.h:112

UnbinnedSplineHandler::NTF1_valid
unsigned int NTF1_valid
Number of valid TF1.
Definition: UnbinnedSplineHandler.h:107

UnbinnedSplineHandler::ScanMasterSpline
void ScanMasterSpline(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, unsigned int &nEvents, short int &MaxPoints, short int &numParams, int &nSplines, unsigned int &NSplinesValid, unsigned int &numKnots, unsigned int &nTF1Valid, unsigned int &nTF1_coeff, const std::vector< RespFuncType > &SplineType)
CW: Function to scan through the MasterSpline of TSpline3.
Definition: UnbinnedSplineHandler.cpp:304

UnbinnedSplineHandler::PrepareSplineFile
void PrepareSplineFile(std::string FileName) final
KS: Prepare spline file that can be used for fast loading.
Definition: UnbinnedSplineHandler.cpp:541

UnbinnedSplineHandler::cpu_nParamPerEvent
std::vector< unsigned int > cpu_nParamPerEvent
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
Definition: UnbinnedSplineHandler.h:122

UnbinnedSplineHandler::Evaluate
void Evaluate() final
CW: This Eval should be used when using two separate x,{y,a,b,c,d} arrays to store the weights; proba...
Definition: UnbinnedSplineHandler.cpp:705

UnbinnedSplineHandler::SynchroniseMemTransfer
void SynchroniseMemTransfer() const final
KS: After calculations are done on GPU we copy memory to CPU. This operation is asynchronous meaning ...
Definition: UnbinnedSplineHandler.cpp:844

UnbinnedSplineHandler::cpu_paramNo_TF1_arr
std::vector< short int > cpu_paramNo_TF1_arr
CW: CPU array with the number of points per spline (not per spline point!)
Definition: UnbinnedSplineHandler.h:140

UnbinnedSplineHandler::PrintInitialsiation
void PrintInitialsiation() const
KS: Print info about how much knots etc has been initialised.
Definition: UnbinnedSplineHandler.cpp:827

UnbinnedSplineHandler::cpu_nParamPerEvent_tf1
std::vector< unsigned int > cpu_nParamPerEvent_tf1
KS: CPU map keeping track how many parameters applies to each event, we keep two numbers here {number...
Definition: UnbinnedSplineHandler.h:125

UnbinnedSplineHandler::gpu_spline_handler
SplineMonolithGPU * gpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
Definition: UnbinnedSplineHandler.h:131

UnbinnedSplineHandler::NSplines_valid
unsigned int NSplines_valid
Number of valid splines.
Definition: UnbinnedSplineHandler.h:105

UnbinnedSplineHandler::SetupSegments
void SetupSegments()
Definition: UnbinnedSplineHandler.cpp:522

UnbinnedSplineHandler::MoveToGPU
void MoveToGPU()
CW: The shared initialiser from constructors of TResponseFunction_red.
Definition: UnbinnedSplineHandler.cpp:240

UnbinnedSplineHandler::nKnots
unsigned int nKnots
Sum of all knots over all splines.
Definition: UnbinnedSplineHandler.h:110

UnbinnedSplineHandler::CalcSplineWeights
void CalcSplineWeights() final
CPU based code which eval weight for each spline.
Definition: UnbinnedSplineHandler.cpp:720

UnbinnedSplineHandler::_max_knots
short int _max_knots
Max knots for production.
Definition: UnbinnedSplineHandler.h:102

UnbinnedSplineHandler::cpu_nPoints_arr
std::vector< short int > cpu_nPoints_arr
CPU arrays to hold number of points.
Definition: UnbinnedSplineHandler.h:137

UnbinnedSplineHandler::PrepareForGPU
void PrepareForGPU(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType)
CW: Prepare the TSpline3_red objects for the GPU.
Definition: UnbinnedSplineHandler.cpp:53

UnbinnedSplineHandler::Initialise
void Initialise()
KS: Set everything to null etc.
Definition: UnbinnedSplineHandler.cpp:12

UnbinnedSplineHandler::cpu_spline_handler
SplineMonoStruct * cpu_spline_handler
KS: Store info about Spline monolith, this allow to obtain better step time. As all necessary informa...
Definition: UnbinnedSplineHandler.h:128

UnbinnedSplineHandler::LoadSplineFile
void LoadSplineFile(std::string FileName) final
KS: Load preprocessed spline file.
Definition: UnbinnedSplineHandler.cpp:440

UnbinnedSplineHandler::~UnbinnedSplineHandler
virtual ~UnbinnedSplineHandler()
Destructor for UnbinnedSplineHandler class.
Definition: UnbinnedSplineHandler.cpp:616

UnbinnedSplineHandler::cpu_coeff_TF1_many
std::vector< float > cpu_coeff_TF1_many
CPU arrays to hold TF1 coefficients.
Definition: UnbinnedSplineHandler.h:134

UnbinnedSplineHandler::GetSplineCoeff_SepMany
void GetSplineCoeff_SepMany(TSpline3_red *&spl, int &nPoints, float *&xArray, float *&manyArray) const
CW: This loads up coefficients into two arrays: one x array and one yabcd array.
Definition: UnbinnedSplineHandler.cpp:638

UnbinnedSplineHandler::cpu_total_weights
M3::float_t * cpu_total_weights
KS: This holds the total CPU weights that gets read in SampleHandler.
Definition: UnbinnedSplineHandler.h:119

UnbinnedSplineHandler::UnbinnedSplineHandler
UnbinnedSplineHandler(std::vector< std::vector< TResponseFunction_red * > > &MasterSpline, const std::vector< RespFuncType > &SplineType, const bool SaveFlatTree=false, const std::string &_FastSplineName="SplineFile.root")
Constructor.
Definition: UnbinnedSplineHandler.cpp:36

UnbinnedSplineHandler::SaveSplineFile
bool SaveSplineFile
Flag telling whether we are saving spline monolith into handy root file.
Definition: UnbinnedSplineHandler.h:143

UnbinnedSplineHandler::NEvents
unsigned int NEvents
Number of events.
Definition: UnbinnedSplineHandler.h:100

UnbinnedSplineHandler::FastSplineName
std::string FastSplineName
Name of Fast Spline to which will be saved.
Definition: UnbinnedSplineHandler.h:146

UnbinnedSplineHandler::CalcTotalEventWeight
void CalcTotalEventWeight()
Calc total event weight.
Definition: UnbinnedSplineHandler.cpp:785

SynchroniseSplines
__host__ void SynchroniseSplines()
Make sure all Cuda threads finished execution.
Definition: gpuSplineUtils.cu:50

gpuSplineUtils.cuh
MaCh3 event-by-event cross-section spline code.

CudaCheckError
#define CudaCheckError()
Definition: gpuUtils.cuh:21

M3::float_t
double float_t
Definition: Core.h:37

M3::AddPath
void AddPath(std::string &FilePath)
Prepends the MACH3 environment path to FilePath if it is not already present.
Definition: Monitor.cpp:382

M3::int_t
int int_t
Definition: Core.h:38

EventInfo
Stores info about each MC event used during reweighting routine.
Definition: EventInfo.h:13

SplineMonoStruct
KS: Struct storing information for spline monolith.
Definition: SplineCommon.h:61

SplineMonoStruct::nKnots_arr
std::vector< unsigned int > nKnots_arr
KS: CPU Number of knots per spline.
Definition: SplineCommon.h:73

SplineMonoStruct::coeff_x
std::vector< float > coeff_x
KS: CPU arrays to hold X coefficient.
Definition: SplineCommon.h:67

SplineMonoStruct::coeff_many
std::vector< float > coeff_many
CPU arrays to hold other coefficients.
Definition: SplineCommon.h:70

SplineMonoStruct::paramNo_arr
std::vector< short int > paramNo_arr
CW: CPU array with the number of points per spline (not per spline point!)
Definition: SplineCommon.h:76