MaCh3  2.4.2
Reference Guide
SampleSummary.cpp
Go to the documentation of this file.
2 
3 //this file is choc full of usage of a root interface that only takes floats, turn this warning off for this CU for now
4 #pragma GCC diagnostic ignored "-Wfloat-conversion"
5 #pragma GCC diagnostic ignored "-Wuseless-cast"
6 
7 // *******************
8 // The constructor
9 SampleSummary::SampleSummary(const int n_Samples, const std::string &Filename, SampleHandlerBase* const sample, const int nSteps) {
10 // *******************
11  MACH3LOG_DEBUG("Making sample summary class...");
12  #ifdef MULTITHREAD
13  MACH3LOG_DEBUG("With OpenMP and {} threads", omp_get_max_threads());
14  #endif
15 
16  StandardFluctuation = true;
17 
18  if(StandardFluctuation) MACH3LOG_INFO("Using standard method of statistical fluctuation");
19  else MACH3LOG_INFO("Using alternative method of statistical fluctuation, which is much slower");
20 
21  //KS: If true it will print posterior predictive for every beta parameter it is quite useful but make root big number of plots
22  DoBetaParam = true;
23  if(DoBetaParam) MACH3LOG_INFO("I will calculate #beta parameters from Barlow-Beeston formalism");
24 
25  //If true code will normalise each histogram, this way you can calculate shape only error. etc. pvalue will be completely wrong unfortunately
26  doShapeOnly = false;
27 
28  nChainSteps = nSteps;
29  //KS: nChainSteps == 0 means we run PriorPredcitive
30  if(nChainSteps == 0) isPriorPredictive = true;
31  else isPriorPredictive = false;
32 
33  OutputName = Filename;
34  nSamples = n_Samples;
35  SampleHandler = sample;
36 
37  //Get mach3 modes from manager
39 
40  nThrows = 0;
41  first_pass = true;
42  Outputfile = nullptr;
43  OutputTree = nullptr;
44  rnd = std::make_unique<TRandom3>();
45 
46  DataHist.resize(nSamples);
49  NominalHist.resize(nSamples);
50  PosteriorHist.resize(nSamples);
51  W2NomHist.resize(nSamples);
52  w2Hist.resize(nSamples);
53 
56 
57  if(DoBetaParam) BetaHist.resize(nSamples);
58 
59  maxBins.resize(nSamples);
60 
61  lnLHist_Mean.resize(nSamples);
62  lnLHist_Mode.resize(nSamples);
64  MeanHist.resize(nSamples);;
66  ModeHist.resize(nSamples);
67  W2MeanHist.resize(nSamples);
68  W2ModeHist.resize(nSamples);
69  lnLHist_Mean1D.resize(nSamples);
70  lnLHist_Mode1D.resize(nSamples);
74 
75  //KS: When a histogram is created with an axis lower limit greater or equal to its upper limit ROOT will automatically adjust histogram range
76  // https://root.cern.ch/doc/master/classTH1.html#auto-bin
77  lnLHist = std::make_unique<TH1D>("lnLHist_predpredfluc", "lnLHist_predpredfluc", 100, 1, -1);
78  lnLHist->SetDirectory(nullptr);
79  lnLHist->GetXaxis()->SetTitle("-2LLH (Pred Fluc, Pred)");
80  lnLHist->GetYaxis()->SetTitle("Counts");
81 
82  lnLHist_drawdata = std::make_unique<TH1D>("lnLHist_drawdata", "lnLHist_drawdata", 100, 1, -1);
83  lnLHist_drawdata->SetDirectory(nullptr);
84  lnLHist_drawdata->GetXaxis()->SetTitle("-2LLH (Data, Draw)");
85  lnLHist_drawdata->GetYaxis()->SetTitle("Counts");
86 
87  lnLHist_drawfluc = std::make_unique<TH1D>("lnLHist_drawpredfluc", "lnLHist_drawpredfluc", 100, 1, -1);
88  lnLHist_drawfluc->SetDirectory(nullptr);
89  lnLHist_drawfluc->GetXaxis()->SetTitle("-2LLH (Pred Fluc, Draw)");
90  lnLHist_drawfluc->GetYaxis()->SetTitle("Counts");
91 
92  lnLHist_drawflucdraw = std::make_unique<TH1D>("lnLHist_drawflucdraw", "lnLHist_drawflucdraw", 100, 1, -1);
93  lnLHist_drawflucdraw->SetDirectory(nullptr);
94  lnLHist_drawflucdraw->GetXaxis()->SetTitle("-2LLH (Draw Fluc, Draw)");
95  lnLHist_drawflucdraw->GetYaxis()->SetTitle("Counts");
96 
97  lnLDrawHist = std::make_unique<TH2D>("lnLDrawHist", "lnLDrawHist", 50, 1, -1, 50, 1, -1);
98  lnLDrawHist->SetDirectory(nullptr);
99  lnLDrawHist->GetXaxis()->SetTitle("-2LLH_{Pred Fluc, Draw}");
100  lnLDrawHist->GetYaxis()->SetTitle("-2LLH_{Data, Draw}");
101 
102  lnLFlucHist = std::make_unique<TH2D>("lnLFlucHist", "lnLFlucHist", 50, 1, -1, 50, 1, -1);
103  lnLFlucHist->SetDirectory(nullptr);
104  lnLFlucHist->GetXaxis()->SetTitle("-2LLH_{Draw Fluc, Draw}");
105  lnLFlucHist->GetYaxis()->SetTitle("-2LLH_{Data, Draw}");
106 
107  lnLDrawHistRate = std::make_unique<TH2D>("lnLDrawHistRate", "lnLDrawHistRate", 50, 1, -1, 50, 1, -1);
108  lnLDrawHistRate->SetDirectory(nullptr);
109  lnLDrawHistRate->GetXaxis()->SetTitle("-2LLH_{Pred Fluc, Draw}");
110  lnLDrawHistRate->GetYaxis()->SetTitle("-2LLH_{Data, Draw}");
111 
112  //KS: This is silly as it assumes all samples uses same kinematics
113  lnLFlucHist_ProjectX = std::make_unique<TH2D>("lnLFlucHist_ProjectX", "lnLFlucHist_ProjectX", 50, 1, -1, 50, 1, -1);
114  lnLFlucHist_ProjectX->SetDirectory(nullptr);
115  lnLFlucHist_ProjectX->GetXaxis()->SetTitle(("-2LLH_{Draw Fluc, Draw} for " + SampleHandler->GetKinVarName(0, 0)).c_str());
116  lnLFlucHist_ProjectX->GetYaxis()->SetTitle(("-2LLH_{Data, Draw} for " + SampleHandler->GetKinVarName(0, 0)).c_str());
117 
118  // Holds the hist of random number draws, only works for posterior predictive
119  if(!isPriorPredictive)
120  {
121  RandomHist = std::make_unique<TH1D>("RandomHist", "RandomHist", 100, 0, nChainSteps);
122  RandomHist->SetDirectory(nullptr);
123  RandomHist->GetXaxis()->SetTitle("Step");
124  const double binwidth = nChainSteps/RandomHist->GetNbinsX();
125  std::stringstream ss;
126  ss << "Draws/" << binwidth;
127  RandomHist->GetYaxis()->SetTitle(ss.str().c_str());
128  RandomHist->SetLineWidth(2);
129  }
130  else RandomHist = nullptr;
131 
132  for (int i = 0; i < nSamples; ++i)
133  {
134  DataHist[i] = nullptr;
135  DataHist_ProjectX[i] = nullptr;
136  DataHist_ProjectY[i] = nullptr;
137  NominalHist[i] = nullptr;
138 
139  MeanHist[i] = nullptr;
140  if(DoBetaParam) MeanHistCorrected[i] = nullptr;
141  W2MeanHist[i] = nullptr;
142  W2ModeHist[i] = nullptr;
143  lnLHist_Mean[i] = nullptr;
144  lnLHist_Mode[i] = nullptr;
145  lnLHist_Mean_ProjectX[i] = nullptr;
146  lnLHist_Mean1D[i] = nullptr;
147  lnLHist_Mode1D[i] = nullptr;
148  lnLHist_Sample_DrawData[i] = nullptr;
149  lnLHist_Sample_DrawflucDraw[i] = nullptr;
150  lnLHist_Sample_PredflucDraw[i] = nullptr;
151  }//end loop over samples
152 
153  DoByModePlots = false;
154  PosteriorHist_ByMode = nullptr;
155 
156  nModelParams = 0;
157 
158  Debug = 0;
159 }
160 
161 // *******************
162 // Destructor
164 // *******************
165  Outputfile->cd();
166 
167  //ROOT is weird and once you write TFile claim ownership of histograms. Best is to first delete histograms and then close file
168  Outputfile->Close();
169  delete Outputfile;
170 
171  if(DoByModePlots)
172  {
173  for (int i = 0; i < nSamples; ++i)
174  {
175  if(DataHist[i] == nullptr) continue;
176  for (int j = 0; j < Modes->GetNModes()+1; j++)
177  {
178  for (int k = 1; k <= maxBins[i]; ++k)
179  {
180  if(PosteriorHist_ByMode[i][j][k] != nullptr) delete PosteriorHist_ByMode[i][j][k];
181  }
182  delete[] PosteriorHist_ByMode[i][j];
183  if(MeanHist_ByMode[i][j] != nullptr) delete MeanHist_ByMode[i][j];
184  }
185  delete[] PosteriorHist_ByMode[i];
186  }
187  delete[] PosteriorHist_ByMode;
188  }
189 
190  for (int i = 0; i < nSamples; ++i)
191  {
192  if(DataHist[i] == nullptr) continue;
193  if(DataHist[i] != nullptr) delete DataHist[i];
194  if(NominalHist[i] != nullptr) delete NominalHist[i];
195  if(MeanHist[i] != nullptr) delete MeanHist[i];
196  if(ModeHist[i] != nullptr) delete ModeHist[i];
197  if(DoBetaParam && MeanHistCorrected[i] != nullptr) delete MeanHistCorrected[i];
198  if(W2MeanHist[i] != nullptr) delete W2MeanHist[i];
199  if(W2ModeHist[i] != nullptr) delete W2ModeHist[i];
200 
201  if(ViolinHists_ProjectX[i] != nullptr) delete ViolinHists_ProjectX[i];
202  if(ViolinHists_ProjectY[i] != nullptr) delete ViolinHists_ProjectY[i];
203 
204  if(lnLHist_Mean[i] != nullptr) delete lnLHist_Mean[i];
205  if(lnLHist_Mode[i] != nullptr) delete lnLHist_Mode[i];
206  if(lnLHist_Mean_ProjectX[i] != nullptr) delete lnLHist_Mean_ProjectX[i];
207  if(lnLHist_Mean1D[i] != nullptr) delete lnLHist_Mean1D[i];
208  if(lnLHist_Mode1D[i] != nullptr) delete lnLHist_Mode1D[i];
209  if(lnLHist_Sample_DrawData[i] != nullptr) delete lnLHist_Sample_DrawData[i];
210  if(lnLHist_Sample_DrawflucDraw[i] != nullptr) delete lnLHist_Sample_DrawflucDraw[i];
211  if(lnLHist_Sample_PredflucDraw[i] != nullptr) delete lnLHist_Sample_PredflucDraw[i];
212  }
213 }
214 
215 // *******************
216 // Check size of sample against size of vectors
217 bool SampleSummary::CheckSamples(const int Length) {
218 // *******************
219  bool ok = (nSamples == Length);
220  if (!ok) {
221  MACH3LOG_ERROR("Size of SampleVector input != number of defined samples");
222  MACH3LOG_ERROR("Size of SampleVector: {}", Length);
223  MACH3LOG_ERROR("Size of defined samples: {}", nSamples);
224  MACH3LOG_ERROR("Something has gone wrong with making the Samples");
225  throw MaCh3Exception(__FILE__ , __LINE__ );
226  }
227  return ok;
228 }
229 
230 // *******************
231 // Add a data histogram to the list (will have N_samples of these)
232 // Since the data doesn't change with varying the MC
233 void SampleSummary::AddData(std::vector<TH2Poly*> &Data) {
234 // *******************
235  const int Length = int(Data.size());
236  // Check length of samples are OK
237  if (!CheckSamples(Length)) throw MaCh3Exception(__FILE__ , __LINE__ );
238  for (int i = 0; i < Length; ++i) {
239  if (Data[i] == nullptr) {
240  DataHist[i] = nullptr;
241  DataHist_ProjectX[i] = nullptr;
242  DataHist_ProjectY[i] = nullptr;
243  maxBins[i] = 0;
244  } else {
245  std::string classname = std::string(DataHist[i]->Class_Name());
246  if(classname == "TH2Poly")
247  {
248  DataHist[i] = static_cast<TH2Poly*>(Data[i]->Clone());
250  DataHist_ProjectX[i] = ProjectPoly(DataHist[i], true, i);
251  DataHist_ProjectY[i] = ProjectPoly(DataHist[i], false, i);
252  maxBins[i] = DataHist[i]->GetNumberOfBins();
253  } else {
254  MACH3LOG_ERROR("Somehow sample {} doesn't use TH2Poly", SampleHandler->GetSampleTitle(i));
255  MACH3LOG_ERROR("Right now I only support TH2Poly but I am ambitious piece of code and surely will have more support in the future");
256  throw MaCh3Exception(__FILE__ , __LINE__ );
257  }
258  }
259  }
260 }
261 
262 // *******************
263 // Add the nominal histograms to the list (will have N_samples of these)
264 void SampleSummary::AddNominal(std::vector<TH2Poly*> &Nominal, std::vector<TH2Poly*> &NomW2) {
265 // *******************
266  const int Length = int(Nominal.size());
267  if (!CheckSamples(Length)) throw MaCh3Exception(__FILE__ , __LINE__ );
268 
269  //KS: ROOT is super annoying and you cannot use clone with openMP, hence we have another loop below
270  for (int i = 0; i < Length; ++i)
271  {
272  if (Nominal[i] == nullptr) {
273  NominalHist[i] = nullptr;
274  W2NomHist[i] = nullptr;
275  lnLHist_Mean[i] = nullptr;
276  lnLHist_Mode[i] = nullptr;
277  lnLHist_Mean_ProjectX[i] = nullptr;
278  MeanHist[i] = nullptr;
279  if(DoBetaParam) MeanHistCorrected[i] = nullptr;
280  ModeHist[i] = nullptr;
281  W2MeanHist[i] = nullptr;
282  W2ModeHist[i] = nullptr;
283  lnLHist_Sample_DrawData[i] = nullptr;
284  lnLHist_Sample_DrawflucDraw[i] = nullptr;
285  lnLHist_Sample_PredflucDraw[i] = nullptr;
286  // If not nullptr it indicates the selection was turned on, so initialise the privates
287  } else {
288  NominalHist[i] = static_cast<TH2Poly*>(Nominal[i]->Clone());
290  W2NomHist[i] = static_cast<TH2Poly*>(NomW2[i]->Clone());
291 
292  lnLHist_Mean[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
293  lnLHist_Mean[i]->SetDirectory(nullptr);
294  lnLHist_Mode[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
295  lnLHist_Mode[i]->SetDirectory(nullptr);
296  lnLHist_Mean_ProjectX[i] = static_cast<TH1D*>(DataHist_ProjectX[i]->Clone());
297  MeanHist[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
298  if(DoBetaParam) MeanHistCorrected[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
299  ModeHist[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
300  W2MeanHist[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
301  W2ModeHist[i] = static_cast<TH2Poly*>(NominalHist[i]->Clone());
302  }
303  }
304 
305  // Loop over the length of nominal and set the initial distributions up
306  //KS: Don't multithread, mostly due to fact that we initialise histograms
307  for (int i = 0; i < Length; ++i) {
308  // If NULL it indicates the selection was turned off, so initialise all the hists to NULL
309  if (Nominal[i] != nullptr)
310  {
311  std::string name = std::string(NominalHist[i]->GetName());
312  name = name.substr(0, name.find("_nom"));
313 
314  PosteriorHist[i].resize(maxBins[i]+1);
315  w2Hist[i].resize(maxBins[i]+1);
316 
317  if(DoBetaParam) BetaHist[i].resize(maxBins[i]+1);
318 
319  for (int j = 0; j <= maxBins[i]; ++j)
320  {
321  PosteriorHist[i][j] = nullptr;
322  }
323  lnLHist_Mean[i]->SetNameTitle((name+"_MeanlnL").c_str(), (name+"_MeanlnL").c_str());
324  lnLHist_Mean[i]->Reset("");
325  lnLHist_Mean[i]->GetZaxis()->SetTitle("-2lnL_{sample} #times sign(MC-data)");
326 
327  lnLHist_Mode[i]->SetNameTitle((name+"_ModelnL").c_str(), (name+"_ModelnL").c_str());
328  lnLHist_Mode[i]->Reset("");
329  lnLHist_Mode[i]->GetZaxis()->SetTitle("-2lnL_{sample} #times sign(MC-data)");
330 
331  lnLHist_Mean_ProjectX[i]->SetNameTitle((name+"_MeanlnL_ProjectX").c_str(), (name+"_MeanlnL_ProjectX").c_str());
332  lnLHist_Mean_ProjectX[i]->Reset("");
333  lnLHist_Mean_ProjectX[i]->GetYaxis()->SetTitle("-2lnL_{sample} #times sign(MC-data)");
334 
335  MeanHist[i]->SetNameTitle((name+"_mean").c_str(), (name+"_mean").c_str());
336  MeanHist[i]->Reset("");
337  MeanHist[i]->GetZaxis()->SetTitle("Mean");
338 
339  if(DoBetaParam)
340  {
341  MeanHistCorrected[i]->SetNameTitle((name+"_mean_corrected").c_str(), (name+"_mean_corrected").c_str());
342  MeanHistCorrected[i]->Reset("");
343  MeanHistCorrected[i]->GetZaxis()->SetTitle("Mean");
344  }
347 
348  //KS: Y axis is number of events to get estimate of maximal number we use integral
349  const int MaxBinning = doShapeOnly ? 1 : int(NoOverflowIntegral(NominalHist[i])/4);
350  ViolinHists_ProjectX[i] = new TH2D((name+"_Violin_ProjectX").c_str(), (name+"_Violin_ProjectX").c_str(), int(xbins.size()-1), &xbins[0] , 400, 0, MaxBinning);
351  ViolinHists_ProjectX[i]->GetYaxis()->SetTitle("Events");
352  ViolinHists_ProjectX[i]->GetXaxis()->SetTitle(std::string(NominalHist[i]->GetXaxis()->GetTitle()).c_str() );
353  ViolinHists_ProjectX[i]->SetDirectory(nullptr);
354 
355  ViolinHists_ProjectY[i] = new TH2D((name+"_Violin_ProjectY").c_str(), (name+"_Violin_ProjectY").c_str(), int(ybins.size()-1), &ybins[0] , 400, 0, MaxBinning);
356  ViolinHists_ProjectY[i]->GetYaxis()->SetTitle("Events");
357  ViolinHists_ProjectY[i]->GetXaxis()->SetTitle(std::string(NominalHist[i]->GetYaxis()->GetTitle()).c_str());
358  ViolinHists_ProjectY[i]->SetDirectory(nullptr);
359 
360  ModeHist[i]->SetNameTitle((name+"_mode").c_str(), (name+"_mode").c_str());
361  ModeHist[i]->Reset("");
362  ModeHist[i]->GetZaxis()->SetTitle("Mode");
363 
364  W2MeanHist[i]->SetNameTitle((name+"_w2mean").c_str(), (name+"_w2mean").c_str());
365  W2MeanHist[i]->Reset("");
366  W2MeanHist[i]->GetZaxis()->SetTitle("W2Mean");
367 
368  W2ModeHist[i]->SetNameTitle((name+"_w2mode").c_str(), (name+"_w2mode").c_str());
369  W2ModeHist[i]->Reset("");
370  W2ModeHist[i]->GetZaxis()->SetTitle("W2Mode");
371 
372  // Declare the lnL histograms
373  lnLHist_Mean1D[i] = new TH1D((name+"_MeanlnL1D").c_str(),(name+"_MeanlnL1D").c_str(), 50, 1, -1);
374  lnLHist_Mean1D[i]->GetXaxis()->SetTitle("-2LLH (Data, Pred)");
375  lnLHist_Mean1D[i]->GetYaxis()->SetTitle("Counts");
376 
377  lnLHist_Mode1D[i] = new TH1D((name+"_ModelnL1D").c_str(),(name+"_ModelnL1D").c_str(), 50, 1, -1);
378  lnLHist_Mode1D[i]->GetXaxis()->SetTitle("-2LLH (Data, Pred)");
379  lnLHist_Mode1D[i]->GetYaxis()->SetTitle("Counts");
380 
381  lnLHist_Sample_DrawData[i] = new TH1D((name+"_lnLdrawdata").c_str(),(name+"_lnL").c_str(), 100, 1, -1);
382  lnLHist_Sample_DrawData[i]->GetXaxis()->SetTitle("-2LLH (Data, Draw)");
383  lnLHist_Sample_DrawData[i]->GetYaxis()->SetTitle("Counts");
384 
385  lnLHist_Sample_DrawflucDraw[i] = new TH1D((name+"_lnLdrawfluc").c_str(),(name+"_lnL").c_str(), 100, 1, -1);
386  lnLHist_Sample_DrawflucDraw[i]->GetXaxis()->SetTitle("-2LLH (Draw Fluc, Draw)");
387  lnLHist_Sample_DrawflucDraw[i]->GetYaxis()->SetTitle("Counts");
388 
389  lnLHist_Sample_PredflucDraw[i] = new TH1D((name+"_lnLpredfluc").c_str(),(name+"_lnL").c_str(), 100, 1, -1);
390  lnLHist_Sample_PredflucDraw[i]->GetXaxis()->SetTitle("-2LLH (Pred Fluc, Draw)");
391  lnLHist_Sample_PredflucDraw[i]->GetYaxis()->SetTitle("Counts");
392  }
393  }
394  //KS: Separate loop for thread safe reasons
395  for (int i = 0; i < Length; ++i)
396  {
397  //KS: We copy histograms so delete original
398  delete Nominal[i];
399  delete NomW2[i];
400  }
401 }
402 
403 // *******************
404 // Add a throw from the MCMC to the posterior predictive
405 // The input here is nSamples long
406 void SampleSummary::AddThrow(std::vector<TH2Poly*> &SampleVector, std::vector<TH2Poly*> &W2Vec, const double LLHPenalty, const double Weight, const int DrawNumber) {
407 // *******************
408  nThrows++;
409  //KS: Only make sense for PosteriorPredictive
410  if( !isPriorPredictive )RandomHist->Fill(DrawNumber);
411 
412  const int size = int(SampleVector.size());
413  if (!CheckSamples(size)) throw MaCh3Exception(__FILE__ , __LINE__ );
414 
415  // Push back the throw
416  MCVector.push_back(SampleVector);
417  LLHPenaltyVector.push_back(LLHPenalty);
418  WeightVector.push_back(Weight);
419  W2MCVector.push_back(W2Vec);
420 
421  // Initialise the posterior hist
422  if (first_pass)
423  {
424  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
425  {
426  const int nXBins = 500;
427  //Initialise TH1D which corresponds to each bin in the sample's th2poly
428  std::string name = std::string(SampleVector[SampleNum]->GetName());
429  for (int i = 1; i <= maxBins[SampleNum]; ++i)
430  {
431  //Get PolyBin
432  TH2PolyBin* bin = static_cast<TH2PolyBin*>(SampleVector[SampleNum]->GetBins()->At(i-1));
433 
434  // Just make a little fancy name
435  std::stringstream ss2;
436  ss2 << name << "_";
437  ss2 << "p_{#mu} (" << bin->GetXMin() << "-" << bin->GetXMax() << ")";
438  ss2 << " cos#theta_{#mu} (" << bin->GetYMin() << "-" << bin->GetYMax() << ")";
439 
440  PosteriorHist[SampleNum][i] = std::make_unique<TH1D>(ss2.str().c_str(), ss2.str().c_str(),nXBins, 1, -1);
441  PosteriorHist[SampleNum][i]->SetDirectory(nullptr);
442  w2Hist[SampleNum][i] = std::make_unique<TH1D>(("w2_"+ss2.str()).c_str(), ("w2_"+ss2.str()).c_str(),nXBins, 1, -1);
443  w2Hist[SampleNum][i]->SetDirectory(nullptr);
444  if(DoBetaParam)
445  {
446  std::string betaName = "#beta_param_";
447  BetaHist[SampleNum][i] = std::make_unique<TH1D>((betaName + ss2.str()).c_str(), (betaName + ss2.str()).c_str(), 70, 1, -1);
448  BetaHist[SampleNum][i]->SetDirectory(nullptr);
449  BetaHist[SampleNum][i]->GetXaxis()->SetTitle("#beta parameter value");
450  BetaHist[SampleNum][i]->GetYaxis()->SetTitle("Counts");
451  }
452  } //end loop over bins
453  }//end loop over samples
454  }
455  first_pass = false;
456 
457  // Loop over the samples
458  #ifdef MULTITHREAD
459  #pragma omp parallel for
460  #endif
461  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
462  {
463  if (SampleVector[SampleNum] == nullptr) continue;
464  if(doShapeOnly) NormaliseTH2Poly(SampleVector[SampleNum]);
465  // Loop over the distribution and fill the prior/posterior predictive
466  for (int i = 1; i <= maxBins[SampleNum]; ++i) {
467  const double Content = SampleVector[SampleNum]->GetBinContent(i);
468  PosteriorHist[SampleNum][i]->Fill(Content, Weight);
469  const double w2 = W2Vec[SampleNum]->GetBinContent(i);
470  w2Hist[SampleNum][i]->Fill(w2, Weight);
471  if(DoBetaParam)
472  {
473  const double data = DataHist[SampleNum]->GetBinContent(i);
474  const double BetaParam = GetBetaParameter(data, Content, w2, likelihood);
475  BetaHist[SampleNum][i]->Fill(BetaParam, Weight);
476  }
477  } // end bin loop
478  } // end samples loop
479 } // end AddThrow
480 
481 // *******************
482 // Add a throw from the MCMC to the posterior predictive
483 // The input here is has dimension [nsample][nMaCh3Modes]
484 void SampleSummary::AddThrowByMode(std::vector<std::vector<TH2Poly*>> &SampleVector_ByMode) {
485 // *******************
486  MCVectorByMode.push_back(SampleVector_ByMode);
487 
488  //KS: This means this is first time
489  if(!DoByModePlots)
490  {
491  MACH3LOG_INFO("Turning reaction breadkwon mode, brum brum");
492  PosteriorHist_ByMode = new TH1D***[nSamples];
493  MeanHist_ByMode.resize(nSamples);
494  for (int SampleNum = 0; SampleNum < nSamples; SampleNum++)
495  {
496  if (DataHist[SampleNum] == nullptr) continue;
497 
498  PosteriorHist_ByMode[SampleNum] = new TH1D**[Modes->GetNModes()+1];
499  MeanHist_ByMode[SampleNum].resize(Modes->GetNModes()+1);
500  for (int j = 0; j < Modes->GetNModes()+1; j++)
501  {
502  PosteriorHist_ByMode[SampleNum][j] = new TH1D*[maxBins[SampleNum]+1];
503  constexpr int nXBins = 500;
504 
505  std::string name = std::string(NominalHist[SampleNum]->GetName());
506  name = name.substr(0, name.find("_nom"));
507  name = name + "_"+Modes->GetMaCh3ModeName(j);
508 
509  for (int i = 1; i <= maxBins[SampleNum]; i++)
510  {
511  //Get PolyBin
512  TH2PolyBin* bin = static_cast<TH2PolyBin*>(NominalHist[SampleNum]->GetBins()->At(i-1));
513 
514  // Just make a little fancy name
515  std::stringstream ss2;
516  ss2 << name << "_";
517  ss2 << "p_{#mu} (" << bin->GetXMin() << "-" << bin->GetXMax() << ")";
518  ss2 << " cos#theta_{#mu} (" << bin->GetYMin() << "-" << bin->GetYMax() << ")";
519 
520  //Initialise TH1D which corresponds to each bin in the sample's th2poly
521  PosteriorHist_ByMode[SampleNum][j][i] = new TH1D((name+ss2.str()).c_str(),(name+ss2.str()).c_str(),nXBins, 1, -1);
522  }
523  MeanHist_ByMode[SampleNum][j] = static_cast<TH2Poly*>(NominalHist[SampleNum]->Clone());
524  MeanHist_ByMode[SampleNum][j]->SetNameTitle((name+"_mean").c_str(), (name+"_mean").c_str());
525  MeanHist_ByMode[SampleNum][j]->Reset("");
526  MeanHist_ByMode[SampleNum][j]->GetZaxis()->SetTitle("Mean");
527  }
528  }
529  }
530  DoByModePlots = true;
531  // Loop over the sameples
532  #ifdef MULTITHREAD
533  #pragma omp parallel for
534  #endif
535  for (int SampleNum = 0; SampleNum < nSamples; SampleNum++)
536  {
537  if (DataHist[SampleNum] == nullptr) continue;
538 
539  for (int j = 0; j < Modes->GetNModes()+1; j++)
540  {
541  if(doShapeOnly) NormaliseTH2Poly(SampleVector_ByMode[SampleNum][j]);
542  // Loop over the distribution and fill the prior/posterior predictive
543  for (int i = 1; i <= maxBins[SampleNum]; ++i)
544  {
545  const double Content = SampleVector_ByMode[SampleNum][j]->GetBinContent(i);
546  const int Entries = int(PosteriorHist_ByMode[SampleNum][j][i]->GetEntries());
547  PosteriorHist_ByMode[SampleNum][j][i]->Fill(Content, WeightVector[Entries]);
548  }
549  }
550  }
551 } // end AddThrowByMode
552 
553 // **********************
555 // **********************
556  // Make the output file (MakePosterioPredictive call writes to this)
557  std::string TempString = OutputName;
558  TempString.replace(TempString.find(".root"), 5, std::string("_procsW2.root"));
559  Outputfile = M3::Open(TempString, "RECREATE", __FILE__, __LINE__);
560 
561  // The array of doubles we write to the TTree
562  // Data vs Draw
563  llh_data_draw.resize(nSamples);
564  // Fluctuated Draw vs Draw
565  llh_drawfluc_draw.resize(nSamples);
566  // Fluctuated Predicitve vs Draw
567  llh_predfluc_draw.resize(nSamples);
568 
569  // Data vs Draw using Rate
571  // Data vs Fluctuated Predictive using Rate
573 
574  // Data vs Fluctuated Draw
575  llh_data_drawfluc.resize(nSamples);
576  // Data vs Fluctuated Predictive
577  llh_data_predfluc.resize(nSamples);
578  // Draw vs Predictive
579  llh_draw_pred.resize(nSamples);
580  // Fluctuated Draw vs Predictive
581  llh_drawfluc_pred.resize(nSamples);
582  // Fluctuated Draw vs Fluctuated Predictive
584 
585  // Fluctuated Predictive vs Predictive
586  llh_predfluc_pred.resize(nSamples);
587  // Fluctuated Data vs Draw
588  llh_datafluc_draw.resize(nSamples);
589 
590  // Data vs Draw for 1D projection
593 
594  // The output tree we're going to write to
595  OutputTree = new TTree("LLH_draws", "LLH_draws");
596  SampleNames.resize(nSamples);
597  // Loop over the samples and set the addresses of the variables to write to file
598  for (int i = 0; i < nSamples; ++i)
599  {
600  // Get the name
601  std::string SampleName = SampleHandler->GetSampleTitle(i);
602  // Strip out spaces
603  while (SampleName.find(" ") != std::string::npos) {
604  SampleName.replace(SampleName.find(" "), 1, std::string("_"));
605  }
606  SampleNames[i] = SampleName;
607  //CW: Also strip out - signs because it messes up TBranches
608  while (SampleName.find("-") != std::string::npos) {
609  SampleName.replace(SampleName.find("-"), 1, std::string("_"));
610  }
611 // All LLH below are used for actual p-value calculations
612  OutputTree->Branch((SampleName+"_data_draw").c_str(), &llh_data_draw[i]);
613  OutputTree->Branch((SampleName+"_drawfluc_draw").c_str(), &llh_drawfluc_draw[i]);
614  OutputTree->Branch((SampleName+"_predfluc_draw").c_str(), &llh_predfluc_draw[i]);
615 
616 // All LLH below are used for actual p-value calculations however using rate only
617  OutputTree->Branch((SampleName+"_rate_data_draw").c_str(), &llh_rate_data_draw[i]);
618  OutputTree->Branch((SampleName+"_rate_predfluc_draw").c_str(), &llh_rate_predfluc_draw[i]);
619 
620 // All LLH below are for validation reason but not used for final P-Value
621  OutputTree->Branch((SampleName+"_data_drawfluc").c_str(), &llh_data_drawfluc[i]);
622  OutputTree->Branch((SampleName+"_data_predfluc").c_str(), &llh_data_predfluc[i]);
623  OutputTree->Branch((SampleName+"_draw_pred").c_str(), &llh_draw_pred[i]);
624  OutputTree->Branch((SampleName+"_drawfluc_pred").c_str(), &llh_drawfluc_pred[i]);
625  OutputTree->Branch((SampleName+"_drawfluc_predfluc").c_str(), &llh_drawfluc_predfluc[i]);
626  OutputTree->Branch((SampleName+"_predfluc_pred").c_str(), &llh_predfluc_pred[i]);
627  OutputTree->Branch((SampleName+"_datafluc_draw").c_str(), &llh_datafluc_draw[i]);
628 
629 // All LLH below are used for calcauting P-Value but using 1D projections
630  OutputTree->Branch((SampleName+"_data_draw_ProjectX").c_str(), &llh_data_draw_ProjectX[i]);
631  OutputTree->Branch((SampleName+"_drawfluc_draw_ProjectX").c_str(), &llh_drawfluc_draw_ProjectX[i]);
632  }
633 //All LLH below are used for actual p-value calculations
634  OutputTree->Branch("LLH_Penalty", &llh_penalty);
635  OutputTree->Branch("Total_LLH_Data_Draw", &total_llh_data_draw);
636  OutputTree->Branch("Total_LLH_DrawFluc_Draw", &total_llh_drawfluc_draw);
637  OutputTree->Branch("Total_LLH_PredFluc_Draw", &total_llh_predfluc_draw);
638 
639 // All LLH below are used for actual p-value calculations however using rate only
640  OutputTree->Branch("Total_LLH_Rate_PredFluc_Draw", &total_llh_rate_predfluc_draw);
641 
642 //All LLH below are for validation reason but not used for final P-Value
643  OutputTree->Branch("Total_LLH_Data_DrawFluc", &total_llh_data_drawfluc);
644  OutputTree->Branch("Total_LLH_Data_PredFluc", &total_llh_data_predfluc);
645  OutputTree->Branch("Total_LLH_Draw_Pred", &total_llh_draw_pred);
646  OutputTree->Branch("Total_LLH_DrawFluc_Pred", &total_llh_drawfluc_pred);
647  OutputTree->Branch("Total_LLH_DrawFluc_PredFluc", &total_llh_drawfluc_predfluc);
648  OutputTree->Branch("Total_LLH_PredFluc_Pred", &total_llh_predfluc_pred);
649  OutputTree->Branch("Total_LLH_DataFluc_Draw", &total_llh_datafluc_draw);
650 
651 //All LLH below are used for calcauting P-Value but 1D projections
652  OutputTree->Branch("total_llh_data_draw_ProjectX", &total_llh_data_draw_ProjectX);
653  OutputTree->Branch("total_llh_drawfluc_draw_ProjectX", &total_llh_drawfluc_draw_ProjectX);
654 
655  Outputfile->cd();
656  Dir.resize(nSamples);
657  for (int i = 0; i < nSamples; ++i)
658  {
659  // Make a new directory
660  Dir[i] = Outputfile->mkdir((SampleNames[i]).c_str());
661  }
662 }
663 
664 // *******************
665 // Write the contents to the file
667 // *******************
668  // Prepare the output tree
669  PrepareOutput();
670 
671  MACH3LOG_INFO("Summarising {} throws...", nThrows);
672  // After all the throws are added finalise the sample
673  TStopwatch timer;
674  timer.Start();
675  MakePredictive();
676  timer.Stop();
677  MACH3LOG_INFO("Made Prior/Posterior Predictive, it took {:.2f}s, now writing...", timer.RealTime());
678 
679  // Studying information criterion
681 
682  OutputTree->Write();
683 
684  // Make the various distributions
685  lnLHist->Write();
686  lnLHist_drawfluc->Write();
687  lnLHist_drawflucdraw->Write();
688  lnLHist_drawdata->Write();
689  lnLDrawHist->Write();
690  lnLFlucHist->Write();
691  lnLDrawHistRate->Write();
692  //KS: Only available for Posterior Predictive
693  if(!isPriorPredictive) RandomHist->Write();
694 
695  lnLFlucHist_ProjectX->Write();
696 
697  // Loop over each sample and write to file
698  //KS: Multithreading is tempting here but we also write to ROOT file, separating all LLH and poly projections from write could work well
699  for (int i = 0; i < nSamples; ++i)
700  {
701  // Skip the null histograms
702  if (DataHist[i] == nullptr || NoOverflowIntegral(DataHist[i]) == 0) continue;
703  Dir[i]->cd();
704 
705  // Make the data/MC ratio histogram
706  TH2Poly *RatioHistMean = RatioPolys(DataHist[i], MeanHist[i]);
707  RatioHistMean->GetZaxis()->SetTitle("Data/Mean");
708  TH2Poly *RatioHistMode = RatioPolys(DataHist[i], ModeHist[i]);
709  RatioHistMode->GetZaxis()->SetTitle("Data/Mode");
710  TH2Poly *RatioHistNom = RatioPolys(DataHist[i], NominalHist[i]);
711  RatioHistNom->GetZaxis()->SetTitle("Data/Nom");
712 
713  // And the normalised data histogram
714  TH2Poly *DataNormHist = NormalisePoly(DataHist[i]);
715  // Last true refers to if project along x or y
716  TH2Poly *MeanNormHist = NormalisePoly(MeanHist[i]);
717  TH2Poly *ModeNormHist = NormalisePoly(ModeHist[i]);
718  TH1D *MeanProjectX = ProjectPoly(MeanHist[i], true, i, true);
719  TH1D *MeanProjectY = ProjectPoly(MeanHist[i], false, i, true);
720  TH1D *ModeProjectX = ProjectPoly(ModeHist[i], true, i, true);
721  TH1D *ModeProjectY = ProjectPoly(ModeHist[i], false, i, true);
722 
723  TH1D *MeanHistCorrectedProjectX = nullptr;
724  if(DoBetaParam) MeanHistCorrectedProjectX = ProjectPoly(MeanHistCorrected[i], true, i, true);
725  TH1D *MeanHistCorrectedProjectY = nullptr;
726  if(DoBetaParam) MeanHistCorrectedProjectY = ProjectPoly(MeanHistCorrected[i], false, i, true);
727 
728  TH1D *W2MeanProjectX = ProjectPoly(W2MeanHist[i], true, i);
729  TH1D *W2MeanProjectY = ProjectPoly(W2MeanHist[i], false, i);
730  TH1D *W2ModeProjectX = ProjectPoly(W2ModeHist[i], true, i);
731  TH1D *W2ModeProjectY = ProjectPoly(W2ModeHist[i], false, i);
732 
733  TH2Poly *NomNormHist = NormalisePoly(NominalHist[i]);
734  TH1D *NomProjectX = ProjectPoly(NominalHist[i], true, i);
735  TH1D *NomProjectY = ProjectPoly(NominalHist[i], false, i);
736 
737  TH1D *W2NomProjectX = ProjectPoly(W2NomHist[i], true, i);
738  TH1D *W2NomProjectY = ProjectPoly(W2NomHist[i], false, i);
739 
740  // Same for the TH2Ds
742  CalcLLH(DataHist[i], MeanHist[i], W2MeanHist[i]);
743  CalcLLH(DataHist[i], ModeHist[i], W2ModeHist[i]);
744 
745  // Calculate the log likelihood for the 1D dists
746  // Sets the title of the second TH1D to the -2LLH
747  CalcLLH(DataHist_ProjectX[i], NomProjectX, W2NomProjectX);
748  CalcLLH(DataHist_ProjectX[i], MeanProjectX, W2MeanProjectX);
749  CalcLLH(DataHist_ProjectX[i], ModeProjectX, W2ModeProjectX);
750  CalcLLH(DataHist_ProjectY[i], NomProjectY, W2NomProjectY);
751  CalcLLH(DataHist_ProjectY[i], MeanProjectY, W2MeanProjectY);
752  CalcLLH(DataHist_ProjectY[i], ModeProjectY, W2ModeProjectY);
753 
754  std::string SampleName = SampleNames[i];
755  // Also strip out - signs because it messes up TBranches
756  while (SampleName.find("-") != std::string::npos) {
757  SampleName.replace(SampleName.find("-"), 1, std::string("_"));
758  }
759  OutputTree->Draw((SampleName+"_data_draw:"+SampleName+"_drawfluc_draw>>htemp").c_str());
760  TH2D *TempHistogram = static_cast<TH2D*>(gDirectory->Get("htemp")->Clone());
761  TempHistogram->GetXaxis()->SetTitle("-2LLH(Draw Fluc, Draw)");
762  TempHistogram->GetYaxis()->SetTitle("-2LLH(Data, Draw)");
763  TempHistogram->SetNameTitle((SampleNames[i]+"_drawfluc_draw").c_str(), (SampleNames[i]+"_drawfluc_draw").c_str());
764  Get2DBayesianpValue(TempHistogram);
765  TempHistogram->Write();
766  delete TempHistogram;
767 
768  // Also write the 2D histograms for the p-value
769  OutputTree->Draw((SampleName+"_data_draw:"+SampleName+"_predfluc_draw>>htemp2").c_str());
770  TH2D *TempHistogram2 = static_cast<TH2D*>(gDirectory->Get("htemp2")->Clone());
771  TempHistogram2->GetXaxis()->SetTitle("-2LLH(Pred Fluc, Draw)");
772  TempHistogram2->GetYaxis()->SetTitle("-2LLH(Data, Draw)");
773  TempHistogram2->SetNameTitle((SampleNames[i]+"_predfluc_draw").c_str(), (SampleNames[i]+"_predfluc_draw").c_str());
774  Get2DBayesianpValue(TempHistogram2);
775  TempHistogram2->Write();
776  delete TempHistogram2;
777 
778  // finally p-value for 1D projection
779  OutputTree->Draw((SampleName+"_rate_data_draw:"+SampleName+"_rate_predfluc_draw>>htemp3").c_str());
780  TH2D *TempHistogram3 = static_cast<TH2D*>(gDirectory->Get("htemp3")->Clone());
781  TempHistogram3->GetXaxis()->SetTitle("-2LLH(Pred Fluc, Draw)");
782  TempHistogram3->GetYaxis()->SetTitle("-2LLH(Data, Draw)");
783  TempHistogram3->SetNameTitle((SampleNames[i]+"_rate_predfluc_draw").c_str(), (SampleNames[i]+"_rate_predfluc_draw").c_str());
784  Get2DBayesianpValue(TempHistogram3);
785  TempHistogram3->Write();
786  delete TempHistogram3;
787 
788  // finally p-value for 1D projection
789  OutputTree->Draw((SampleName+"_data_draw_ProjectX:"+SampleName+"_drawfluc_draw_ProjectX>>htemp4").c_str());
790  TH2D *TempHistogram4 = static_cast<TH2D*>(gDirectory->Get("htemp4")->Clone());
791  TempHistogram4->GetXaxis()->SetTitle(("-2LLH_{Draw Fluc, Draw} for " + SampleHandler->GetKinVarName(i, 0)).c_str());
792  TempHistogram4->GetYaxis()->SetTitle(("-2LLH_{Data, Draw} for " + SampleHandler->GetKinVarName(i, 0)).c_str());
793  TempHistogram4->SetNameTitle((SampleNames[i]+"_drawfluc_draw_ProjectX").c_str(), (SampleNames[i]+"_drawfluc_draw_ProjectX").c_str());
794  Get2DBayesianpValue(TempHistogram4);
795  TempHistogram4->Write();
796  delete TempHistogram4;
797 
798  // Write the Histograms to each folder
799  DataHist[i]->Write();
800  NominalHist[i]->Write();
801  MeanHist[i]->Write();
802  ModeHist[i]->Write();
803  RatioHistMean->Write();
804  RatioHistMode->Write();
805  RatioHistNom->Write();
806  if(DoBetaParam) MeanHistCorrected[i]->Write();
807 
808  W2NomHist[i]->Write();
809  W2MeanHist[i]->Write();
810  W2ModeHist[i]->Write();
811 
812  DataNormHist->Write();
813  NomNormHist->Write();
814  MeanNormHist->Write();
815  ModeNormHist->Write();
816 
817  DataHist_ProjectX[i]->Write();
818  NomProjectX->Write();
819  MeanProjectX->Write();
820  ModeProjectX->Write();
821  if(DoBetaParam) MeanHistCorrectedProjectX->Write();
822  ViolinHists_ProjectX[i]->Write();
823 
824  DataHist_ProjectY[i]->Write();
825  NomProjectY->Write();
826  MeanProjectY->Write();
827  ModeProjectY->Write();
828  if(DoBetaParam) MeanHistCorrectedProjectY->Write();
829  ViolinHists_ProjectY[i]->Write();
830 
831  W2NomProjectX->Write();
832  W2MeanProjectX->Write();
833  W2ModeProjectX->Write();
834 
835  W2NomProjectY->Write();
836  W2MeanProjectY->Write();
837  W2ModeProjectY->Write();
838 
839  //KS: This will dump lots of hists, use it only for debugging
840  if(Debug > 0)
841  {
842  TDirectory* DebugDir = Dir[i]->mkdir("Debug");
843  DebugDir->cd();
844  for (int b = 1; b <= maxBins[i]; ++b)
845  {
846  PosteriorHist[i][b]->Write();
847  std::string Title = PosteriorHist[i][b]->GetName();
848 
849  auto TempLine = std::make_unique<TLine>(NominalHist[i]->GetBinContent(b), PosteriorHist[i][b]->GetMinimum(),
850  NominalHist[i]->GetBinContent(b), PosteriorHist[i][b]->GetMaximum());
851  TempLine->SetLineColor(kRed);
852  TempLine->SetLineWidth(2);
853 
854  auto TempLineData = std::make_unique<TLine>(DataHist[i]->GetBinContent(b), PosteriorHist[i][b]->GetMinimum(),
855  DataHist[i]->GetBinContent(b), PosteriorHist[i][b]->GetMaximum());
856  TempLineData->SetLineColor(kGreen);
857  TempLineData->SetLineWidth(2);
858 
859  // Also fit a Gaussian because why not?
860  TF1 *Fitter = new TF1("Fit", "gaus", PosteriorHist[i][b]->GetBinLowEdge(1), PosteriorHist[i][b]->GetBinLowEdge(PosteriorHist[i][b]->GetNbinsX()+1));
861  PosteriorHist[i][b]->Fit(Fitter, "RQ");
862  Fitter->SetLineColor(kRed-5);
863 
864  auto Legend = std::make_unique<TLegend>(0.4, 0.75, 0.98, 0.90);
865  Legend->SetFillColor(0);
866  Legend->SetFillStyle(0);
867  Legend->SetLineWidth(0);
868  Legend->SetLineColor(0);
869  Legend->AddEntry(TempLineData.get(), Form("Data #mu=%.2f", DataHist[i]->GetBinContent(b)), "l");
870  Legend->AddEntry(TempLine.get(), Form("Prior #mu=%.2f", NominalHist[i]->GetBinContent(b)), "l");
871  Legend->AddEntry(PosteriorHist[i][b].get(), Form("Post, #mu=%.2f#pm%.2f", PosteriorHist[i][b]->GetMean(), PosteriorHist[i][b]->GetRMS()), "l");
872  Legend->AddEntry(Fitter, Form("Gauss, #mu=%.2f#pm%.2f", Fitter->GetParameter(1), Fitter->GetParameter(2)), "l");
873  std::string TempTitle = std::string(PosteriorHist[i][b]->GetName());
874 
875  TempTitle += "_canv";
876  TCanvas *TempCanvas = new TCanvas(TempTitle.c_str(), TempTitle.c_str(), 1024, 1024);
877  TempCanvas->SetGridx();
878  TempCanvas->SetGridy();
879  TempCanvas->SetRightMargin(0.03);
880  TempCanvas->SetBottomMargin(0.08);
881  TempCanvas->SetLeftMargin(0.10);
882  TempCanvas->SetTopMargin(0.06);
883  TempCanvas->cd();
884  PosteriorHist[i][b]->Draw();
885  TempLine->Draw("same");
886  TempLineData->Draw("same");
887  Fitter->Draw("same");
888  Legend->Draw("same");
889  TempCanvas->Write();
890 
891  delete TempCanvas;
892  delete Fitter;
893  //This isn't useful check only in desperation
894  if(Debug > 1) w2Hist[i][b]->Write();
895  }
896  DebugDir->Close();
897  delete DebugDir;
898  Dir[i]->cd();
899  }
900  lnLHist_Mean[i]->Write();
901  lnLHist_Mode[i]->Write();
902 
903  lnLHist_Mean_ProjectX[i]->Write();
904 
905  lnLHist_Mean1D[i]->Write();
906  lnLHist_Mode1D[i]->Write();
907 
909  lnLHist_Sample_DrawData[i]->Write();
911  lnLHist_Sample_DrawflucDraw[i]->Write();
913  lnLHist_Sample_PredflucDraw[i]->Write();
914 
915  if(DoByModePlots)
916  {
917  for (int j = 0; j < Modes->GetNModes()+1; ++j)
918  {
919  MeanHist_ByMode[i][j]->Write();
920  TH1D *MeanProjectX_ByMode = ProjectPoly(MeanHist_ByMode[i][j], true, i, true);
921  TH1D *MeanProjectY_ByMode = ProjectPoly(MeanHist_ByMode[i][j], false, i, true);
922  MeanProjectX_ByMode->Write();
923  MeanProjectY_ByMode->Write();
924  //KS: This will dump lots of hists, use it only for debugging
925  if(Debug > 0)
926  {
927  for (int b = 1; b <= maxBins[i]; ++b)
928  {
929  PosteriorHist_ByMode[i][j][b]->Write();
930  }
931  }
932  delete MeanProjectX_ByMode;
933  delete MeanProjectY_ByMode;
934  } // End loop over bins
935  }
936  // Delete temporary objects
937  delete RatioHistMean;
938  delete RatioHistMode;
939  delete RatioHistNom;
940 
941  delete DataNormHist;
942  delete MeanNormHist;
943  delete ModeNormHist;
944  delete NomNormHist;
945 
946  delete DataHist_ProjectX[i];
947  delete MeanProjectX;
948  delete ModeProjectX;
949  if(DoBetaParam) delete MeanHistCorrectedProjectX;
950  delete NomProjectX;
951 
952  delete DataHist_ProjectY[i];
953  delete MeanProjectY;
954  delete ModeProjectY;
955  if(DoBetaParam) delete MeanHistCorrectedProjectY;
956  delete NomProjectY;
957 
958  delete W2NomProjectX;
959  delete W2MeanProjectX;
960  delete W2ModeProjectX;
961 
962  delete W2NomProjectY;
963  delete W2MeanProjectY;
964  delete W2ModeProjectY;
965  MACH3LOG_INFO("");
966  } //end loop over samples
968 
970  MACH3LOG_INFO("Wrote to {}", Outputfile->GetName());
971 }
972 
973 // *******************
974 // Make the posterior predictive distributions: fit Poisson etc
976 // *******************
977  // First make the projection on the z axis of the TH3D* for every pmu cosmu bin
978  double llh_total_temp = 0.0;
979 
980  // Loop over the samples
981  #ifdef MULTITHREAD
982  #pragma omp parallel for reduction(+:llh_total_temp)
983  #endif
984  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
985  {
986  // Skip disabled samples
987  if (DataHist[SampleNum] == nullptr || NoOverflowIntegral(DataHist[SampleNum]) == 0) continue;
988 
989  // Count the -2LLH for each histogram
990  double negLogL_Mean = 0.0;
991  double negLogL_Mode = 0.0;
992 
993  // Loop over each pmu cosmu bin
994  for (int j = 1; j < maxBins[SampleNum]+1; ++j)
995  {
996  TH1D *Projection = PosteriorHist[SampleNum][j].get();
997  TH1D *W2Projection = w2Hist[SampleNum][j].get();
998 
999  // Data content for the j,kth bin
1000  const double nData = DataHist[SampleNum]->GetBinContent(j);
1001 
1002  // Get the mean for this projection for all the samples
1003  // This is the mean prediction for this given j,k bin
1004  const double nMean = Projection->GetMean();
1005  const double nMeanError = Projection->GetRMS();
1006  const double nMode = Projection->GetBinCenter(Projection->GetMaximumBin());
1007  const double nModeError = GetModeError(Projection);
1008 
1009  const double nW2Mean = W2Projection->GetMean();
1010  const double nW2Mode = W2Projection->GetBinCenter(W2Projection->GetMaximumBin());
1011 
1012  double TempLLH_Mean = 0.0;
1013  double TempLLH_Mode = 0.0;
1014 
1015  //KS:Get LLH contribution getTestStatLLH can calculate Barlow Beeston/IceCube or Poisson
1016  TempLLH_Mean = SampleHandler->GetTestStatLLH(nData, nMean, nW2Mean);
1017  TempLLH_Mode = SampleHandler->GetTestStatLLH(nData, nMode, nW2Mode);
1018 
1019  // Increment -2LLH
1020  //KS: do times 2 because banff reports chi2
1021  negLogL_Mean += 2*TempLLH_Mean;
1022  negLogL_Mode += 2*TempLLH_Mode;
1023 
1024  // Set the content and error to the mean in the bin
1025  MeanHist[SampleNum]->SetBinContent(j, MeanHist[SampleNum]->GetBinContent(j)+nMean);
1026  // KS: This -1 is only needed for root older than 6.18 for more see https://t2k-experiment.slack.com/archives/CU9CBG6NS/p1714551365661589
1027  MeanHist[SampleNum]->SetBinError(j, nMeanError);
1028 
1029  if(DoBetaParam)
1030  {
1031  TH1D *BetaTemp = BetaHist[SampleNum][j].get();
1032  const double nBetaMean = BetaTemp->GetMean();
1033  const double nBetaMeanError = BetaTemp->GetRMS();
1034  //KS: Here we modify predictions by beta parameter from Barlow-Beeston
1035  MeanHistCorrected[SampleNum]->SetBinContent(j, MeanHistCorrected[SampleNum]->GetBinContent(j)+nMean*nBetaMean);
1036  //KS: Use error propagation to calcuate error
1037  const double ErrorTemp = std::sqrt( (nBetaMean*nMeanError) * (nBetaMean*nMeanError) + (nMean*nBetaMeanError) * (nMean*nBetaMeanError));
1038  // KS: This -1 is only needed for root older than 6.18 for more see https://t2k-experiment.slack.com/archives/CU9CBG6NS/p1714551365661589
1039  MeanHistCorrected[SampleNum]->SetBinError(j, ErrorTemp);
1040  }
1041  // Set the content to the mode in the bin
1042  ModeHist[SampleNum]->SetBinContent(j, ModeHist[SampleNum]->GetBinContent(j)+nMode);
1043  // KS: This -1 is only needed for root older than 6.18 for more see https://t2k-experiment.slack.com/archives/CU9CBG6NS/p1714551365661589
1044  ModeHist[SampleNum]->SetBinError(j, nModeError);
1045  // Set the content to the mean in the bin
1046  W2MeanHist[SampleNum]->SetBinContent(j, W2MeanHist[SampleNum]->GetBinContent(j)+nW2Mean);
1047  // Set the content to the mode in the bin
1048  W2ModeHist[SampleNum]->SetBinContent(j, W2ModeHist[SampleNum]->GetBinContent(j)+nW2Mode);
1049 
1050  // Set the mean and average LLH for this given bin
1051  // Can use these hists to see where the largest -2LLH hists come from
1052  lnLHist_Mean[SampleNum]->SetBinContent(j, 2.0*TempLLH_Mean);
1053  lnLHist_Mode[SampleNum]->SetBinContent(j, 2.0*TempLLH_Mode);
1054 
1055  lnLHist_Mean1D[SampleNum]->Fill(2.0*TempLLH_Mean);
1056  lnLHist_Mode1D[SampleNum]->Fill(2.0*TempLLH_Mode);
1057  } // End loop over bins
1058  if(DoByModePlots)
1059  {
1060  for (int j = 0; j < Modes->GetNModes()+1; j++)
1061  {
1062  // Loop over each pmu cosmu bin
1063  for (int i = 1; i < maxBins[SampleNum]+1; ++i)
1064  {
1065  // Make the posterior/prior predictive projection on z
1066  // The z axis of Predictive is the bin content
1067  // Essentially zooming in on one bin and looking at the mean and mode of that bin
1068  TH1D *Projection = PosteriorHist_ByMode[SampleNum][j][i];
1069 
1070  // Get the mean for this projection for all the samples
1071  const double nMean = Projection->GetMean();
1072  const double nMeanError = Projection->GetRMS();
1073 
1074  // Set the content and error to the mean in the bin
1075  MeanHist_ByMode[SampleNum][j]->SetBinContent(i, MeanHist_ByMode[SampleNum][j]->GetBinContent(i)+nMean);
1076  // KS: This -1 is only needed for root older than 6.18 for more see https://t2k-experiment.slack.com/archives/CU9CBG6NS/p1714551365661589
1077  MeanHist_ByMode[SampleNum][j]->SetBinError(i, nMeanError);
1078  } // End loop over bins
1079  }
1080  }
1081  llh_total_temp += negLogL_Mean;
1082  } // End loop over samples
1083 
1084  // This is not multithreaded as due to ProjectPoly it is not safe
1085  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1086  {
1087  // Skip disabled samples
1088  if (DataHist[SampleNum] == nullptr || NoOverflowIntegral(DataHist[SampleNum]) == 0) continue;
1089 
1090  //KS:: Might consider caching it as we use it once agian much later
1091  TH1D *MeanProjectX = ProjectPoly(MeanHist[SampleNum], true, SampleNum, true);
1092  TH1D *W2MeanProjectX = ProjectPoly(W2MeanHist[SampleNum], true, SampleNum);
1093  // Loop over each pmu bin for 1D projection
1094  for (int j = 1; j <= lnLHist_Mean_ProjectX[SampleNum]->GetXaxis()->GetNbins(); ++j)
1095  {
1096  // Data content for the j,kth bin
1097  const double nData = DataHist_ProjectX[SampleNum]->GetBinContent(j);
1098  const double nMean = MeanProjectX->GetBinContent(j);
1099  const double nW2Mean = W2MeanProjectX->GetBinContent(j);
1100 
1101  double TempLLH_Mean = 0.0;
1102  TempLLH_Mean = SampleHandler->GetTestStatLLH(nData, nMean, nW2Mean);
1103 
1104  //KS: do times 2 because banff reports chi2
1105  lnLHist_Mean_ProjectX[SampleNum]->SetBinContent(j, 2.0*TempLLH_Mean);
1106  }// End loop over bins
1107 
1108  delete MeanProjectX;
1109  delete W2MeanProjectX;
1110  } // End loop over samples
1111 
1112  llh_total = llh_total_temp;
1113  // Now we have our posterior predictive histogram and it's LLH
1114  MACH3LOG_INFO("Prior/Posterior predictive LLH mean (sample only) = {:.2f}", llh_total);
1115  std::stringstream ss;
1116  ss << llh_total;
1117  lnLHist->SetTitle((std::string(lnLHist->GetTitle())+"_"+ss.str()).c_str());
1118 
1119  // Now make the fluctuated hists of the MeanHist and ModeHist
1120  MakeChi2Hists();
1121 
1122  // Get the 1D LLH dists
1123  MakeCutLLH();
1124 } // End MakePredictive() function
1125 
1126 // *******************
1127 // Make the fluctuated histograms (2D and 1D) for the chi2s
1128 // Essentially taking the MCMC draws and calculating their LLH to the Posterior predictive distribution
1129 // And additionally taking the data histogram and calculating the LLH to the predictive distribution
1130 // Additionally we calculate the chi2 of the draws (fluctuated) of the MC with the prior/posterior predictive and plot it vs the chi2 from the draws of MCMC and the data
1132 // *******************
1133  MACH3LOG_INFO("Making the chi2 histograms...");
1134  // Have this to signify if we're doing the first pass
1135  first_pass = true;
1136 
1137  double AveragePenalty = 0;
1138 
1139  // Vectors to hold exact LLH
1140  std::vector<double> LLH_PredFluc_V(nThrows);
1141  std::vector<double> LLH_DataDraw_V(nThrows);
1142  std::vector<double> LLH_DrawFlucDraw_V(nThrows);
1143 
1144  // Loop over the draws
1145  // Should look into multi-threading this. Would require temporary THxx structures from arrays
1146  //KS: Update above would be ideal but currently we loop over samples (see loop below) which isn't as efficient as loop over throws but much much easier to implement
1147  for (unsigned int i = 0; i < nThrows; ++i)
1148  {
1149  if (i % (nThrows/10) == 0) {
1151  }
1152 
1153  // Set the total LLH to zero to initialise
1154  double total_llh_data_draw_temp = 0.0;
1155  double total_llh_drawfluc_draw_temp = 0.0;
1156  double total_llh_predfluc_draw_temp = 0.0;
1157 
1158  double total_llh_rate_data_draw_temp = 0.0;
1159  double total_llh_rate_predfluc_draw_temp = 0.0;
1160 
1161  double total_llh_data_drawfluc_temp = 0.0;
1162  double total_llh_data_predfluc_temp = 0.0;
1163  double total_llh_draw_pred_temp = 0.0;
1164  double total_llh_drawfluc_pred_temp = 0.0;
1165  double total_llh_drawfluc_predfluc_temp = 0.0;
1166  double total_llh_predfluc_pred_temp = 0.0;
1167  double total_llh_datafluc_draw_temp = 0.0;
1168 
1169  double total_llh_data_draw_ProjectX_temp = 0.0;
1170  double total_llh_drawfluc_draw_ProjectX_temp = 0.0;
1171 
1172  // Save the double that gets written to file
1174  AveragePenalty += llh_penalty;
1175 
1176  // Make the Poisson fluctuated hist
1177  std::vector<TH2Poly*> FluctHist(nSamples);
1178  // Also Poisson fluctuate the drawn MCMC hist
1179  std::vector<TH2Poly*> FluctDrawHist(nSamples);
1180  // Finally Poisson fluctuate the data histogram
1181  std::vector<TH2Poly*> DataFlucHist(nSamples);
1182 
1183  // Finally Poisson fluctuate the data histogram
1184  std::vector<TH1D*> FluctDrawHistProjectX(nSamples);
1185  std::vector<TH1D*> DrawHistProjectX(nSamples);
1186  std::vector<TH1D*> DrawHistProjectY(nSamples);
1187  std::vector<TH1D*> DrawW2HistProjectX(nSamples);
1188 
1189  //KS: We have to clone histograms here to avoid cloning in MP loop, we have to make sure binning matches, content doesn't have to
1190  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1191  {
1192  FluctHist[SampleNum] = static_cast<TH2Poly*>(MeanHist[SampleNum]->Clone());
1193  FluctDrawHist[SampleNum] = static_cast<TH2Poly*>(MeanHist[SampleNum]->Clone());
1194  DataFlucHist[SampleNum] = static_cast<TH2Poly*>(MeanHist[SampleNum]->Clone());
1195 
1196  FluctDrawHistProjectX[SampleNum] = static_cast<TH1D*>(DataHist_ProjectX[SampleNum]->Clone());
1197 
1198  // Get the ith draw for the jth sample
1199  TH2Poly *DrawHist = MCVector[i][SampleNum];
1200  TH2Poly *DrawW2Hist = W2MCVector[i][SampleNum];
1201 
1202  //ProjectPoly calls new TH1D under the hood, never define new ROOT object under MP...
1203  DrawHistProjectX[SampleNum] = ProjectPoly(DrawHist, true, SampleNum);
1204  DrawW2HistProjectX[SampleNum] = ProjectPoly(DrawW2Hist, true, SampleNum);
1205  DrawHistProjectY[SampleNum] = ProjectPoly(DrawHist, false, SampleNum);
1206  }
1207  #ifdef MULTITHREAD
1208  //KS: might be most obscure OMP reduction I have ever made...
1209  #pragma omp parallel for reduction(+:total_llh_data_draw_temp, total_llh_drawfluc_draw_temp, total_llh_predfluc_draw_temp, total_llh_rate_data_draw_temp, total_llh_rate_predfluc_draw_temp, total_llh_data_drawfluc_temp, total_llh_data_predfluc_temp, total_llh_draw_pred_temp, total_llh_drawfluc_pred_temp, total_llh_drawfluc_predfluc_temp, total_llh_predfluc_pred_temp, total_llh_datafluc_draw_temp, total_llh_data_draw_ProjectX_temp, total_llh_drawfluc_draw_ProjectX_temp)
1210  #endif
1211  // Loop over the samples
1212  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1213  {
1214  // Get the ith draw for the jth sample
1215  TH2Poly *DrawHist = MCVector[i][SampleNum];
1216  TH2Poly *DrawW2Hist = W2MCVector[i][SampleNum];
1217  // Skip empty samples
1218  if (DrawHist == nullptr) continue;
1219 
1220  // Add LLH penalties from the systematics to the LLH that use the drawn histogram
1221  // Data vs Draw
1222  llh_data_draw[SampleNum] = llh_penalty;
1223  // Fluctuated Draw vs Draw
1224  llh_drawfluc_draw[SampleNum] = llh_penalty;
1225  // Fluctuated Predicitve vs Draw
1226  llh_predfluc_draw[SampleNum] = llh_penalty;
1227 
1228  // Data vs Draw using rate
1229  llh_rate_data_draw[SampleNum] = llh_penalty;
1230  // Fluctuated Predicitve vs Draw using rate
1231  llh_rate_predfluc_draw[SampleNum] = llh_penalty;
1232 
1233  // Data vs Fluctuated Draw
1234  llh_data_drawfluc[SampleNum] = llh_penalty;
1235  // Draw vs Predictive
1236  llh_draw_pred[SampleNum] = llh_penalty;
1237  // Fluctuated Draw vs Predictive
1238  llh_drawfluc_pred[SampleNum] = llh_penalty;
1239  // Fluctuated Draw vs Fluctuated Predictive
1240  llh_drawfluc_predfluc[SampleNum] = llh_penalty;
1241  // Fluctuated Data vs Draw
1242  llh_datafluc_draw[SampleNum] = llh_penalty;
1243 
1244  //Some LLH for 1D projections
1245  llh_data_draw_ProjectX[SampleNum] = llh_penalty;
1247 
1248  //Other get 0 penalty term
1249  // Fluctuated Predictive vs Predictive
1250  llh_predfluc_pred[SampleNum] = 0.0;
1251  // Data vs Fluctuated Predictive
1252  llh_data_predfluc[SampleNum] = 0.0;
1253 
1254  // Make the Poisson fluctuated hist
1255  MakeFluctuatedHistogram(FluctHist[SampleNum], MeanHist[SampleNum]);
1256  // Also Poisson fluctuate the drawn MCMC hist
1257  MakeFluctuatedHistogram(FluctDrawHist[SampleNum], DrawHist);
1258  // Finally Poisson fluctuate the data histogram
1259  MakeFluctuatedHistogram(DataFlucHist[SampleNum], DataHist[SampleNum]);
1260 
1261  // Likelihood between the drawn histogram and the data
1262  const double DataDrawLLH = GetLLH(DataHist[SampleNum], DrawHist, DrawW2Hist);
1263  llh_data_draw[SampleNum] += DataDrawLLH;
1264  total_llh_data_draw_temp += DataDrawLLH;
1265 
1266  // Likelihood between drawn histogram and fluctuated drawn histogram
1267  const double DrawFlucDrawLLH = GetLLH(FluctDrawHist[SampleNum], DrawHist, DrawW2Hist);
1268  llh_drawfluc_draw[SampleNum] += DrawFlucDrawLLH;
1269  total_llh_drawfluc_draw_temp += DrawFlucDrawLLH;
1270 
1271  // Likelihood between drawn histogram and fluctuated posterior predictive distribution
1272  const double PredFlucDrawLLH = GetLLH(FluctHist[SampleNum], DrawHist, DrawW2Hist);
1273  llh_predfluc_draw[SampleNum] += PredFlucDrawLLH;
1274  total_llh_predfluc_draw_temp += PredFlucDrawLLH;
1275 
1276 //Rate Based p-value
1277  // Likelihood between the drawn histogram and the data
1278  const double RateDataDrawLLH = SampleHandler->GetTestStatLLH(NoOverflowIntegral(DataHist[SampleNum]), NoOverflowIntegral(DrawHist), NoOverflowIntegral(DrawW2Hist));
1279  llh_rate_data_draw[SampleNum] += RateDataDrawLLH;
1280  total_llh_rate_data_draw_temp += RateDataDrawLLH;
1281 
1282  // Likelihood between drawn histogram and fluctuated posterior predictive distribution using rate
1283  const double RatePredFlucDrawLLH = SampleHandler->GetTestStatLLH(NoOverflowIntegral(FluctHist[SampleNum]), NoOverflowIntegral(DrawHist), NoOverflowIntegral(DrawW2Hist));
1284  llh_rate_predfluc_draw[SampleNum] += RatePredFlucDrawLLH;
1285  total_llh_rate_predfluc_draw_temp += RatePredFlucDrawLLH;
1286 
1287 // All LLH below are for validation reason but not used for final P-Value
1288  // Likelihood between the fluctuated drawn histogram and the data
1289  const double DataDrawFlucLLH = GetLLH(DataHist[SampleNum], FluctDrawHist[SampleNum], DrawW2Hist);
1290  llh_data_drawfluc[SampleNum] += DataDrawFlucLLH;
1291  total_llh_data_drawfluc_temp += DataDrawFlucLLH;
1292 
1293  // Likelihood between the drawn histogram and the data
1294  const double DataPredFlucLLH = GetLLH(DataHist[SampleNum], FluctHist[SampleNum], W2MeanHist[SampleNum]);
1295  llh_data_predfluc[SampleNum] += DataPredFlucLLH;
1296  total_llh_data_predfluc_temp += DataPredFlucLLH;
1297 
1298  // Likelihood between the drawn hist and the Posterior Predictive
1299  const double DrawPredLLH = GetLLH(DrawHist, MeanHist[SampleNum], W2MeanHist[SampleNum]);
1300  llh_draw_pred[SampleNum] += DrawPredLLH;
1301  total_llh_draw_pred_temp += DrawPredLLH;
1302 
1303  // Likelihood between fluctuated drawn and predictive
1304  const double DrawFlucPredLLH = GetLLH(FluctDrawHist[SampleNum], MeanHist[SampleNum], W2MeanHist[SampleNum]);
1305  llh_drawfluc_pred[SampleNum] += DrawFlucPredLLH;
1306  total_llh_drawfluc_pred_temp += DrawFlucPredLLH;
1307 
1308  // Likelihood between drawn histogram and fluctuated drawn histogram
1309  const double DrawFlucPredFlucLLH = GetLLH(FluctDrawHist[SampleNum], FluctHist[SampleNum], W2MeanHist[SampleNum]);
1310  llh_drawfluc_predfluc[SampleNum] += DrawFlucPredFlucLLH;
1311  total_llh_drawfluc_predfluc_temp += DrawFlucPredFlucLLH;
1312 
1313  // Likelihood between the fluctuated drawn histogram and the posterior predictive
1314  const double PredFlucPredLLH = GetLLH(FluctHist[SampleNum], MeanHist[SampleNum], W2MeanHist[SampleNum]);
1315  llh_predfluc_pred[SampleNum] += PredFlucPredLLH;
1316  total_llh_predfluc_pred_temp += PredFlucPredLLH;
1317 
1318  // Likelihood between fluctuated data histogram and drawn histogram
1319  const double DataFlucDrawLLH = GetLLH(DataFlucHist[SampleNum], DrawHist, DrawW2Hist);
1320  llh_datafluc_draw[SampleNum] += DataFlucDrawLLH;
1321  total_llh_datafluc_draw_temp += DataFlucDrawLLH;
1322 
1323  lnLHist_Sample_DrawData[SampleNum]->Fill(DataDrawLLH);
1324  lnLHist_Sample_DrawflucDraw[SampleNum]->Fill(DrawFlucDrawLLH);
1325  lnLHist_Sample_PredflucDraw[SampleNum]->Fill(PredFlucDrawLLH);
1326 
1327 // At the end we leave LLH for 1D projections
1328  MakeFluctuatedHistogram(FluctDrawHistProjectX[SampleNum], DrawHistProjectX[SampleNum]);
1329 
1330  // Likelihood between the drawn histogram and the data for muon momentum
1331  const double DataDrawLLH_ProjectX = GetLLH(DataHist_ProjectX[SampleNum], DrawHistProjectX[SampleNum], DrawW2HistProjectX[SampleNum]);
1332  llh_data_draw_ProjectX[SampleNum] += DataDrawLLH_ProjectX;
1333  total_llh_data_draw_ProjectX_temp += DataDrawLLH_ProjectX;
1334 
1335  const double DrawFlucDrawLLH_ProjectX = GetLLH(FluctDrawHistProjectX[SampleNum], DrawHistProjectX[SampleNum], DrawW2HistProjectX[SampleNum]);
1336  llh_drawfluc_draw_ProjectX[SampleNum] += DrawFlucDrawLLH_ProjectX;
1337  total_llh_drawfluc_draw_ProjectX_temp += DrawFlucDrawLLH_ProjectX;
1338 
1339  //KS: This might seem complicated but we make X and Y projection for each sample. Then we add this to the Violin plot making nice Gaussian in each kineatmical bin of x and y axis
1340  FastViolinFill(ViolinHists_ProjectX[SampleNum], DrawHistProjectX[SampleNum]);
1341  FastViolinFill(ViolinHists_ProjectY[SampleNum], DrawHistProjectY[SampleNum]);
1342  } // End loop over samples (still looping throws)
1343 
1344  // Delete the temporary histograms
1345  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1346  {
1347  delete FluctHist[SampleNum];
1348  delete FluctDrawHist[SampleNum];
1349  delete DataFlucHist[SampleNum];
1350  delete FluctDrawHistProjectX[SampleNum];
1351  delete DrawHistProjectX[SampleNum];
1352  delete DrawHistProjectY[SampleNum];
1353  delete DrawW2HistProjectX[SampleNum];
1354  }
1355 
1356  total_llh_data_draw = total_llh_data_draw_temp;
1357  total_llh_drawfluc_draw = total_llh_drawfluc_draw_temp;
1358  total_llh_predfluc_draw = total_llh_predfluc_draw_temp;
1359 
1360  total_llh_rate_data_draw = total_llh_rate_data_draw_temp;
1361  total_llh_rate_predfluc_draw = total_llh_rate_predfluc_draw_temp;
1362 
1363  total_llh_data_drawfluc = total_llh_data_drawfluc_temp;
1364  total_llh_data_predfluc = total_llh_data_predfluc_temp;
1365  total_llh_draw_pred = total_llh_draw_pred_temp;
1366  total_llh_drawfluc_pred = total_llh_drawfluc_pred_temp;
1367  total_llh_drawfluc_predfluc = total_llh_drawfluc_predfluc_temp;
1368  total_llh_predfluc_pred = total_llh_predfluc_pred_temp;
1369  total_llh_datafluc_draw = total_llh_datafluc_draw_temp;
1370 
1371  total_llh_data_draw_ProjectX = total_llh_data_draw_ProjectX_temp;
1372  total_llh_drawfluc_draw_ProjectX = total_llh_drawfluc_draw_ProjectX_temp;
1373 
1374  // Add LLH penalties from the systematics to the LLH that use the drawn histogram
1378  //Rate based
1381 
1386 
1389 
1394 
1397 
1399 
1401 
1402  // Also save to arrays to make sure we have the utmost super accuracy
1403  LLH_PredFluc_V[i] = total_llh_predfluc_draw;
1404  LLH_DataDraw_V[i] = total_llh_data_draw;
1405  LLH_DrawFlucDraw_V[i] = total_llh_drawfluc_draw;
1406 
1407  // Write to the output tree
1408  OutputTree->Fill();
1409  } // End loop over throws
1410 
1411  AveragePenalty = AveragePenalty/double(nThrows);
1412  MACH3LOG_INFO("Average LLH penalty over toys is {:.2f}", AveragePenalty);
1413  // Calculate exact p-value instead of binned
1414  unsigned int Accept_PredFluc = 0;
1415  unsigned int Accept_DrawFluc = 0;
1416  for (unsigned int i = 0; i < nThrows; ++i)
1417  {
1418  if (LLH_DataDraw_V[i] > LLH_DrawFlucDraw_V[i]) Accept_DrawFluc++;
1419  if (LLH_DataDraw_V[i] > LLH_PredFluc_V[i]) Accept_PredFluc++;
1420  }
1421  const double pvalue_DrawFluc = double(Accept_DrawFluc)/double(nThrows);
1422  const double pvalue_PredFluc = double(Accept_PredFluc)/double(nThrows);
1423 
1424  MACH3LOG_INFO("Calculated exact p-value using Fluctuation of Draw: {:.2f}", pvalue_DrawFluc);
1425  MACH3LOG_INFO("Calculated exact p-value using Fluctuation of Prediction: {:.2f}", pvalue_PredFluc);
1426 }
1427 
1428 // *******************
1429 // Make the cut LLH histogram
1431 // *******************
1432  Outputfile->cd();
1433  MakeCutLLH1D(lnLHist.get());
1437 
1442 }
1443 
1444 // ****************
1445 // Make the 1D cut distribution and give the 1D p-value
1446 void SampleSummary::MakeCutLLH1D(TH1D *Histogram, double llh_ref) {
1447 // ****************
1448  const double TotalIntegral = Histogram->Integral();
1449  double Above = 0.0;
1450  // Get the LLH reference from total llh or some reference histogram
1451  double llh_reference = 0.0;
1452  if (llh_ref >= 0) {
1453  llh_reference = llh_ref;
1454  } else {
1455  llh_reference = llh_total;
1456  }
1457  for (int i = 0; i < Histogram->GetXaxis()->GetNbins(); ++i) {
1458  const double xvalue = Histogram->GetBinCenter(i+1);
1459  if (xvalue >= llh_reference) {
1460  Above += Histogram->GetBinContent(i+1);
1461  }
1462  }
1463  const double pvalue = Above/TotalIntegral;
1464  std::stringstream ss;
1465  ss << int(Above) << "/" << int(TotalIntegral) << "=" << pvalue;
1466  Histogram->SetTitle((std::string(Histogram->GetTitle())+"_"+ss.str()).c_str());
1467 
1468  // Write a TCanvas and make a line and a filled histogram
1469  auto TempLine = std::make_unique<TLine>(llh_reference , Histogram->GetMinimum(), llh_reference, Histogram->GetMaximum());
1470  TempLine->SetLineColor(kBlack);
1471  TempLine->SetLineWidth(2);
1472 
1473  // Make the fill histogram
1474  TH1D *TempHistogram = static_cast<TH1D*>(Histogram->Clone());
1475  TempHistogram->SetFillStyle(1001);
1476  TempHistogram->SetFillColor(kRed);
1477  for (int i = 0; i < TempHistogram->GetNbinsX(); ++i) {
1478  if (TempHistogram->GetBinCenter(i+1) < llh_reference) {
1479  TempHistogram->SetBinContent(i+1, 0.0);
1480  }
1481  }
1482 
1483  auto Legend = std::make_unique<TLegend>(0.6, 0.6, 0.9, 0.9);
1484  Legend->SetFillColor(0);
1485  Legend->SetFillStyle(0);
1486  Legend->SetLineWidth(0);
1487  Legend->SetLineColor(0);
1488  Legend->AddEntry(TempLine.get(), Form("Reference LLH, %.0f, p-value=%.2f", llh_reference, pvalue), "l");
1489  Legend->AddEntry(Histogram, Form("LLH, #mu=%.1f#pm%.1f", Histogram->GetMean(), Histogram->GetRMS()), "l");
1490  std::string Title = Histogram->GetName();
1491  Title += "_canv";
1492  TCanvas *TempCanvas = new TCanvas(Title.c_str(), Title.c_str(), 1024, 1024);
1493  TempCanvas->SetGridx();
1494  TempCanvas->SetGridy();
1495  Histogram->Draw();
1496  TempHistogram->Draw("same");
1497  TempLine->Draw("same");
1498  Legend->Draw("same");
1499 
1500  TempCanvas->Write();
1501 
1502  delete TempHistogram;
1503  delete TempCanvas;
1504 }
1505 
1506 // ****************
1507 // Make the 1D Event Rate Hist
1508 void SampleSummary::MakeCutEventRate(TH1D *Histogram, const double DataRate) {
1509 // ****************
1510  // For the event rate histogram add a TLine to the data rate
1511  auto TempLine = std::make_unique<TLine>(DataRate, Histogram->GetMinimum(), DataRate, Histogram->GetMaximum());
1512  TempLine->SetLineColor(kRed);
1513  TempLine->SetLineWidth(2);
1514  // Also fit a Gaussian because why not?
1515  TF1 *Fitter = new TF1("Fit", "gaus", Histogram->GetBinLowEdge(1), Histogram->GetBinLowEdge(Histogram->GetNbinsX()+1));
1516  Histogram->Fit(Fitter, "RQ");
1517  Fitter->SetLineColor(kRed-5);
1518  // Calculate a p-value
1519  double Above = 0.0;
1520  for (int z = 0; z < Histogram->GetNbinsX(); ++z) {
1521  const double xvalue = Histogram->GetBinCenter(z+1);
1522  if (xvalue >= DataRate) {
1523  Above += Histogram->GetBinContent(z+1);
1524  }
1525  }
1526  const double pvalue = Above/Histogram->Integral();
1527  auto Legend = std::make_unique<TLegend>(0.4, 0.75, 0.98, 0.90);
1528  Legend->SetFillColor(0);
1529  Legend->SetFillStyle(0);
1530  Legend->SetLineWidth(0);
1531  Legend->SetLineColor(0);
1532  Legend->AddEntry(TempLine.get(), Form("Data, %.0f, p-value=%.2f", DataRate, pvalue), "l");
1533  Legend->AddEntry(Histogram, Form("MC, #mu=%.1f#pm%.1f", Histogram->GetMean(), Histogram->GetRMS()), "l");
1534  Legend->AddEntry(Fitter, Form("Gauss, #mu=%.1f#pm%.1f", Fitter->GetParameter(1), Fitter->GetParameter(2)), "l");
1535  std::string TempTitle = std::string(Histogram->GetName());
1536  TempTitle += "_canv";
1537  TCanvas *TempCanvas = new TCanvas(TempTitle.c_str(), TempTitle.c_str(), 1024, 1024);
1538  TempCanvas->SetGridx();
1539  TempCanvas->SetGridy();
1540  TempCanvas->SetRightMargin(0.03);
1541  TempCanvas->SetBottomMargin(0.08);
1542  TempCanvas->SetLeftMargin(0.10);
1543  TempCanvas->SetTopMargin(0.06);
1544  TempCanvas->cd();
1545  Histogram->Draw();
1546  TempLine->Draw("same");
1547  Fitter->Draw("same");
1548  Legend->Draw("same");
1549  TempCanvas->Write();
1550  Histogram->Write();
1551 
1552  delete TempCanvas;
1553  delete Fitter;
1554 }
1555 
1556 // ****************
1557 // Calculate the LLH for TH1D, set the LLH to title of MCHist
1558 void SampleSummary::CalcLLH(TH1D * const & DatHist, TH1D * const & MCHist, TH1D * const & W2Hist) {
1559 // ****************
1560  const double llh = GetLLH(DatHist, MCHist, W2Hist);
1561  std::stringstream ss;
1562  ss << "_2LLH=" << llh;
1563  MCHist->SetTitle((std::string(MCHist->GetTitle())+ss.str()).c_str());
1564  MACH3LOG_INFO("{:<55} {:<10.2f} {:<10.2f} {:<10.2f}", MCHist->GetName(), DatHist->Integral(), MCHist->Integral(), llh);
1565 }
1566 
1567 // ****************
1568 // Calculate the LLH for TH1D, set the LLH to title of MCHist
1569 void SampleSummary::CalcLLH(TH2Poly * const & DatHist, TH2Poly * const & MCHist, TH2Poly * const & W2Hist) {
1570 // ****************
1571  const double llh = GetLLH(DatHist, MCHist, W2Hist);
1572  std::stringstream ss;
1573  ss << "_2LLH=" << llh;
1574  MCHist->SetTitle((std::string(MCHist->GetTitle())+ss.str()).c_str());
1575  MACH3LOG_INFO("{:<55} {:<10.2f} {:<10.2f} {:<10.2f}", MCHist->GetName(), NoOverflowIntegral(DatHist), NoOverflowIntegral(MCHist), llh);
1576 }
1577 
1578 // ****************
1579 double SampleSummary::GetLLH(TH2Poly * const & DatHist, TH2Poly * const & MCHist, TH2Poly * const & W2Hist) {
1580 // ****************
1581  double llh = 0.0;
1582  for (int i = 1; i < DatHist->GetNumberOfBins()+1; ++i)
1583  {
1584  const double data = DatHist->GetBinContent(i);
1585  const double mc = MCHist->GetBinContent(i);
1586  const double w2 = W2Hist->GetBinContent(i);
1587  llh += SampleHandler->GetTestStatLLH(data, mc, w2);
1588  }
1589  //KS: do times 2 because banff reports chi2
1590  return 2*llh;
1591 }
1592 
1593 // ****************
1594 double SampleSummary::GetLLH(TH1D * const & DatHist, TH1D * const & MCHist, TH1D * const & W2Hist) {
1595 // ****************
1596  double llh = 0.0;
1597  for (int i = 1; i <= DatHist->GetXaxis()->GetNbins(); ++i)
1598  {
1599  const double data = DatHist->GetBinContent(i);
1600  const double mc = MCHist->GetBinContent(i);
1601  const double w2 = W2Hist->GetBinContent(i);
1602  llh += SampleHandler->GetTestStatLLH(data, mc, w2);
1603  }
1604  //KS: do times 2 because banff reports chi2
1605  return 2*llh;
1606 }
1607 
1608 // ****************
1610 // ****************
1611  // Make a new directory
1612  TDirectory *BetaDir = Outputfile->mkdir("BetaParameters");
1613  BetaDir->cd();
1614 
1615  int originalErrorLevel = gErrorIgnoreLevel;
1616 
1617  //To avoid Warning in <Fit>: Fit data is empty
1618  gErrorIgnoreLevel = kFatal;
1619 
1620  MACH3LOG_INFO("Writing Beta parameters");
1621  std::vector<TDirectory *> DirBeta(nSamples);
1622  for (int i = 0; i < nSamples; ++i)
1623  {
1624  // Make a new directory
1625  DirBeta[i] = BetaDir->mkdir((SampleNames[i]).c_str());
1626  DirBeta[i]->cd();
1627 
1628  // Loop over each pmu cosmu bin
1629  for (int j = 1; j < maxBins[i]+1; ++j)
1630  {
1631  const double data = DataHist[i]->GetBinContent(j);
1632  const double mc = NominalHist[i]->GetBinContent(j);
1633  const double w2 = W2NomHist[i]->GetBinContent(j);
1634 
1635  const double BetaPrior = GetBetaParameter(data, mc, w2, likelihood);
1636 
1637  auto TempLine = std::unique_ptr<TLine>(new TLine(BetaPrior, BetaHist[i][j]->GetMinimum(), BetaPrior, BetaHist[i][j]->GetMaximum()));
1638  TempLine->SetLineColor(kRed);
1639  TempLine->SetLineWidth(2);
1640 
1641  // Also fit a Gaussian because why not?
1642  TF1 *Fitter = new TF1("Fit", "gaus", BetaHist[i][j]->GetBinLowEdge(1), BetaHist[i][j]->GetBinLowEdge(BetaHist[i][j]->GetNbinsX()+1));
1643  BetaHist[i][j]->Fit(Fitter, "RQ");
1644  Fitter->SetLineColor(kRed-5);
1645 
1646  auto Legend = std::make_unique<TLegend>(0.4, 0.75, 0.98, 0.90);
1647  Legend->SetFillColor(0);
1648  Legend->SetFillStyle(0);
1649  Legend->SetLineWidth(0);
1650  Legend->SetLineColor(0);
1651  Legend->AddEntry(TempLine.get(), Form("Prior #mu=%.4f, N_{data}=%.0f", BetaPrior, data), "l");
1652  Legend->AddEntry(BetaHist[i][j].get(), Form("Post, #mu=%.4f#pm%.4f", BetaHist[i][j]->GetMean(), BetaHist[i][j]->GetRMS()), "l");
1653  Legend->AddEntry(Fitter, Form("Gauss, #mu=%.4f#pm%.4f", Fitter->GetParameter(1), Fitter->GetParameter(2)), "l");
1654  std::string TempTitle = std::string(BetaHist[i][j]->GetName());
1655 
1656  TempTitle += "_canv";
1657  TCanvas *TempCanvas = new TCanvas(TempTitle.c_str(), TempTitle.c_str(), 1024, 1024);
1658  TempCanvas->SetGridx();
1659  TempCanvas->SetGridy();
1660  TempCanvas->SetRightMargin(0.03);
1661  TempCanvas->SetBottomMargin(0.08);
1662  TempCanvas->SetLeftMargin(0.10);
1663  TempCanvas->SetTopMargin(0.06);
1664  TempCanvas->cd();
1665  BetaHist[i][j]->Draw();
1666  TempLine->Draw("same");
1667  Fitter->Draw("same");
1668  Legend->Draw("same");
1669  TempCanvas->Write();
1670  BetaHist[i][j]->Write();
1671 
1672  delete TempCanvas;
1673  delete Fitter;
1674  }
1675  DirBeta[i]->Write();
1676  delete DirBeta[i];
1677  }
1678  BetaDir->Write();
1679  delete BetaDir;
1680 
1681  gErrorIgnoreLevel = originalErrorLevel;
1682  Outputfile->cd();
1683 }
1684 
1685 // ****************
1686 // Make a projection
1688 // ****************
1689  MACH3LOG_INFO("Calculating Correlations");
1690  TStopwatch timer;
1691  timer.Start();
1692 
1693  // Data vs Draw for 1D projection
1694  std::vector<double> NEvents_Sample(nSamples);
1695  double event_rate = 0.;
1696 
1697  // The output tree we're going to write to
1698  TTree* Event_Rate_Tree = new TTree("Event_Rate_draws", "Event_Rate_draws");
1699  Event_Rate_Tree->Branch("Event_Rate", &event_rate);
1700  // Loop over the samples and set the addresses of the variables to write to file
1701  for (int i = 0; i < nSamples; ++i)
1702  {
1703  // Get the name
1704  std::string SampleName = SampleNames[i];
1705  //CW: Also strip out - signs because it messes up TBranches
1706  while (SampleName.find("-") != std::string::npos) {
1707  SampleName.replace(SampleName.find("-"), 1, std::string("_"));
1708  }
1709  Event_Rate_Tree->Branch((SampleName+"_Event_Rate").c_str(), &NEvents_Sample[i]);
1710  }
1711 
1712  // Holds the total event rate
1713  auto EventHist = std::make_unique<TH1D>("EventHist", "Total Event Rate", 100, 1, -1);
1714  EventHist->SetDirectory(nullptr);
1715  EventHist->GetXaxis()->SetTitle("Total event rate");
1716  EventHist->GetYaxis()->SetTitle("Counts");
1717  EventHist->SetLineWidth(2);
1718 
1719  // Holds the event rate for the distribution
1720  std::vector<std::unique_ptr<TH1D>> SumHist(nSamples);
1721  for (int i = 0; i < nSamples; ++i)
1722  {
1723  std::string name = std::string(NominalHist[i]->GetName());
1724  name = name.substr(0, name.find("_nom"));
1725 
1726  SumHist[i] = std::make_unique<TH1D>((name+"_sum").c_str(),(name+"_sum").c_str(), 100, 1, -1);
1727  SumHist[i]->GetXaxis()->SetTitle("N_{events}");
1728  SumHist[i]->GetYaxis()->SetTitle("Counts");
1729  double Integral = NoOverflowIntegral(DataHist[i]);
1730  std::stringstream ss;
1731  ss << Integral;
1732  SumHist[i]->SetTitle((std::string(SumHist[i]->GetTitle())+"_"+ss.str()).c_str());
1733  }
1734 
1735  for (unsigned int it = 0; it < nThrows; ++it)
1736  {
1737  double event_rate_temp = 0.;
1738  // Loop over the samples
1739  #ifdef MULTITHREAD
1740  #pragma omp parallel for reduction(+:event_rate_temp)
1741  #endif
1742  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1743  {
1744  NEvents_Sample[SampleNum] = NoOverflowIntegral(MCVector[it][SampleNum]);
1745  // Fill the sum histogram with the integral of the sampled distribution
1746  SumHist[SampleNum]->Fill(NEvents_Sample[SampleNum], WeightVector[it]);
1747 
1748  event_rate_temp += NEvents_Sample[SampleNum];
1749  } // end samples loop
1750  event_rate = event_rate_temp;
1751  EventHist->Fill(event_rate);
1752  Event_Rate_Tree->Fill();
1753  } //end loops over throws
1754  Event_Rate_Tree->Write();
1755  delete Event_Rate_Tree;
1756 
1757  double DataRate = 0.0;
1758  #ifdef MULTITHREAD
1759  #pragma omp parallel for reduction(+:DataRate)
1760  #endif
1761  for (int i = 0; i < nSamples; ++i)
1762  {
1763  DataRate += NoOverflowIntegral(DataHist[i]);
1764  }
1765  MakeCutEventRate(EventHist.get(), DataRate);
1766 
1767  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1768  {
1769  Dir[SampleNum]->cd();
1770  //Make fancy event rate histogram
1771  MakeCutEventRate(SumHist[SampleNum].get(), NoOverflowIntegral(DataHist[SampleNum]));
1772  }
1773 
1774  // Make a new directory
1775  TDirectory *CorrDir = Outputfile->mkdir("Correlations");
1776  CorrDir->cd();
1777 
1778  TMatrixDSym* SampleCorrelation = new TMatrixDSym(nSamples);
1779  std::vector<std::vector<std::unique_ptr<TH2D>>> SamCorr(nSamples);
1780  for (int i = 0; i < nSamples; ++i)
1781  {
1782  SamCorr[i].resize(nSamples);
1783 
1784  (*SampleCorrelation)(i,i) = 1.0;
1785  const double Min_i = SumHist[i]->GetXaxis()->GetBinLowEdge(1);
1786  const double Max_i = SumHist[i]->GetXaxis()->GetBinUpEdge(SumHist[i]->GetNbinsX()+1);
1787  for (int j = 0; j < nSamples; ++j)
1788  {
1789  const double Min_j = SumHist[j]->GetXaxis()->GetBinLowEdge(1);
1790  const double Max_j = SumHist[j]->GetXaxis()->GetBinUpEdge(SumHist[j]->GetNbinsX()+1);
1791 
1792  // TH2D to hold the Correlation
1793  SamCorr[i][j] = std::make_unique<TH2D>(Form("SamCorr_%i_%i", i, j), Form("SamCorr_%i_%i", i, j), 70, Min_i, Max_i, 70, Min_j, Max_j);
1794  SamCorr[i][j]->SetDirectory(nullptr);
1795  SamCorr[i][j]->SetMinimum(0);
1796  SamCorr[i][j]->GetXaxis()->SetTitle(SampleNames[i].c_str());
1797  SamCorr[i][j]->GetYaxis()->SetTitle(SampleNames[j].c_str());
1798  SamCorr[i][j]->GetZaxis()->SetTitle("Events");
1799  }
1800  }
1801 
1802  // Now we are sure we have the diagonal elements, let's make the off-diagonals
1803  #ifdef MULTITHREAD
1804  #pragma omp parallel for
1805  #endif
1806  for (int i = 0; i < nSamples; ++i)
1807  {
1808  for (int j = 0; j <= i; ++j)
1809  {
1810  // Skip the diagonal elements which we've already done above
1811  if (j == i) continue;
1812 
1813  for (unsigned int it = 0; it < nThrows; ++it)
1814  {
1815  SamCorr[i][j]->Fill(NoOverflowIntegral(MCVector[it][i]), NoOverflowIntegral(MCVector[it][j]));
1816  }
1817  SamCorr[i][j]->Smooth();
1818 
1819  // Get the Covariance for these two parameters
1820  (*SampleCorrelation)(i,j) = SamCorr[i][j]->GetCorrelationFactor();
1821  (*SampleCorrelation)(j,i) = (*SampleCorrelation)(i,j);
1822  }// End j loop
1823  }// End i loop
1824 
1825  auto hSamCorr = std::make_unique<TH2D>("Sample Correlation", "Sample Correlation", nSamples, 0, nSamples, nSamples, 0, nSamples);
1826  hSamCorr->SetDirectory(nullptr);
1827  hSamCorr->GetZaxis()->SetTitle("Correlation");
1828  hSamCorr->SetMinimum(-1);
1829  hSamCorr->SetMaximum(1);
1830  hSamCorr->GetXaxis()->SetLabelSize(0.015);
1831  hSamCorr->GetYaxis()->SetLabelSize(0.015);
1832 
1833  // Loop over the Covariance matrix entries
1834  for (int i = 0; i < nSamples; ++i)
1835  {
1836  hSamCorr->GetXaxis()->SetBinLabel(i+1, SampleNames[i].c_str());
1837 
1838  for (int j = 0; j < nSamples; ++j)
1839  {
1840  hSamCorr->GetYaxis()->SetBinLabel(j+1, SampleNames[j].c_str());
1841  // The value of the Covariance
1842  const double corr = (*SampleCorrelation)(i,j);
1843  hSamCorr->SetBinContent(i+1, j+1, corr);
1844  }
1845  }
1846  hSamCorr->Draw("colz");
1847  hSamCorr->Write("Sample_Corr");
1848 
1849  SampleCorrelation->Write("Sample_Correlation");
1850  delete SampleCorrelation;
1851 
1852  for (int i = 0; i < nSamples; ++i)
1853  {
1854  for (int j = 0; j <= i; ++j)
1855  {
1856  // Skip the diagonal elements which we've already done above
1857  if (j == i) continue;
1858  SamCorr[i][j]->Write();
1859  }// End j loop
1860  }// End i loop
1861 
1862  //KS: This can take ages so better turn it off by default
1863  bool DoPerKinemBin = false;
1864  if(DoPerKinemBin)
1865  {
1866  //KS: Now the same but for kinematic bin of each sample
1867  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
1868  {
1869  TMatrixDSym* KinCorrelation = new TMatrixDSym(maxBins[SampleNum]);
1870  std::vector<std::vector<std::unique_ptr<TH2D>>> KinCorr(maxBins[SampleNum]);
1871  for (int i = 0; i < maxBins[SampleNum]; ++i)
1872  {
1873  KinCorr[i].resize(maxBins[SampleNum]);
1874  (*KinCorrelation)(i,i) = 1.0;
1875 
1876  const double Min_i = PosteriorHist[SampleNum][i+1]->GetXaxis()->GetBinLowEdge(1);
1877  const double Max_i = PosteriorHist[SampleNum][i+1]->GetXaxis()->GetBinUpEdge(PosteriorHist[SampleNum][i+1]->GetNbinsX()+1);
1878 
1879  //Get PolyBin
1880  TH2PolyBin* bin = static_cast<TH2PolyBin*>(NominalHist[SampleNum]->GetBins()->At(i));
1881  // Just make a little fancy name
1882  std::stringstream ss2;
1883  ss2 << "p_{#mu} (" << bin->GetXMin() << "-" << bin->GetXMax() << ")";
1884  ss2 << " cos#theta_{#mu} (" << bin->GetYMin() << "-" << bin->GetYMax() << ")";
1885 
1886  for (int j = 0; j < maxBins[SampleNum]; ++j)
1887  {
1888  const double Min_j = PosteriorHist[SampleNum][j+1]->GetXaxis()->GetBinLowEdge(1);
1889  const double Max_j = PosteriorHist[SampleNum][j+1]->GetXaxis()->GetBinUpEdge(PosteriorHist[SampleNum][j+1]->GetNbinsX()+1);
1890 
1891  // TH2D to hold the Correlation
1892  KinCorr[i][j] = std::make_unique<TH2D>( Form("Kin_%i_%i_%i", SampleNum, i, j),
1893  Form("Kin_%i_%i_%i", SampleNum, i, j), 70, Min_i, Max_i, 70, Min_j, Max_j);
1894  KinCorr[i][j]->SetDirectory(nullptr);
1895  KinCorr[i][j]->SetMinimum(0);
1896 
1897  KinCorr[i][j]->GetXaxis()->SetTitle(ss2.str().c_str());
1898 
1899  bin = static_cast<TH2PolyBin*>(NominalHist[SampleNum]->GetBins()->At(j));
1900  // Just make a little fancy name
1901  std::stringstream ss3;
1902  ss3 << "p_{#mu} (" << bin->GetXMin() << "-" << bin->GetXMax() << ")";
1903  ss3 << " cos#theta_{#mu} (" << bin->GetYMin() << "-" << bin->GetYMax() << ")";
1904  KinCorr[i][j]->GetYaxis()->SetTitle(ss3.str().c_str());
1905  KinCorr[i][j]->GetZaxis()->SetTitle("Events");
1906  }
1907  }
1908  // Now we are sure we have the diagonal elements, let's make the off-diagonals
1909  #ifdef MULTITHREAD
1910  #pragma omp parallel for
1911  #endif
1912  for (int i = 0; i < maxBins[SampleNum]; ++i)
1913  {
1914  for (int j = 0; j <= i; ++j)
1915  {
1916  // Skip the diagonal elements which we've already done above
1917  if (j == i) continue;
1918 
1919  for (unsigned int it = 0; it < nThrows; ++it)
1920  {
1921  KinCorr[i][j]->Fill(MCVector[it][SampleNum]->GetBinContent(i+1), MCVector[it][SampleNum]->GetBinContent(j+1));
1922  }
1923  KinCorr[i][j]->Smooth();
1924 
1925  // Get the Covariance for these two parameters
1926  (*KinCorrelation)(i,j) = KinCorr[i][j]->GetCorrelationFactor();
1927  (*KinCorrelation)(j,i) = (*KinCorrelation)(i,j);
1928  }// End j loop
1929  }// End i loop
1930 
1931  auto hKinCorr = std::make_unique<TH2D>(SampleNames[SampleNum].c_str(), SampleNames[SampleNum].c_str(),
1932  maxBins[SampleNum], 0, maxBins[SampleNum], maxBins[SampleNum], 0, maxBins[SampleNum]);
1933  hKinCorr->SetDirectory(nullptr);
1934  hKinCorr->GetZaxis()->SetTitle("Correlation");
1935  hKinCorr->SetMinimum(-1);
1936  hKinCorr->SetMaximum(1);
1937  hKinCorr->GetXaxis()->SetLabelSize(0.015);
1938  hKinCorr->GetYaxis()->SetLabelSize(0.015);
1939 
1940  // Loop over the Covariance matrix entries
1941  for (int i = 0; i < maxBins[SampleNum]; ++i)
1942  {
1943  //Get PolyBin
1944  TH2PolyBin* bin = static_cast<TH2PolyBin*>(NominalHist[SampleNum]->GetBins()->At(i));
1945  // Just make a little fancy name
1946  std::stringstream ss2;
1947  ss2 << "p_{#mu} (" << bin->GetXMin() << "-" << bin->GetXMax() << ")";
1948  ss2 << " cos#theta_{#mu} (" << bin->GetYMin() << "-" << bin->GetYMax() << ")";
1949  hKinCorr->GetXaxis()->SetBinLabel(i+1, ss2.str().c_str());
1950 
1951  for (int j = 0; j < maxBins[SampleNum]; ++j)
1952  {
1953  bin = static_cast<TH2PolyBin*>(NominalHist[SampleNum]->GetBins()->At(j));
1954  // Just make a little fancy name
1955  std::stringstream ss3;
1956  ss3 << "p_{#mu} (" << bin->GetXMin() << "-" << bin->GetXMax() << ")";
1957  ss3 << " cos#theta_{#mu} (" << bin->GetYMin() << "-" << bin->GetYMax() << ")";
1958  KinCorr[i][j]->GetYaxis()->SetTitle(ss3.str().c_str());
1959 
1960  hKinCorr->GetYaxis()->SetBinLabel(j+1, ss3.str().c_str());
1961  // The value of the Covariance
1962  const double corr = (*KinCorrelation)(i,j);
1963  hKinCorr->SetBinContent(i+1, j+1, corr);
1964  }
1965  }
1966  hKinCorr->Draw("colz");
1967  hKinCorr->Write((SampleNames[SampleNum] + "_Corr").c_str());
1968 
1969  KinCorrelation->Write((SampleNames[SampleNum] + "_Correlation").c_str());
1970  delete KinCorrelation;
1971 
1972  /*
1973  for (int i = 0; i < maxBins[SampleNum]; ++i)
1974  {
1975  for (int j = 0; j <= i; ++j)
1976  {
1977  // Skip the diagonal elements which we've already done above
1978  if (j == i) continue;
1979  KinCorr[i][j]->Write();
1980  }// End j loop
1981  }// End i loop
1982  */
1983  }//end loop over samples
1984  }//end if DoPerKinemBin
1985  else
1986  {
1987  MACH3LOG_INFO("Not calculating correlations per each kinematic bin");
1988  }
1989 
1990  if(DoByModePlots)
1991  {
1992  // Holds the total event rate by mode
1993  std::vector<TH1D*> EventHist_ByMode(Modes->GetNModes()+1);
1994  for (int j = 0; j < Modes->GetNModes()+1; j++)
1995  {
1996  std::string ModeName = Modes->GetMaCh3ModeName(j);
1997  EventHist_ByMode[j] = new TH1D(Form("EventHist_%s", ModeName.c_str()), Form("Total Event Rate %s", ModeName.c_str()), 100, 1, -1);
1998  EventHist_ByMode[j]->GetXaxis()->SetTitle("Total event rate");
1999  EventHist_ByMode[j]->GetYaxis()->SetTitle("Counts");
2000  EventHist_ByMode[j]->SetLineWidth(2);
2001  }
2002 
2003  //KS: Here we calculate total event rates for each mode, maybe not most efficient but can be improved in the future
2004  for (unsigned int it = 0; it < nThrows; ++it)
2005  {
2006  for (int j = 0; j < Modes->GetNModes()+1; j++)
2007  {
2008  double event_rate_temp = 0.;
2009  #ifdef MULTITHREAD
2010  #pragma omp parallel for reduction(+:event_rate_temp)
2011  #endif
2012  for (int SampleNum = 0; SampleNum < nSamples; SampleNum++)
2013  {
2014  event_rate_temp += NoOverflowIntegral(MCVectorByMode[it][SampleNum][j]);
2015  }
2016  EventHist_ByMode[j]->Fill(event_rate_temp);
2017  }
2018  }
2019 
2020  for (int i = 0; i < Modes->GetNModes()+1; ++i)
2021  {
2022  MakeCutEventRate(EventHist_ByMode[i], DataRate);
2023  }
2024 
2025  TMatrixDSym* ModeCorrelation = new TMatrixDSym(Modes->GetNModes()+1);
2026 
2027  TH2D*** ModeCorr = new TH2D**[Modes->GetNModes()+1]();
2028  for (int i = 0; i < Modes->GetNModes()+1; ++i)
2029  {
2030  ModeCorr[i] = new TH2D*[Modes->GetNModes()+1]();
2031 
2032  (*ModeCorrelation)(i,i) = 1.0;
2033 
2034  const double Min_i = EventHist_ByMode[i]->GetXaxis()->GetBinLowEdge(1);
2035  const double Max_i = EventHist_ByMode[i]->GetXaxis()->GetBinUpEdge(EventHist_ByMode[i]->GetNbinsX()+1);
2036  for (int j = 0; j < Modes->GetNModes()+1; ++j)
2037  {
2038  const double Min_j = EventHist_ByMode[j]->GetXaxis()->GetBinLowEdge(1);
2039  const double Max_j = EventHist_ByMode[j]->GetXaxis()->GetBinUpEdge(EventHist_ByMode[j]->GetNbinsX()+1);
2040 
2041  // TH2D to hold the Correlation
2042  ModeCorr[i][j] = new TH2D(Form("ModeCorr_%i_%i",i,j), Form("ModeCorr_%i_%i",i,j), 70, Min_i, Max_i, 70, Min_j, Max_j);
2043  ModeCorr[i][j]->SetDirectory(nullptr);
2044  ModeCorr[i][j]->SetMinimum(0);
2045  ModeCorr[i][j]->GetXaxis()->SetTitle(Modes->GetMaCh3ModeName(i).c_str());
2046  ModeCorr[i][j]->GetYaxis()->SetTitle(Modes->GetMaCh3ModeName(j).c_str());
2047  ModeCorr[i][j]->GetZaxis()->SetTitle("Events");
2048  }
2049  }
2050 
2051  // Now we are sure we have the diagonal elements, let's make the off-diagonals
2052  #ifdef MULTITHREAD
2053  #pragma omp parallel for
2054  #endif
2055  for (int i = 0; i < Modes->GetNModes()+1; ++i)
2056  {
2057  for (int j = 0; j <= i; ++j)
2058  {
2059  // Skip the diagonal elements which we've already done above
2060  if (j == i) continue;
2061 
2062  for (unsigned int it = 0; it < nThrows; ++it)
2063  {
2064  double Integral_X = 0.;
2065  double Integral_Y = 0.;
2066  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
2067  {
2068  Integral_X += NoOverflowIntegral(MCVectorByMode[it][SampleNum][i]);
2069  Integral_Y += NoOverflowIntegral(MCVectorByMode[it][SampleNum][j]);
2070  }
2071  ModeCorr[i][j]->Fill(Integral_X, Integral_Y);
2072  }
2073  ModeCorr[i][j]->Smooth();
2074 
2075  // Get the Covariance for these two parameters
2076  (*ModeCorrelation)(i,j) = ModeCorr[i][j]->GetCorrelationFactor();
2077  (*ModeCorrelation)(j,i) = (*ModeCorrelation)(i,j);
2078  }// End j loop
2079  }// End i loop
2080 
2081  TH2D* hModeCorr = new TH2D("Mode Correlation", "Mode Correlation", Modes->GetNModes()+1, 0, Modes->GetNModes()+1, Modes->GetNModes()+1, 0, Modes->GetNModes()+1);
2082  hModeCorr->SetDirectory(nullptr);
2083  hModeCorr->GetZaxis()->SetTitle("Correlation");
2084  hModeCorr->SetMinimum(-1);
2085  hModeCorr->SetMaximum(1);
2086  hModeCorr->GetXaxis()->SetLabelSize(0.015);
2087  hModeCorr->GetYaxis()->SetLabelSize(0.015);
2088 
2089  // Loop over the Covariance matrix entries
2090  for (int i = 0; i < Modes->GetNModes()+1; ++i)
2091  {
2092  hModeCorr->GetXaxis()->SetBinLabel(i+1, Modes->GetMaCh3ModeName(i).c_str());
2093 
2094  for (int j = 0; j < Modes->GetNModes()+1; ++j)
2095  {
2096  hModeCorr->GetYaxis()->SetBinLabel(j+1, Modes->GetMaCh3ModeName(j).c_str());
2097  // The value of the Covariance
2098  const double corr = (*ModeCorrelation)(i,j);
2099  hModeCorr->SetBinContent(i+1, j+1, corr);
2100  }
2101  }
2102  hModeCorr->Draw("colz");
2103  hModeCorr->Write("Mode_Corr");
2104  delete hModeCorr;
2105 
2106  for (int i = 0; i < Modes->GetNModes()+1; ++i)
2107  {
2108  for (int j = 0; j <= i; ++j)
2109  {
2110  // Skip the diagonal elements which we've already done above
2111  if (j == i) continue;
2112  ModeCorr[i][j]->Write();
2113  }// End j loop
2114  }// End i loop
2115 
2116  for (int i = 0; i < Modes->GetNModes()+1; ++i)
2117  {
2118  for (int j = 0; j < Modes->GetNModes()+1; ++j)
2119  {
2120  delete ModeCorr[i][j];
2121  }
2122  delete[] ModeCorr[i];
2123  }
2124  delete[] ModeCorr;
2125  ModeCorrelation->Write("Mode_Correlation");
2126  delete ModeCorrelation;
2127 
2128  for (int j = 0; j < Modes->GetNModes()+1; j++)
2129  {
2130  delete EventHist_ByMode[j];
2131  }
2132  }
2133 
2134  CorrDir->Close();
2135  delete CorrDir;
2136 
2137  timer.Stop();
2138  MACH3LOG_INFO("Calculating correlations took {:.2f}s", timer.RealTime());
2139  Outputfile->cd();
2140 }
2141 
2142 // ****************
2143 // Make a projection
2144 TH1D* SampleSummary::ProjectHist(TH2D* Histogram, const bool ProjectX) {
2145 // ****************
2146  TH1D* Projection = nullptr;
2147  std::string name;
2148  if (ProjectX) {
2149  name = std::string(Histogram->GetName()) + "_x";
2150  Projection = Histogram->ProjectionX(name.c_str(), 1, Histogram->GetYaxis()->GetNbins(), "e");
2151  } else {
2152  name = std::string(Histogram->GetName()) + "_y";
2153  Projection = Histogram->ProjectionY(name.c_str(), 1, Histogram->GetXaxis()->GetNbins(), "e");
2154  }
2155  return Projection;
2156 }
2157 
2158 // ****************
2159 // Make a projection
2160 TH1D* SampleSummary::ProjectPoly(TH2Poly* Histogram, const bool ProjectX, const int selection, const bool MakeErrorHist) {
2161 // ****************
2162  std::vector<double> xbins = SampleHandler->ReturnKinematicParameterBinning(M3::int_t(selection), SampleHandler->GetKinVarName(M3::int_t(selection), 0));
2163  std::vector<double> ybins = SampleHandler->ReturnKinematicParameterBinning(M3::int_t(selection), SampleHandler->GetKinVarName(M3::int_t(selection), 1));
2164 
2165  TH1D* Projection = nullptr;
2166  std::string name;
2167  if (ProjectX) {
2168  name = std::string(Histogram->GetName()) + "_x";
2169  Projection = PolyProjectionX(Histogram, name.c_str(), xbins, MakeErrorHist);
2170  } else {
2171  name = std::string(Histogram->GetName()) + "_y";
2172  Projection = PolyProjectionY(Histogram, name.c_str(), ybins, MakeErrorHist);
2173  }
2174  return Projection;
2175 }
2176 
2177 // ****************
2178 //KS: We have two methods how to apply statistical fluctuation standard is faster hence is default
2179 void SampleSummary::MakeFluctuatedHistogram(TH1D *FluctHist, TH1D* PolyHist){
2180 // ****************
2181  if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist, rnd.get());
2182  else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist, rnd.get());
2183 }
2184 
2185 // ****************
2186 //KS: We have two methods how to apply statistical fluctuation standard is faster hence is default
2187 void SampleSummary::MakeFluctuatedHistogram(TH2Poly *FluctHist, TH2Poly* PolyHist){
2188 // ****************
2189  if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist, rnd.get());
2190  else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist, rnd.get());
2191 }
2192 
2193 
2194 // ****************
2196 // ****************
2197  MACH3LOG_INFO("******************************");
2198  switch(Criterion) {
2199  case M3::kInfCrit::kBIC:
2200  // Study Bayesian Information Criterion
2201  StudyBIC();
2202  break;
2203  case M3::kInfCrit::kDIC:
2204  // Study Deviance Information Criterion
2205  StudyDIC();
2206  break;
2207  case M3::kInfCrit::kWAIC:
2208  // Study Watanabe-Akaike information criterion (WAIC)
2209  StudyWAIC();
2210  break;
2212  MACH3LOG_ERROR("kInfCrits is not a valid kInfCrit!");
2213  throw MaCh3Exception(__FILE__, __LINE__);
2214  default:
2215  MACH3LOG_ERROR("UNKNOWN Information Criterion SPECIFIED!");
2216  MACH3LOG_ERROR("You gave {}", static_cast<int>(Criterion));
2217  throw MaCh3Exception(__FILE__ , __LINE__ );
2218  }
2219  MACH3LOG_INFO("******************************");
2220 }
2221 
2222 // ****************
2224 // ****************
2225  //make fancy event rate histogram
2226  double DataRate = 0.0;
2227  double BinsRate = 0.0;
2228  #ifdef MULTITHREAD
2229  #pragma omp parallel for reduction(+:DataRate, BinsRate)
2230  #endif
2231  for (int i = 0; i < nSamples; ++i)
2232  {
2233  if (DataHist[i] == nullptr) continue;
2234  DataRate += NoOverflowIntegral(DataHist[i]);
2235  BinsRate += maxBins[i];
2236  }
2237 
2238  const double EventRateBIC = GetBIC(llh_total, DataRate, nModelParams);
2239  const double BinBasedBIC = GetBIC(llh_total, BinsRate, nModelParams);
2240  MACH3LOG_INFO("Calculated Bayesian Information Criterion using global number of events: {:.2f}", EventRateBIC);
2241  MACH3LOG_INFO("Calculated Bayesian Information Criterion using global number of bins: {:.2f}", BinBasedBIC);
2242  MACH3LOG_INFO("Additional info: nModelParams {} DataRate: {:.2f} BinsRate: {:.2f}", nModelParams, DataRate, BinsRate);
2243 }
2244 
2245 // ****************
2246 // Get the Deviance Information Criterion (DIC)
2248 // ****************
2249  //The posterior mean of the deviance
2250  double Dbar = 0.;
2251 
2252  #ifdef MULTITHREAD
2253  #pragma omp parallel for reduction(+:Dbar)
2254  #endif
2255  for (unsigned int i = 0; i < nThrows; ++i)
2256  {
2257  double LLH_temp = 0.;
2258  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum)
2259  {
2260  // Get -2*log-likelihood
2261  LLH_temp += GetLLH(DataHist[SampleNum], MCVector[i][SampleNum], W2MCVector[i][SampleNum]);
2262  }
2263  Dbar += LLH_temp;
2264  }
2265  Dbar = Dbar / nThrows;
2266 
2267  // A point estimate of the deviance
2268  const double Dhat = llh_total;
2269 
2270  //Effective number of parameters
2271  const double p_D = std::fabs(Dbar - Dhat);
2272 
2273  //Actual test stat
2274  const double DIC_stat = Dhat + 2 * p_D;
2275  MACH3LOG_INFO("Effective number of parameters following DIC formalism is equal to: {:.2f}", p_D);
2276  MACH3LOG_INFO("DIC test statistic = {:.2f}", DIC_stat);
2277 }
2278 
2279 // ****************
2280 // Get the Watanabe-Akaike information criterion (WAIC)
2282 // ****************
2283  // log pointwise predictive density
2284  double lppd = 0.;
2285  // effective number of parameters
2286  double p_WAIC = 0.;
2287 
2288  #ifdef MULTITHREAD
2289  #pragma omp parallel for reduction(+:lppd, p_WAIC)
2290  #endif
2291  for (int SampleNum = 0; SampleNum < nSamples; ++SampleNum) {
2292  int nBins = maxBins[SampleNum];
2293  for (int i = 1; i <= nBins; ++i) {
2294  double mean_llh = 0.;
2295  double sum_exp_llh = 0;
2296  double mean_llh_squared = 0.;
2297 
2298  for (unsigned int s = 0; s < nThrows; ++s) {
2299  const double data = DataHist[SampleNum]->GetBinContent(i);
2300  const double mc = MCVector[s][SampleNum]->GetBinContent(i);
2301  const double w2 = W2MCVector[s][SampleNum]->GetBinContent(i);
2302 
2303  // Get the -log-likelihood for this sample and bin
2304  double neg_LLH_temp = SampleHandler->GetTestStatLLH(data, mc, w2);
2305 
2306  // Negate the negative log-likelihood to get the actual log-likelihood
2307  double LLH_temp = -neg_LLH_temp;
2308 
2309  mean_llh += LLH_temp;
2310  mean_llh_squared += LLH_temp * LLH_temp;
2311  sum_exp_llh += std::exp(LLH_temp);
2312  }
2313 
2314  // Compute the mean log-likelihood and the squared mean
2315  mean_llh /= nThrows;
2316  mean_llh_squared /= nThrows;
2317  sum_exp_llh /= nThrows;
2318  sum_exp_llh = std::log(sum_exp_llh);
2319 
2320  // Log pointwise predictive density based on Eq. 4 in Gelman2014
2321  lppd += sum_exp_llh;
2322 
2323  // Compute the effective number of parameters for WAIC
2324  p_WAIC += mean_llh_squared - (mean_llh * mean_llh);
2325  }
2326  }
2327 
2328  // Compute WAIC, see Eq. 13 in Gelman2014
2329  double WAIC = -2 * (lppd - p_WAIC);
2330  MACH3LOG_INFO("Effective number of parameters following WAIC formalism is equal to: {:.2f}", p_WAIC);
2331  MACH3LOG_INFO("WAIC = {:.2f}", WAIC);
2332 }
void MakeFluctuatedHistogramAlternative(TH1D *FluctHist, TH1D *PolyHist, TRandom3 *rand)
Make Poisson fluctuation of TH1D hist using slow method which is only for cross-check.
void NormaliseTH2Poly(TH2Poly *Histogram)
Helper to Normalise histograms.
void MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D *PolyHist, TRandom3 *rand)
Make Poisson fluctuation of TH1D hist using default fast method.
TH1D * PolyProjectionX(TObject *poly, std::string TempName, const std::vector< double > &xbins, const bool computeErrors)
WP: Poly Projectors.
void FastViolinFill(TH2D *violin, TH1D *hist_1d)
KS: Fill Violin histogram with entry from a toy.
TH2Poly * NormalisePoly(TH2Poly *Histogram)
WP: Helper to Normalise histograms.
TH2Poly * RatioPolys(TH2Poly *NumHist, TH2Poly *DenomHist)
Helper to make ratio of TH2Polys.
TH1D * PolyProjectionY(TObject *poly, std::string TempName, const std::vector< double > &ybins, const bool computeErrors)
WP: Poly Projectors.
double NoOverflowIntegral(TH2Poly *poly)
WP: Helper function for calculating binned Integral of TH2Poly i.e not including overflow.
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
double GetBetaParameter(const double data, const double mc, const double w2, const TestStatistic TestStat)
KS: Calculate Beta parameter which will be different based on specified test statistic.
double GetBIC(const double llh, const int data, const int nPars)
Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC,...
void Get2DBayesianpValue(TH2D *Histogram)
Calculates the 2D Bayesian p-value and generates a visualization.
double GetModeError(TH1D *hpost)
Get the mode error from a TH1D.
Custom exception class used throughout MaCh3.
int GetNModes() const
KS: Get number of modes, keep in mind actual number is +1 greater due to unknown category.
Definition: MaCh3Modes.h:148
std::string GetMaCh3ModeName(const int Index) const
KS: Get normal name of mode, if mode not known you will get UNKNOWN_BAD.
Definition: MaCh3Modes.cpp:156
Class responsible for handling implementation of samples used in analysis, reweighting and returning ...
virtual std::string GetKinVarName(const int iSample, const int Dimension) const =0
Return Kinematic Variable name for specified sample and dimension for example "Reconstructed_Neutrino...
MaCh3Modes * GetMaCh3Modes() const
Return pointer to MaCh3 modes.
double GetTestStatLLH(const double data, const double mc, const double w2) const
Calculate test statistic for a single bin. Calculation depends on setting of fTestStatistic....
virtual std::vector< double > ReturnKinematicParameterBinning(const int Sample, const std::string &KinematicParameter) const =0
Return the binning used to draw a kinematic parameter.
virtual std::string GetSampleTitle(const int Sample) const =0
void StudyInformationCriterion(M3::kInfCrit Criterion)
Information Criterion.
std::vector< TH2Poly * > lnLHist_Mode
The LLH distribution in pmu cosmu for using the mode in each bin.
std::vector< double > LLHPenaltyVector
Vector to hold the penalty term.
std::vector< double > llh_data_draw_ProjectX
Projection X (most likely muon momentum) of LLH.
void MakeCutEventRate(TH1D *Histogram, const double DataRate)
Make the 1D Event Rate Hist.
SampleHandlerBase * SampleHandler
Pointer to SampleHandler object, mostly used to get sample names, binning etc.
~SampleSummary()
Destructor.
std::vector< double > llh_predfluc_draw
Fluctuated Predictive vs Draw.
void AddThrow(std::vector< TH2Poly * > &MCHist, std::vector< TH2Poly * > &W2Hist, const double LLHPenalty=0.0, const double Weight=1.0, const int DrawNumber=0)
KS: Add histograms with throws.
std::vector< std::vector< TH2Poly * > > MCVector
Vector of vectors which holds the loaded MC histograms.
std::vector< TH2Poly * > lnLHist_Mean
The LLH distribution in pmu cosmu for using the mean in each bin.
void CalcLLH(TH2Poly *const &Data, TH2Poly *const &MC, TH2Poly *const &W2)
Helper functions to calculate likelihoods using TH2Poly, will modify MC hist title to include LLH.
double total_llh_predfluc_pred
Fluctuated Predictive vs Predictive.
double total_llh_drawfluc_draw_ProjectX
Fluctuated Draw vs Draw for projection X (most likely muon momentum)
std::vector< std::vector< TH2Poly * > > W2MCVector
Vector of vectors which holds the loaded W2 histograms.
std::vector< TH2Poly * > W2MeanHist
Pointer to the w2 histograms (for mean values).
double total_llh_data_draw
Data vs Draw.
std::unique_ptr< TH1D > RandomHist
Holds the history of which entries have been drawn in the MCMC file.
double total_llh_rate_predfluc_draw
Fluctuated Predictive vs Draw using Rate.
void MakeChi2Hists()
Make the fluctuated histograms (2D and 1D) for the chi2s Essentially taking the MCMC draws and calcul...
std::vector< double > llh_data_predfluc
Data vs Fluctuated Predictive.
void StudyBIC()
Study Bayesian Information Criterion (BIC) .
std::unique_ptr< TRandom3 > rnd
Random number generator.
std::vector< TH1D * > lnLHist_Mode1D
Holds the bin-by-bin LLH for the mode posterior predictive vs the data.
std::string OutputName
Output filename.
void MakePredictive()
Finalise the distributions from the thrown samples.
std::vector< double > llh_predfluc_pred
Fluctuated Predictive vs Predictive.
std::vector< std::vector< std::unique_ptr< TH1D > > > BetaHist
Distribution of beta parameters in Barlow Beeston formalisms.
int nModelParams
Number of parameters.
std::vector< TH2D * > ViolinHists_ProjectX
Posterior predictive but for X projection but as a violin plot.
bool first_pass
KS: Hacky flag to let us know if this is first toy.
std::vector< TH2Poly * > NominalHist
The nominal histogram for the selection.
std::vector< std::vector< std::unique_ptr< TH1D > > > PosteriorHist
The posterior predictive for the whole selection: this gets built after adding in the toys....
double total_llh_draw_pred
Draw vs Predictive.
std::vector< double > llh_drawfluc_draw
Fluctuated Draw vs Draw.
double total_llh_data_draw_ProjectX
Data vs Draw for projection X (most likely muon momentum)
std::vector< TH2Poly * > MeanHistCorrected
The posterior predictive distribution in pmu cosmu using the mean after applying Barlow-Beeston Corre...
std::vector< double > llh_drawfluc_predfluc
Fluctuated Draw vs Fluctuated Predictive.
MaCh3Modes * Modes
MaCh3 Modes.
std::vector< double > llh_data_draw
Data vs Draw.
double total_llh_data_drawfluc
Data vs Fluctuated Draw.
void AddData(std::vector< TH2Poly * > &DataHist)
KS: Add data histograms.
TH1D * ProjectPoly(TH2Poly *Histogram, const bool ProjectX, const int selection, const bool MakeErrorHist=false)
Helper to project TH2Poly onto axis.
std::unique_ptr< TH1D > lnLHist
The histogram containing the lnL for each throw.
std::vector< TH1D * > lnLHist_Mean1D
Holds the bin-by-bin LLH for the mean posterior predictive vs the data.
TTree * OutputTree
TTree which we save useful information to.
bool DoByModePlots
By mode variables.
std::vector< TH1D * > lnLHist_Mean_ProjectX
The LLH distribution in pmu using the mean in each bin.
void PrepareOutput()
KS: Prepare output tree and necessary variables.
SampleSummary(const int n_Samples, const std::string &Filename, SampleHandlerBase *const sample, const int nSteps)
Constructor.
double total_llh_drawfluc_draw
Fluctuated Draw vs Draw.
std::vector< TH2Poly * > DataHist
The data histogram for the selection.
std::vector< double > llh_draw_pred
Draw vs Predictive.
double total_llh_predfluc_draw
Fluctuated Predictive vs Draw.
int Debug
Tells Debug level to save additional histograms.
std::vector< double > llh_drawfluc_draw_ProjectX
bool CheckSamples(const int Length)
Check the length of samples agrees.
std::unique_ptr< TH2D > lnLDrawHist
The 2D lnLhist, showing (draw vs data) and (draw vs fluct), anything above y=x axis is the p-value.
void AddNominal(std::vector< TH2Poly * > &NominalHist, std::vector< TH2Poly * > &W2Nom)
KS: Add prior histograms.
void StudyDIC()
KS: Get the Deviance Information Criterion (DIC) .
void AddThrowByMode(std::vector< std::vector< TH2Poly * >> &SampleVector_ByMode)
KS: Add histograms for each mode.
std::vector< TH2D * > ViolinHists_ProjectY
Posterior predictive but for Y projection but as a violin plot.
std::vector< double > llh_data_drawfluc
Data vs Fluctuated Draw.
std::vector< TH2Poly * > ModeHist
The posterior predictive distribution in pmu cosmu using the mode.
std::vector< std::string > SampleNames
name for each sample
std::vector< TH2Poly * > W2NomHist
Pointer to the w2 histograms (for nominal values).
double total_llh_drawfluc_pred
Fluctuated Draw vs Predictive.
void StudyWAIC()
KS: Get the Watanabe-Akaike information criterion (WAIC) .
std::vector< TDirectory * > Dir
Directory for each sample.
std::vector< double > llh_rate_predfluc_draw
Fluctuated Predictive vs Draw using rate only.
unsigned int nThrows
Number of throws.
std::vector< TH1D * > lnLHist_Sample_DrawData
The histogram containing the lnL (draw vs data) for each throw for each sample.
std::vector< std::vector< std::unique_ptr< TH1D > > > w2Hist
The posterior predictive for the whole selection: this gets built after adding in the toys....
TH1D **** PosteriorHist_ByMode
Histogram which corresponds to each bin in the sample's th2poly.
std::unique_ptr< TH1D > lnLHist_drawflucdraw
The lnLhist for the draw vs draw fluctuated.
TFile * Outputfile
Output filename.
double llh_total
Total LLH for the posterior predictive distribution.
void Write()
KS: Write results into root file.
std::unique_ptr< TH2D > lnLDrawHistRate
The 2D lnLhist, showing (draw vs data) and (draw vs fluct), using rate, anything above y=x axis is th...
double total_llh_rate_data_draw
Rate Data vs Draw.
std::unique_ptr< TH1D > lnLHist_drawdata
The lnLhist for the draw vs data.
TestStatistic likelihood
Type of likelihood for example Poisson, Barlow-Beeston or Ice Cube.
int nSamples
Number of samples.
void MakeCutLLH()
Make the cut LLH histogram.
std::vector< std::vector< std::vector< TH2Poly * > > > MCVectorByMode
Vector of vectors which holds the loaded MC histograms for each mode.
std::vector< double > llh_drawfluc_pred
Fluctuated Draw vs Predictive.
std::vector< std::vector< TH2Poly * > > MeanHist_ByMode
The posterior predictive distribution in pmu cosmu using the mean.
std::vector< double > llh_datafluc_draw
Fluctuated Data vs Draw.
std::vector< TH1D * > DataHist_ProjectX
The data histogram for the selection X projection.
void MakeCutLLH1D(TH1D *Histogram, double llh_ref=-999)
std::vector< TH1D * > DataHist_ProjectY
The data histogram for the selection Y projection.
void PlotBetaParameters()
KS: In Barlow Beeston we have Beta Parameters which scale generated MC.
double GetLLH(TH2Poly *const &Data, TH2Poly *const &MC, TH2Poly *const &W2)
Helper functions to calculate likelihoods using TH2Poly.
std::vector< double > WeightVector
Vector holding weight.
void MakeFluctuatedHistogram(TH1D *FluctHist, TH1D *PolyHist)
Make Poisson fluctuation of TH1D hist.
std::vector< double > llh_rate_data_draw
Data vs Draw using rate only.
bool isPriorPredictive
bool whether we have Prior or Posterior Predictive
bool DoBetaParam
Are we making Beta Histograms.
std::vector< TH2Poly * > W2ModeHist
Pointer to the w2 histograms (for mode values).
std::unique_ptr< TH2D > lnLFlucHist
The 2D lnLHist, showing (draw vs data) and (draw vs draw fluct), anything above y=x axis is the p-val...
bool StandardFluctuation
KS: We have two methods for Poissonian fluctuation.
std::unique_ptr< TH2D > lnLFlucHist_ProjectX
The 2D lnLHist but for ProjectionX histogram (pmu), showing (draw vs data) and (draw vs draw fluct),...
std::unique_ptr< TH1D > lnLHist_drawfluc
The lnLhist for the draw vs MC fluctuated.
bool doShapeOnly
bool whether to normalise each toy to have shape based p-value and pos pred distribution
std::vector< TH1D * > lnLHist_Sample_PredflucDraw
The histogram containing the lnL (draw vs pred fluct) for each throw for each sample.
double total_llh_data_predfluc
Data vs Fluctuated Predictive.
double llh_penalty
LLH penalty for each throw.
std::vector< TH2Poly * > MeanHist
The posterior predictive distribution in pmu cosmu using the mean.
unsigned int nChainSteps
Number of throws by user.
std::vector< int > maxBins
Max Number of Bins per each sample.
TH1D * ProjectHist(TH2D *Histogram, const bool ProjectX)
Helper to project TH2D onto axis.
double total_llh_drawfluc_predfluc
Fluctuated Draw vs Fluctuated Predictive.
double total_llh_datafluc_draw
Fluctuated Data vs Draw.
void StudyKinematicCorrelations()
KS: Study how correlated are sample or kinematic bins.
std::vector< TH1D * > lnLHist_Sample_DrawflucDraw
The histogram containing the lnL (draw vs draw fluct) for each throw for each sample.
kInfCrit
KS: Different Information Criterion tests mostly based Gelman paper.
Definition: SampleSummary.h:10
@ kWAIC
Watanabe-Akaike information criterion.
Definition: SampleSummary.h:13
@ kInfCrits
This only enumerates.
Definition: SampleSummary.h:14
@ kBIC
Bayesian Information Criterion.
Definition: SampleSummary.h:11
@ kDIC
Deviance Information Criterion.
Definition: SampleSummary.h:12
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
int int_t
Definition: Core.h:38
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.
Definition: Monitor.cpp:228