MaCh3  2.5.1
Reference Guide
PredictiveThrower.cpp
Go to the documentation of this file.
1 #include "PredictiveThrower.h"
3 #include "TH3.h"
4 
5 //this file is choc full of usage of a root interface that only takes floats, turn this warning off for this CU for now
6 #pragma GCC diagnostic ignored "-Wfloat-conversion"
7 #pragma GCC diagnostic ignored "-Wuseless-cast"
8 
9 // *************************
11 // *************************
12  AlgorithmName = "PredictiveThrower";
13  if(!CheckNodeExists(fitMan->raw(), "Predictive")) {
14  MACH3LOG_ERROR("Predictive is missing in your main yaml config");
15  throw MaCh3Exception(__FILE__ , __LINE__ );
16  }
17 
18  StandardFluctuation = GetFromManager<bool>(fitMan->raw()["Predictive"]["StandardFluctuation"], true, __FILE__, __LINE__ );
19 
20  if(StandardFluctuation) MACH3LOG_INFO("Using standard method of statistical fluctuation");
21  else MACH3LOG_INFO("Using alternative method of statistical fluctuation, which is much slower");
22 
23  ModelSystematic = nullptr;
24  // Use the full likelihood for the Prior/Posterior predictive pvalue
25  FullLLH = GetFromManager<bool>(fitMan->raw()["Predictive"]["FullLLH"], false, __FILE__, __LINE__ );
26  NModelParams = 0;
27 
28  Is_PriorPredictive = Get<bool>(fitMan->raw()["Predictive"]["PriorPredictive"], __FILE__, __LINE__);
29  Ntoys = Get<int>(fitMan->raw()["Predictive"]["Ntoy"], __FILE__, __LINE__);
30 
31  ReweightWeight.resize(Ntoys);
32  PenaltyTerm.resize(Ntoys);
33 }
34 
35 // *************************
36 // Destructor:
38 // *************************
39 
40 }
41 
42 // *************************
43 void PredictiveThrower::SetParamters(std::vector<std::string>& ParameterGroupsNotVaried,
44  std::unordered_set<int>& ParameterOnlyToVary) {
45 // *************************
46  // WARNING This should be removed in the future
47  auto DoNotThrowLegacyCov = GetFromManager<std::vector<std::string>>(fitMan->raw()["Predictive"]["DoNotThrowLegacyCov"], {}, __FILE__, __LINE__);
49  for (size_t i = 0; i < DoNotThrowLegacyCov.size(); ++i) {
50  for (size_t s = 0; s < systematics.size(); ++s) {
51  if (systematics[s]->GetName() == DoNotThrowLegacyCov[i]) {
52  systematics[s]->SetParameters();
53  break;
54  }
55  }
56  }
57 
58  // Set groups to prefit values if they were set to not be varies
59  if(ModelSystematic && ParameterGroupsNotVaried.size() > 0) {
60  ModelSystematic->SetGroupOnlyParameters(ParameterGroupsNotVaried);
61  }
62 
64  if (ModelSystematic && !ParameterOnlyToVary.empty()) {
65  for (int i = 0; i < ModelSystematic->GetNumParams(); ++i) {
66  // KS: If parameter is in map then we are skipping this, otherwise for params that we don't want to vary we simply set it to prior
67  if (ParameterOnlyToVary.find(i) == ParameterOnlyToVary.end()) {
69  }
70  }
71  }
72 }
73 
74 // *************************
76 // *************************
78  for (size_t iPDF = 0; iPDF < samples.size(); iPDF++) {
79  TotalNumberOfSamples += samples[iPDF]->GetNSamples();
80  }
81 
87 
89 
90 
91  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
92  MC_Hist_Toy[sample].resize(Ntoys);
93  W2_Hist_Toy[sample].resize(Ntoys);
94  }
95  int counter = 0;
96  for (size_t iPDF = 0; iPDF < samples.size(); iPDF++) {
97  for (int SampleIndex = 0; SampleIndex < samples[iPDF]->GetNSamples(); ++SampleIndex) {
98  SampleInfo[counter].Name = samples[iPDF]->GetSampleTitle(SampleIndex);
99  SampleInfo[counter].LocalId = SampleIndex;
100  SampleInfo[counter].SamHandler = samples[iPDF];
101  SampleInfo[counter].Dimenstion = SampleInfo[counter].SamHandler->GetNDim(SampleIndex);
102  counter++;
103  }
104  }
105  SampleInfo[TotalNumberOfSamples].Name= "Total";
106 }
107 
108 // *************************
109 // Produce MaCh3 toys:
110 void PredictiveThrower::SetupToyGeneration(std::vector<std::string>& ParameterGroupsNotVaried,
111  std::unordered_set<int>& ParameterOnlyToVary,
112  std::vector<const M3::float_t*>& BoundValuePointer,
113  std::vector<std::pair<double, double>>& ParamBounds) {
114 // *************************
115  int counter = 0;
116  for (size_t s = 0; s < systematics.size(); ++s) {
117  auto* MaCh3Params = dynamic_cast<ParameterHandlerGeneric*>(systematics[s]);
118  if(MaCh3Params) {
119  ModelSystematic = MaCh3Params;
120  counter++;
121  }
122  }
123 
125 
126  if(Is_PriorPredictive) {
127  MACH3LOG_INFO("You've chosen to run Prior Predictive Distribution");
128  } else {
129  auto PosteriorFileName = Get<std::string>(fitMan->raw()["Predictive"]["PosteriorFile"], __FILE__, __LINE__);
130  //KS: We use MCMCProcessor to get names of covariances that were actually used to produce given chain
131  MCMCProcessor Processor(PosteriorFileName);
132  Processor.Initialise();
133 
134  // For throwing FD predictions from ND-only chain we have to allow having different yaml configs
135  auto AllowDifferentConfigs = GetFromManager<bool>(fitMan->raw()["Predictive"]["AllowDifferentConfigs"], false, __FILE__, __LINE__);
136 
138  YAML::Node ConfigInChain = Processor.GetCovConfig(kXSecPar);
139  if(ModelSystematic) {
140  YAML::Node ConfigNow = ModelSystematic->GetConfig();
141  if (!compareYAMLNodes(ConfigNow, ConfigInChain))
142  {
143  if(AllowDifferentConfigs){
144  MACH3LOG_WARN("Yaml configs used for your ParameterHandler for chain you want sample from ({}) and one currently initialised are different", PosteriorFileName);
145  } else {
146  MACH3LOG_ERROR("Yaml configs used for your ParameterHandler for chain you want sample from ({}) and one currently initialised are different", PosteriorFileName);
147  throw MaCh3Exception(__FILE__ , __LINE__ );
148  }
149  }
150  }
151  }
152  if(counter > 1) {
153  MACH3LOG_ERROR("Found {} ParmaterHandler inheriting from ParameterHandlerGeneric, I can accept at most 1", counter);
154  throw MaCh3Exception(__FILE__, __LINE__);
155  }
156 
157  for (size_t s = 0; s < systematics.size(); ++s) {
158  NModelParams += systematics[s]->GetNumParams();
159  }
160 
161  if (ModelSystematic) {
162  auto ThrowParamGroupOnly = GetFromManager<std::vector<std::string>>(fitMan->raw()["Predictive"]["ThrowParamGroupOnly"], {}, __FILE__, __LINE__);
163  auto UniqueParamGroup = ModelSystematic->GetUniqueParameterGroups();
164  auto ParameterOnlyToVaryString = GetFromManager<std::vector<std::string>>(fitMan->raw()["Predictive"]["ThrowSinlgeParams"], {}, __FILE__, __LINE__);
165 
166  if (!ThrowParamGroupOnly.empty() && !ParameterOnlyToVaryString.empty()) {
167  MACH3LOG_ERROR("Can't use ThrowParamGroupOnly and ThrowSinlgeParams at the same time");
168  throw MaCh3Exception(__FILE__, __LINE__);
169  }
170 
171  if (!ParameterOnlyToVaryString.empty()) {
172  MACH3LOG_INFO("I will throw only: {}", fmt::join(ParameterOnlyToVaryString, ", "));
173  std::vector<int> ParameterVary(ParameterOnlyToVaryString.size());
174 
175  for (size_t i = 0; i < ParameterOnlyToVaryString.size(); ++i) {
176  ParameterVary[i] = ModelSystematic->GetParIndex(ParameterOnlyToVaryString[i]);
177  if (ParameterVary[i] == M3::_BAD_INT_) {
178  MACH3LOG_ERROR("Can't proceed if param {} is missing", ParameterOnlyToVaryString[i]);
179  throw MaCh3Exception(__FILE__, __LINE__);
180  }
181  }
182  ParameterOnlyToVary = std::unordered_set<int>(ParameterVary.begin(), ParameterVary.end());
183  } else {
184  MACH3LOG_INFO("I have following parameter groups: {}", fmt::join(UniqueParamGroup, ", "));
185  if (ThrowParamGroupOnly.empty()) {
186  MACH3LOG_INFO("I will vary all");
187  } else {
188  std::unordered_set<std::string> throwOnlySet(ThrowParamGroupOnly.begin(), ThrowParamGroupOnly.end());
189  ParameterGroupsNotVaried.clear();
190 
191  for (const auto& group : UniqueParamGroup) {
192  if (throwOnlySet.find(group) == throwOnlySet.end()) {
193  ParameterGroupsNotVaried.push_back(group);
194  }
195  }
196 
197  MACH3LOG_INFO("I will vary: {}", fmt::join(ThrowParamGroupOnly, ", "));
198  MACH3LOG_INFO("Exclude: {}", fmt::join(ParameterGroupsNotVaried, ", "));
199  }
200  }
201  }
202 
203  auto paramNode = fitMan->raw()["Predictive"]["ParameterBounds"];
204  for (const auto& p : paramNode) {
205  // Extract name
206  std::string name = p[0].as<std::string>();
207 
208  // Extract bounds: min and max
209  double minVal = p[1][0].as<double>();
210  double maxVal = p[1][1].as<double>();
211  ParamBounds.emplace_back(minVal, maxVal);
212 
213  for (size_t s = 0; s < systematics.size(); ++s) {
214  for(int iPar = 0; iPar < systematics[s]->GetNParameters(); iPar++){
215  if(systematics[s]->GetParFancyName(iPar) == name){
216  BoundValuePointer.push_back(systematics[s]->RetPointer(iPar));
217  break;
218  }
219  }
220  }
221  if(ParamBounds.size() != BoundValuePointer.size()){
222  MACH3LOG_ERROR("Ddin't find paramter {}", name);
223  throw MaCh3Exception(__FILE__,__LINE__);
224  }
225  MACH3LOG_INFO("Parameter: {} with : [{}, {}]", name, minVal, maxVal);
226  }
227  if(Is_PriorPredictive && ParamBounds.size() > 0) {
228  MACH3LOG_ERROR("Additional bounds not supported by prior predictive right now");
229  throw MaCh3Exception(__FILE__,__LINE__);
230  }
231 }
232 
233 // *************************
234 // Try loading toys
236 // *************************
237  auto PosteriorFileName = Get<std::string>(fitMan->raw()["Predictive"]["PosteriorFile"], __FILE__, __LINE__);
238  // Open the ROOT file
239  int originalErrorWarning = gErrorIgnoreLevel;
240  gErrorIgnoreLevel = kFatal;
241  TFile* file = TFile::Open(PosteriorFileName.c_str(), "READ");
242 
243  gErrorIgnoreLevel = originalErrorWarning;
244  TDirectory* ToyDir = nullptr;
245  if (!file || file->IsZombie()) {
246  return false;
247  } else {
248  // Check for the "toys" directory
249  if ((ToyDir = file->GetDirectory("Toys"))) {
250  MACH3LOG_INFO("Found toys in Posterior file will attempt toy reading");
251  } else {
252  file->Close();
253  delete file;
254  return false;
255  }
256  }
257 
258  // Finally get the TTree branch with the penalty vectors for each of the toy throws
259  TTree* PenaltyTree = static_cast<TTree*>(file->Get("ToySummary"));
260  if (!PenaltyTree) {
261  MACH3LOG_WARN("ToySummary TTree not found in file.");
262  file->Close();
263  delete file;
264  return false;
265  }
266 
267  Ntoys = static_cast<int>(PenaltyTree->GetEntries());
268  int ConfigNtoys = Get<int>(fitMan->raw()["Predictive"]["Ntoy"], __FILE__, __LINE__);;
269  if (Ntoys != ConfigNtoys) {
270  MACH3LOG_WARN("Found different number of toys in saved file than asked to run!");
271  MACH3LOG_INFO("Will read _ALL_ toys in the file");
272  MACH3LOG_INFO("Ntoys in file: {}", Ntoys);
273  MACH3LOG_INFO("Ntoys specified: {}", ConfigNtoys);
274  }
275 
276  PenaltyTerm.resize(Ntoys);
277  ReweightWeight.resize(Ntoys);
278 
279  double Penalty = 0, Weight = 1;
280  PenaltyTree->SetBranchAddress("Penalty", &Penalty);
281  PenaltyTree->SetBranchAddress("Weight", &Weight);
282  PenaltyTree->SetBranchAddress("NModelParams", &NModelParams);
283 
284  for (int i = 0; i < Ntoys; ++i) {
285  PenaltyTree->GetEntry(i);
286  if (FullLLH) {
287  PenaltyTerm[i] = Penalty;
288  } else {
289  PenaltyTerm[i] = 0.0;
290  }
291 
292  ReweightWeight[i] = Weight;
293  }
294  // Resize all vectors and get sample names
296 
297  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
298  TH1* DataHist1D = static_cast<TH1*>(ToyDir->Get((SampleInfo[sample].Name + "_data").c_str()));
299  Data_Hist[sample] = M3::Clone(DataHist1D);
300 
301  TH1* MCHist1D = static_cast<TH1*>(ToyDir->Get((SampleInfo[sample].Name + "_mc").c_str()));
302  MC_Nom_Hist[sample] = M3::Clone(MCHist1D);
303 
304  TH1* W2Hist1D = static_cast<TH1*>(ToyDir->Get((SampleInfo[sample].Name + "_w2").c_str()));
305  W2_Nom_Hist[sample] = M3::Clone(W2Hist1D);
306  }
307 
308 
309  for (int iToy = 0; iToy < Ntoys; ++iToy)
310  {
311  if (iToy % 100 == 0) MACH3LOG_INFO(" Loaded toy {}", iToy);
312 
313  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
314  TH1* MCHist1D = static_cast<TH1*>(ToyDir->Get((SampleInfo[sample].Name + "_mc_" + std::to_string(iToy)).c_str()));
315  TH1* W2Hist1D = static_cast<TH1*>(ToyDir->Get((SampleInfo[sample].Name + "_w2_" + std::to_string(iToy)).c_str()));
316 
317  MC_Hist_Toy[sample][iToy] = M3::Clone(MCHist1D);
318  W2_Hist_Toy[sample][iToy] = M3::Clone(W2Hist1D);
319  }
320  }
321 
322  file->Close();
323  delete file;
324  return true;
325 }
326 
327 // *************************
328 std::vector<std::string> PredictiveThrower::GetStoredFancyName(ParameterHandlerBase* Systematics) const {
329 // *************************
330  TDirectory * ogdir = gDirectory;
331 
332  std::vector<std::string> FancyNames;
333  std::string Name = std::string("Config_") + Systematics->GetName();
334  auto PosteriorFileName = Get<std::string>(fitMan->raw()["Predictive"]["PosteriorFile"], __FILE__, __LINE__);
335 
336  TFile* file = TFile::Open(PosteriorFileName.c_str(), "READ");
337  TDirectory* CovarianceFolder = file->GetDirectory("CovarianceFolder");
338 
339  TMacro* FoundMacro = static_cast<TMacro*>(CovarianceFolder->Get(Name.c_str()));
340  if(FoundMacro == nullptr) {
341  file->Close();
342  delete file;
343  if(ogdir){ ogdir->cd(); }
344 
345  return FancyNames;
346  }
347  MACH3LOG_DEBUG("Found config for {}", Name);
348  YAML::Node Settings = TMacroToYAML(*FoundMacro);
349 
350  int params = int(Settings["Systematics"].size());
351  FancyNames.resize(params);
352  int iPar = 0;
353  for (auto const &param : Settings["Systematics"]) {
354  FancyNames[iPar] = Get<std::string>(param["Systematic"]["Names"]["FancyName"], __FILE__ , __LINE__);
355  iPar++;
356  }
357  file->Close();
358  delete file;
359  if(ogdir){ ogdir->cd(); }
360  return FancyNames;
361 }
362 
363 
364 // *************************
365 void PredictiveThrower::WriteToy(TDirectory* ToyDirectory,
366  TDirectory* Toy_1DDirectory,
367  TDirectory* Toy_2DDirectory,
368  const int iToy) {
369 // *************************
370  int SampleCounter = 0;
371  for (size_t iPDF = 0; iPDF < samples.size(); iPDF++)
372  {
373  auto* SampleHandler = samples[iPDF];
374  for (int iSample = 0; iSample < SampleHandler->GetNSamples(); ++iSample)
375  {
376  ToyDirectory->cd();
377 
378  auto SampleName = SampleHandler->GetSampleTitle(iSample);
379  const TH1* MCHist = SampleHandler->GetMCHist(iSample);
380  MC_Hist_Toy[SampleCounter][iToy] = M3::Clone(MCHist, SampleName + "_mc_" + std::to_string(iToy));
381  MC_Hist_Toy[SampleCounter][iToy]->Write();
382 
383  const TH1* W2Hist = SampleHandler->GetW2Hist(iSample);
384  W2_Hist_Toy[SampleCounter][iToy] = M3::Clone(W2Hist, SampleName + "_w2_" + std::to_string(iToy));
385  W2_Hist_Toy[SampleCounter][iToy]->Write();
386 
387  // now get 1D projection for every dimension
388  Toy_1DDirectory->cd();
389  for(int iDim = 0; iDim < SampleHandler->GetNDim(iSample); iDim++) {
390  std::string ProjectionName = SampleHandler->GetKinVarName(iSample, iDim);
391  std::string ProjectionSuffix = "_1DProj" + std::to_string(iDim) + "_" + std::to_string(iToy);
392 
393  auto hist = SampleHandler->Get1DVarHist(iSample, ProjectionName);
394  hist->SetTitle((SampleName + ProjectionSuffix).c_str());
395  hist->SetName((SampleName + ProjectionSuffix).c_str());
396  hist->Write();
397  }
398 
399  Toy_2DDirectory->cd();
400  // now get 2D projection for every combination
401  for(int iDim1 = 0; iDim1 < SampleHandler->GetNDim(iSample); iDim1++) {
402  for (int iDim2 = iDim1 + 1; iDim2 < SampleHandler->GetNDim(iSample); ++iDim2) {
403  // Get the names for the two dimensions
404  std::string XVarName = SampleHandler->GetKinVarName(iSample, iDim1);
405  std::string YVarName = SampleHandler->GetKinVarName(iSample, iDim2);
406 
407  // Get the 2D histogram for this pair
408  auto hist2D = SampleHandler->Get2DVarHist(iSample, XVarName, YVarName);
409 
410  // Write the histogram
411  std::string suffix2D = "_2DProj_" + std::to_string(iDim1) + "_vs_" + std::to_string(iDim2) + "_" + std::to_string(iToy);
412  hist2D->SetTitle((SampleName + suffix2D).c_str());
413  hist2D->SetName((SampleName + suffix2D).c_str());
414  hist2D->Write();
415  }
416  }
417  SampleCounter++;
418  }
419  }
420 }
421 
422 // *************************
423 void PredictiveThrower::WriteByModeToys(TDirectory* ByModeDirectory,
424  const int iToy) {
425 // *************************
426  int SampleCounter = 0;
427  for (size_t iPDF = 0; iPDF < samples.size(); iPDF++)
428  {
429  auto* SampleHandler = samples[iPDF];
430  auto* modes = SampleHandler->GetMaCh3Modes();
431  for (int iSample = 0; iSample < SampleHandler->GetNSamples(); ++iSample)
432  {
433  ByModeDirectory->cd();
434 
435  auto SampleName = SampleHandler->GetSampleTitle(iSample);
436  for (int iMode = 0; iMode < modes->GetNModes()+1; ++iMode) {
437  auto ModeName = modes->GetMaCh3ModeName(iMode);
438  for(int iDim = 0; iDim < SampleHandler->GetNDim(iSample); iDim++) {
439  std::string ProjectionName = SampleHandler->GetKinVarName(iSample, iDim);
440  std::string PlotSuffix = "_1DProj" + std::to_string(iDim) + "_" + ModeName + "_" + std::to_string(iToy);
441 
442  auto hist = SampleHandler->Get1DVarHistByModeAndChannel(iSample, ProjectionName, iMode);
443  hist->SetTitle((SampleName + PlotSuffix).c_str());
444  hist->SetName((SampleName + PlotSuffix).c_str());
445  hist->Write();
446  } // end loop over dimension
447  } // end loop over mode
448  SampleCounter++;
449  } // end loop over sample
450  } // end loop over sample handler objects
451 }
452 
453 // *************************
454 bool CheckBounds(const std::vector<const M3::float_t*>& BoundValuePointer,
455  const std::vector<std::pair<double,double>>& ParamBounds) {
456 // *************************
457  for (size_t i = 0; i < BoundValuePointer.size(); ++i) {
458  const double val = *(BoundValuePointer[i]);
459  const double minVal = ParamBounds[i].first;
460  const double maxVal = ParamBounds[i].second;
461 
462  if (val < minVal || val > maxVal)
463  return false; // out of bounds
464  }
465  return true; // all values are within bounds
466 }
467 
468 // *************************
469 // Produce MaCh3 toys:
471 // *************************
472  // If we found toys then skip process of making new toys
473  if(LoadToys()) return;
474 
476  std::vector<std::string> ParameterGroupsNotVaried;
478  std::unordered_set<int> ParameterOnlyToVary;
479  // For study where one would like to apply bounds
480  std::vector<const M3::float_t*> BoundValuePointer;
481  std::vector<std::pair<double, double>> ParamBounds;
482 
483  // Setup useful information for toy generation
484  SetupToyGeneration(ParameterGroupsNotVaried, ParameterOnlyToVary,
485  BoundValuePointer, ParamBounds);
486 
487  auto PosteriorFileName = Get<std::string>(fitMan->raw()["Predictive"]["PosteriorFile"], __FILE__, __LINE__);
488 
489  MACH3LOG_INFO("Starting {}", __func__);
490 
491  outputFile->cd();
492  double Penalty = 0, Weight = 1;
493  int Draw = 0;
494 
495  TTree *ToyTree = new TTree("ToySummary", "ToySummary");
496  ToyTree->Branch("Penalty", &Penalty, "Penalty/D");
497  ToyTree->Branch("Weight", &Weight, "Weight/D");
498  ToyTree->Branch("Draw", &Draw, "Draw/I");
499  ToyTree->Branch("NModelParams", &NModelParams, "NModelParams/I");
500 
501  // KS: define branches so we can keep track of what params we are throwing
502  std::vector<double> ParamValues(NModelParams);
503  std::vector<const M3::float_t*> ParampPointers(NModelParams);
504  int ParamCounter = 0;
505  for (size_t iSys = 0; iSys < systematics.size(); iSys++)
506  {
507  for (int iPar = 0; iPar < systematics[iSys]->GetNumParams(); iPar++)
508  {
509  ParampPointers[ParamCounter] = systematics[iSys]->RetPointer(iPar);
510  std::string Name = systematics[iSys]->GetParFancyName(iPar);
511  //CW: Also strip out - signs because it messes up TBranches
512  while (Name.find("-") != std::string::npos) {
513  Name.replace(Name.find("-"), 1, std::string("_"));
514  }
515  ToyTree->Branch(Name.c_str(), &ParamValues[ParamCounter], (Name + "/D").c_str());
516  ParamCounter++;
517  }
518  }
519  TDirectory* ToyDirectory = outputFile->mkdir("Toys");
520  ToyDirectory->cd();
521  int SampleCounter = 0;
522  for (size_t iPDF = 0; iPDF < samples.size(); iPDF++)
523  {
524  auto* MaCh3Sample = samples[iPDF];
525  for (int SampleIndex = 0; SampleIndex < MaCh3Sample->GetNSamples(); ++SampleIndex)
526  {
527  // Get nominal spectra and event rates
528  const TH1* DataHist = MaCh3Sample->GetDataHist(SampleIndex);
529  Data_Hist[SampleCounter] = M3::Clone(DataHist, MaCh3Sample->GetSampleTitle(SampleIndex) + "_data");
530  Data_Hist[SampleCounter]->Write((MaCh3Sample->GetSampleTitle(SampleIndex) + "_data").c_str());
531 
532  const TH1* MCHist = MaCh3Sample->GetMCHist(SampleIndex);
533  MC_Nom_Hist[SampleCounter] = M3::Clone(MCHist, MaCh3Sample->GetSampleTitle(SampleIndex) + "_mc");
534  MC_Nom_Hist[SampleCounter]->Write((MaCh3Sample->GetSampleTitle(SampleIndex) + "_mc").c_str());
535 
536  const TH1* W2Hist = MaCh3Sample->GetW2Hist(SampleIndex);
537  W2_Nom_Hist[SampleCounter] = M3::Clone(W2Hist, MaCh3Sample->GetSampleTitle(SampleIndex) + "_w2");
538  W2_Nom_Hist[SampleCounter]->Write((MaCh3Sample->GetSampleTitle(SampleIndex) + "_w2").c_str());
539  SampleCounter++;
540  }
541  }
542 
543  TDirectory* Toy_1DDirectory = outputFile->mkdir("Toys_1DHistVar");
544  TDirectory* Toy_2DDirectory = outputFile->mkdir("Toys_2DHistVar");
545  auto doByMode = GetFromManager<bool>(fitMan->raw()["Predictive"]["ByMode"], false, __FILE__, __LINE__);
546  TDirectory* ByModeDirectory = nullptr;
547  if(doByMode) ByModeDirectory = outputFile->mkdir("Toys_ByMode");
548 
550  std::vector<std::vector<double>> branch_vals(systematics.size());
551  std::vector<std::vector<std::string>> branch_name(systematics.size());
552 
553  TChain* PosteriorFile = nullptr;
554  unsigned int burn_in = 0;
555  unsigned int maxNsteps = 0;
556  unsigned int Step = 0;
557  if(!Is_PriorPredictive)
558  {
559  PosteriorFile = new TChain("posteriors");
560  PosteriorFile->Add(PosteriorFileName.c_str());
561 
562  PosteriorFile->SetBranchAddress("step", &Step);
563  if (PosteriorFile->GetBranch("Weight")) {
564  PosteriorFile->SetBranchStatus("Weight", true);
565  PosteriorFile->SetBranchAddress("Weight", &Weight);
566  } else {
567  MACH3LOG_WARN("Not applying reweighting weight");
568  Weight = 1.0;
569  }
570 
571  for (size_t s = 0; s < systematics.size(); ++s) {
572  auto fancy_names = GetStoredFancyName(systematics[s]);
573  systematics[s]->MatchMaCh3OutputBranches(PosteriorFile, branch_vals[s], branch_name[s], fancy_names);
574  }
575 
576  //Get the burn-in from the config
577  burn_in = Get<unsigned int>(fitMan->raw()["Predictive"]["BurnInSteps"], __FILE__, __LINE__);
578 
579  //DL: Adding sanity check for chains shorter than burn in
580  maxNsteps = static_cast<unsigned int>(PosteriorFile->GetMaximum("step"));
581  if(burn_in >= maxNsteps)
582  {
583  MACH3LOG_ERROR("You are running on a chain shorter than burn in cut");
584  MACH3LOG_ERROR("Maximal value of nSteps: {}, burn in cut {}", maxNsteps, burn_in);
585  MACH3LOG_ERROR("You will run into infinite loop");
586  MACH3LOG_ERROR("You can make new chain or modify burn in cut");
587  throw MaCh3Exception(__FILE__,__LINE__);
588  }
589  }
590 
591  TStopwatch TempClock;
592  TempClock.Start();
593  for(int i = 0; i < Ntoys; i++)
594  {
595  if(Ntoys >= 10 && i % (Ntoys/10) == 0) {
597  }
598  if(!Is_PriorPredictive){
599  int entry = 0;
600  Step = 0;
601  // KS This allow to set additional bounds like mass ordering
602  bool WithinBounds = false;
603  //YSP: Ensures you get an entry from the mcmc even when burn_in is set to zero (Although not advised :p ).
604  //Take 200k burn in steps, WP: Eb C in 1st peaky
605  // If we have combined chains by hadd need to check the step in the chain
606  // Note, entry is not necessarily same as step due to merged ROOT files, so can't choose entry in the range BurnIn - nEntries :(
607  while(Step < burn_in || !WithinBounds) {
608  entry = random->Integer(static_cast<unsigned int>(PosteriorFile->GetEntries()));
609  PosteriorFile->GetEntry(entry);
610  // KS: This might be bit hacky... but BoundValuePointer refer to values in ParameterHandler
611  // so we need to update them
612  if(BoundValuePointer.size() > 0) {
613  for (size_t s = 0; s < systematics.size(); ++s) {
614  systematics[s]->SetParameters(branch_vals[s]);
615  }
616  }
617  WithinBounds = CheckBounds(BoundValuePointer, ParamBounds);
618  }
619  Draw = entry;
620  }
621  for (size_t s = 0; s < systematics.size(); ++s)
622  {
623  //KS: Below line can help you get prior predictive distributions which are helpful for getting pre and post ND fit spectra
624  //YSP: If not set in the config, the code runs SK Posterior Predictive distributions by default. If true, then the code runs SK prior predictive.
625  if(Is_PriorPredictive) {
626  systematics[s]->ThrowParameters();
627  } else {
628  systematics[s]->SetParameters(branch_vals[s]);
629  }
630  }
631 
632  // This set some params to prior value this way you can evaluate errors from subset of errors
633  SetParamters(ParameterGroupsNotVaried, ParameterOnlyToVary);
634 
635  Penalty = 0;
636  if(FullLLH) {
637  for (size_t s = 0; s < systematics.size(); ++s) {
638  //KS: do times 2 because banff reports chi2
639  Penalty = 2.0 * systematics[s]->GetLikelihood();
640  }
641  }
642 
643  PenaltyTerm[i] = Penalty;
644  ReweightWeight[i] = Weight;
645 
646  for (size_t iPDF = 0; iPDF < samples.size(); iPDF++) {
647  samples[iPDF]->Reweight();
648  }
649  // Save histograms to file
650  WriteToy(ToyDirectory, Toy_1DDirectory, Toy_2DDirectory, i);
651  if(doByMode) WriteByModeToys(ByModeDirectory, i);
652 
653  // Fill parameter value so we know throw values
654  for (size_t iPar = 0; iPar < ParamValues.size(); iPar++) {
655  ParamValues[iPar] = *ParampPointers[iPar];
656  }
657 
658  ToyTree->Fill();
659  }//end of toys loop
660  TempClock.Stop();
661 
662  if(PosteriorFile) delete PosteriorFile;
663  ToyDirectory->Close(); delete ToyDirectory;
664  Toy_1DDirectory->Close(); delete Toy_1DDirectory;
665  Toy_2DDirectory->Close(); delete Toy_2DDirectory;
666  if(doByMode){
667  ByModeDirectory->Close();
668  delete ByModeDirectory;
669  }
670 
671  outputFile->cd();
672  ToyTree->Write(); delete ToyTree;
673 
674  MACH3LOG_INFO("{} took {:.2f}s to finish for {} toys", __func__, TempClock.RealTime(), Ntoys);
675 }
676 
677 // *************************
678 void PredictiveThrower::Study1DProjections(const std::vector<TDirectory*>& SampleDirectories) const {
679 // *************************
680  MACH3LOG_INFO("Starting {}", __func__);
681 
682  TDirectory * ogdir = gDirectory;
683  auto PosteriorFileName = Get<std::string>(fitMan->raw()["Predictive"]["PosteriorFile"], __FILE__, __LINE__);
684  // Open the ROOT file
685  int originalErrorWarning = gErrorIgnoreLevel;
686  gErrorIgnoreLevel = kFatal;
687 
688  TFile* file = TFile::Open(PosteriorFileName.c_str(), "READ");
689 
690  gErrorIgnoreLevel = originalErrorWarning;
691  TDirectory* ToyDir = file->GetDirectory("Toys_1DHistVar");
692  // If toys not amiable in posterior file this means they must be in output file
693  if(ToyDir == nullptr) {
694  ToyDir = outputFile->GetDirectory("Toys_1DHistVar");
695  }
696  // [sample], [toy], [dim]
697  std::vector<std::vector<std::vector<std::unique_ptr<TH1D>>>> ProjectionToys(TotalNumberOfSamples);
698  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
699  ProjectionToys[sample].resize(Ntoys);
700  const int nDims = SampleInfo[sample].Dimenstion;
701  for (int iToy = 0; iToy < Ntoys; ++iToy) {
702  ProjectionToys[sample][iToy].resize(nDims);
703  }
704  }
705 
706  for (int iToy = 0; iToy < Ntoys; ++iToy) {
707  if (iToy % 100 == 0) MACH3LOG_INFO(" Loaded Projection toys {}", iToy);
708  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
709  const int nDims = SampleInfo[sample].Dimenstion;
710  for(int iDim = 0; iDim < nDims; iDim ++){
711  std::string ProjectionSuffix = "_1DProj" + std::to_string(iDim) + "_" + std::to_string(iToy);
712  TH1D* MCHist1D = static_cast<TH1D*>(ToyDir->Get((SampleInfo[sample].Name + ProjectionSuffix).c_str()));
713  ProjectionToys[sample][iToy][iDim] = M3::Clone(MCHist1D);
714  }
715  } // end loop over samples
716  } // end loop over toys
717  file->Close(); delete file;
718  if(ogdir){ ogdir->cd(); }
719 
720  ProduceSpectra(ProjectionToys, SampleDirectories, "mc");
721  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
722  const int nDims = SampleInfo[sample].Dimenstion;
723  // KS: We only care about doing projections for 2D, for 1D we have well 1D, for beyond 2D we have flattened TH1D
724  if(nDims == 2){
725  auto hist = Data_Hist[sample].get();
726  SampleDirectories[sample]->cd();
727 
728  std::string nameX = "Data_" + SampleInfo[sample].Name + "_Dim0";
729  std::string nameY = "Data_" + SampleInfo[sample].Name + "_Dim1";
730 
731  if(std::string(hist->ClassName()) == "TH2Poly") {
732  TAxis* xax = ProjectionToys[sample][0][0]->GetXaxis();
733  TAxis* yax = ProjectionToys[sample][0][1]->GetXaxis();
734 
735  std::vector<double> XBinning(xax->GetNbins()+1);
736  std::vector<double> YBinning(yax->GetNbins()+1);
737 
738  for(int i=0;i<=xax->GetNbins();++i)
739  XBinning[i] = xax->GetBinLowEdge(i+1);
740 
741  for(int i=0;i<=yax->GetNbins();++i)
742  YBinning[i] = yax->GetBinLowEdge(i+1);
743 
744  TH1D* ProjectionX = PolyProjectionX(static_cast<TH2Poly*>(hist), nameX.c_str(), XBinning, false);
745  TH1D* ProjectionY = PolyProjectionY(static_cast<TH2Poly*>(hist), nameY.c_str(), YBinning, false);
746 
747  ProjectionX->SetDirectory(nullptr);
748  ProjectionY->SetDirectory(nullptr);
749 
750  ProjectionX->Write(nameX.c_str());
751  ProjectionY->Write(nameY.c_str());
752 
753  delete ProjectionX;
754  delete ProjectionY;
755  } else { //TH2D
756  TH1D* ProjectionX = static_cast<TH2D*>(hist)->ProjectionX(nameX.c_str());
757  TH1D* ProjectionY = static_cast<TH2D*>(hist)->ProjectionY(nameY.c_str());
758 
759  ProjectionX->SetDirectory(nullptr);
760  ProjectionY->SetDirectory(nullptr);
761 
762  ProjectionX->Write(nameX.c_str());
763  ProjectionY->Write(nameY.c_str());
764  delete ProjectionX;
765  delete ProjectionY;
766  }
767  }
768  }
769 }
770 
771 // *************************
772 void PredictiveThrower::StudyByMode1DProjections(const std::vector<TDirectory*>& SampleDirectories) const {
773 // *************************
774  MACH3LOG_INFO("Starting {}", __func__);
775 
776  TDirectory * ogdir = gDirectory;
777  auto PosteriorFileName = Get<std::string>(fitMan->raw()["Predictive"]["PosteriorFile"], __FILE__, __LINE__);
778  // Open the ROOT file
779  int originalErrorWarning = gErrorIgnoreLevel;
780  gErrorIgnoreLevel = kFatal;
781 
782  TFile* file = TFile::Open(PosteriorFileName.c_str(), "READ");
783 
784  gErrorIgnoreLevel = originalErrorWarning;
785  TDirectory* ToyDir = file->GetDirectory("Toys_ByMode");
786  // If toys not amiable in posterior file this means they must be in output file
787  if(ToyDir == nullptr) {
788  ToyDir = outputFile->GetDirectory("Toys_ByMode");
789  }
793  auto* mode = SampleInfo[0].SamHandler->GetMaCh3Modes();
794  auto NModes = mode->GetNModes()+1;
795  // [mode], [sample], [toy], [dim]
796  std::vector<std::vector<std::vector<std::vector<std::unique_ptr<TH1D>>>>> ProjectionToys(NModes);
797  for(int iMode = 0; iMode < NModes; iMode++) {
798  ProjectionToys[iMode].resize(TotalNumberOfSamples);
799  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
800  ProjectionToys[iMode][sample].resize(Ntoys);
801  const int nDims = SampleInfo[sample].Dimenstion;
802  for (int iToy = 0; iToy < Ntoys; ++iToy) {
803  ProjectionToys[iMode][sample][iToy].resize(nDims);
804  }
805  }
806  }
807 
808  for (int iToy = 0; iToy < Ntoys; ++iToy) {
809  if (iToy % 100 == 0) MACH3LOG_INFO(" Loaded Projection toys {}", iToy);
810  for(int iMode = 0; iMode < NModes; iMode++) {
811  auto ModeName = mode->GetMaCh3ModeName(iMode);
812  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
813  const int nDims = SampleInfo[sample].Dimenstion;
814  for(int iDim = 0; iDim < nDims; iDim ++) {
815  std::string ProjectionSuffix = "_1DProj" + std::to_string(iDim) + "_" + ModeName + "_" + std::to_string(iToy);
816  TH1D* MCHist1D = static_cast<TH1D*>(ToyDir->Get((SampleInfo[sample].Name + ProjectionSuffix).c_str()));
817  ProjectionToys[iMode][sample][iToy][iDim] = M3::Clone(MCHist1D);
818  }
819  }
820  } // end loop over samples
821  } // end loop over toys
822 
823  // ByMode directory
824  std::vector<TDirectory*> ModeDirectory(TotalNumberOfSamples);
825  for(int iSample = 0; iSample < TotalNumberOfSamples; iSample++) {
826  ModeDirectory[iSample] = SampleDirectories[iSample]->mkdir("ByMode");
827  }
828  // Produce By Mode Spectra
829  for(int iMode = 0; iMode < NModes; iMode++) {
830  auto ModeName = mode->GetMaCh3ModeName(iMode);
831  ProduceSpectra(ProjectionToys[iMode], ModeDirectory, ModeName, false);
832  }
833  for(int iSample = 0; iSample < TotalNumberOfSamples; iSample++) {
834  ModeDirectory[iSample]->Close();
835  delete ModeDirectory[iSample];
836  }
837  file->Close(); delete file;
838  if(ogdir){ ogdir->cd(); }
839 }
840 
841 // *************************
842 void PredictiveThrower::ProduceSpectra(const std::vector<std::vector<std::vector<std::unique_ptr<TH1D>>>>& Toys,
843  const std::vector<TDirectory*>& SampleDirectories,
844  const std::string suffix,
845  const bool DoSummary) const {
846 // *************************
847  std::vector<std::vector<double>> MaxValue(TotalNumberOfSamples);
848 
849  // 1. Create Max value
850  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
851  const int nDims = SampleInfo[sample].Dimenstion;
852  MaxValue[sample].assign(nDims, 0);
853  }
854 
855  // 2. Find maximum entries over all toys
856  #ifdef MULTITHREAD
857  #pragma omp parallel for
858  #endif
859  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
860  for (int toy = 0; toy < Ntoys; ++toy) {
861  const int nDims = SampleInfo[sample].Dimenstion;
862  for (int dim = 0; dim < nDims; dim++) {
863  double max_val = Toys[sample][toy][dim]->GetMaximum();
864  MaxValue[sample][dim] = std::max(MaxValue[sample][dim], max_val);
865  }
866  }
867  }
868 
869  // 3. Make actual spectra histogram (this is because making ROOT histograms is not save)
870  // And we now have actual max values
871  std::vector<std::vector<std::unique_ptr<TH2D>>> Spectra(TotalNumberOfSamples);
872  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
873  const int nDims = SampleInfo[sample].Dimenstion;
874  Spectra[sample].resize(nDims);
875  for (int dim = 0; dim < nDims; dim++) {
876  // Get MC histogram x-axis binning
877  TH1D* refHist = Toys[sample][0][dim].get();
878 
879  const int n_bins_x = refHist->GetNbinsX();
880  std::vector<double> x_bin_edges(n_bins_x + 1);
881  for (int b = 0; b < n_bins_x; ++b) {
882  x_bin_edges[b] = refHist->GetXaxis()->GetBinLowEdge(b + 1);
883  }
884  x_bin_edges[n_bins_x] = refHist->GetXaxis()->GetBinUpEdge(n_bins_x);
885 
886  constexpr int n_bins_y = 400;
887  constexpr double y_min = 0.0;
888  const double y_max = MaxValue[sample][dim] * 1.05;
889 
890  // Create TH2D with variable binning on x axis
891  Spectra[sample][dim] = std::make_unique<TH2D>(
892  (SampleInfo[sample].Name + "_" + suffix + "_dim" + std::to_string(dim)).c_str(), // name
893  (SampleInfo[sample].Name + "_" + suffix + "_dim" + std::to_string(dim)).c_str(), // title
894  n_bins_x, x_bin_edges.data(), // x axis bins
895  n_bins_y, y_min, y_max // y axis bins
896  );
897 
898  Spectra[sample][dim]->GetXaxis()->SetTitle(refHist->GetXaxis()->GetTitle());
899  Spectra[sample][dim]->GetYaxis()->SetTitle("Events");
900 
901  Spectra[sample][dim]->SetDirectory(nullptr);
902  Spectra[sample][dim]->Sumw2(true);
903  }
904  }
905 
906  // 4. now we can actually fill our projections
907  #ifdef MULTITHREAD
908  #pragma omp parallel for collapse(2)
909  #endif
910  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
911  for (int toy = 0; toy < Ntoys; ++toy) {
912  const int nDims = SampleInfo[sample].Dimenstion;
913  for (int dim = 0; dim < nDims; dim++) {
914  FastViolinFill(Spectra[sample][dim].get(), Toys[sample][toy][dim].get());
915  }
916  }
917  }
918 
919  // 5. Save histograms which is not thread save
920  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
921  SampleDirectories[sample]->cd();
922  const int nDims = SampleInfo[sample].Dimenstion;
923  for (long unsigned int dim = 0; dim < Spectra[sample].size(); dim++) {
924  Spectra[sample][dim]->Write();
925  // For case of 2D make additional histograms
926  if(nDims == 2 && DoSummary) {
927  const std::string name = SampleInfo[sample].Name + "_" + suffix+ "_PostPred_dim" + std::to_string(dim);
928  auto Summary = MakeSummaryFromSpectra(Spectra[sample][dim].get(), name);
929  Summary->Write();
930  }
931  }
932  }
933 }
934 
935 // *************************
936 std::string PredictiveThrower::GetBinName(TH1* hist,
937  const bool uniform,
938  const int Dim,
939  const std::vector<int>& bins) const {
940 // *************************
941  std::string BinName = "";
942  if(Dim == 1) { // True 1D distribution using TH1D
943  const int b = bins[0];
944  const TAxis* ax = hist->GetXaxis();
945  const double low = ax->GetBinLowEdge(b);
946  const double up = ax->GetBinUpEdge(b);
947 
948  BinName = fmt::format("Dim0 ({:g}, {:g})", low, up);
949  } else if (Dim == 2) { // True 2D dsitrubitons
950  if(uniform == true) { //using TH2D
951  const int bx = bins[0];
952  const int by = bins[1];
953  const TAxis* ax = hist->GetXaxis();
954  const TAxis* ay = hist->GetYaxis();
955  BinName = fmt::format("Dim0 ({:g}, {:g}), ", ax->GetBinLowEdge(bx), ax->GetBinUpEdge(bx));
956  BinName += fmt::format("Dim1 ({:g}, {:g})", ay->GetBinLowEdge(by), ay->GetBinUpEdge(by));
957  } else { // using TH2Poly
958  TH2PolyBin* bin = static_cast<TH2PolyBin*>(static_cast<TH2Poly*>(hist)->GetBins()->At(bins[0]-1));
959  // Just make a little fancy name
960  BinName += fmt::format("Dim{} ({:g}, {:g})", 0, bin->GetXMin(), bin->GetXMax());
961  BinName += fmt::format("Dim{} ({:g}, {:g})", 1, bin->GetYMin(), bin->GetYMax());
962  }
963  } else { // N-dimensional distribution using flatten TH1D
964  BinName = hist->GetXaxis()->GetBinLabel(bins[0]);
965  }
966  return BinName;
967 }
968 
969 // *************************
970 std::vector<std::unique_ptr<TH1D>> PredictiveThrower::PerBinHistogram(TH1* hist,
971  const int SampleId,
972  const int Dim,
973  const std::string& suffix) const {
974 // *************************
975  std::vector<std::unique_ptr<TH1D>> PosteriorHistVec;
976  constexpr int nBins = 100;
977  const std::string Sample_Name = SampleInfo[SampleId].Name;
978  if (Dim == 2) {
979  if(std::string(hist->ClassName()) == "TH2Poly") {
980  for (int i = 1; i <= static_cast<TH2Poly*>(hist)->GetNumberOfBins(); ++i) {
981  std::string ProjName = fmt::format("{} {} Bin: {}",
982  Sample_Name, suffix,
983  GetBinName(hist, false, Dim, {i}));
984  // KS: When a histogram is created with an axis lower limit greater or equal to its upper limit ROOT will automatically adjust histogram range
985  // https://root.cern.ch/doc/master/classTH1.html#auto-bin
986  auto PosteriorHist = std::make_unique<TH1D>(ProjName.c_str(), ProjName.c_str(), nBins, 1, -1);
987  PosteriorHist->SetDirectory(nullptr);
988  PosteriorHist->GetXaxis()->SetTitle("Events");
989  PosteriorHistVec.push_back(std::move(PosteriorHist));
990  } //end loop over bin
991  } else {
992  int nbinsx = hist->GetNbinsX();
993  int nbinsy = hist->GetNbinsY();
994  for (int iy = 1; iy <= nbinsy; ++iy) {
995  for (int ix = 1; ix <= nbinsx; ++ix) {
996  std::string ProjName = fmt::format("{} {} Bin: {}",
997  Sample_Name, suffix,
998  GetBinName(hist, true, Dim, {ix,iy}));
999  //KS: When a histogram is created with an axis lower limit greater or equal to its upper limit ROOT will automatically adjust histogram range
1000  // https://root.cern.ch/doc/master/classTH1.html#auto-bin
1001  auto PosteriorHist = std::make_unique<TH1D>(ProjName.c_str(), ProjName.c_str(), nBins, 1, -1);
1002  PosteriorHist->SetDirectory(nullptr);
1003  PosteriorHist->GetXaxis()->SetTitle("Events");
1004  PosteriorHistVec.push_back(std::move(PosteriorHist));
1005  }
1006  }
1007  }
1008  } else {
1009  int nbinsx = hist->GetNbinsX();
1010  PosteriorHistVec.reserve(nbinsx);
1011  for (int i = 1; i <= nbinsx; ++i) {
1012  std::string ProjName = fmt::format("{} {} Bin: {}",
1013  Sample_Name, suffix,
1014  GetBinName(hist, true, Dim, {i}));
1015  //KS: When a histogram is created with an axis lower limit greater or equal to its upper limit ROOT will automatically adjust histogram range
1016  // https://root.cern.ch/doc/master/classTH1.html#auto-bin
1017  auto PosteriorHist = std::make_unique<TH1D>(ProjName.c_str(), ProjName.c_str(), nBins, 1, -1);
1018  PosteriorHist->SetDirectory(nullptr);
1019  PosteriorHist->GetXaxis()->SetTitle("Events");
1020  PosteriorHistVec.push_back(std::move(PosteriorHist));
1021  }
1022  }
1023  return PosteriorHistVec;
1024 }
1025 
1026 // *************************
1027 std::vector<std::unique_ptr<TH1>> PredictiveThrower::MakePredictive(const std::vector<std::vector<std::unique_ptr<TH1>>>& Toys,
1028  const std::vector<TDirectory*>& Directory,
1029  const std::string& suffix,
1030  const bool DebugHistograms,
1031  const bool WriteHist) {
1032 // *************************
1033  std::vector<std::unique_ptr<TH1>> PostPred(TotalNumberOfSamples);
1034  std::vector<std::vector<std::unique_ptr<TH1D>>> Posterior_hist(TotalNumberOfSamples);
1035  // 1.initialisation
1036  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
1037  const int nDims = SampleInfo[sample].Dimenstion;
1038  const std::string Sample_Name = SampleInfo[sample].Name;
1039  Posterior_hist[sample] = PerBinHistogram(Toys[sample][0].get(), sample, nDims, suffix);
1040  auto PredictiveHist = M3::Clone(Toys[sample][0].get());
1041  // Clear the bin contents
1042  PredictiveHist->Reset();
1043  PredictiveHist->SetName((Sample_Name + "_" + suffix + "_PostPred").c_str());
1044  PredictiveHist->SetTitle((Sample_Name + "_" + suffix + "_PostPred").c_str());
1045  PredictiveHist->SetDirectory(nullptr);
1046  PostPred[sample] = std::move(PredictiveHist);
1047  }
1048 
1050  #ifdef MULTITHREAD
1051  #pragma omp parallel for
1052  #endif
1053  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
1054  const int nDims = SampleInfo[sample].Dimenstion;
1055  auto& hist = Toys[sample][0];
1056  for (size_t iToy = 0; iToy < Toys[sample].size(); ++iToy) {
1057  if(nDims == 2) {
1058  if(std::string(hist->ClassName()) == "TH2Poly") {
1059  for (int i = 1; i <= static_cast<TH2Poly*>(hist.get())->GetNumberOfBins(); ++i) {
1060  double content = Toys[sample][iToy]->GetBinContent(i);
1061  Posterior_hist[sample][i-1]->Fill(content, ReweightWeight[iToy]);
1062  }
1063  } else {
1064  int nbinsx = hist->GetNbinsX();
1065  int nbinsy = hist->GetNbinsY();
1066  for (int iy = 1; iy <= nbinsy; ++iy) {
1067  for (int ix = 1; ix <= nbinsx; ++ix) {
1068  int Bin = (iy-1) * nbinsx + (ix-1);
1069  double content = Toys[sample][iToy]->GetBinContent(ix, iy);
1070  Posterior_hist[sample][Bin]->Fill(content, ReweightWeight[iToy]);
1071  } // end loop over X bins
1072  } // end loop over Y bins
1073  }
1074  } else {
1075  int nbinsx = hist->GetNbinsX();
1076  for (int i = 1; i <= nbinsx; ++i) {
1077  double content = Toys[sample][iToy]->GetBinContent(i);
1078  Posterior_hist[sample][i-1]->Fill(content, ReweightWeight[iToy]);
1079  } // end loop over bins
1080  } // end if over dimensions
1081  } // end loop over toys
1082  } // end loop over samples
1083 
1084  // 3.save
1085  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
1086  const int nDims = SampleInfo[sample].Dimenstion;
1087  auto& hist = Toys[sample][0];
1088  Directory[sample]->cd();
1089  if(nDims == 2) {
1090  if(std::string(hist->ClassName()) == "TH2Poly") {
1091  for (int i = 1; i <= static_cast<TH2Poly*>(hist.get())->GetNumberOfBins(); ++i) {
1092  PostPred[sample]->SetBinContent(i, Posterior_hist[sample][i-1]->GetMean());
1093  // KS: If ROOT below 6.18 one need -1 only for error due to stupid bug...
1094  PostPred[sample]->SetBinError(i, Posterior_hist[sample][i-1]->GetRMS());
1095  if (DebugHistograms) Posterior_hist[sample][i-1]->Write();
1096  } // end loop over poly bins
1097  } else {
1098  int nbinsx = hist->GetNbinsX();
1099  int nbinsy = hist->GetNbinsY();
1100  for (int iy = 1; iy <= nbinsy; ++iy) {
1101  for (int ix = 1; ix <= nbinsx; ++ix) {
1102  int Bin = (iy-1) * nbinsx + (ix-1);
1103  if (DebugHistograms) Posterior_hist[sample][Bin]->Write();
1104  PostPred[sample]->SetBinContent(ix, iy, Posterior_hist[sample][Bin]->GetMean());
1105  PostPred[sample]->SetBinError(ix, iy, Posterior_hist[sample][Bin]->GetRMS());
1106  } // end loop over x
1107  } // end loop over y
1108  }
1109  } else {
1110  int nbinsx = hist->GetNbinsX();
1111  for (int i = 1; i <= nbinsx; ++i) {
1112  PostPred[sample]->SetBinContent(i, Posterior_hist[sample][i-1]->GetMean());
1113  PostPred[sample]->SetBinError(i, Posterior_hist[sample][i-1]->GetRMS());
1114  if (DebugHistograms) Posterior_hist[sample][i-1]->Write();
1115  }
1116  }
1117  if(WriteHist) PostPred[sample]->Write();
1118  } // end loop over samples
1119  return PostPred;
1120 }
1121 
1122 // *************************
1123 // Perform predictive analysis
1125 // *************************
1126  // Remove not useful stuff
1127  SanitiseInputs();
1128 
1129  MACH3LOG_INFO("Starting {}", __func__);
1130  MACH3LOG_WARN("\033[0;31mCurrent Total RAM usage is {:.2f} GB\033[0m", M3::Utils::getValue("VmRSS") / 1048576.0);
1131  MACH3LOG_WARN("\033[0;31mOut of Total available RAM {:.2f} GB\033[0m", M3::Utils::getValue("MemTotal") / 1048576.0);
1132 
1133  TStopwatch TempClock;
1134  TempClock.Start();
1135 
1136  auto DebugHistograms = GetFromManager<bool>(fitMan->raw()["Predictive"]["DebugHistograms"], false, __FILE__, __LINE__);
1137  auto doByMode = GetFromManager<bool>(fitMan->raw()["Predictive"]["ByMode"], false, __FILE__, __LINE__);
1138 
1139  TDirectory* PredictiveDir = outputFile->mkdir("Predictive");
1140  std::vector<TDirectory*> SampleDirectories;
1141  SampleDirectories.resize(TotalNumberOfSamples+1);
1142 
1143  // open directory for every sample
1144  for (int sample = 0; sample < TotalNumberOfSamples+1; ++sample) {
1145  SampleDirectories[sample] = PredictiveDir->mkdir(SampleInfo[sample].Name.c_str());
1146  }
1147 
1148  // Produce Violin style spectra
1149  Study1DProjections(SampleDirectories);
1150  // Produce Post pred by each mode individually
1151  if(doByMode) StudyByMode1DProjections(SampleDirectories);
1152  // Produce posterior predictive distribution for mc
1153  auto PostPred_mc = MakePredictive(MC_Hist_Toy, SampleDirectories, "mc", DebugHistograms, false);
1154  // Produce posterior predictive distribution for w2
1155  auto PostPred_w2 = MakePredictive(W2_Hist_Toy, SampleDirectories, "w2", false, false);
1156  // Calculate Posterior Predictive LLH
1157  PredictiveLLH(Data_Hist, PostPred_mc, PostPred_w2, SampleDirectories);
1158  // Calculate Posterior Predictive $p$-value
1159  PosteriorPredictivepValue(PostPred_mc, SampleDirectories);
1160  // Check how number of events changed
1161  RateAnalysis(MC_Hist_Toy, SampleDirectories);
1162 
1163  // Close directories
1164  for (int sample = 0; sample < TotalNumberOfSamples+1; ++sample) {
1165  SampleDirectories[sample]->Close();
1166  delete SampleDirectories[sample];
1167  }
1168 
1169  auto StudyBeta = GetFromManager<bool>(fitMan->raw()["Predictive"]["StudyBetaParameters"], true, __FILE__, __LINE__);
1170  auto StudyInfoCriterion = GetFromManager<bool>(fitMan->raw()["Predictive"]["StudyInformationCriterion"], true, __FILE__, __LINE__);
1171  auto StudyCorr = GetFromManager<bool>(fitMan->raw()["Predictive"]["StudyCorrelations"], true, __FILE__, __LINE__);
1172 
1173  // Studying information criterion
1174  if(StudyInfoCriterion) StudyInformationCriterion(M3::kWAIC, PostPred_mc, PostPred_w2);
1175  // Study Prior/Posterior correlations between samples etc.
1176  if(StudyCorr) StudyCorrelations(PredictiveDir, MC_Hist_Toy, DebugHistograms);
1177  // Perform beta analysis for mc statical uncertainty
1178  if(StudyBeta) StudyBetaParameters(PredictiveDir);
1179 
1180  PredictiveDir->Close();
1181  delete PredictiveDir;
1182 
1183  outputFile->cd();
1184 
1185  TempClock.Stop();
1186  MACH3LOG_INFO("{} took {:.2f}s to finish for {} toys", __func__, TempClock.RealTime(), Ntoys);
1187 }
1188 
1189 // *************************
1190 double PredictiveThrower::CalcLLH(const double data,
1191  const double mc,
1192  const double w2,
1193  const SampleHandlerInterface* SampleHandler) const {
1194 // *************************
1195  double llh = SampleHandler->GetTestStatLLH(data, mc, w2);
1196  //KS: do times 2 because banff reports chi2
1197  return 2*llh;
1198 }
1199 
1200 // *************************
1201 double PredictiveThrower::CalcLLH(const TH1* DatHist,
1202  const TH1* MCHist,
1203  const TH1* W2Hist,
1204  const SampleHandlerInterface* SampleHandler) const {
1205 // *************************
1206  // 1D case
1207  if (auto h1 = dynamic_cast<const TH1D*>(DatHist)) {
1208  return GetLLH(h1,
1209  static_cast<const TH1D*>(MCHist),
1210  static_cast<const TH1D*>(W2Hist),
1211  SampleHandler);
1212  }
1213 
1214  // 2D case
1215  if (auto h2 = dynamic_cast<const TH2D*>(DatHist)) {
1216  return GetLLH(h2,
1217  static_cast<const TH2D*>(MCHist),
1218  static_cast<const TH2D*>(W2Hist),
1219  SampleHandler);
1220  }
1221 
1222  // 2D poly case
1223  if (auto h2p = dynamic_cast<const TH2Poly*>(DatHist)) {
1224  return GetLLH(h2p,
1225  static_cast<const TH2Poly*>(MCHist),
1226  static_cast<const TH2Poly*>(W2Hist),
1227  SampleHandler);
1228  }
1229 
1230  MACH3LOG_ERROR("Unsupported histogram type in {}", __func__);
1231  throw MaCh3Exception(__FILE__ , __LINE__ );
1232 }
1233 
1234 // *************************
1235 double PredictiveThrower::GetLLH(const TH1D* DatHist,
1236  const TH1D* MCHist,
1237  const TH1D* W2Hist,
1238  const SampleHandlerInterface* SampleHandler) const {
1239 // *************************
1240  double llh = 0.0;
1241  for (int i = 1; i <= DatHist->GetXaxis()->GetNbins(); ++i)
1242  {
1243  const double data = DatHist->GetBinContent(i);
1244  const double mc = MCHist->GetBinContent(i);
1245  const double w2 = W2Hist->GetBinContent(i);
1246  llh += SampleHandler->GetTestStatLLH(data, mc, w2);
1247  }
1248  //KS: do times 2 because banff reports chi2
1249  return 2*llh;
1250 }
1251 
1252 // *************************
1253 double PredictiveThrower::GetLLH(const TH2Poly* DatHist,
1254  const TH2Poly* MCHist,
1255  const TH2Poly* W2Hist,
1256  const SampleHandlerInterface* SampleHandler) const {
1257 // *************************
1258  double llh = 0.0;
1259  for (int i = 1; i <= DatHist->GetNumberOfBins(); ++i)
1260  {
1261  const double data = DatHist->GetBinContent(i);
1262  const double mc = MCHist->GetBinContent(i);
1263  const double w2 = W2Hist->GetBinContent(i);
1264  llh += SampleHandler->GetTestStatLLH(data, mc, w2);
1265  }
1266  //KS: do times 2 because banff reports chi2
1267  return 2*llh;
1268 }
1269 
1270 // *************************
1271 double PredictiveThrower::GetLLH(const TH2D* DatHist,
1272  const TH2D* MCHist,
1273  const TH2D* W2Hist,
1274  const SampleHandlerInterface* SampleHandler) const {
1275 // *************************
1276  double llh = 0.0;
1277 
1278  const int nBinsX = DatHist->GetXaxis()->GetNbins();
1279  const int nBinsY = DatHist->GetYaxis()->GetNbins();
1280 
1281  for (int i = 1; i <= nBinsX; ++i)
1282  {
1283  for (int j = 1; j <= nBinsY; ++j)
1284  {
1285  const double data = DatHist->GetBinContent(i, j);
1286  const double mc = MCHist->GetBinContent(i, j);
1287  const double w2 = W2Hist->GetBinContent(i, j);
1288 
1289  llh += SampleHandler->GetTestStatLLH(data, mc, w2);
1290  }
1291  }
1292 
1293  // KS: do times 2 because banff reports chi2
1294  return 2 * llh;
1295 }
1296 
1297 // ****************
1298 //KS: We have two methods how to apply statistical fluctuation standard is faster hence is default
1299 void PredictiveThrower::MakeFluctuatedHistogram(TH1* FluctHist, TH1* Hist) {
1300 // ****************
1301  // Determine which fluctuation function to call
1302  auto applyFluctuation = [&](auto* f, auto* h) {
1303  if (StandardFluctuation) {
1305  } else {
1307  }
1308  };
1309 
1310  if (Hist->InheritsFrom(TH2Poly::Class())) {
1311  applyFluctuation(static_cast<TH2Poly*>(FluctHist), static_cast<TH2Poly*>(Hist));
1312  }
1313  else if (Hist->InheritsFrom(TH2D::Class())) {
1314  applyFluctuation(static_cast<TH2D*>(FluctHist), static_cast<TH2D*>(Hist));
1315  }
1316  else if (Hist->InheritsFrom(TH1D::Class())) {
1317  applyFluctuation(static_cast<TH1D*>(FluctHist), static_cast<TH1D*>(Hist));
1318  }
1319  else {
1320  MACH3LOG_ERROR("Unsupported histogram type");
1321  throw MaCh3Exception(__FILE__ , __LINE__ );
1322  }
1323 }
1324 
1325 // *************************
1326 void PredictiveThrower::PosteriorPredictivepValue(const std::vector<std::unique_ptr<TH1>>& PostPred_mc,
1327  const std::vector<TDirectory*>& SampleDir) {
1328 // *************************
1329  // Step 1: Initialize per-toy accumulators once
1330  // [Sample] [Toys]
1331  auto make_matrix = [&](double init = 0.0) {
1332  return std::vector<std::vector<double>>(
1334  std::vector<double>(Ntoys, init));
1335  };
1336  auto chi2_dat = make_matrix();
1337  auto chi2_mc = make_matrix();
1338  auto chi2_pred = make_matrix();
1339  auto chi2_rate_dat = make_matrix();
1340  auto chi2_rate_mc = make_matrix();
1341  auto chi2_rate_pred = make_matrix();
1342 
1343  // 2. Add penalty terms to global bin
1344  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1345  chi2_dat[TotalNumberOfSamples][iToy] = PenaltyTerm[iToy];
1346  chi2_mc[TotalNumberOfSamples][iToy] = PenaltyTerm[iToy];
1347  chi2_pred[TotalNumberOfSamples][iToy] = PenaltyTerm[iToy];
1348 
1349  chi2_rate_dat[TotalNumberOfSamples][iToy] = PenaltyTerm[iToy];
1350  chi2_rate_mc[TotalNumberOfSamples][iToy] = PenaltyTerm[iToy];
1351  chi2_rate_pred[TotalNumberOfSamples][iToy] = PenaltyTerm[iToy];
1352  }
1353 
1355  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1356  auto SampleHandler = SampleInfo[iSample].SamHandler;
1357  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1358  // Clone histograms to avoid modifying originals
1359  auto DrawFluctHist = M3::Clone(MC_Hist_Toy[iSample][iToy].get());
1360  auto PredFluctHist = M3::Clone(PostPred_mc[iSample].get());
1361 
1362  // Apply fluctuations
1363  MakeFluctuatedHistogram(DrawFluctHist.get(), MC_Hist_Toy[iSample][iToy].get());
1364  MakeFluctuatedHistogram(PredFluctHist.get(), PostPred_mc[iSample].get());
1365 
1366  // I. SHAPE + RATE (bin-by-bin likelihood)
1367  chi2_dat[iSample][iToy] = CalcLLH(Data_Hist[iSample].get(), MC_Hist_Toy[iSample][iToy].get(), W2_Hist_Toy[iSample][iToy].get(), SampleHandler);
1368  chi2_mc[iSample][iToy] = CalcLLH(DrawFluctHist.get(), MC_Hist_Toy[iSample][iToy].get(), W2_Hist_Toy[iSample][iToy].get(), SampleHandler);
1369  chi2_pred[iSample][iToy] = CalcLLH(PredFluctHist.get(), MC_Hist_Toy[iSample][iToy].get(), W2_Hist_Toy[iSample][iToy].get(), SampleHandler);
1370 
1371  // II. RATE-ONLY (total normalization)
1372  chi2_rate_dat[iSample][iToy] = CalcLLH(Data_Hist[iSample]->Integral(), MC_Hist_Toy[iSample][iToy]->Integral(), W2_Hist_Toy[iSample][iToy]->Integral(), SampleHandler);
1373  chi2_rate_mc[iSample][iToy] = CalcLLH(DrawFluctHist->Integral(), MC_Hist_Toy[iSample][iToy]->Integral(), W2_Hist_Toy[iSample][iToy]->Integral(), SampleHandler);
1374  chi2_rate_pred[iSample][iToy] = CalcLLH(PredFluctHist->Integral(), MC_Hist_Toy[iSample][iToy]->Integral(), W2_Hist_Toy[iSample][iToy]->Integral(), SampleHandler);
1375 
1376  // III. accumulate global sums ---
1377  chi2_dat[TotalNumberOfSamples][iToy] += chi2_dat[iSample][iToy];
1378  chi2_mc[TotalNumberOfSamples][iToy] += chi2_mc[iSample][iToy];
1379  chi2_pred[TotalNumberOfSamples][iToy] += chi2_pred[iSample][iToy];
1380 
1381  chi2_rate_dat[TotalNumberOfSamples][iToy] += chi2_rate_dat[iSample][iToy];
1382  chi2_rate_mc[TotalNumberOfSamples][iToy] += chi2_rate_mc[iSample][iToy];
1383  chi2_rate_pred[TotalNumberOfSamples][iToy] += chi2_rate_pred[iSample][iToy];
1384  }
1385  }
1386 
1387  // 4. Produce pValue plots
1388  // Shape+rate posterior predictive checks
1389  MakeChi2Plots(chi2_mc, "-2LLH (Draw Fluc, Draw)", chi2_dat, "-2LLH (Data, Draw)", SampleDir, "_drawfluc_draw");
1390  MakeChi2Plots(chi2_pred, "-2LLH (Pred Fluc, Draw)", chi2_dat, "-2LLH (Data, Draw)", SampleDir, "_predfluc_draw");
1391 
1392  // Rate-only posterior predictive checks
1393  MakeChi2Plots(chi2_rate_mc, "-2LLH (Rate Draw Fluc, Draw)", chi2_rate_dat, "-2LLH (Rate Data, Draw)", SampleDir, "_rate_drawfluc_draw");
1394  MakeChi2Plots(chi2_rate_pred, "-2LLH (Rate Pred Fluc, Draw)", chi2_rate_dat, "-2LLH (Rate Data, Draw)", SampleDir, "_rate_predfluc_draw");
1395 }
1396 
1397 // *************************
1398 void PredictiveThrower::PredictiveLLH(const std::vector<std::unique_ptr<TH1>>& Data_histogram,
1399  const std::vector<std::unique_ptr<TH1>>& PostPred_mc,
1400  const std::vector<std::unique_ptr<TH1>>& PostPred_w,
1401  const std::vector<TDirectory*>& SampleDir) {
1402 // *************************
1403  MACH3LOG_INFO("{:<55} {:<10} {:<10} {:<10}", "Sample", "DataInt", "MCInt", "-2LLH");
1404  MACH3LOG_INFO("{:-<55} {:-<10} {:-<10} {:-<10}", "", "", "", "");
1405  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1406  SampleDir[iSample]->cd();
1407  ExtractLLH(Data_histogram[iSample].get(), PostPred_mc[iSample].get(), PostPred_w[iSample].get(), SampleInfo[iSample].SamHandler);
1408  PostPred_mc[iSample]->Write();
1409  }
1410 }
1411 
1412 
1413 // *************************
1414 void PredictiveThrower::MakeChi2Plots(const std::vector<std::vector<double>>& Chi2_x,
1415  const std::string& Chi2_x_title,
1416  const std::vector<std::vector<double>>& Chi2_y,
1417  const std::string& Chi2_y_title,
1418  const std::vector<TDirectory*>& SampleDir,
1419  const std::string Title) {
1420 // *************************
1421  for (int iSample = 0; iSample < TotalNumberOfSamples+1; ++iSample) {
1422  SampleDir[iSample]->cd();
1423 
1424  // Transpose to extract chi2 values for a given sample across all toys
1425  std::vector<double> chi2_y_sample(Ntoys);
1426  std::vector<double> chi2_x_per_sample(Ntoys);
1427 
1428  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1429  chi2_y_sample[iToy] = Chi2_y[iSample][iToy];
1430  chi2_x_per_sample[iToy] = Chi2_x[iSample][iToy];
1431  }
1432 
1433  const double min_val = std::min(*std::min_element(chi2_y_sample.begin(), chi2_y_sample.end()),
1434  *std::min_element(chi2_x_per_sample.begin(), chi2_x_per_sample.end()));
1435  const double max_val = std::max(*std::max_element(chi2_y_sample.begin(), chi2_y_sample.end()),
1436  *std::max_element(chi2_x_per_sample.begin(), chi2_x_per_sample.end()));
1437 
1438  auto chi2_hist = std::make_unique<TH2D>((SampleInfo[iSample].Name+ Title).c_str(),
1439  (SampleInfo[iSample].Name+ Title).c_str(),
1440  50, min_val, max_val, 50, min_val, max_val);
1441  chi2_hist->SetDirectory(nullptr);
1442  chi2_hist->GetXaxis()->SetTitle(Chi2_x_title.c_str());
1443  chi2_hist->GetYaxis()->SetTitle(Chi2_y_title.c_str());
1444 
1445  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1446  chi2_hist->Fill(chi2_x_per_sample[iToy], chi2_y_sample[iToy]);
1447  }
1448 
1449  Get2DBayesianpValue(chi2_hist.get());
1450  chi2_hist->Write();
1451  }
1452 }
1453 
1454 // *************************
1455 // Study Beta Parameters
1456 void PredictiveThrower::StudyBetaParameters(TDirectory* PredictiveDir) {
1457 // *************************
1458  bool StudyBeta = GetFromManager<bool>(fitMan->raw()["Predictive"]["StudyBetaParameters"], true, __FILE__, __LINE__ );
1459  if (StudyBeta == false) return;
1460 
1461  MACH3LOG_INFO("Starting {}", __func__);
1462  TDirectory* BetaDir = PredictiveDir->mkdir("BetaParameters");
1463  std::vector<std::vector<std::unique_ptr<TH1D>>> BetaHist(TotalNumberOfSamples);
1464  std::vector<TDirectory *> DirBeta(TotalNumberOfSamples);
1465  // initialise directory for each sample
1466  for (int sample = 0; sample < TotalNumberOfSamples; ++sample) {
1467  BetaDir->cd();
1468  DirBeta[sample] = BetaDir->mkdir(SampleInfo[sample].Name.c_str());
1469  }
1470 
1472  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1473  const int nDims = SampleInfo[iSample].Dimenstion;
1474  // Use any histogram that defines the binning structure
1475  TH1* RefHist = Data_Hist[iSample].get();
1476  BetaHist[iSample] = PerBinHistogram(RefHist, iSample, nDims, "Beta_Parameter");
1477  // Change x-axis title
1478  for (size_t i = 0; i < BetaHist[iSample].size(); ++i) {
1479  BetaHist[iSample][i]->GetXaxis()->SetTitle("beta parameter");
1480  }
1481  }
1482 
1484  #ifdef MULTITHREAD
1485  #pragma omp parallel for
1486  #endif
1487  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1488  const int nDims = SampleInfo[iSample].Dimenstion;
1489  const auto likelihood = SampleInfo[iSample].SamHandler->GetTestStatistic();
1490  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1491  if (nDims == 2) {
1492  if(std::string(Data_Hist[iSample]->ClassName()) == "TH2Poly") {
1493  for (int i = 1; i <= static_cast<TH2Poly*>(Data_Hist[iSample].get())->GetNumberOfBins(); ++i) {
1494  const double Data = Data_Hist[iSample]->GetBinContent(i);
1495  const double MC = MC_Hist_Toy[iSample][iToy]->GetBinContent(i);
1496  const double w2 = W2_Hist_Toy[iSample][iToy]->GetBinContent(i);
1497 
1498  const double BetaParam = GetBetaParameter(Data, MC, w2, likelihood);
1499  BetaHist[iSample][i-1]->Fill(BetaParam, ReweightWeight[iToy]);
1500  } // end loop over poly bins
1501  } else {
1502  const int nX = Data_Hist[iSample]->GetNbinsX();
1503  const int nY = Data_Hist[iSample]->GetNbinsY();
1504  for (int iy = 1; iy <= nY; ++iy) {
1505  for (int ix = 1; ix <= nX; ++ix) {
1506  const int FlatBin = (iy-1) * nX + (ix-1);
1507 
1508  const double Data = Data_Hist[iSample]->GetBinContent(ix, iy);
1509  const double MC = MC_Hist_Toy[iSample][iToy]->GetBinContent(ix, iy);
1510  const double w2 = W2_Hist_Toy[iSample][iToy]->GetBinContent(ix, iy);
1511 
1512  const double BetaParam = GetBetaParameter(Data, MC, w2, likelihood);
1513  BetaHist[iSample][FlatBin]->Fill(BetaParam, ReweightWeight[iToy]);
1514  }
1515  } // end loop over x
1516  } // end loop over y
1517  } else {
1518  int nbinsx = Data_Hist[iSample]->GetNbinsX();
1519  for (int ix = 1; ix <= nbinsx; ++ix) {
1521  const double Data = Data_Hist[iSample]->GetBinContent(ix);
1522  const double MC = MC_Hist_Toy[iSample][iToy]->GetBinContent(ix);
1523  const double w2 = W2_Hist_Toy[iSample][iToy]->GetBinContent(ix);
1524 
1525  const double BetaParam = GetBetaParameter(Data, MC, w2, likelihood);
1526  BetaHist[iSample][ix-1]->Fill(BetaParam, ReweightWeight[iToy]);
1527  } // end loop over bins
1528  }
1529  } // end loop over toys
1530  } // end loop over samples
1531 
1533  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1534  for (size_t iBin = 0; iBin < BetaHist[iSample].size(); iBin++) {
1535  DirBeta[iSample]->cd();
1536  BetaHist[iSample][iBin]->Write();
1537  }
1538  DirBeta[iSample]->Close();
1539  delete DirBeta[iSample];
1540  }
1541  BetaDir->Close();
1542  delete BetaDir;
1543 
1544  PredictiveDir->cd();
1545 }
1546 
1547 // ****************
1548 // Study Prior/Posterior correlations between samples etc.
1549 void PredictiveThrower::StudyCorrelations(TDirectory* PredictiveDir,
1550  const std::vector<std::vector<std::unique_ptr<TH1>>>& Toys,
1551  const bool DebugHistograms) const {
1552 // ****************
1553  MACH3LOG_INFO("Startin {}", __func__);
1554 
1555  // Make a new directory
1556  TDirectory *CorrDir = PredictiveDir->mkdir("Correlations");
1557  CorrDir->cd();
1558 
1559  std::vector<double> minVals(TotalNumberOfSamples, std::numeric_limits<double>::max());
1560  std::vector<double> maxVals(TotalNumberOfSamples, std::numeric_limits<double>::lowest());
1561  #ifdef MULTITHREAD
1562  #pragma omp parallel for
1563  #endif
1564  for (int i = 0; i < TotalNumberOfSamples; ++i)
1565  {
1566  for (const auto& toyHist : Toys[i])
1567  {
1568  const double val = toyHist->Integral();
1569  if (val < minVals[i]) minVals[i] = val;
1570  if (val > maxVals[i]) maxVals[i] = val;
1571  }
1572  }
1573  auto hSamCorr = std::make_unique<TH2D>("Sample Correlation", "Sample Correlation", TotalNumberOfSamples, 0,
1575  hSamCorr->SetDirectory(nullptr);
1576  hSamCorr->GetZaxis()->SetTitle("Correlation");
1577  hSamCorr->SetMinimum(-1);
1578  hSamCorr->SetMaximum(1);
1579  hSamCorr->GetXaxis()->SetLabelSize(0.015);
1580  hSamCorr->GetYaxis()->SetLabelSize(0.015);
1581  // Loop over the Covariance matrix entries
1582  for (int i = 0; i < TotalNumberOfSamples; ++i) {
1583  hSamCorr->SetBinContent(i+1, i+1, 1.0);
1584  hSamCorr->GetXaxis()->SetBinLabel(i+1, SampleInfo[i].Name.c_str());
1585  for (int j = 0; j < TotalNumberOfSamples; ++j) {
1586  hSamCorr->GetYaxis()->SetBinLabel(j+1, SampleInfo[j].Name.c_str());
1587  }
1588  }
1589 
1590  std::vector<std::vector<std::unique_ptr<TH2D>>> SamCorr(TotalNumberOfSamples);
1591  for (int i = 0; i < TotalNumberOfSamples; ++i)
1592  {
1593  SamCorr[i].resize(TotalNumberOfSamples);
1594  const double Min_i = minVals[i];
1595  const double Max_i = maxVals[i];
1596  for (int j = 0; j < TotalNumberOfSamples; ++j)
1597  {
1598  const double Min_j = minVals[j];
1599  const double Max_j = maxVals[j];
1600  // TH2D to hold the Correlation
1601  std::string name = "SamCorr_" + std::to_string(i) + "_" + std::to_string(j);
1602  SamCorr[i][j] = std::make_unique<TH2D>(name.c_str(), name.c_str(), 70, Min_i, Max_i, 70, Min_j, Max_j);
1603  SamCorr[i][j]->SetDirectory(nullptr);
1604  SamCorr[i][j]->SetMinimum(0);
1605  SamCorr[i][j]->GetXaxis()->SetTitle(SampleInfo[i].Name.c_str());
1606  SamCorr[i][j]->GetYaxis()->SetTitle(SampleInfo[j].Name.c_str());
1607  SamCorr[i][j]->GetZaxis()->SetTitle("Events");
1608  }
1609  }
1610 
1611  // Now we are sure we have the diagonal elements, let's make the off-diagonals
1612  #ifdef MULTITHREAD
1613  #pragma omp parallel for
1614  #endif
1615  for (int i = 0; i < TotalNumberOfSamples; ++i)
1616  {
1617  for (int j = 0; j <= i; ++j)
1618  {
1619  // Skip the diagonal elements which we've already done above
1620  if (j == i) continue;
1621 
1622  for (int iToy = 0; iToy < Ntoys; ++iToy)
1623  {
1624  SamCorr[i][j]->Fill(Toys[i][iToy]->Integral(), Toys[j][iToy]->Integral());
1625  }
1626  SamCorr[i][j]->Smooth();
1627 
1628  // The value of the Covariance
1629  const double corr = SamCorr[i][j]->GetCorrelationFactor();
1630  hSamCorr->SetBinContent(i+1, j+1, corr);
1631  hSamCorr->SetBinContent(j+1, i+1, corr);
1632  }// End j loop
1633  }// End i loop
1634 
1635  hSamCorr->Draw("colz");
1636  hSamCorr->Write("Sample_Corr");
1637 
1638  if(DebugHistograms) {
1639  for (int i = 0; i < TotalNumberOfSamples; ++i){
1640  for (int j = 0; j <= i; ++j) {
1641  // Skip the diagonal elements which we've already done above
1642  if (j == i) continue;
1643  SamCorr[i][j]->Write();
1644  }// End j loop
1645  }// End i loop
1646  } // end if debugHist
1647 
1648  PredictiveDir->cd();
1649 }
1650 
1651 // ****************
1652 // Calculate the LLH for TH1, set the LLH to title of MCHist
1653 void PredictiveThrower::ExtractLLH(TH1* DatHist, TH1* MCHist, TH1* W2Hist, const SampleHandlerInterface* SampleHandler) const {
1654 // ****************
1655  const double llh = CalcLLH(DatHist, MCHist, W2Hist, SampleHandler);
1656  std::stringstream ss;
1657  ss << "_2LLH=" << llh;
1658  MCHist->SetTitle((std::string(MCHist->GetTitle())+ss.str()).c_str());
1659  MACH3LOG_INFO("{:<55} {:<10.2f} {:<10.2f} {:<10.2f}", MCHist->GetName(), DatHist->Integral(), MCHist->Integral(), llh);
1660 }
1661 
1662 // ****************
1663 // Make the 1D Event Rate Hist
1664 void PredictiveThrower::MakeCutEventRate(TH1D *Histogram, const double DataRate) const {
1665 // ****************
1666  // Open the ROOT file
1667  int originalErrorWarning = gErrorIgnoreLevel;
1668  gErrorIgnoreLevel = kFatal;
1669 
1670  // For the event rate histogram add a TLine to the data rate
1671  auto TempLine = std::make_unique<TLine>(DataRate, Histogram->GetMinimum(), DataRate, Histogram->GetMaximum());
1672  TempLine->SetLineColor(kRed);
1673  TempLine->SetLineWidth(2);
1674  // Also fit a Gaussian because why not?
1675  auto Fitter = std::make_unique<TF1>("Fit", "gaus", Histogram->GetBinLowEdge(1), Histogram->GetBinLowEdge(Histogram->GetNbinsX()+1));
1676  Histogram->Fit(Fitter.get(), "RQ");
1677  Fitter->SetLineColor(kRed-5);
1678  // Calculate a p-value
1679  double Above = 0.0;
1680  for (int z = 0; z < Histogram->GetNbinsX(); ++z) {
1681  const double xvalue = Histogram->GetBinCenter(z+1);
1682  if (xvalue >= DataRate) {
1683  Above += Histogram->GetBinContent(z+1);
1684  }
1685  }
1686  const double pvalue = Above/Histogram->Integral();
1687  TLegend Legend(0.4, 0.75, 0.98, 0.90);
1688  Legend.SetFillColor(0);
1689  Legend.SetFillStyle(0);
1690  Legend.SetLineWidth(0);
1691  Legend.SetLineColor(0);
1692  Legend.AddEntry(TempLine.get(), Form("Data, %.0f, p-value=%.2f", DataRate, pvalue), "l");
1693  Legend.AddEntry(Histogram, Form("MC, #mu=%.1f#pm%.1f", Histogram->GetMean(), Histogram->GetRMS()), "l");
1694  Legend.AddEntry(Fitter.get(), Form("Gauss, #mu=%.1f#pm%.1f", Fitter->GetParameter(1), Fitter->GetParameter(2)), "l");
1695  std::string TempTitle = std::string(Histogram->GetName());
1696  TempTitle += "_canv";
1697  TCanvas TempCanvas(TempTitle.c_str(), TempTitle.c_str(), 1024, 1024);
1698  TempCanvas.SetGridx();
1699  TempCanvas.SetGridy();
1700  TempCanvas.SetRightMargin(0.03);
1701  TempCanvas.SetBottomMargin(0.08);
1702  TempCanvas.SetLeftMargin(0.10);
1703  TempCanvas.SetTopMargin(0.06);
1704  TempCanvas.cd();
1705  Histogram->Draw();
1706  TempLine->Draw("same");
1707  Fitter->Draw("same");
1708  Legend.Draw("same");
1709  TempCanvas.Write();
1710  Histogram->Write();
1711  gErrorIgnoreLevel = originalErrorWarning;
1712 }
1713 
1714 // *************************
1715 void PredictiveThrower::RateAnalysis(const std::vector<std::vector<std::unique_ptr<TH1>>>& Toys,
1716  const std::vector<TDirectory*>& SampleDirectories) const {
1717 // *************************
1718  std::vector<std::unique_ptr<TH1D>> EventHist(TotalNumberOfSamples+1);
1719  for (int iSample = 0; iSample < TotalNumberOfSamples+1; ++iSample) {
1720  std::string Title = "EventHist: ";
1721  if (iSample == TotalNumberOfSamples) {
1722  Title = "Total";
1723  } else {
1724  Title = SampleInfo[iSample].Name;
1725  }
1726  Title += "_sum";
1727  //KS: When a histogram is created with an axis lower limit greater or equal to its upper limit ROOT will automatically adjust histogram range
1728  // https://root.cern.ch/doc/master/classTH1.html#auto-bin
1729  EventHist[iSample] = std::make_unique<TH1D>(Title.c_str(), Title.c_str(), 100, 1, -1);
1730  EventHist[iSample]->SetDirectory(nullptr);
1731  EventHist[iSample]->GetXaxis()->SetTitle("Total event rate");
1732  EventHist[iSample]->GetYaxis()->SetTitle("Counts");
1733  EventHist[iSample]->SetLineWidth(2);
1734  }
1735 
1736  // First fill per-sample histograms
1737  #ifdef MULTITHREAD
1738  #pragma omp parallel for
1739  #endif
1740  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1741  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1742  double Count = Toys[iSample][iToy]->Integral();
1743  EventHist[iSample]->Fill(Count);
1744  }
1745  }
1746 
1747  // Now fill total histogram properly (per toy)
1748  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1749  double TotalCount = 0.0;
1750  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1751  TotalCount += Toys[iSample][iToy]->Integral();
1752  }
1753  EventHist[TotalNumberOfSamples]->Fill(TotalCount);
1754  }
1755 
1756  double DataRate = 0.0;
1757  std::vector<double> DataRates(TotalNumberOfSamples+1);
1758  #ifdef MULTITHREAD
1759  #pragma omp parallel for reduction(+:DataRate)
1760  #endif
1761  for (int i = 0; i < TotalNumberOfSamples; ++i) {
1762  DataRates[i] = Data_Hist[i]->Integral();
1763  DataRate += DataRates[i];
1764  }
1765  DataRates[TotalNumberOfSamples] = DataRate;
1766 
1767  for (int SampleNum = 0; SampleNum < TotalNumberOfSamples+1; ++SampleNum) {
1768  SampleDirectories[SampleNum]->cd();
1769  //Make fancy event rate histogram
1770  MakeCutEventRate(EventHist[SampleNum].get(), DataRates[SampleNum]);
1771  }
1772 }
1773 
1774 
1775 // ****************
1777  const std::vector<std::unique_ptr<TH1>>& PostPred_mc,
1778  const std::vector<std::unique_ptr<TH1>>& PostPred_w) {
1779 // ****************
1780  MACH3LOG_INFO("******************************");
1781  switch(Criterion) {
1782  case M3::kInfCrit::kBIC:
1783  // Study Bayesian Information Criterion
1784  StudyBIC(PostPred_mc, PostPred_w);
1785  break;
1786  case M3::kInfCrit::kDIC:
1787  // Study Deviance Information Criterion
1788  StudyDIC(PostPred_mc, PostPred_w);
1789  break;
1790  case M3::kInfCrit::kWAIC:
1791  // Study Watanabe-Akaike information criterion (WAIC)
1792  StudyWAIC();
1793  break;
1795  MACH3LOG_ERROR("kInfCrits is not a valid kInfCrit!");
1796  throw MaCh3Exception(__FILE__, __LINE__);
1797  default:
1798  MACH3LOG_ERROR("UNKNOWN Information Criterion SPECIFIED!");
1799  MACH3LOG_ERROR("You gave {}", static_cast<int>(Criterion));
1800  throw MaCh3Exception(__FILE__ , __LINE__ );
1801  }
1802  MACH3LOG_INFO("******************************");
1803 }
1804 
1805 // ****************
1806 void PredictiveThrower::StudyBIC(const std::vector<std::unique_ptr<TH1>>& PostPred_mc,
1807  const std::vector<std::unique_ptr<TH1>>& PostPred_w) {
1808 // ****************
1809  //make fancy event rate histogram
1810  double DataRate = 0.0;
1811  double BinsRate = 0.0;
1812  double TotalLLH = 0.0;
1813  #ifdef MULTITHREAD
1814  #pragma omp parallel for reduction(+:DataRate, BinsRate, TotalLLH)
1815  #endif
1816  for (int i = 0; i < TotalNumberOfSamples; ++i)
1817  {
1818  auto SampleHandler = SampleInfo[i].SamHandler;
1819  auto* h = Data_Hist[i].get();
1820  DataRate += h->Integral();
1821  if (auto h1 = dynamic_cast<TH1D*>(h)) {
1822  BinsRate += h1->GetNbinsX();
1823  } else if (auto h2 = dynamic_cast<TH2D*>(h)) {
1824  BinsRate += h2->GetNbinsX() * h2->GetNbinsY();
1825  } else if (auto h2poly = dynamic_cast<TH2Poly*>(h)) {
1826  BinsRate += h2poly->GetNumberOfBins();
1827  } else {
1828  MACH3LOG_WARN("Unknown histogram type in DataHist[{}]", i);
1829  }
1830  TotalLLH += CalcLLH(Data_Hist[i].get(), PostPred_mc[i].get(), PostPred_w[i].get(), SampleHandler);
1831  }
1832 
1833  const double EventRateBIC = GetBIC(TotalLLH, DataRate, NModelParams);
1834  const double BinBasedBIC = GetBIC(TotalLLH, BinsRate, NModelParams);
1835  MACH3LOG_INFO("Calculated Bayesian Information Criterion using global number of events: {:.2f}", EventRateBIC);
1836  MACH3LOG_INFO("Calculated Bayesian Information Criterion using global number of bins: {:.2f}", BinBasedBIC);
1837  MACH3LOG_INFO("Additional info: NModelParams: {}, DataRate: {:.2f}, BinsRate: {:.2f}", NModelParams, DataRate, BinsRate);
1838 }
1839 
1840 // ****************
1841 // Get the Deviance Information Criterion (DIC)
1842 void PredictiveThrower::StudyDIC(const std::vector<std::unique_ptr<TH1>>& PostPred_mc,
1843  const std::vector<std::unique_ptr<TH1>>& PostPred_w) {
1844 // ****************
1845  //The posterior mean of the deviance
1846  double Dbar = 0.;
1847  double TotalLLH = 0.0;
1848 
1849  #ifdef MULTITHREAD
1850  #pragma omp parallel for reduction(+:Dbar)
1851  #endif
1852  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample)
1853  {
1854  auto SampleHandler = SampleInfo[iSample].SamHandler;
1855  TotalLLH += CalcLLH(Data_Hist[iSample].get(), PostPred_mc[iSample].get(), PostPred_w[iSample].get(), SampleHandler);
1856  double LLH_temp = 0.;
1857  for (int iToy = 0; iToy < Ntoys; ++iToy)
1858  {
1859  LLH_temp += CalcLLH(Data_Hist[iSample].get(), MC_Hist_Toy[iSample][iToy].get(), W2_Hist_Toy[iSample][iToy].get(), SampleHandler);
1860  }
1861  Dbar += LLH_temp;
1862  }
1863  Dbar = Dbar / Ntoys;
1864 
1865  // A point estimate of the deviance
1866  const double Dhat = TotalLLH;
1867 
1868  //Effective number of parameters
1869  const double p_D = std::fabs(Dbar - Dhat);
1870 
1871  //Actual test stat
1872  const double DIC_stat = Dhat + 2 * p_D;
1873  MACH3LOG_INFO("Effective number of parameters following DIC formalism is equal to: {:.2f}", p_D);
1874  MACH3LOG_INFO("DIC test statistic = {:.2f}", DIC_stat);
1875 }
1876 
1877 
1878 // ****************
1879 // Helper: update WAIC accumulators for a single toy/bin
1880 void AccumulateWAICToy(const double neg_LLH_temp,
1881  double& mean_llh,
1882  double& mean_llh_squared,
1883  double& sum_exp_llh) {
1884 // ****************
1885  // Negate the negative log-likelihood to get the actual log-likelihood
1886  double LLH_temp = -neg_LLH_temp;
1887 
1888  mean_llh += LLH_temp;
1889  mean_llh_squared += LLH_temp * LLH_temp;
1890  sum_exp_llh += std::exp(LLH_temp);
1891 }
1892 
1893 // ****************
1894 // Helper function to finalize WAIC contributions for one bin
1895 void AccumulateWAICBin(double& mean_llh, double& mean_llh_squared, double& sum_exp_llh,
1896  const unsigned int Ntoys, double& lppd, double& p_WAIC) {
1897 // ****************
1898  // Compute the mean log-likelihood and the squared mean
1899  mean_llh /= Ntoys;
1900  mean_llh_squared /= Ntoys;
1901  sum_exp_llh /= Ntoys;
1902  sum_exp_llh = std::log(sum_exp_llh);
1903 
1904  // Log pointwise predictive density based on Eq. 4 in Gelman2014
1905  lppd += sum_exp_llh;
1906 
1907  // Compute the effective number of parameters for WAIC
1908  p_WAIC += mean_llh_squared - (mean_llh * mean_llh);
1909 }
1910 
1911 // ****************
1912 // Get the Watanabe-Akaike information criterion (WAIC)
1914 // ****************
1915  // log pointwise predictive density
1916  double lppd = 0.;
1917  // effective number of parameters
1918  double p_WAIC = 0.;
1919 
1920  #ifdef MULTITHREAD
1921  #pragma omp parallel for reduction(+:lppd, p_WAIC)
1922  #endif
1923  for (int iSample = 0; iSample < TotalNumberOfSamples; ++iSample) {
1924  auto SampleHandler = SampleInfo[iSample].SamHandler;
1925  auto* hData = Data_Hist[iSample].get();
1926 
1927  if (auto h2poly = dynamic_cast<TH2Poly*>(hData)) {
1928  // TH2Poly: irregular bins, linear indexing
1929  for (int i = 1; i <= h2poly->GetNumberOfBins(); ++i) {
1930  const double data = Data_Hist[iSample]->GetBinContent(i);
1931  double mean_llh = 0.;
1932  double sum_exp_llh = 0;
1933  double mean_llh_squared = 0.;
1934 
1935  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1936  const double mc = MC_Hist_Toy[iSample][iToy]->GetBinContent(i);
1937  const double w2 = W2_Hist_Toy[iSample][iToy]->GetBinContent(i);
1938  // Get the -log-likelihood for this sample and bin
1939  double neg_LLH_temp = SampleHandler->GetTestStatLLH(data, mc, w2);
1940  AccumulateWAICToy(neg_LLH_temp, mean_llh, mean_llh_squared, sum_exp_llh);
1941  }
1942  AccumulateWAICBin(mean_llh, mean_llh_squared, sum_exp_llh, Ntoys, lppd, p_WAIC);
1943  }
1944  } else if (auto h2 = dynamic_cast<TH2D*>(hData)) {
1945  // TH2D: nested loops over X and Y
1946  for (int ix = 1; ix <= h2->GetNbinsX(); ++ix) {
1947  for (int iy = 1; iy <= h2->GetNbinsY(); ++iy) {
1948  const double data = hData->GetBinContent(ix, iy);
1949  double mean_llh = 0.;
1950  double mean_llh_squared = 0.;
1951  double sum_exp_llh = 0.;
1952  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1953  const double mc = MC_Hist_Toy[iSample][iToy]->GetBinContent(ix, iy);
1954  const double w2 = W2_Hist_Toy[iSample][iToy]->GetBinContent(ix, iy);
1955  // Get the -log-likelihood for this sample and bin
1956  double neg_LLH_temp = SampleHandler->GetTestStatLLH(data, mc, w2);
1957  AccumulateWAICToy(neg_LLH_temp, mean_llh, mean_llh_squared, sum_exp_llh);
1958  }
1959  AccumulateWAICBin(mean_llh, mean_llh_squared, sum_exp_llh, Ntoys, lppd, p_WAIC);
1960  }
1961  }
1962  } else if (auto h1 = dynamic_cast<TH1D*>(hData)) {
1963  // TH1D: 1D histogram
1964  for (int iBin = 1; iBin <= h1->GetNbinsX(); ++iBin) {
1965  const double data = hData->GetBinContent(iBin);
1966  double mean_llh = 0.;
1967  double mean_llh_squared = 0.;
1968  double sum_exp_llh = 0.;
1969  for (int iToy = 0; iToy < Ntoys; ++iToy) {
1970  const double mc = MC_Hist_Toy[iSample][iToy]->GetBinContent(iBin);
1971  const double w2 = W2_Hist_Toy[iSample][iToy]->GetBinContent(iBin);
1972 
1973  // Get the -log-likelihood for this sample and bin
1974  double neg_LLH_temp = SampleHandler->GetTestStatLLH(data, mc, w2);
1975  AccumulateWAICToy(neg_LLH_temp, mean_llh, mean_llh_squared, sum_exp_llh);
1976  }
1977  AccumulateWAICBin(mean_llh, mean_llh_squared, sum_exp_llh, Ntoys, lppd, p_WAIC);
1978  }
1979  }
1980  }
1981 
1982  // Compute WAIC, see Eq. 13 in Gelman2014
1983  double WAIC = -2 * (lppd - p_WAIC);
1984  MACH3LOG_INFO("Effective number of parameters following WAIC formalism is equal to: {:.2f}", p_WAIC);
1985  MACH3LOG_INFO("WAIC = {:.2f}", WAIC);
1986 }
void MakeFluctuatedHistogramAlternative(TH1D *FluctHist, TH1D *PolyHist, TRandom3 *rand)
Make Poisson fluctuation of TH1D hist using slow method which is only for cross-check.
void MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D *PolyHist, TRandom3 *rand)
Make Poisson fluctuation of TH1D hist using default fast method.
TH1D * PolyProjectionX(TObject *poly, const std::string &TempName, const std::vector< double > &xbins, const bool computeErrors)
WP: Poly Projectors.
void FastViolinFill(TH2D *violin, TH1D *hist_1d)
KS: Fill Violin histogram with entry from a toy.
TH1D * PolyProjectionY(TObject *poly, const std::string &TempName, const std::vector< double > &ybins, const bool computeErrors)
WP: Poly Projectors.
std::unique_ptr< TH1D > MakeSummaryFromSpectra(const TH2D *Spectra, const std::string &name)
Build a 1D posterior-predictive summary from a violin spectrum.
@ kXSecPar
Definition: MCMCProcessor.h:46
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:36
void AccumulateWAICToy(const double neg_LLH_temp, double &mean_llh, double &mean_llh_squared, double &sum_exp_llh)
bool CheckBounds(const std::vector< const M3::float_t * > &BoundValuePointer, const std::vector< std::pair< double, double >> &ParamBounds)
void AccumulateWAICBin(double &mean_llh, double &mean_llh_squared, double &sum_exp_llh, const unsigned int Ntoys, double &lppd, double &p_WAIC)
int Ntoys
double GetBetaParameter(const double data, const double mc, const double w2, const TestStatistic TestStat)
KS: Calculate Beta parameter which will be different based on specified test statistic.
double GetBIC(const double llh, const int data, const int nPars)
Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC,...
void Get2DBayesianpValue(TH2D *Histogram)
Calculates the 2D Bayesian p-value and generates a visualization.
YAML::Node TMacroToYAML(const TMacro &macro)
KS: Convert a ROOT TMacro object to a YAML node.
Definition: YamlHelper.h:152
bool compareYAMLNodes(const YAML::Node &node1, const YAML::Node &node2, bool Mute=false)
Compare if yaml nodes are identical.
Definition: YamlHelper.h:186
bool CheckNodeExists(const YAML::Node &node, Args... args)
KS: Wrapper function to call the recursive helper.
Definition: YamlHelper.h:60
Base class for implementing fitting algorithms.
Definition: FitterBase.h:26
std::string GetName() const
Get name of class.
Definition: FitterBase.h:72
std::unique_ptr< TRandom3 > random
Random number.
Definition: FitterBase.h:146
TFile * outputFile
Output.
Definition: FitterBase.h:149
std::string AlgorithmName
Name of fitting algorithm that is being used.
Definition: FitterBase.h:170
std::vector< SampleHandlerInterface * > samples
Sample holder.
Definition: FitterBase.h:131
Manager * fitMan
The manager for configuration handling.
Definition: FitterBase.h:110
void SanitiseInputs()
Remove obsolete memory and make other checks before fit starts.
Definition: FitterBase.cpp:221
std::vector< ParameterHandlerBase * > systematics
Systematic holder.
Definition: FitterBase.h:136
Class responsible for processing MCMC chains, performing diagnostics, generating plots,...
Definition: MCMCProcessor.h:61
void Initialise()
Scan chain, what parameters we have and load information from covariance matrices.
YAML::Node GetCovConfig(const int i) const
Get Yaml config obtained from a Chain.
Custom exception class used throughout MaCh3.
The manager class is responsible for managing configurations and settings.
Definition: Manager.h:16
YAML::Node const & raw() const
Return config.
Definition: Manager.h:47
Base class responsible for handling of systematic error parameters. Capable of using PCA or using ada...
int GetNumParams() const
Get total number of parameters.
void SetParProp(const int i, const double val)
Set proposed parameter value.
int GetParIndex(const std::string &name) const
Get index based on name.
std::string GetName() const
Get name of covariance.
double GetParInit(const int i) const
Get prior parameter value.
YAML::Node GetConfig() const
Getter to return a copy of the YAML node.
Class responsible for handling of systematic error parameters with different types defined in the con...
std::vector< std::string > GetUniqueParameterGroups() const
KS: Get names of all unique parameter groups.
void SetGroupOnlyParameters(const std::string &Group, const std::vector< double > &Pars={})
KS Function to set to prior parameters of a given group or values from vector.
std::vector< double > PenaltyTerm
Penalty term values for each toy by default 0.
virtual ~PredictiveThrower()
Destructor.
void ExtractLLH(TH1 *DatHist, TH1 *MCHist, TH1 *W2Hist, const SampleHandlerInterface *SampleHandler) const
Calculate the LLH for TH1, set the LLH to title of MCHist.
bool FullLLH
KS: Use Full LLH or only sample contribution based on discussion with Asher we almost always only wan...
void WriteToy(TDirectory *ToyDirectory, TDirectory *Toy_1DDirectory, TDirectory *Toy_2DDirectory, const int iToy)
Save histograms for a single MCMC Throw/Toy.
void StudyCorrelations(TDirectory *PredictiveDir, const std::vector< std::vector< std::unique_ptr< TH1 >>> &Toys, const bool DebugHistograms) const
Study Prior/Posterior correlations between samples etc.
void RunPredictiveAnalysis()
Main routine responsible for producing posterior predictive distributions and $p$-value.
bool LoadToys()
Load existing toys.
void PosteriorPredictivepValue(const std::vector< std::unique_ptr< TH1 >> &PostPred_mc, const std::vector< TDirectory * > &SampleDir)
Calculate Posterior Predictive $p$-value Compares observed data to toy datasets generated from:
void WriteByModeToys(TDirectory *ByModeDirectory, const int iToy)
Save mode histograms for a single MCMC Throw/Toy.
void StudyBIC(const std::vector< std::unique_ptr< TH1 >> &PostPred_mc, const std::vector< std::unique_ptr< TH1 >> &PostPred_w)
Study Bayesian Information Criterion (BIC) The BIC is defined as:
void SetupToyGeneration(std::vector< std::string > &ParameterGroupsNotVaried, std::unordered_set< int > &ParameterOnlyToVary, std::vector< const M3::float_t * > &BoundValuePointer, std::vector< std::pair< double, double >> &ParamBounds)
Setup useful variables etc before stating toy generation.
std::string GetBinName(TH1 *hist, const bool uniform, const int Dim, const std::vector< int > &bins) const
Construct a human-readable label describing a specific analysis bin.
std::vector< std::string > GetStoredFancyName(ParameterHandlerBase *Systematics) const
Get Fancy parameters stored in mcmc chains for passed ParameterHandler.
std::vector< std::unique_ptr< TH1 > > W2_Nom_Hist
Vector of W2 histograms.
std::vector< std::vector< std::unique_ptr< TH1 > > > W2_Hist_Toy
bool Is_PriorPredictive
Whether it is Prior or Posterior predictive.
int NModelParams
KS: Count total number of model parameters which can be used for stuff like BIC.
void MakeCutEventRate(TH1D *Histogram, const double DataRate) const
Make the 1D Event Rate Hist.
void StudyInformationCriterion(M3::kInfCrit Criterion, const std::vector< std::unique_ptr< TH1 >> &PostPred_mc, const std::vector< std::unique_ptr< TH1 >> &PostPred_w)
Information Criterion.
int Ntoys
Number of toys we are generating analysing.
void StudyByMode1DProjections(const std::vector< TDirectory * > &SampleDirectories) const
Load 1D projections by mode and produce post pred for each.
void PredictiveLLH(const std::vector< std::unique_ptr< TH1 >> &Data_histogram, const std::vector< std::unique_ptr< TH1 >> &PostPred_mc, const std::vector< std::unique_ptr< TH1 >> &PostPred_w, const std::vector< TDirectory * > &SampleDir)
Calculate Posterior Predictive LLH.
void RateAnalysis(const std::vector< std::vector< std::unique_ptr< TH1 >>> &Toys, const std::vector< TDirectory * > &SampleDirectories) const
Produce distribution of number of events for each sample.
void MakeChi2Plots(const std::vector< std::vector< double >> &Chi2_x, const std::string &Chi2_x_title, const std::vector< std::vector< double >> &Chi2_y, const std::string &Chi2_y_title, const std::vector< TDirectory * > &SampleDir, const std::string Title)
Produce Chi2 plot for a single sample based on which $p$-value is calculated.
void SetParamters(std::vector< std::string > &ParameterGroupsNotVaried, std::unordered_set< int > &ParameterOnlyToVary)
This set some params to prior value this way you can evaluate errors from subset of errors.
void StudyWAIC()
KS: Get the Watanabe-Akaike information criterion (WAIC)
void Study1DProjections(const std::vector< TDirectory * > &SampleDirectories) const
Load 1D projections and later produce violin plots for each.
void SetupSampleInformation()
Setup sample information.
std::vector< std::unique_ptr< TH1 > > MC_Nom_Hist
Vector of MC histograms.
int TotalNumberOfSamples
Number of toys we are generating analysing.
void ProduceToys()
Produce toys by throwing from MCMC.
void StudyBetaParameters(TDirectory *PredictiveDir)
Evaluate prior/post predictive distribution for beta parameters (used for evaluating impact MC statis...
double CalcLLH(const double data, const double mc, const double w2, const SampleHandlerInterface *SampleHandler) const
Calculates the -2LLH (likelihood) for a single sample.
std::vector< std::unique_ptr< TH1 > > Data_Hist
Vector of Data histograms.
bool StandardFluctuation
KS: We have two methods for Poissonian fluctuation.
ParameterHandlerGeneric * ModelSystematic
Pointer to El Generico.
std::vector< double > ReweightWeight
Reweighting factors applied for each toy, by default 1.
std::vector< std::unique_ptr< TH1 > > MakePredictive(const std::vector< std::vector< std::unique_ptr< TH1 >>> &Toys, const std::vector< TDirectory * > &Director, const std::string &suffix, const bool DebugHistograms, const bool WriteHist)
Produce posterior predictive distribution.
PredictiveThrower(Manager *const fitMan)
Constructor.
void MakeFluctuatedHistogram(TH1 *FluctHist, TH1 *PolyHist)
Make Poisson fluctuation of TH1D hist.
std::vector< std::vector< std::unique_ptr< TH1 > > > MC_Hist_Toy
void StudyDIC(const std::vector< std::unique_ptr< TH1 >> &PostPred_mc, const std::vector< std::unique_ptr< TH1 >> &PostPred_w)
KS: Get the Deviance Information Criterion (DIC) The deviance is defined as:
double GetLLH(const TH1D *DatHist, const TH1D *MCHist, const TH1D *W2Hist, const SampleHandlerInterface *SampleHandler) const
Helper functions to calculate likelihoods using TH1D.
void ProduceSpectra(const std::vector< std::vector< std::vector< std::unique_ptr< TH1D >>>> &Toys, const std::vector< TDirectory * > &Director, const std::string suffix, const bool DoSummary=true) const
Produce Violin style spectra.
std::vector< std::unique_ptr< TH1D > > PerBinHistogram(TH1 *hist, const int SampleId, const int Dim, const std::string &suffix) const
Create per-bin posterior histograms for a given sample.
Class responsible for handling implementation of samples used in analysis, reweighting and returning ...
double GetTestStatLLH(const double data, const double mc, const double w2) const
Calculate test statistic for a single bin. Calculation depends on setting of fTestStatistic....
int getValue(const std::string &Type)
CW: Get info like RAM.
Definition: Monitor.cpp:252
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.
Definition: Monitor.cpp:229
kInfCrit
KS: Different Information Criterion tests mostly based Gelman paper.
@ kWAIC
Watanabe-Akaike information criterion.
@ kInfCrits
This only enumerates.
@ kBIC
Bayesian Information Criterion.
@ kDIC
Deviance Information Criterion.
std::unique_ptr< ObjectType > Clone(const ObjectType *obj, const std::string &name="")
KS: Creates a copy of a ROOT-like object and wraps it in a smart pointer.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
constexpr static const int _BAD_INT_
Default value used for int initialisation.
Definition: Core.h:55
KS: Store info about MC sample.
Definition: SampleInfo.h:40