MaCh3  2.4.2
Reference Guide
PredictivePlotting.cpp
Go to the documentation of this file.
1 //MaCh3 Includes
2 #include "PlottingUtils/PlottingUtils.h"
3 #include "PlottingUtils/PlottingManager.h"
4 
5 //this file has lots of usage of the ROOT plotting interface that only takes floats, turn this warning off for this CU for now
6 #pragma GCC diagnostic ignored "-Wfloat-conversion"
7 #pragma GCC diagnostic ignored "-Wconversion"
8 
12 
14 MaCh3Plotting::PlottingManager* PlotMan;
15 
16 std::vector<std::string> FindSamples(const std::string& File)
17 {
18  TFile *file = M3::Open(File, "READ", __FILE__, __LINE__);
19  TDirectoryFile *PredicitveDir = file->Get<TDirectoryFile>("Predictive");
20 
21  std::vector<std::string> SampleNames;
22  //Get all entries in input file
23  TIter next(PredicitveDir->GetListOfKeys());
24  TKey *key = nullptr;
25 
26  // Loop through all entries
27  while ((key = static_cast<TKey*>(next()))) {
28  // get directory names, ignore flux
29  auto classname = std::string(key->GetClassName());
30  auto dirname = std::string(key->GetName());
31 
32  if (classname != "TDirectoryFile") continue;
33  dirname = std::string(key->GetName());
34 
35  if(dirname == "Total") continue;
36  if(dirname == "BetaParameters") continue;
37 
38  SampleNames.push_back(dirname);
39  MACH3LOG_DEBUG("Entering Sample {}", dirname);
40  }
41 
42  file->Close();
43  delete file;
44  return SampleNames;
45 }
46 
47 std::vector<int> FindDimensions(const std::string& File, const std::vector<std::string>& Samples)
48 {
49  TFile *file = M3::Open(File, "READ", __FILE__, __LINE__);
50  TDirectoryFile *PredicitveDir = file->Get<TDirectoryFile>("Predictive");
51 
52  std::vector<int> SampleDimension;
53  for (const auto& sample : Samples)
54  {
55  // Get directory for this sample
56  TDirectoryFile* SampleDir = PredicitveDir->Get<TDirectoryFile>(sample.c_str());
57 
58  int Dimension = 0;
59 
60  while (true)
61  {
62  // Construct name Tutorial_mc_dimX
63  std::string histName = fmt::format("{}_mc_dim{}", sample, Dimension);
64 
65  TH2D* hist = SampleDir->Get<TH2D>(histName.c_str());
66  if (!hist) break; // stop when next dimension does not exist
67 
68  Dimension++;
69  }
70 
71  MACH3LOG_DEBUG("Sample '{}' has dimension {}", sample, Dimension);
72  SampleDimension.push_back(Dimension);
73  }
74 
75  file->Close();
76  delete file;
77 
78  return SampleDimension;
79 }
80 
81 double GetPValue(const TH2D* hist)
82 {
83  double pvalue = 0;
84  std::string TempTile = hist->GetTitle();
85  std::string temp = "=";
86 
87  std::string::size_type SizeType = TempTile.find(temp);
88  TempTile.erase(0, SizeType+1);
89  pvalue = atof(TempTile.c_str());
90  return pvalue;
91 }
92 
93 void PrintPosteriorPValue(const YAML::Node& Settings,
94  const std::vector<TFile*>& InputFiles,
95  const std::vector<std::string>& SampleNames)
96 {
97  MACH3LOG_INFO("Starting {}", __func__);
98  auto Titles = Get<std::vector<std::string>>(Settings["FileTitle"], __FILE__, __LINE__);
99  std::vector<std::vector<double>> FlucDrawVec(InputFiles.size());
100 
101  //KS: P-values per each sample
102  std::cout<<"\\begin{table}[htb]"<<std::endl;
103  std::cout<<"\\centering"<<std::endl;
104  std::cout<<"\\begin{tabular}{ | l | ";
105 
106 
107  for(unsigned int f = 0; f < InputFiles.size(); f++)
108  {
109  std::cout<<"c | ";
110  }
111 
112  std::cout<<"} \\hline"<<std::endl;
113  std::cout<<"Sample ";
114  for(unsigned int f = 0; f < InputFiles.size(); f++)
115  {
116  std::cout<<"& \\multicolumn{1}{| c |}{" + Titles[f] +" p-value} ";
117  }
118  std::cout<<"\\\\"<<std::endl;
119  for(unsigned int f = 0; f < InputFiles.size(); f++)
120  {
121  std::cout<<" & Fluctuation of Prediction ";
122  }
123  std::cout<<"\\\\ \\hline"<<std::endl;
124  for(unsigned int i = 0; i < SampleNames.size(); i++)
125  {
126  std::cout<<SampleNames[i];
127  for(unsigned int f = 0; f < InputFiles.size(); f++)
128  {
129  std::string TempString = "Predictive/" + SampleNames[i]+"/"+SampleNames[i]+"_predfluc_draw";
130  TH2D *hist2D = InputFiles[f]->Get<TH2D>(TempString.c_str());
131  double FlucDraw = GetPValue(hist2D);
132  std::cout<<" & "<<FlucDraw;
133  FlucDrawVec[f].push_back(FlucDraw);
134  }
135  std::cout<<" \\\\"<<std::endl;
136  }
137  std::cout<<"Total ";
138  for(unsigned int f = 0; f < InputFiles.size(); f++)
139  {
140  TH2D *hFlucPred = InputFiles[f]->Get<TH2D>("Predictive/Total/Total_predfluc_draw");
141  double FlucDraw = GetPValue(hFlucPred);
142  std::cout<<" & "<<FlucDraw;
143  }
144  std::cout<<" \\\\ \\hline"<<std::endl;
145  std::cout<<"\\hline"<<std::endl;
146  std::cout<<"\\end{tabular}"<<std::endl;
147  std::cout<<"\\end{table}"<<std::endl;
148 
149 
150  auto Threshold = GetFromManager<double>(Settings["Significance"], 0.05);
151  for(unsigned int f = 0; f < InputFiles.size(); f++)
152  {
153  MACH3LOG_INFO("Calculating Shape for file {}", Titles[f]);
154 
155  CheckBonferoniCorrectedpValue(SampleNames, FlucDrawVec[f], Threshold);
156  MACH3LOG_INFO("Combined pvalue following Fisher method: {:.4f}", FisherCombinedPValue(FlucDrawVec[f]));
157  }
158 }
159 
160 void OverlayViolin(const YAML::Node& Settings,
161  const std::vector<TFile*>& InputFiles,
162  const std::vector<std::string>& SampleNames,
163  const std::vector<int>& SampleDimension,
164  const std::unique_ptr<TCanvas>& canv)
165 {
166  MACH3LOG_INFO("Starting {}", __func__);
167  canv->Clear();
168 
169  canv->SetTopMargin(0.10);
170  canv->SetBottomMargin(0.12);
171  canv->SetRightMargin(0.075);
172  canv->SetLeftMargin(0.14);
173 
174  auto PosteriorColor = Get<std::vector<Color_t >>(Settings["PosteriorColor"], __FILE__, __LINE__);
175  auto Titles = Get<std::vector<std::string>>(Settings["FileTitle"], __FILE__, __LINE__);
176  const int nFiles = static_cast<int>(InputFiles.size());
177 
178  //KS: No idea why but ROOT changed treatment of violin in R6. If you have non uniform binning this will results in very hard to see violin plots.
179  TCandle::SetScaledViolin(false);
180  for(size_t iSample = 0; iSample < SampleNames.size(); iSample++)
181  {
182  for(int iDim = 0; iDim < SampleDimension[iSample]; iDim++)
183  {
184  std::vector<std::unique_ptr<TH2D>> ViolinHist(nFiles);
185  for(int iFile = 0; iFile < nFiles; iFile++)
186  {
187  InputFiles[iFile]->cd();
188  ViolinHist[iFile] = M3::Clone(InputFiles[iFile]->Get<TH2D>(("Predictive/" + SampleNames[iSample]
189  + "/" + SampleNames[iSample] + "_mc_dim" + iDim).Data()));
190  ViolinHist[iFile]->SetTitle(PlotMan->style().prettifySampleName(SampleNames[iSample]).c_str());
191  ViolinHist[iFile]->SetLineColor(PosteriorColor[iFile]);
192  ViolinHist[iFile]->SetMarkerColor(PosteriorColor[iFile]);
193  ViolinHist[iFile]->SetFillColorAlpha(PosteriorColor[iFile], 0.35);
194  ViolinHist[iFile]->SetFillStyle(1001);
195  ViolinHist[iFile]->GetXaxis()->SetTitle(PlotMan->style().prettifyKinematicName(
196  ViolinHist[iFile]->GetXaxis()->GetTitle()).c_str());
197  ViolinHist[iFile]->GetYaxis()->SetTitle("Events");
198  }
199 
200  ViolinHist[0]->Draw("violinX(03100300)");
201  for(int iFile = 1; iFile < nFiles; iFile++) {
202  ViolinHist[iFile]->Draw("violinX(03100300) same");
203  }
204 
205  TLegend legend(0.50, 0.52, 0.90, 0.88);
206  for(int ig = 0; ig < nFiles; ig++) {
207  legend.AddEntry(ViolinHist[ig].get(), Form("%s", Titles[ig].c_str()), "lpf");
208  }
209  legend.SetLineStyle(0);
210  legend.SetTextSize(0.03);
211  legend.Draw();
212 
213  canv->Print("Overlay_Predictive.pdf", "pdf");
214  }
215  }
216 }
217 
218 void OverlayPredicitve(const YAML::Node& Settings,
219  const std::vector<TFile*>& InputFiles,
220  const std::vector<std::string>& SampleNames,
221  const std::unique_ptr<TCanvas>& canv)
222 {
223  MACH3LOG_INFO("Starting {}", __func__);
224  canv->Clear();
225 
226  TPad* pad1 = new TPad("pad1","pad1",0,0.25,1,1);
227  pad1->AppendPad();
228  TPad* pad2 = new TPad("pad2","pad2",0,0,1,0.25);
229  pad2->AppendPad();
230 
231  pad1->SetGrid();
232  pad2->SetGrid();
233 
234  pad1->SetLeftMargin(canv->GetLeftMargin());
235  pad1->SetRightMargin(canv->GetRightMargin());
236  pad1->SetTopMargin(canv->GetTopMargin());
237  pad1->SetBottomMargin(0);
238 
239  pad2->SetLeftMargin(canv->GetLeftMargin());
240  pad2->SetRightMargin(canv->GetRightMargin());
241  pad2->SetTopMargin(0);
242  pad2->SetBottomMargin(0.28);
243 
244  auto PosteriorColor = Get<std::vector<Color_t >>(Settings["PosteriorColor"], __FILE__, __LINE__);
245  auto Titles = Get<std::vector<std::string>>(Settings["FileTitle"], __FILE__, __LINE__);
246 
247  if(Titles.size() < InputFiles.size() || PosteriorColor.size() < InputFiles.size()){
248  MACH3LOG_ERROR("Passed {} files, while only {} titles and {} colors", InputFiles.size(), Titles.size(), PosteriorColor.size());
249  throw MaCh3Exception(__FILE__, __LINE__);
250  }
251  for(size_t iSample = 0; iSample < SampleNames.size(); iSample++)
252  {
253  const int nFiles = static_cast<int>(InputFiles.size());
254  TH1D* hist = InputFiles[0]->Get<TH1D>(("SampleFolder/data_" + SampleNames[iSample]).c_str());
255  if(!hist) {
256  MACH3LOG_WARN("Couldn't find hist for {}, most likely it is using 2D", SampleNames[iSample]);
257  MACH3LOG_WARN("Currently only 1D, sorry");
258  continue;
259  }
260  std::unique_ptr<TH1D> DataHist = M3::Clone(hist);
261  DataHist->SetLineColor(kBlack);
262  //KS: +1 for data, we want to get integral before scaling of the histogram
263  std::vector<double> Integral(nFiles+1);
264  Integral[nFiles] = DataHist->Integral();
265  std::vector<std::unique_ptr<TH1D>> PredHist(nFiles);
266 
267  for(int iFile = 0; iFile < nFiles; iFile++)
268  {
269  InputFiles[iFile]->cd();
270  PredHist[iFile] = M3::Clone(InputFiles[iFile]->Get<TH1D>(("Predictive/" + SampleNames[iSample] + "/" +
271  SampleNames[iSample] + "_mc_PostPred").c_str()));
272  Integral[iFile] = PredHist[iFile]->Integral();
273  PredHist[iFile]->SetTitle(PlotMan->style().prettifySampleName(SampleNames[iSample]).c_str());
274  PredHist[iFile]->SetLineColor(PosteriorColor[iFile]);
275  PredHist[iFile]->SetMarkerColor(PosteriorColor[iFile]);
276  PredHist[iFile]->SetFillColorAlpha(PosteriorColor[iFile], 0.35);
277  PredHist[iFile]->SetFillStyle(1001);
278  PredHist[iFile]->GetYaxis()->SetTitle("Events");
279  }
280  pad1->cd();
281 
282  PredHist[0]->Draw("p e2");
283  for(int iFile = 1; iFile < nFiles; iFile++)
284  {
285  PredHist[iFile]->Draw("p e2 same");
286  }
287  DataHist->Draw("he same");
288 
289  auto legend = std::make_unique<TLegend>(0.50,0.52,0.90,0.88);
290  legend->AddEntry(DataHist.get(), Form("Data, #int=%.0f", Integral[nFiles]),"le");
291  for(int ig = 0; ig < nFiles; ig++ )
292  {
293  legend->AddEntry(PredHist[ig].get(), Form("%s, #int=%.2f", Titles[ig].c_str(), Integral[ig]), "lpf");
294  }
295  legend->SetLineStyle(0);
296  legend->SetTextSize(0.03);
297  legend->Draw();
298 
300  pad2->cd();
301 
302  auto line = std::make_unique<TLine>(PredHist[0]->GetXaxis()->GetBinLowEdge(PredHist[0]->GetXaxis()->GetFirst()), 1.0, PredHist[0]->GetXaxis()->GetBinUpEdge(PredHist[0]->GetXaxis()->GetLast()), 1.0);
303 
304  line->SetLineWidth(2);
305  line->SetLineColor(kBlack);
306  line->Draw("");
307 
308  std::unique_ptr<TH1D> RatioPlotData = M3::Clone(DataHist.get());
309  std::vector<std::unique_ptr<TH1D>> RatioPlot(nFiles);
310 
311  for(int ig = 0; ig < nFiles; ig++ )
312  {
313  RatioPlot[ig] = M3::Clone(DataHist.get());
314  RatioPlot[ig]->SetLineColor(PosteriorColor[ig]);
315  RatioPlot[ig]->SetMarkerColor(PosteriorColor[ig]);
316  RatioPlot[ig]->SetFillColorAlpha(PosteriorColor[ig], 0.35);
317  RatioPlot[ig]->SetFillStyle(1001);
318  RatioPlot[ig]->GetYaxis()->SetTitle("Data/MC");
319  auto PrettyX = PlotMan->style().prettifyKinematicName(PredHist[0]->GetXaxis()->GetTitle());
320  RatioPlot[ig]->GetXaxis()->SetTitle(PrettyX.c_str());
321  RatioPlot[ig]->SetBit(TH1D::kNoTitle);
322  RatioPlot[ig]->GetXaxis()->SetTitleSize(0.12);
323  RatioPlot[ig]->GetYaxis()->SetTitleOffset(0.4);
324  RatioPlot[ig]->GetYaxis()->SetTitleSize(0.10);
325 
326  RatioPlot[ig]->GetXaxis()->SetLabelSize(0.10);
327  RatioPlot[ig]->GetYaxis()->SetLabelSize(0.10);
328 
329  RatioPlot[ig]->Divide(PredHist[ig].get());
330  PassErrorToRatioPlot(RatioPlot[ig].get(), PredHist[ig].get(), DataHist.get());
331  }
332 
333  RatioPlotData->Divide(DataHist.get());
334  PassErrorToRatioPlot(RatioPlotData.get(), DataHist.get(), DataHist.get());
335 
336  double maxz = -999;
337  double minz = +999;
338  for (int j = 0; j < nFiles; j++) {
339  for (int i = 1; i < RatioPlot[0]->GetXaxis()->GetNbins(); i++)
340  {
341  maxz = std::max(maxz, RatioPlot[j]->GetBinContent(i));
342  minz = std::min(minz, RatioPlot[j]->GetBinContent(i));
343  }
344  }
345  maxz = maxz*1.001;
346  minz = minz*1.001;
347 
348  if (std::fabs(1 - maxz) > std::fabs(1-minz))
349  RatioPlot[0]->GetYaxis()->SetRangeUser(1-std::fabs(1-maxz),1+std::fabs(1-maxz));
350  else
351  RatioPlot[0]->GetYaxis()->SetRangeUser(1-std::fabs(1-minz),1+std::fabs(1-minz));
352 
353  RatioPlot[0]->Draw("p e2");
354  for(int ig = 1; ig < nFiles; ig++ )
355  {
356  RatioPlot[ig]->Draw("p e2 same");
357  }
358  RatioPlotData->Draw("he same");
359 
360  canv->Print("Overlay_Predictive.pdf", "pdf");
361  }
362 
363  delete pad1;
364  delete pad2;
365 }
366 
367 void PredictivePlotting(const std::string& ConfigName,
368  const std::vector<std::string>& FileNames)
369 {
370  auto canvas = std::make_unique<TCanvas>("canv", "canv", 1080, 1080);
371  // set the paper & margin sizes
372  canvas->SetTopMargin(0.11);
373  canvas->SetBottomMargin(0.16);
374  canvas->SetRightMargin(0.075);
375  canvas->SetLeftMargin(0.12);
376  canvas->SetGrid();
377 
378  gStyle->SetOptStat(0); //Set 0 to disable statistic box
379  gStyle->SetPalette(51);
380  gStyle->SetLegendBorderSize(0); //This option disables legends borders
381  gStyle->SetFillStyle(0);
382 
383  //To avoid TCanvas::Print> messages
384  gErrorIgnoreLevel = kWarning;
385 
386  auto Samples = FindSamples(FileNames[0]);
387  auto Dimensions = FindDimensions(FileNames[0], Samples);
388 
389  std::vector<TFile*> InputFiles(FileNames.size());
390  for(size_t i = 0; i < FileNames.size(); i++)
391  {
392  InputFiles[i] = M3::Open(FileNames[i], "READ", __FILE__, __LINE__);
393  }
394 
395  // Load the YAML file
396  YAML::Node Config = M3OpenConfig(ConfigName);
397  // Access the "MatrixPlotter" section
398  YAML::Node settings = Config["PredictivePlotting"];
399  canvas->Print("Overlay_Predictive.pdf[", "pdf");
400 
401  // Make overlay of 1D hists
402  OverlayPredicitve(settings, InputFiles, Samples, canvas);
403  // Make overlay of violin plots
404  OverlayViolin(settings, InputFiles, Samples, Dimensions, canvas);
405  // Get PValue per sample
406  PrintPosteriorPValue(settings, InputFiles, Samples);
407 
408  canvas->Print("Overlay_Predictive.pdf]", "pdf");
409 
410  for(size_t i = 0; i < FileNames.size(); i++)
411  {
412  InputFiles[i]->Close();
413  delete InputFiles[i];
414  }
415 }
416 
417 
418 int main(int argc, char **argv)
419 {
421  if (argc < 3)
422  {
423  MACH3LOG_ERROR("Need at least two arguments, {} <Config.Yaml> <Prior/Post_PredOutput.root>", argv[0]);
424  throw MaCh3Exception(__FILE__, __LINE__);
425  }
426  std::string ConfigName = std::string(argv[1]);
427  // Collect all remaining arguments as file names
428  std::vector<std::string> FileNames;
429  for (int i = 2; i < argc; ++i) {
430  FileNames.emplace_back(argv[i]);
431  }
432 
433  PlotMan = new MaCh3Plotting::PlottingManager();
434  PlotMan->initialise();
435 
436  PredictivePlotting(ConfigName, FileNames);
437 
438  if(PlotMan) delete PlotMan;
439  return 0;
440 }
TCanvas * canv
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:61
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:36
double GetPValue(const TH2D *hist)
void PrintPosteriorPValue(const YAML::Node &Settings, const std::vector< TFile * > &InputFiles, const std::vector< std::string > &SampleNames)
void PredictivePlotting(const std::string &ConfigName, const std::vector< std::string > &FileNames)
int main(int argc, char **argv)
MaCh3Plotting::PlottingManager * PlotMan
std::vector< std::string > FindSamples(const std::string &File)
void OverlayViolin(const YAML::Node &Settings, const std::vector< TFile * > &InputFiles, const std::vector< std::string > &SampleNames, const std::vector< int > &SampleDimension, const std::unique_ptr< TCanvas > &canv)
void OverlayPredicitve(const YAML::Node &Settings, const std::vector< TFile * > &InputFiles, const std::vector< std::string > &SampleNames, const std::unique_ptr< TCanvas > &canv)
std::vector< int > FindDimensions(const std::string &File, const std::vector< std::string > &Samples)
int nFiles
Definition: ProcessMCMC.cpp:27
std::vector< std::string > FileNames
Definition: ProcessMCMC.cpp:28
double FisherCombinedPValue(const std::vector< double > &pvalues)
KS: Combine p-values using Fisher's method.
void PassErrorToRatioPlot(TH1D *RatioHist, TH1D *Hist1, TH1D *DataHist)
void CheckBonferoniCorrectedpValue(const std::vector< std::string > &SampleNameVec, const std::vector< double > &PValVec, const double Threshold)
KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.
#define M3OpenConfig(filename)
Macro to simplify calling LoadYaml with file and line info.
Definition: YamlHelper.h:589
Custom exception class used throughout MaCh3.
std::unique_ptr< ObjectType > Clone(const ObjectType *obj, const std::string &name="")
KS: Creates a copy of a ROOT-like object and wraps it in a smart pointer.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.