MaCh3  2.5.1
Reference Guide
StatisticalUtils.cpp
Go to the documentation of this file.
1 //MaCh3 includes
3 #include <numeric>
4 
5 // **************************
6 std::string GetJeffreysScale(const double BayesFactor){
7 // **************************
8  std::string JeffreysScale = "";
9  //KS: Get fancy Jeffreys Scale as I am to lazy to look into table every time
10  if(BayesFactor < 0) JeffreysScale = "Negative";
11  else if( 5 > BayesFactor) JeffreysScale = "Barely worth mentioning";
12  else if( 10 > BayesFactor) JeffreysScale = "Substantial";
13  else if( 15 > BayesFactor) JeffreysScale = "Strong";
14  else if( 20 > BayesFactor) JeffreysScale = "Very strong";
15  else JeffreysScale = "Decisive";
16 
17  return JeffreysScale;
18 }
19 
20 // **************************
21 std::string GetDunneKaboth(const double BayesFactor){
22 // **************************
23  std::string DunneKaboth = "";
24  //KS: Get fancy DunneKaboth Scale as I am to lazy to look into table every time
25 
26  if(2.125 > BayesFactor) DunneKaboth = "< 1 #sigma";
27  else if( 20.74 > BayesFactor) DunneKaboth = "> 1 #sigma";
28  else if( 369.4 > BayesFactor) DunneKaboth = "> 2 #sigma";
29  else if( 15800 > BayesFactor) DunneKaboth = "> 3 #sigma";
30  else if( 1745000 > BayesFactor) DunneKaboth = "> 4 #sigma";
31  else DunneKaboth = "> 5 #sigma";
32 
33  return DunneKaboth;
34 }
35 
36 // *********************
37 double GetSigmaValue(const int sigma) {
38 // *********************
39  double width = 0;
40  switch (std::abs(sigma))
41  {
42  case 1:
43  width = 0.682689492137;
44  break;
45  case 2:
46  width = 0.954499736104;
47  break;
48  case 3:
49  width = 0.997300203937;
50  break;
51  case 4:
52  width = 0.999936657516;
53  break;
54  case 5:
55  width = 0.999999426697;
56  break;
57  case 6:
58  width = 0.999999998027;
59  break;
60  default:
61  MACH3LOG_ERROR("{} is unsupported value of sigma", sigma);
62  throw MaCh3Exception(__FILE__ , __LINE__ );
63  break;
64  }
65  return width;
66 }
67 
68 // ****************
69 double GetBIC(const double llh, const int data, const int nPars){
70 // ****************
71  if(nPars == 0)
72  {
73  MACH3LOG_ERROR("You haven't passed number of model parameters as it is still zero");
74  throw MaCh3Exception(__FILE__ , __LINE__ );
75  }
76  const double BIC = double(nPars * logl(data) + llh);
77 
78  return BIC;
79 }
80 
81 // ****************
82 double GetNeffective(const int N1, const int N2) {
83 // ****************
84  const double Nominator = (N1+N2);
85  const double Denominator = (N1*N2);
86  const double N_e = Nominator/Denominator;
87  return N_e;
88 }
89 
90 // ****************
91 void CheckBonferoniCorrectedpValue(const std::vector<std::string>& SampleNameVec,
92  const std::vector<double>& PValVec,
93  const double Threshold) {
94 // ****************
95  MACH3LOG_INFO("");
96  if(SampleNameVec.size() != PValVec.size())
97  {
98  MACH3LOG_ERROR("Size of vectors do not match");
99  throw MaCh3Exception(__FILE__ , __LINE__ );
100  }
101  const size_t NumberOfStatisticalTests = SampleNameVec.size();
102  //KS: 0.05 or 5% is value used by T2K.
103  const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests);
104  const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown;
105  MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown);
106 
107  int Counter = 0;
108  for(unsigned int i = 0; i < SampleNameVec.size(); i++)
109  {
110  if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) {
111  MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
112  MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]);
113  Counter++;
114  } else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) {
115  MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
116  MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]);
117  Counter++;
118  }
119  }
120  if(Counter == 0) {
121  MACH3LOG_INFO("Every sample passed Bonferroni-corrected statistical significance level test");
122  } else {
123  MACH3LOG_WARN("{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter);
124  }
125  MACH3LOG_INFO("");
126 }
127 
128 // ****************
129 double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint) {
130 // ****************
131  double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint));
132 
133  if( std::isinf(ADstat) || std::isnan(ADstat)) return 0;
134  return ADstat;
135 }
136 
137 // ****************
138 int GetNumberOfRuns(const std::vector<int>& GroupClasifier) {
139 // ****************
140  int NumberOfRuns = 0;
141  int PreviousGroup = -999;
142 
143  //KS: If group changed increment run
144  for (unsigned int i = 0; i < GroupClasifier.size(); i++)
145  {
146  if(GroupClasifier[i] != PreviousGroup)
147  NumberOfRuns++;
148  PreviousGroup = GroupClasifier[i];
149  }
150 
151  return NumberOfRuns;
152 }
153 
154 // ****************
155 double GetBetaParameter(const double data, const double mc, const double w2, const TestStatistic TestStat) {
156 // ****************
157  double Beta = 0.0;
158 
159  if (TestStat == kDembinskiAbdelmotteleb) {
160  //the so-called effective count
161  const double k = mc*mc / w2;
162  //Calculate beta which is scaling factor between true and generated MC
163  Beta = (data + k) / (mc + k);
164  }
165  //KS: Below is technically only true for Cowan's BB, which will not be true for Poisson or IceCube, because why not...
166  else {
167  // CW: Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc
168  const double fractional = std::sqrt(w2)/mc;
169  // CW: -b/2a in quadratic equation
170  const double temp = mc*fractional*fractional-1;
171  // CW: b^2 - 4ac in quadratic equation
172  const double temp2 = temp*temp + 4*data*fractional*fractional;
173  if (temp2 < 0) {
174  MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!");
175  throw MaCh3Exception(__FILE__ , __LINE__ );
176  }
177  // CW: Solve for the positive beta
178  Beta = (-1*temp+std::sqrt(temp2))/2.;
179  }
180  return Beta;
181 }
182 
183 
184 // *********************
185 double GetSubOptimality(const std::vector<double>& EigenValues, const int TotalTarameters) {
186 // *********************
187  double sum_eigenvalues_squared_inv = 0.0;
188  double sum_eigenvalues_inv = 0.0;
189  for (unsigned int j = 0; j < EigenValues.size(); j++)
190  {
191  //KS: IF Eigen values are super small skip them
192  //if(EigenValues[j] < 0.0000001) continue;
193  sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2);
194  sum_eigenvalues_inv += 1.0 / EigenValues[j];
195  }
196  const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2);
197  return SubOptimality;
198 }
199 
200 
201 // **************************
202 void GetArithmetic(TH1D * const hist, double& Mean, double& Error) {
203 // **************************
204  Mean = hist->GetMean();
205  Error = hist->GetRMS();
206 }
207 
208 // **************************
209 void GetGaussian(TH1D*& hist, TF1* gauss, double& Mean, double& Error) {
210 // **************************
211  // Supress spammy ROOT messages
212  int originalErrorLevel = gErrorIgnoreLevel;
213  gErrorIgnoreLevel = kFatal;
214 
215  const double meanval = hist->GetMean();
216  const double err = hist->GetRMS();
217  const double peakval = hist->GetBinCenter(hist->GetMaximumBin());
218 
219  // Set the range for the Gaussian fit
220  gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err);
221  // Set the starting parameters close to RMS and peaks of the histograms
222  gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err);
223 
224  // Perform the fit
225  hist->Fit(gauss->GetName(),"Rq");
226  hist->SetStats(0);
227 
228  Mean = gauss->GetParameter(1);
229  Error = gauss->GetParameter(2);
230 
231  // restore original warning setting
232  gErrorIgnoreLevel = originalErrorLevel;
233 }
234 
235 // ***************
236 void GetHPD(TH1D* const hist, double& Mean, double& Error, double& Error_p, double& Error_m, const double coverage) {
237 // ****************
238  // Get the bin which has the largest posterior density
239  const int MaxBin = hist->GetMaximumBin();
240  // And it's value
241  const double peakval = hist->GetBinCenter(MaxBin);
242 
243  // The total integral of the posterior
244  const long double Integral = hist->Integral();
245  //KS: and integral of left handed and right handed parts
246  const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0;
247  const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0;
248 
249  // Keep count of how much area we're covering
250  //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error
251  long double sum = hist->GetBinContent(MaxBin)/2.0;
252 
253  // Counter for current bin
254  int CurrBin = MaxBin;
255  while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) {
256  CurrBin++;
257  sum += hist->GetBinContent(CurrBin);
258  }
259  const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin));
260  // Reset the sum
261  //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error
262  sum = hist->GetBinContent(MaxBin)/2.0;
263 
264  // Reset the bin counter
265  CurrBin = MaxBin;
266  // Counter for current bin
267  while (sum/LowIntegral < coverage && CurrBin > 1) {
268  CurrBin--;
269  sum += hist->GetBinContent(CurrBin);
270  }
271  const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin));
272 
273  // Now do the double sided HPD
274  //KS: Start sum from the HPD
275  sum = hist->GetBinContent(MaxBin);
276  int LowBin = MaxBin;
277  int HighBin = MaxBin;
278  long double LowCon = 0.0;
279  long double HighCon = 0.0;
280 
281  while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1))
282  {
283  LowCon = 0.0;
284  HighCon = 0.0;
285  //KS:: Move further only if you haven't reached histogram end
286  if(LowBin > 1)
287  {
288  LowBin--;
289  LowCon = hist->GetBinContent(LowBin);
290  }
291  if(HighBin < hist->GetNbinsX())
292  {
293  HighBin++;
294  HighCon = hist->GetBinContent(HighBin);
295  }
296 
297  // If we're on the last slice and the lower contour is larger than the upper
298  if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) {
299  sum += LowCon;
300  break;
301  // If we're on the last slice and the upper contour is larger than the lower
302  } else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) {
303  sum += HighCon;
304  break;
305  } else {
306  sum += LowCon + HighCon;
307  }
308  }
309 
310  double sigma_hpd = 0.0;
311  if (LowCon > HighCon) {
312  sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin));
313  } else {
314  sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin));
315  }
316 
317  Mean = peakval;
318  Error = sigma_hpd;
319  Error_p = sigma_p;
320  Error_m = sigma_m;
321 }
322 
323 // ***************
324 void GetCredibleInterval(const std::unique_ptr<TH1D>& hist, std::unique_ptr<TH1D>& hpost_copy, const double coverage) {
325 // ***************
326  if(coverage > 1)
327  {
328  MACH3LOG_ERROR("Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage);
329  throw MaCh3Exception(__FILE__ , __LINE__ );
330  }
331  //KS: Reset first copy of histogram
332  hpost_copy->Reset("");
333  hpost_copy->Fill(0.0, 0.0);
334 
335  //KS: Temporary structure to be thread save
336  std::vector<double> hist_copy(hist->GetXaxis()->GetNbins()+1);
337  std::vector<bool> hist_copy_fill(hist->GetXaxis()->GetNbins()+1);
338  for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
339  {
340  hist_copy[i] = hist->GetBinContent(i);
341  hist_copy_fill[i] = false;
342  }
343 
345  const long double Integral = hist->Integral();
346  long double sum = 0;
347 
348  while ((sum / Integral) < coverage)
349  {
351  int max_entry_bin = 0;
352  double max_entries = 0.;
353  for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
354  {
355  if (hist_copy[i] > max_entries)
356  {
357  max_entries = hist_copy[i];
358  max_entry_bin = i;
359  }
360  }
362  hist_copy[max_entry_bin] = -1.;
363  hist_copy_fill[max_entry_bin] = true;
364 
365  sum += max_entries;
366  }
367  //KS: Now fill our copy only for bins which got included in coverage region
368  for(int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
369  {
370  if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i));
371  }
372 }
373 
374 // ***************
375 void GetCredibleIntervalSig(const std::unique_ptr<TH1D>& hist, std::unique_ptr<TH1D>& hpost_copy, const bool CredibleInSigmas, const double coverage) {
376 // ***************
377  //KS: Slightly different approach depending if intervals are in percentage or sigmas
378  if(CredibleInSigmas) {
379  //KS: Convert sigmas into percentage
380  const double CredReg = GetSigmaValue(int(std::round(coverage)));
381  GetCredibleInterval(hist, hpost_copy, CredReg);
382  } else {
383  GetCredibleInterval(hist, hpost_copy, coverage);
384  }
385 }
386 
387 // ***************
388 void GetCredibleRegion(std::unique_ptr<TH2D>& hist2D, const double coverage) {
389 // ***************
390  if(coverage > 1)
391  {
392  MACH3LOG_ERROR("Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1", coverage);
393  throw MaCh3Exception(__FILE__ , __LINE__ );
394  }
395 
396  //KS: Temporary structure to be thread save
397  std::vector<std::vector<double>> hist_copy(hist2D->GetXaxis()->GetNbins()+1,
398  std::vector<double>(hist2D->GetYaxis()->GetNbins()+1));
399  for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) {
400  for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) {
401  hist_copy[i][j] = hist2D->GetBinContent(i, j);
402  }
403  }
404 
406  const long double Integral = hist2D->Integral();
407  long double sum = 0;
408 
409  //We need to as ROOT requires array to set to contour
410  double Contour[1];
411  while ((sum / Integral) < coverage)
412  {
414  int max_entry_bin_x = 0;
415  int max_entry_bin_y = 0;
416  double max_entries = 0.;
417  for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i)
418  {
419  for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j)
420  {
421  if (hist_copy[i][j] > max_entries)
422  {
423  max_entries = hist_copy[i][j];
424  max_entry_bin_x = i;
425  max_entry_bin_y = j;
426  }
427  }
428  }
430  hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.;
431 
432  sum += max_entries;
433  Contour[0] = max_entries;
434  }
435  hist2D->SetContour(1, Contour);
436 }
437 
438 // ***************
439 void GetCredibleRegionSig(std::unique_ptr<TH2D>& hist2D, const bool CredibleInSigmas, const double coverage) {
440 // ***************
441  if(CredibleInSigmas) {
442  //KS: Convert sigmas into percentage
443  const double CredReg = GetSigmaValue(int(std::round(coverage)));
444  GetCredibleRegion(hist2D, CredReg);
445  } else {
446  GetCredibleRegion(hist2D, coverage);
447  }
448 }
449 
450 // *********************
451 double GetIQR(TH1D *Hist) {
452 // *********************
453  if(Hist->Integral() == 0) return 0.0;
454 
455  constexpr double quartiles_x[3] = {0.25, 0.5, 0.75};
456  double quartiles[3];
457 
458  Hist->GetQuantiles(3, quartiles, quartiles_x);
459 
460  return quartiles[2] - quartiles[0];
461 }
462 
463 // ********************
464 double ComputeKLDivergence(const std::vector<double>& Data,
465  const std::vector<double>& MC) {
466 // ********************
467  double klDivergence = 0.0;
468  double DataIntegral = std::accumulate(Data.begin(), Data.end(), 0.0);
469  double MCIntegral = std::accumulate(MC.begin(), MC.end(), 0.0);
470  for (size_t i = 0; i < Data.size(); ++i)
471  {
472  if (Data[i] > 0 && MC[i] > 0) {
473  klDivergence += Data[i] / DataIntegral *
474  std::log((Data[i] / DataIntegral) / ( MC[i] / MCIntegral));
475  }
476  }
477  return klDivergence;
478 }
479 
480 // ********************
481 double ComputeKLDivergence(TH2Poly* DataPoly, TH2Poly* PolyMC) {
482 // *********************
483  int nBins = DataPoly->GetNumberOfBins();
484  std::vector<double> Data(nBins);
485  std::vector<double> MC(nBins);
486 
487  for (int i = 0; i < nBins; ++i) {
488  Data[i] = DataPoly->GetBinContent(i+1);
489  MC[i] = PolyMC->GetBinContent(i+1);
490  }
491 
492  return ComputeKLDivergence(Data, MC);
493 }
494 
495 // ********************
496 double FisherCombinedPValue(const std::vector<double>& pvalues) {
497 // ********************
498  double testStatistic = 0;
499  for(size_t i = 0; i < pvalues.size(); i++)
500  {
501  const double pval = std::max(0.00001, pvalues[i]);
502  testStatistic += -2.0 * std::log(pval);
503  }
504  // Degrees of freedom is twice the number of p-values
505  int degreesOfFreedom = int(2 * pvalues.size());
506  double pValue = TMath::Prob(testStatistic, degreesOfFreedom);
507 
508  return pValue;
509 }
510 
511 // ********************
512 void ThinningMCMC(const std::string& FilePath, const int ThinningCut) {
513 // ********************
514  std::string FilePathNowRoot = FilePath;
515  if (FilePath.size() >= 5 && FilePath.substr(FilePath.size() - 5) == ".root") {
516  FilePathNowRoot = FilePath.substr(0, FilePath.size() - 5);
517  }
518  std::string TempFilePath = FilePathNowRoot + "_thinned.root";
519  int ret = system(("cp " + FilePath + " " + TempFilePath).c_str());
520  if (ret != 0) {
521  MACH3LOG_WARN("System call to copy file failed with code {}", ret);
522  }
523 
524  TFile *inFile = M3::Open(TempFilePath, "RECREATE", __FILE__, __LINE__);
525  TTree *inTree = inFile->Get<TTree>("posteriors");
526  if (!inTree) {
527  MACH3LOG_ERROR("TTree 'posteriors' not found in file.");
528  inFile->ls();
529  inFile->Close();
530  throw MaCh3Exception(__FILE__, __LINE__);
531  }
532 
533  // Clone the structure without data
534  TTree *outTree = inTree->CloneTree(0);
535 
536  // Loop over entries and apply thinning
537  Long64_t nEntries = inTree->GetEntries();
538  double retainedPercentage = (double(nEntries) / ThinningCut) / double(nEntries) * 100;
539  MACH3LOG_INFO("Thinning will retain {:.2f}% of chains", retainedPercentage);
540  for (Long64_t i = 0; i < nEntries; i++) {
541  if (i % (nEntries/10) == 0) {
542  M3::Utils::PrintProgressBar(i, nEntries);
543  }
544  if (i % ThinningCut == 0) {
545  inTree->GetEntry(i);
546  outTree->Fill();
547  }
548  }
549  inFile->WriteTObject(outTree, "posteriors", "kOverwrite");
550  inFile->Close();
551  delete inFile;
552 
553  MACH3LOG_INFO("Thinned TTree saved and overwrote original in: {}", TempFilePath);
554 }
555 
556 // ********************
557 double GetZScore(const double value, const double mean, const double stddev) {
558 // ********************
559  return (value - mean) / stddev;
560 }
561 
562 // ********************
563 double GetPValueFromZScore(const double zScore) {
564 // ********************
565  return 0.5 * std::erfc(-zScore / std::sqrt(2));
566 }
567 
568 // ****************
569 // Get the mode error from a TH1D
570 double GetModeError(TH1D* hpost) {
571 // ****************
572  // Get the bin which has the largest posterior density
573  int MaxBin = hpost->GetMaximumBin();
574 
575  // The total integral of the posterior
576  const double Integral = hpost->Integral();
577 
578  int LowBin = MaxBin;
579  int HighBin = MaxBin;
580  double sum = hpost->GetBinContent(MaxBin);;
581  double LowCon = 0.0;
582  double HighCon = 0.0;
583  while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) )
584  {
585  LowCon = 0.0;
586  HighCon = 0.0;
587  //KS:: Move further only if you haven't reached histogram end
588  if(LowBin > 1)
589  {
590  LowBin--;
591  LowCon = hpost->GetBinContent(LowBin);
592  }
593  if(HighBin < hpost->GetNbinsX())
594  {
595  HighBin++;
596  HighCon = hpost->GetBinContent(HighBin);
597  }
598 
599  // If we're on the last slice and the lower contour is larger than the upper
600  if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) {
601  sum += LowCon;
602  break;
603  // If we're on the last slice and the upper contour is larger than the lower
604  } else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) {
605  sum += HighCon;
606  break;
607  } else {
608  sum += LowCon + HighCon;
609  }
610  }
611 
612  double Mode_Error = 0.0;
613  if (LowCon > HighCon) {
614  Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin));
615  } else {
616  Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin));
617  }
618 
619  return Mode_Error;
620 }
621 
622 // ****************
623 // Make the 2D cut distribution and give the 2D p-value
624 void Get2DBayesianpValue(TH2D *Histogram) {
625 // ****************
626  const double TotalIntegral = Histogram->Integral();
627  // Count how many fills are above y=x axis
628  // This is the 2D p-value
629  double Above = 0.0;
630  for (int i = 0; i < Histogram->GetXaxis()->GetNbins(); ++i) {
631  const double xvalue = Histogram->GetXaxis()->GetBinCenter(i+1);
632  for (int j = 0; j < Histogram->GetYaxis()->GetNbins(); ++j) {
633  const double yvalue = Histogram->GetYaxis()->GetBinCenter(j+1);
634  // We're only interested in being _ABOVE_ the y=x axis
635  if (xvalue >= yvalue) {
636  Above += Histogram->GetBinContent(i+1, j+1);
637  }
638  }
639  }
640  const double pvalue = Above/TotalIntegral;
641  std::stringstream ss;
642  ss << int(Above) << "/" << int(TotalIntegral) << "=" << pvalue;
643  Histogram->SetTitle((std::string(Histogram->GetTitle())+"_"+ss.str()).c_str());
644 
645  // Now add the TLine going diagonally
646  double minimum = Histogram->GetXaxis()->GetBinLowEdge(1);
647  if (Histogram->GetYaxis()->GetBinLowEdge(1) > minimum) {
648  minimum = Histogram->GetYaxis()->GetBinLowEdge(1);
649  }
650  double maximum = Histogram->GetXaxis()->GetBinLowEdge(Histogram->GetXaxis()->GetNbins());
651  if (Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins()) < maximum) {
652  maximum = Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins());
653  //KS: Extend by bin width to perfectly fit canvas
654  maximum += Histogram->GetYaxis()->GetBinWidth(Histogram->GetYaxis()->GetNbins());
655  }
656  else maximum += Histogram->GetXaxis()->GetBinWidth(Histogram->GetXaxis()->GetNbins());
657  auto TempLine = std::make_unique<TLine>(minimum, minimum, maximum, maximum);
658  TempLine->SetLineColor(kRed);
659  TempLine->SetLineWidth(2);
660 
661  std::string Title = Histogram->GetName();
662  Title += "_canv";
663  auto TempCanvas = std::make_unique<TCanvas>(Title.c_str(), Title.c_str(), 1024, 1024);
664  TempCanvas->SetGridx();
665  TempCanvas->SetGridy();
666  TempCanvas->cd();
667  gStyle->SetPalette(81);
668  Histogram->SetMinimum(-0.01);
669  Histogram->Draw("colz");
670  TempLine->Draw("same");
671 
672  TempCanvas->Write();
673 }
674 
675 // ****************
676 // Converts posterior likelihood to dchi2
677 std::unique_ptr<TH1D> GetDeltaChi2(TH1D* posterior_probability_hist) {
678 // ****************
679  auto delta_chi2 = M3::Clone(posterior_probability_hist);
680  delta_chi2->GetYaxis()->SetTitle("#Delta#chi^{2}");
681 
682  int max_bin = delta_chi2->GetMaximumBin();
683  double max_content = delta_chi2->GetBinContent(max_bin);
684  if (max_content == 0) {
685  MACH3LOG_ERROR("Histogram {}, has larges bin with 0", delta_chi2->GetTitle());
686  MACH3LOG_ERROR("This suggest you skewed binning for posterior probability or something else");
687  throw MaCh3Exception(__FILE__, __LINE__);
688  }
689 
690  double NewMaximum = M3::_BAD_DOUBLE_;
691  for(int iBin = 1; iBin < delta_chi2->GetNbinsX()+1; iBin++) {
692  double bin_content = delta_chi2->GetBinContent(iBin);
693  if(bin_content == 0) bin_content = 1.0 ;
694 
695  double chi2_likelihood = -2*std::log(bin_content/max_content);
696  delta_chi2->SetBinContent(iBin, chi2_likelihood);
697  NewMaximum = std::max(NewMaximum, chi2_likelihood);
698  }
699  delta_chi2->SetMaximum(NewMaximum*1.1);
700  return delta_chi2;
701 }
702 
703 // ****************
704 void PassErrorToRatioPlot(TH1D* RatioHist, TH1D* Hist1, TH1D* DataHist) {
705 // ****************
706  for (int j = 0; j <= RatioHist->GetNbinsX(); ++j)
707  {
708  if(DataHist->GetBinContent(j) > 0)
709  {
710  double dx = Hist1->GetBinError(j) / DataHist->GetBinContent(j);
711  RatioHist->SetBinError(j, dx);
712  }
713  }
714 }
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:36
double ** Mean
Definition: RHat.cpp:63
TestStatistic
Make an enum of the test statistic that we're using.
@ kDembinskiAbdelmotteleb
Based on .
double GetSigmaValue(const int sigma)
KS: Convert sigma from normal distribution into percentage.
double GetBetaParameter(const double data, const double mc, const double w2, const TestStatistic TestStat)
KS: Calculate Beta parameter which will be different based on specified test statistic.
double ComputeKLDivergence(const std::vector< double > &Data, const std::vector< double > &MC)
Compute the Kullback-Leibler divergence between two TH2Poly histograms.
double GetSubOptimality(const std::vector< double > &EigenValues, const int TotalTarameters)
Based on .
double GetBIC(const double llh, const int data, const int nPars)
Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC,...
double GetPValueFromZScore(const double zScore)
Compute the P-value from a given Z-score.
void Get2DBayesianpValue(TH2D *Histogram)
Calculates the 2D Bayesian p-value and generates a visualization.
double GetZScore(const double value, const double mean, const double stddev)
Compute the Z-score for a given value.
void GetGaussian(TH1D *&hist, TF1 *gauss, double &Mean, double &Error)
CW: Fit Gaussian to posterior.
void GetCredibleRegion(std::unique_ptr< TH2D > &hist2D, const double coverage)
KS: Set 2D contour within some coverage.
double GetModeError(TH1D *hpost)
Get the mode error from a TH1D.
double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint)
void GetCredibleIntervalSig(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const bool CredibleInSigmas, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...
double FisherCombinedPValue(const std::vector< double > &pvalues)
KS: Combine p-values using Fisher's method.
void PassErrorToRatioPlot(TH1D *RatioHist, TH1D *Hist1, TH1D *DataHist)
double GetIQR(TH1D *Hist)
Interquartile Range (IQR)
void ThinningMCMC(const std::string &FilePath, const int ThinningCut)
Thin MCMC Chain, to save space and maintain low autocorrelations.
std::string GetJeffreysScale(const double BayesFactor)
KS: Following H. Jeffreys .
double GetNeffective(const int N1, const int N2)
KS: See 14.3.10 in Numerical Recipes in C .
void GetHPD(TH1D *const hist, double &Mean, double &Error, double &Error_p, double &Error_m, const double coverage)
Get Highest Posterior Density (HPD)
void GetCredibleRegionSig(std::unique_ptr< TH2D > &hist2D, const bool CredibleInSigmas, const double coverage)
KS: Set 2D contour within some coverage.
void GetArithmetic(TH1D *const hist, double &Mean, double &Error)
CW: Get Arithmetic mean from posterior.
void GetCredibleInterval(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...
void CheckBonferoniCorrectedpValue(const std::vector< std::string > &SampleNameVec, const std::vector< double > &PValVec, const double Threshold)
KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.
std::string GetDunneKaboth(const double BayesFactor)
Convert a Bayes factor into an approximate particle-physics significance level using the Dunne–Kaboth...
int GetNumberOfRuns(const std::vector< int > &GroupClasifier)
KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6....
std::unique_ptr< TH1D > GetDeltaChi2(TH1D *posterior_probability_hist)
Convert a posterior probability histogram into a distribution. Using the likelihood-ratio definition...
Utility functions for statistical interpretations in MaCh3.
Custom exception class used throughout MaCh3.
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.
Definition: Monitor.cpp:229
std::unique_ptr< ObjectType > Clone(const ObjectType *obj, const std::string &name="")
KS: Creates a copy of a ROOT-like object and wraps it in a smart pointer.
constexpr static const double _BAD_DOUBLE_
Default value used for double initialisation.
Definition: Core.h:53
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.