7 std::string JeffreysScale =
"";
9 if(BayesFactor < 0) JeffreysScale =
"Negative";
10 else if( 5 > BayesFactor) JeffreysScale =
"Barely worth mentioning";
11 else if( 10 > BayesFactor) JeffreysScale =
"Substantial";
12 else if( 15 > BayesFactor) JeffreysScale =
"Strong";
13 else if( 20 > BayesFactor) JeffreysScale =
"Very strong";
14 else JeffreysScale =
"Decisive";
22 std::string DunneKaboth =
"";
25 if(2.125 > BayesFactor) DunneKaboth =
"< 1 #sigma";
26 else if( 20.74 > BayesFactor) DunneKaboth =
"> 1 #sigma";
27 else if( 369.4 > BayesFactor) DunneKaboth =
"> 2 #sigma";
28 else if( 15800 > BayesFactor) DunneKaboth =
"> 3 #sigma";
29 else if( 1745000 > BayesFactor) DunneKaboth =
"> 4 #sigma";
30 else DunneKaboth =
"> 5 #sigma";
39 switch (std::abs(sigma))
42 width = 0.682689492137;
45 width = 0.954499736104;
48 width = 0.997300203937;
51 width = 0.999936657516;
54 width = 0.999999426697;
57 width = 0.999999998027;
68 double GetBIC(
const double llh,
const int data,
const int nPars){
72 MACH3LOG_ERROR(
"You haven't passed number of model parameters as it is still zero");
75 const double BIC = double(nPars * logl(data) + llh);
83 const double Nominator = (N1+N2);
84 const double Denominator = (N1*N2);
85 const double N_e = Nominator/Denominator;
91 const std::vector<double>& PValVec,
92 const double Threshold) {
95 if(SampleNameVec.size() != PValVec.size())
100 const size_t NumberOfStatisticalTests = SampleNameVec.size();
102 const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests);
103 const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown;
104 MACH3LOG_INFO(
"Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown);
107 for(
unsigned int i = 0; i < SampleNameVec.size(); i++)
109 if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) {
110 MACH3LOG_INFO(
"Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
111 MACH3LOG_INFO(
"Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]);
113 }
else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) {
114 MACH3LOG_INFO(
"Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
115 MACH3LOG_INFO(
"Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]);
120 MACH3LOG_INFO(
"Every sample passed Bonferroni-corrected statistical significance level test");
122 MACH3LOG_WARN(
"{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter);
130 double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint));
132 if( std::isinf(ADstat) || std::isnan(ADstat))
return 0;
139 int NumberOfRuns = 0;
140 int PreviousGroup = -999;
143 for (
unsigned int i = 0; i < GroupClasifier.size(); i++)
145 if(GroupClasifier[i] != PreviousGroup)
147 PreviousGroup = GroupClasifier[i];
160 const double k = mc*mc / w2;
162 Beta = (data + k) / (mc + k);
167 const double fractional = std::sqrt(w2)/mc;
169 const double temp = mc*fractional*fractional-1;
171 const double temp2 = temp*temp + 4*data*fractional*fractional;
173 MACH3LOG_ERROR(
"Negative square root in Barlow Beeston coefficient calculation!");
177 Beta = (-1*temp+std::sqrt(temp2))/2.;
184 double GetSubOptimality(
const std::vector<double>& EigenValues,
const int TotalTarameters) {
186 double sum_eigenvalues_squared_inv = 0.0;
187 double sum_eigenvalues_inv = 0.0;
188 for (
unsigned int j = 0; j < EigenValues.size(); j++)
192 sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2);
193 sum_eigenvalues_inv += 1.0 / EigenValues[j];
195 const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2);
196 return SubOptimality;
203 Mean = hist->GetMean();
204 Error = hist->GetRMS();
211 int originalErrorLevel = gErrorIgnoreLevel;
212 gErrorIgnoreLevel = kFatal;
214 const double meanval = hist->GetMean();
215 const double err = hist->GetRMS();
216 const double peakval = hist->GetBinCenter(hist->GetMaximumBin());
219 gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err);
221 gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err);
224 hist->Fit(gauss->GetName(),
"Rq");
227 Mean = gauss->GetParameter(1);
228 Error = gauss->GetParameter(2);
231 gErrorIgnoreLevel = originalErrorLevel;
235 void GetHPD(TH1D*
const hist,
double&
Mean,
double& Error,
double& Error_p,
double& Error_m,
const double coverage) {
238 const int MaxBin = hist->GetMaximumBin();
240 const double peakval = hist->GetBinCenter(MaxBin);
243 const long double Integral = hist->Integral();
245 const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0;
246 const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0;
250 long double sum = hist->GetBinContent(MaxBin)/2.0;
253 int CurrBin = MaxBin;
254 while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) {
256 sum += hist->GetBinContent(CurrBin);
258 const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin));
261 sum = hist->GetBinContent(MaxBin)/2.0;
266 while (sum/LowIntegral < coverage && CurrBin > 1) {
268 sum += hist->GetBinContent(CurrBin);
270 const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin));
274 sum = hist->GetBinContent(MaxBin);
276 int HighBin = MaxBin;
277 long double LowCon = 0.0;
278 long double HighCon = 0.0;
280 while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1))
288 LowCon = hist->GetBinContent(LowBin);
290 if(HighBin < hist->GetNbinsX())
293 HighCon = hist->GetBinContent(HighBin);
297 if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) {
301 }
else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) {
305 sum += LowCon + HighCon;
309 double sigma_hpd = 0.0;
310 if (LowCon > HighCon) {
311 sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin));
313 sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin));
323 void GetCredibleInterval(
const std::unique_ptr<TH1D>& hist, std::unique_ptr<TH1D>& hpost_copy,
const double coverage) {
327 MACH3LOG_ERROR(
"Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage);
331 hpost_copy->Reset(
"");
332 hpost_copy->Fill(0.0, 0.0);
335 std::vector<double> hist_copy(hist->GetXaxis()->GetNbins()+1);
336 std::vector<bool> hist_copy_fill(hist->GetXaxis()->GetNbins()+1);
337 for (
int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
339 hist_copy[i] = hist->GetBinContent(i);
340 hist_copy_fill[i] =
false;
344 const long double Integral = hist->Integral();
347 while ((sum / Integral) < coverage)
350 int max_entry_bin = 0;
351 double max_entries = 0.;
352 for (
int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
354 if (hist_copy[i] > max_entries)
356 max_entries = hist_copy[i];
361 hist_copy[max_entry_bin] = -1.;
362 hist_copy_fill[max_entry_bin] =
true;
367 for(
int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
369 if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i));
374 void GetCredibleIntervalSig(
const std::unique_ptr<TH1D>& hist, std::unique_ptr<TH1D>& hpost_copy,
const bool CredibleInSigmas,
const double coverage) {
377 if(CredibleInSigmas) {
379 const double CredReg =
GetSigmaValue(
int(std::round(coverage)));
391 MACH3LOG_ERROR(
"Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1", coverage);
396 std::vector<std::vector<double>> hist_copy(hist2D->GetXaxis()->GetNbins()+1,
397 std::vector<double>(hist2D->GetYaxis()->GetNbins()+1));
398 for (
int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) {
399 for (
int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) {
400 hist_copy[i][j] = hist2D->GetBinContent(i, j);
405 const long double Integral = hist2D->Integral();
410 while ((sum / Integral) < coverage)
413 int max_entry_bin_x = 0;
414 int max_entry_bin_y = 0;
415 double max_entries = 0.;
416 for (
int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i)
418 for (
int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j)
420 if (hist_copy[i][j] > max_entries)
422 max_entries = hist_copy[i][j];
429 hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.;
432 Contour[0] = max_entries;
434 hist2D->SetContour(1, Contour);
440 if(CredibleInSigmas) {
442 const double CredReg =
GetSigmaValue(
int(std::round(coverage)));
452 if(Hist->Integral() == 0)
return 0.0;
454 constexpr
double quartiles_x[3] = {0.25, 0.5, 0.75};
457 Hist->GetQuantiles(3, quartiles, quartiles_x);
459 return quartiles[2] - quartiles[0];
465 double klDivergence = 0.0;
468 for (
int i = 1; i < DataPoly->GetNumberOfBins()+1; ++i)
470 if (DataPoly->GetBinContent(i) > 0 && PolyMC->GetBinContent(i) > 0) {
471 klDivergence += DataPoly->GetBinContent(i) / DataIntegral *
472 std::log((DataPoly->GetBinContent(i) / DataIntegral) / ( PolyMC->GetBinContent(i) / MCIntegral));
480 double testStatistic = 0;
481 for(
size_t i = 0; i < pvalues.size(); i++)
483 const double pval = std::max(0.00001, pvalues[i]);
484 testStatistic += -2.0 * std::log(pval);
487 int degreesOfFreedom = int(2 * pvalues.size());
488 double pValue = TMath::Prob(testStatistic, degreesOfFreedom);
494 void ThinningMCMC(
const std::string& FilePath,
const int ThinningCut) {
496 std::string FilePathNowRoot = FilePath;
497 if (FilePath.size() >= 5 && FilePath.substr(FilePath.size() - 5) ==
".root") {
498 FilePathNowRoot = FilePath.substr(0, FilePath.size() - 5);
500 std::string TempFilePath = FilePathNowRoot +
"_thinned.root";
501 int ret = system((
"cp " + FilePath +
" " + TempFilePath).c_str());
503 MACH3LOG_WARN(
"System call to copy file failed with code {}", ret);
506 TFile *inFile =
M3::Open(TempFilePath,
"RECREATE", __FILE__, __LINE__);
507 TTree *inTree = inFile->Get<TTree>(
"posteriors");
516 TTree *outTree = inTree->CloneTree(0);
519 Long64_t nEntries = inTree->GetEntries();
520 double retainedPercentage = (double(nEntries) / ThinningCut) /
double(nEntries) * 100;
521 MACH3LOG_INFO(
"Thinning will retain {:.2f}% of chains", retainedPercentage);
522 for (Long64_t i = 0; i < nEntries; i++) {
523 if (i % (nEntries/10) == 0) {
526 if (i % ThinningCut == 0) {
531 inFile->WriteTObject(outTree,
"posteriors",
"kOverwrite");
535 MACH3LOG_INFO(
"Thinned TTree saved and overwrote original in: {}", TempFilePath);
539 double GetZScore(
const double value,
const double mean,
const double stddev) {
541 return (value - mean) / stddev;
547 return 0.5 * std::erfc(-zScore / std::sqrt(2));
555 int MaxBin = hpost->GetMaximumBin();
558 const double Integral = hpost->Integral();
561 int HighBin = MaxBin;
562 double sum = hpost->GetBinContent(MaxBin);;
564 double HighCon = 0.0;
565 while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) )
573 LowCon = hpost->GetBinContent(LowBin);
575 if(HighBin < hpost->GetNbinsX())
578 HighCon = hpost->GetBinContent(HighBin);
582 if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) {
586 }
else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) {
590 sum += LowCon + HighCon;
594 double Mode_Error = 0.0;
595 if (LowCon > HighCon) {
596 Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin));
598 Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin));
608 const double TotalIntegral = Histogram->Integral();
612 for (
int i = 0; i < Histogram->GetXaxis()->GetNbins(); ++i) {
613 const double xvalue = Histogram->GetXaxis()->GetBinCenter(i+1);
614 for (
int j = 0; j < Histogram->GetYaxis()->GetNbins(); ++j) {
615 const double yvalue = Histogram->GetYaxis()->GetBinCenter(j+1);
617 if (xvalue >= yvalue) {
618 Above += Histogram->GetBinContent(i+1, j+1);
622 const double pvalue = Above/TotalIntegral;
623 std::stringstream ss;
624 ss << int(Above) <<
"/" << int(TotalIntegral) <<
"=" << pvalue;
625 Histogram->SetTitle((std::string(Histogram->GetTitle())+
"_"+ss.str()).c_str());
628 double minimum = Histogram->GetXaxis()->GetBinLowEdge(1);
629 if (Histogram->GetYaxis()->GetBinLowEdge(1) > minimum) {
630 minimum = Histogram->GetYaxis()->GetBinLowEdge(1);
632 double maximum = Histogram->GetXaxis()->GetBinLowEdge(Histogram->GetXaxis()->GetNbins());
633 if (Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins()) < maximum) {
634 maximum = Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins());
636 maximum += Histogram->GetYaxis()->GetBinWidth(Histogram->GetYaxis()->GetNbins());
638 else maximum += Histogram->GetXaxis()->GetBinWidth(Histogram->GetXaxis()->GetNbins());
639 auto TempLine = std::make_unique<TLine>(minimum, minimum, maximum, maximum);
640 TempLine->SetLineColor(kRed);
641 TempLine->SetLineWidth(2);
643 std::string Title = Histogram->GetName();
645 auto TempCanvas = std::make_unique<TCanvas>(Title.c_str(), Title.c_str(), 1024, 1024);
646 TempCanvas->SetGridx();
647 TempCanvas->SetGridy();
649 gStyle->SetPalette(81);
650 Histogram->SetMinimum(-0.01);
651 Histogram->Draw(
"colz");
652 TempLine->Draw(
"same");
662 for (
int j = 0; j <= RatioHist->GetNbinsX(); ++j)
664 if(DataHist->GetBinContent(j) > 0)
666 double dx = Hist1->GetBinError(j) / DataHist->GetBinContent(j);
667 RatioHist->SetBinError(j, dx);
double NoOverflowIntegral(TH2Poly *poly)
WP: Helper function for calculating binned Integral of TH2Poly i.e not including overflow.
TestStatistic
Make an enum of the test statistic that we're using.
@ kDembinskiAbdelmotteleb
Based on .
double GetSigmaValue(const int sigma)
KS: Convert sigma from normal distribution into percentage.
double ComputeKLDivergence(TH2Poly *DataPoly, TH2Poly *PolyMC)
Compute the Kullback-Leibler divergence between two TH2Poly histograms.
double GetBetaParameter(const double data, const double mc, const double w2, const TestStatistic TestStat)
KS: Calculate Beta parameter which will be different based on specified test statistic.
double GetSubOptimality(const std::vector< double > &EigenValues, const int TotalTarameters)
Based on .
double GetBIC(const double llh, const int data, const int nPars)
Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC,...
double GetPValueFromZScore(const double zScore)
Compute the P-value from a given Z-score.
void Get2DBayesianpValue(TH2D *Histogram)
Calculates the 2D Bayesian p-value and generates a visualization.
double GetZScore(const double value, const double mean, const double stddev)
Compute the Z-score for a given value.
void GetGaussian(TH1D *&hist, TF1 *gauss, double &Mean, double &Error)
CW: Fit Gaussian to posterior.
void GetCredibleRegion(std::unique_ptr< TH2D > &hist2D, const double coverage)
KS: Set 2D contour within some coverage.
double GetModeError(TH1D *hpost)
Get the mode error from a TH1D.
double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint)
void GetCredibleIntervalSig(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const bool CredibleInSigmas, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...
double FisherCombinedPValue(const std::vector< double > &pvalues)
KS: Combine p-values using Fisher's method.
void PassErrorToRatioPlot(TH1D *RatioHist, TH1D *Hist1, TH1D *DataHist)
double GetIQR(TH1D *Hist)
Interquartile Range (IQR)
void ThinningMCMC(const std::string &FilePath, const int ThinningCut)
Thin MCMC Chain, to save space and maintain low autocorrelations.
std::string GetJeffreysScale(const double BayesFactor)
KS: Following H. Jeffreys .
double GetNeffective(const int N1, const int N2)
KS: See 14.3.10 in Numerical Recipes in C .
void GetHPD(TH1D *const hist, double &Mean, double &Error, double &Error_p, double &Error_m, const double coverage)
Get Highest Posterior Density (HPD)
void GetCredibleRegionSig(std::unique_ptr< TH2D > &hist2D, const bool CredibleInSigmas, const double coverage)
KS: Set 2D contour within some coverage.
void GetArithmetic(TH1D *const hist, double &Mean, double &Error)
CW: Get Arithmetic mean from posterior.
void GetCredibleInterval(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...
void CheckBonferoniCorrectedpValue(const std::vector< std::string > &SampleNameVec, const std::vector< double > &PValVec, const double Threshold)
KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.
std::string GetDunneKaboth(const double BayesFactor)
KS: Based on Table 1 in https://www.t2k.org/docs/technotes/435.
int GetNumberOfRuns(const std::vector< int > &GroupClasifier)
KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6....
Utility functions for statistical interpretations in MaCh3.
Custom exception class used throughout MaCh3.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.