8 std::string JeffreysScale =
"";
10 if(BayesFactor < 0) JeffreysScale =
"Negative";
11 else if( 5 > BayesFactor) JeffreysScale =
"Barely worth mentioning";
12 else if( 10 > BayesFactor) JeffreysScale =
"Substantial";
13 else if( 15 > BayesFactor) JeffreysScale =
"Strong";
14 else if( 20 > BayesFactor) JeffreysScale =
"Very strong";
15 else JeffreysScale =
"Decisive";
23 std::string DunneKaboth =
"";
26 if(2.125 > BayesFactor) DunneKaboth =
"< 1 #sigma";
27 else if( 20.74 > BayesFactor) DunneKaboth =
"> 1 #sigma";
28 else if( 369.4 > BayesFactor) DunneKaboth =
"> 2 #sigma";
29 else if( 15800 > BayesFactor) DunneKaboth =
"> 3 #sigma";
30 else if( 1745000 > BayesFactor) DunneKaboth =
"> 4 #sigma";
31 else DunneKaboth =
"> 5 #sigma";
40 switch (std::abs(sigma))
43 width = 0.682689492137;
46 width = 0.954499736104;
49 width = 0.997300203937;
52 width = 0.999936657516;
55 width = 0.999999426697;
58 width = 0.999999998027;
69 double GetBIC(
const double llh,
const int data,
const int nPars){
73 MACH3LOG_ERROR(
"You haven't passed number of model parameters as it is still zero");
76 const double BIC = double(nPars * logl(data) + llh);
84 const double Nominator = (N1+N2);
85 const double Denominator = (N1*N2);
86 const double N_e = Nominator/Denominator;
92 const std::vector<double>& PValVec,
93 const double Threshold) {
96 if(SampleNameVec.size() != PValVec.size())
101 const size_t NumberOfStatisticalTests = SampleNameVec.size();
103 const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests);
104 const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown;
105 MACH3LOG_INFO(
"Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown);
108 for(
unsigned int i = 0; i < SampleNameVec.size(); i++)
110 if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) {
111 MACH3LOG_INFO(
"Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
112 MACH3LOG_INFO(
"Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]);
114 }
else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) {
115 MACH3LOG_INFO(
"Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
116 MACH3LOG_INFO(
"Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]);
121 MACH3LOG_INFO(
"Every sample passed Bonferroni-corrected statistical significance level test");
123 MACH3LOG_WARN(
"{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter);
131 double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint));
133 if( std::isinf(ADstat) || std::isnan(ADstat))
return 0;
140 int NumberOfRuns = 0;
141 int PreviousGroup = -999;
144 for (
unsigned int i = 0; i < GroupClasifier.size(); i++)
146 if(GroupClasifier[i] != PreviousGroup)
148 PreviousGroup = GroupClasifier[i];
161 const double k = mc*mc / w2;
163 Beta = (data + k) / (mc + k);
168 const double fractional = std::sqrt(w2)/mc;
170 const double temp = mc*fractional*fractional-1;
172 const double temp2 = temp*temp + 4*data*fractional*fractional;
174 MACH3LOG_ERROR(
"Negative square root in Barlow Beeston coefficient calculation!");
178 Beta = (-1*temp+std::sqrt(temp2))/2.;
185 double GetSubOptimality(
const std::vector<double>& EigenValues,
const int TotalTarameters) {
187 double sum_eigenvalues_squared_inv = 0.0;
188 double sum_eigenvalues_inv = 0.0;
189 for (
unsigned int j = 0; j < EigenValues.size(); j++)
193 sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2);
194 sum_eigenvalues_inv += 1.0 / EigenValues[j];
196 const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2);
197 return SubOptimality;
204 Mean = hist->GetMean();
205 Error = hist->GetRMS();
212 int originalErrorLevel = gErrorIgnoreLevel;
213 gErrorIgnoreLevel = kFatal;
215 const double meanval = hist->GetMean();
216 const double err = hist->GetRMS();
217 const double peakval = hist->GetBinCenter(hist->GetMaximumBin());
220 gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err);
222 gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err);
225 hist->Fit(gauss->GetName(),
"Rq");
228 Mean = gauss->GetParameter(1);
229 Error = gauss->GetParameter(2);
232 gErrorIgnoreLevel = originalErrorLevel;
236 void GetHPD(TH1D*
const hist,
double&
Mean,
double& Error,
double& Error_p,
double& Error_m,
const double coverage) {
239 const int MaxBin = hist->GetMaximumBin();
241 const double peakval = hist->GetBinCenter(MaxBin);
244 const long double Integral = hist->Integral();
246 const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0;
247 const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0;
251 long double sum = hist->GetBinContent(MaxBin)/2.0;
254 int CurrBin = MaxBin;
255 while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) {
257 sum += hist->GetBinContent(CurrBin);
259 const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin));
262 sum = hist->GetBinContent(MaxBin)/2.0;
267 while (sum/LowIntegral < coverage && CurrBin > 1) {
269 sum += hist->GetBinContent(CurrBin);
271 const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin));
275 sum = hist->GetBinContent(MaxBin);
277 int HighBin = MaxBin;
278 long double LowCon = 0.0;
279 long double HighCon = 0.0;
281 while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1))
289 LowCon = hist->GetBinContent(LowBin);
291 if(HighBin < hist->GetNbinsX())
294 HighCon = hist->GetBinContent(HighBin);
298 if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) {
302 }
else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) {
306 sum += LowCon + HighCon;
310 double sigma_hpd = 0.0;
311 if (LowCon > HighCon) {
312 sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin));
314 sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin));
324 void GetCredibleInterval(
const std::unique_ptr<TH1D>& hist, std::unique_ptr<TH1D>& hpost_copy,
const double coverage) {
328 MACH3LOG_ERROR(
"Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage);
332 hpost_copy->Reset(
"");
333 hpost_copy->Fill(0.0, 0.0);
336 std::vector<double> hist_copy(hist->GetXaxis()->GetNbins()+1);
337 std::vector<bool> hist_copy_fill(hist->GetXaxis()->GetNbins()+1);
338 for (
int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
340 hist_copy[i] = hist->GetBinContent(i);
341 hist_copy_fill[i] =
false;
345 const long double Integral = hist->Integral();
348 while ((sum / Integral) < coverage)
351 int max_entry_bin = 0;
352 double max_entries = 0.;
353 for (
int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
355 if (hist_copy[i] > max_entries)
357 max_entries = hist_copy[i];
362 hist_copy[max_entry_bin] = -1.;
363 hist_copy_fill[max_entry_bin] =
true;
368 for(
int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
370 if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i));
375 void GetCredibleIntervalSig(
const std::unique_ptr<TH1D>& hist, std::unique_ptr<TH1D>& hpost_copy,
const bool CredibleInSigmas,
const double coverage) {
378 if(CredibleInSigmas) {
380 const double CredReg =
GetSigmaValue(
int(std::round(coverage)));
392 MACH3LOG_ERROR(
"Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1", coverage);
397 std::vector<std::vector<double>> hist_copy(hist2D->GetXaxis()->GetNbins()+1,
398 std::vector<double>(hist2D->GetYaxis()->GetNbins()+1));
399 for (
int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) {
400 for (
int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) {
401 hist_copy[i][j] = hist2D->GetBinContent(i, j);
406 const long double Integral = hist2D->Integral();
411 while ((sum / Integral) < coverage)
414 int max_entry_bin_x = 0;
415 int max_entry_bin_y = 0;
416 double max_entries = 0.;
417 for (
int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i)
419 for (
int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j)
421 if (hist_copy[i][j] > max_entries)
423 max_entries = hist_copy[i][j];
430 hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.;
433 Contour[0] = max_entries;
435 hist2D->SetContour(1, Contour);
441 if(CredibleInSigmas) {
443 const double CredReg =
GetSigmaValue(
int(std::round(coverage)));
453 if(Hist->Integral() == 0)
return 0.0;
455 constexpr
double quartiles_x[3] = {0.25, 0.5, 0.75};
458 Hist->GetQuantiles(3, quartiles, quartiles_x);
460 return quartiles[2] - quartiles[0];
465 const std::vector<double>& MC) {
467 double klDivergence = 0.0;
468 double DataIntegral = std::accumulate(Data.begin(), Data.end(), 0.0);
469 double MCIntegral = std::accumulate(MC.begin(), MC.end(), 0.0);
470 for (
size_t i = 0; i < Data.size(); ++i)
472 if (Data[i] > 0 && MC[i] > 0) {
473 klDivergence += Data[i] / DataIntegral *
474 std::log((Data[i] / DataIntegral) / ( MC[i] / MCIntegral));
483 int nBins = DataPoly->GetNumberOfBins();
484 std::vector<double> Data(nBins);
485 std::vector<double> MC(nBins);
487 for (
int i = 0; i < nBins; ++i) {
488 Data[i] = DataPoly->GetBinContent(i+1);
489 MC[i] = PolyMC->GetBinContent(i+1);
498 double testStatistic = 0;
499 for(
size_t i = 0; i < pvalues.size(); i++)
501 const double pval = std::max(0.00001, pvalues[i]);
502 testStatistic += -2.0 * std::log(pval);
505 int degreesOfFreedom = int(2 * pvalues.size());
506 double pValue = TMath::Prob(testStatistic, degreesOfFreedom);
512 void ThinningMCMC(
const std::string& FilePath,
const int ThinningCut) {
514 std::string FilePathNowRoot = FilePath;
515 if (FilePath.size() >= 5 && FilePath.substr(FilePath.size() - 5) ==
".root") {
516 FilePathNowRoot = FilePath.substr(0, FilePath.size() - 5);
518 std::string TempFilePath = FilePathNowRoot +
"_thinned.root";
519 int ret = system((
"cp " + FilePath +
" " + TempFilePath).c_str());
521 MACH3LOG_WARN(
"System call to copy file failed with code {}", ret);
524 TFile *inFile =
M3::Open(TempFilePath,
"RECREATE", __FILE__, __LINE__);
525 TTree *inTree = inFile->Get<TTree>(
"posteriors");
534 TTree *outTree = inTree->CloneTree(0);
537 Long64_t nEntries = inTree->GetEntries();
538 double retainedPercentage = (double(nEntries) / ThinningCut) /
double(nEntries) * 100;
539 MACH3LOG_INFO(
"Thinning will retain {:.2f}% of chains", retainedPercentage);
540 for (Long64_t i = 0; i < nEntries; i++) {
541 if (i % (nEntries/10) == 0) {
544 if (i % ThinningCut == 0) {
549 inFile->WriteTObject(outTree,
"posteriors",
"kOverwrite");
553 MACH3LOG_INFO(
"Thinned TTree saved and overwrote original in: {}", TempFilePath);
557 double GetZScore(
const double value,
const double mean,
const double stddev) {
559 return (value - mean) / stddev;
565 return 0.5 * std::erfc(-zScore / std::sqrt(2));
573 int MaxBin = hpost->GetMaximumBin();
576 const double Integral = hpost->Integral();
579 int HighBin = MaxBin;
580 double sum = hpost->GetBinContent(MaxBin);;
582 double HighCon = 0.0;
583 while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) )
591 LowCon = hpost->GetBinContent(LowBin);
593 if(HighBin < hpost->GetNbinsX())
596 HighCon = hpost->GetBinContent(HighBin);
600 if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) {
604 }
else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) {
608 sum += LowCon + HighCon;
612 double Mode_Error = 0.0;
613 if (LowCon > HighCon) {
614 Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin));
616 Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin));
626 const double TotalIntegral = Histogram->Integral();
630 for (
int i = 0; i < Histogram->GetXaxis()->GetNbins(); ++i) {
631 const double xvalue = Histogram->GetXaxis()->GetBinCenter(i+1);
632 for (
int j = 0; j < Histogram->GetYaxis()->GetNbins(); ++j) {
633 const double yvalue = Histogram->GetYaxis()->GetBinCenter(j+1);
635 if (xvalue >= yvalue) {
636 Above += Histogram->GetBinContent(i+1, j+1);
640 const double pvalue = Above/TotalIntegral;
641 std::stringstream ss;
642 ss << int(Above) <<
"/" << int(TotalIntegral) <<
"=" << pvalue;
643 Histogram->SetTitle((std::string(Histogram->GetTitle())+
"_"+ss.str()).c_str());
646 double minimum = Histogram->GetXaxis()->GetBinLowEdge(1);
647 if (Histogram->GetYaxis()->GetBinLowEdge(1) > minimum) {
648 minimum = Histogram->GetYaxis()->GetBinLowEdge(1);
650 double maximum = Histogram->GetXaxis()->GetBinLowEdge(Histogram->GetXaxis()->GetNbins());
651 if (Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins()) < maximum) {
652 maximum = Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins());
654 maximum += Histogram->GetYaxis()->GetBinWidth(Histogram->GetYaxis()->GetNbins());
656 else maximum += Histogram->GetXaxis()->GetBinWidth(Histogram->GetXaxis()->GetNbins());
657 auto TempLine = std::make_unique<TLine>(minimum, minimum, maximum, maximum);
658 TempLine->SetLineColor(kRed);
659 TempLine->SetLineWidth(2);
661 std::string Title = Histogram->GetName();
663 auto TempCanvas = std::make_unique<TCanvas>(Title.c_str(), Title.c_str(), 1024, 1024);
664 TempCanvas->SetGridx();
665 TempCanvas->SetGridy();
667 gStyle->SetPalette(81);
668 Histogram->SetMinimum(-0.01);
669 Histogram->Draw(
"colz");
670 TempLine->Draw(
"same");
679 auto delta_chi2 =
M3::Clone(posterior_probability_hist);
680 delta_chi2->GetYaxis()->SetTitle(
"#Delta#chi^{2}");
682 int max_bin = delta_chi2->GetMaximumBin();
683 double max_content = delta_chi2->GetBinContent(max_bin);
684 if (max_content == 0) {
685 MACH3LOG_ERROR(
"Histogram {}, has larges bin with 0", delta_chi2->GetTitle());
686 MACH3LOG_ERROR(
"This suggest you skewed binning for posterior probability or something else");
691 for(
int iBin = 1; iBin < delta_chi2->GetNbinsX()+1; iBin++) {
692 double bin_content = delta_chi2->GetBinContent(iBin);
693 if(bin_content == 0) bin_content = 1.0 ;
695 double chi2_likelihood = -2*std::log(bin_content/max_content);
696 delta_chi2->SetBinContent(iBin, chi2_likelihood);
697 NewMaximum = std::max(NewMaximum, chi2_likelihood);
699 delta_chi2->SetMaximum(NewMaximum*1.1);
706 for (
int j = 0; j <= RatioHist->GetNbinsX(); ++j)
708 if(DataHist->GetBinContent(j) > 0)
710 double dx = Hist1->GetBinError(j) / DataHist->GetBinContent(j);
711 RatioHist->SetBinError(j, dx);
TestStatistic
Make an enum of the test statistic that we're using.
@ kDembinskiAbdelmotteleb
Based on .
double GetSigmaValue(const int sigma)
KS: Convert sigma from normal distribution into percentage.
double GetBetaParameter(const double data, const double mc, const double w2, const TestStatistic TestStat)
KS: Calculate Beta parameter which will be different based on specified test statistic.
double ComputeKLDivergence(const std::vector< double > &Data, const std::vector< double > &MC)
Compute the Kullback-Leibler divergence between two TH2Poly histograms.
double GetSubOptimality(const std::vector< double > &EigenValues, const int TotalTarameters)
Based on .
double GetBIC(const double llh, const int data, const int nPars)
Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC,...
double GetPValueFromZScore(const double zScore)
Compute the P-value from a given Z-score.
void Get2DBayesianpValue(TH2D *Histogram)
Calculates the 2D Bayesian p-value and generates a visualization.
double GetZScore(const double value, const double mean, const double stddev)
Compute the Z-score for a given value.
void GetGaussian(TH1D *&hist, TF1 *gauss, double &Mean, double &Error)
CW: Fit Gaussian to posterior.
void GetCredibleRegion(std::unique_ptr< TH2D > &hist2D, const double coverage)
KS: Set 2D contour within some coverage.
double GetModeError(TH1D *hpost)
Get the mode error from a TH1D.
double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint)
void GetCredibleIntervalSig(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const bool CredibleInSigmas, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...
double FisherCombinedPValue(const std::vector< double > &pvalues)
KS: Combine p-values using Fisher's method.
void PassErrorToRatioPlot(TH1D *RatioHist, TH1D *Hist1, TH1D *DataHist)
double GetIQR(TH1D *Hist)
Interquartile Range (IQR)
void ThinningMCMC(const std::string &FilePath, const int ThinningCut)
Thin MCMC Chain, to save space and maintain low autocorrelations.
std::string GetJeffreysScale(const double BayesFactor)
KS: Following H. Jeffreys .
double GetNeffective(const int N1, const int N2)
KS: See 14.3.10 in Numerical Recipes in C .
void GetHPD(TH1D *const hist, double &Mean, double &Error, double &Error_p, double &Error_m, const double coverage)
Get Highest Posterior Density (HPD)
void GetCredibleRegionSig(std::unique_ptr< TH2D > &hist2D, const bool CredibleInSigmas, const double coverage)
KS: Set 2D contour within some coverage.
void GetArithmetic(TH1D *const hist, double &Mean, double &Error)
CW: Get Arithmetic mean from posterior.
void GetCredibleInterval(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...
void CheckBonferoniCorrectedpValue(const std::vector< std::string > &SampleNameVec, const std::vector< double > &PValVec, const double Threshold)
KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.
std::string GetDunneKaboth(const double BayesFactor)
Convert a Bayes factor into an approximate particle-physics significance level using the Dunne–Kaboth...
int GetNumberOfRuns(const std::vector< int > &GroupClasifier)
KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6....
std::unique_ptr< TH1D > GetDeltaChi2(TH1D *posterior_probability_hist)
Convert a posterior probability histogram into a distribution. Using the likelihood-ratio definition...
Utility functions for statistical interpretations in MaCh3.
Custom exception class used throughout MaCh3.
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.
std::unique_ptr< ObjectType > Clone(const ObjectType *obj, const std::string &name="")
KS: Creates a copy of a ROOT-like object and wraps it in a smart pointer.
constexpr static const double _BAD_DOUBLE_
Default value used for double initialisation.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.