MaCh3 2.2.1
Reference Guide
Loading...
Searching...
No Matches
Functions
StatisticalUtils.cpp File Reference
#include "Fitters/StatisticalUtils.h"
Include dependency graph for StatisticalUtils.cpp:

Go to the source code of this file.

Functions

std::string GetJeffreysScale (const double BayesFactor)
 KS: Following H. Jeffreys [17].
 
std::string GetDunneKaboth (const double BayesFactor)
 KS: Based on Table 1 in https://www.t2k.org/docs/technotes/435.
 
double GetSigmaValue (const int sigma)
 KS: Convert sigma from normal distribution into percentage.
 
double GetBIC (const double llh, const int data, const int nPars)
 Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC, SBC, SBIC)
 
double GetNeffective (const int N1, const int N2)
 KS: See 14.3.10 in Numerical Recipes in C [23].
 
void CheckBonferoniCorrectedpValue (const std::vector< std::string > &SampleNameVec, const std::vector< double > &PValVec, const double Threshold)
 KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.
 
double GetAndersonDarlingTestStat (const double CumulativeData, const double CumulativeMC, const double CumulativeJoint)
 
int GetNumberOfRuns (const std::vector< int > &GroupClasifier)
 KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6.pdf.
 
double GetBetaParameter (const double data, const double mc, const double w2, TestStatistic TestStat)
 KS: Calculate Beta parameter which will be different based on specified test statistic.
 
double GetSubOptimality (const std::vector< double > &EigenValues, const int TotalTarameters)
 Based on [24].
 
void GetArithmetic (TH1D *const hist, double &Mean, double &Error)
 CW: Get Arithmetic mean from posterior.
 
void GetGaussian (TH1D *&hist, TF1 *gauss, double &Mean, double &Error)
 CW: Fit Gaussian to posterior.
 
void GetHPD (TH1D *const hist, double &Mean, double &Error, double &Error_p, double &Error_m, const double coverage)
 Get Highest Posterior Density (HPD)
 
void GetCredibleInterval (const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
 KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading.
 
void GetCredibleIntervalSig (const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const bool CredibleInSigmas, const double coverage)
 KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading.
 
void GetCredibleRegion (std::unique_ptr< TH2D > &hist2D, const double coverage)
 KS: Set 2D contour within some coverage.
 
void GetCredibleRegionSig (std::unique_ptr< TH2D > &hist2D, const bool CredibleInSigmas, const double coverage)
 KS: Set 2D contour within some coverage.
 
double GetIQR (TH1D *Hist)
 Interquartile Range (IQR)
 
double ComputeKLDivergence (TH2Poly *DataPoly, TH2Poly *PolyMC)
 Compute the Kullback-Leibler divergence between two TH2Poly histograms.
 
double FisherCombinedPValue (const std::vector< double > &pvalues)
 KS: Combine p-values using Fisher's method.
 
void ThinningMCMC (const std::string &FilePath, const int ThinningCut)
 Thin MCMC Chain, to save space and maintain low autocorrelations.
 
double GetZScore (const double value, const double mean, const double stddev)
 Compute the Z-score for a given value.
 
double GetPValueFromZScore (const double zScore)
 Compute the P-value from a given Z-score.
 
double GetModeError (TH1D *hpost)
 Get the mode error from a TH1D.
 
void Get2DBayesianpValue (TH2D *Histogram)
 Calculates the 2D Bayesian p-value and generates a visualization.
 

Function Documentation

◆ CheckBonferoniCorrectedpValue()

void CheckBonferoniCorrectedpValue ( const std::vector< std::string > &  SampleNameVec,
const std::vector< double > &  PValVec,
const double  Threshold = 0.05 
)

KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.

Parameters
SampleNameVecvector of sample names
PValVecpvalue for each sample
Thresholdpvalue accepted threshold, usually 5%

Definition at line 90 of file StatisticalUtils.cpp.

92 {
93// ****************
94 MACH3LOG_INFO("");
95 if(SampleNameVec.size() != PValVec.size())
96 {
97 MACH3LOG_ERROR("Size of vectors do not match");
98 throw MaCh3Exception(__FILE__ , __LINE__ );
99 }
100 const size_t NumberOfStatisticalTests = SampleNameVec.size();
101 //KS: 0.05 or 5% is value used by T2K.
102 const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests);
103 const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown;
104 MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown);
105
106 int Counter = 0;
107 for(unsigned int i = 0; i < SampleNameVec.size(); i++)
108 {
109 if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) {
110 MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
111 MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]);
112 Counter++;
113 } else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) {
114 MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
115 MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]);
116 Counter++;
117 }
118 }
119 if(Counter == 0) {
120 MACH3LOG_INFO("Every sample passed Bonferroni-corrected statistical significance level test");
121 } else {
122 MACH3LOG_WARN("{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter);
123 }
124 MACH3LOG_INFO("");
125}
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:25
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:23
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:24
Custom exception class for MaCh3 errors.

◆ ComputeKLDivergence()

double ComputeKLDivergence ( TH2Poly *  DataPoly,
TH2Poly *  PolyMC 
)

Compute the Kullback-Leibler divergence between two TH2Poly histograms.

Parameters
DataPolyPointer to the data histogram (TH2Poly).
PolyMCPointer to the Monte Carlo histogram (TH2Poly).
Returns
The Kullback-Leibler divergence value. Returns 0 if the data or MC integral is zero.

Definition at line 456 of file StatisticalUtils.cpp.

456 {
457// *********************
458 double klDivergence = 0.0;
459 double DataIntegral = NoOverflowIntegral(DataPoly);
460 double MCIntegral = NoOverflowIntegral(PolyMC);
461 for (int i = 1; i < DataPoly->GetNumberOfBins()+1; ++i)
462 {
463 if (DataPoly->GetBinContent(i) > 0 && PolyMC->GetBinContent(i) > 0) {
464 klDivergence += DataPoly->GetBinContent(i) / DataIntegral *
465 std::log((DataPoly->GetBinContent(i) / DataIntegral) / ( PolyMC->GetBinContent(i) / MCIntegral));
466 }
467 }
468 return klDivergence;
469}
double NoOverflowIntegral(TH2Poly *poly)
WP: Helper function for calculating binned Integral of TH2Poly i.e not including overflow.

◆ FisherCombinedPValue()

double FisherCombinedPValue ( const std::vector< double > &  pvalues)

KS: Combine p-values using Fisher's method.

Parameters
pvaluesA vector of individual p-values to combine.
Returns
The combined p-value, representing the overall significance.

Definition at line 471 of file StatisticalUtils.cpp.

471 {
472// ********************
473 double testStatistic = 0;
474 for(size_t i = 0; i < pvalues.size(); i++)
475 {
476 const double pval = std::max(0.00001, pvalues[i]);
477 testStatistic += -2.0 * std::log(pval);
478 }
479 // Degrees of freedom is twice the number of p-values
480 int degreesOfFreedom = int(2 * pvalues.size());
481 double pValue = TMath::Prob(testStatistic, degreesOfFreedom);
482
483 return pValue;
484}

◆ Get2DBayesianpValue()

void Get2DBayesianpValue ( TH2D *  Histogram)

Calculates the 2D Bayesian p-value and generates a visualization.

Parameters
HistogramA pointer to a TH2D histogram object. The function modifies the histogram's title to include the p-value information.
Warning
The canvas is saved to the current ROOT file using TempCanvas->Write().

Definition at line 596 of file StatisticalUtils.cpp.

596 {
597// ****************
598 const double TotalIntegral = Histogram->Integral();
599 // Count how many fills are above y=x axis
600 // This is the 2D p-value
601 double Above = 0.0;
602 for (int i = 0; i < Histogram->GetXaxis()->GetNbins(); ++i) {
603 const double xvalue = Histogram->GetXaxis()->GetBinCenter(i+1);
604 for (int j = 0; j < Histogram->GetYaxis()->GetNbins(); ++j) {
605 const double yvalue = Histogram->GetYaxis()->GetBinCenter(j+1);
606 // We're only interested in being _ABOVE_ the y=x axis
607 if (xvalue >= yvalue) {
608 Above += Histogram->GetBinContent(i+1, j+1);
609 }
610 }
611 }
612 const double pvalue = Above/TotalIntegral;
613 std::stringstream ss;
614 ss << int(Above) << "/" << int(TotalIntegral) << "=" << pvalue;
615 Histogram->SetTitle((std::string(Histogram->GetTitle())+"_"+ss.str()).c_str());
616
617 // Now add the TLine going diagonally
618 double minimum = Histogram->GetXaxis()->GetBinLowEdge(1);
619 if (Histogram->GetYaxis()->GetBinLowEdge(1) > minimum) {
620 minimum = Histogram->GetYaxis()->GetBinLowEdge(1);
621 }
622 double maximum = Histogram->GetXaxis()->GetBinLowEdge(Histogram->GetXaxis()->GetNbins());
623 if (Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins()) < maximum) {
624 maximum = Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins());
625 //KS: Extend by bin width to perfectly fit canvas
626 maximum += Histogram->GetYaxis()->GetBinWidth(Histogram->GetYaxis()->GetNbins());
627 }
628 else maximum += Histogram->GetXaxis()->GetBinWidth(Histogram->GetXaxis()->GetNbins());
629 auto TempLine = std::make_unique<TLine>(minimum, minimum, maximum, maximum);
630 TempLine->SetLineColor(kRed);
631 TempLine->SetLineWidth(2);
632
633 std::string Title = Histogram->GetName();
634 Title += "_canv";
635 auto TempCanvas = std::make_unique<TCanvas>(Title.c_str(), Title.c_str(), 1024, 1024);
636 TempCanvas->SetGridx();
637 TempCanvas->SetGridy();
638 TempCanvas->cd();
639 gStyle->SetPalette(81);
640 Histogram->SetMinimum(-0.01);
641 Histogram->Draw("colz");
642 TempLine->Draw("same");
643
644 TempCanvas->Write();
645}

◆ GetAndersonDarlingTestStat()

double GetAndersonDarlingTestStat ( const double  CumulativeData,
const double  CumulativeMC,
const double  CumulativeJoint 
)
Parameters
CumulativeDataValue of CDF for data
CumulativeMCValue of CDF for MC
CumulativeJointValue of CDF for joint data and MC distribution

Definition at line 128 of file StatisticalUtils.cpp.

128 {
129// ****************
130 double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint));
131
132 if( std::isinf(ADstat) || std::isnan(ADstat)) return 0;
133 return ADstat;
134}

◆ GetArithmetic()

void GetArithmetic ( TH1D *const  hist,
double &  Mean,
double &  Error 
)

CW: Get Arithmetic mean from posterior.

Parameters
histhistograms from which we extract arithmetic mean
MeanArithmetic Mean value
ErrorArithmetic Error value

Definition at line 201 of file StatisticalUtils.cpp.

201 {
202// **************************
203 Mean = hist->GetMean();
204 Error = hist->GetRMS();
205}
double ** Mean
Definition: RHat.cpp:58

◆ GetBetaParameter()

double GetBetaParameter ( const double  data,
const double  mc,
const double  w2,
TestStatistic  TestStat 
)

KS: Calculate Beta parameter which will be different based on specified test statistic.

Parameters
dataNumber of data events in a bin
mcNumber of MC events in a bin
w2Value of weight squared in a bin
TestStatTest statistic based on which we calculate beta

Definition at line 154 of file StatisticalUtils.cpp.

154 {
155// ****************
156 double Beta = 0.0;
157
158 if (TestStat == kDembinskiAbdelmotteleb) {
159 //the so-called effective count
160 const double k = mc*mc / w2;
161 //Calculate beta which is scaling factor between true and generated MC
162 Beta = (data + k) / (mc + k);
163 }
164 //KS: Below is technically only true for Cowan's BB, which will not be true for Poisson or IceCube, because why not...
165 else {
166 // CW: Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc
167 const double fractional = std::sqrt(w2)/mc;
168 // CW: -b/2a in quadratic equation
169 const double temp = mc*fractional*fractional-1;
170 // CW: b^2 - 4ac in quadratic equation
171 const double temp2 = temp*temp + 4*data*fractional*fractional;
172 if (temp2 < 0) {
173 MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!");
174 throw MaCh3Exception(__FILE__ , __LINE__ );
175 }
176 // CW: Solve for the positive beta
177 Beta = (-1*temp+std::sqrt(temp2))/2.;
178 }
179 return Beta;
180}
@ kDembinskiAbdelmotteleb
Based on .

◆ GetBIC()

double GetBIC ( const double  llh,
const int  data,
const int  nPars 
)

Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC, SBC, SBIC)

Definition at line 68 of file StatisticalUtils.cpp.

68 {
69// ****************
70 if(nPars == 0)
71 {
72 MACH3LOG_ERROR("You haven't passed number of model parameters as it is still zero");
73 throw MaCh3Exception(__FILE__ , __LINE__ );
74 }
75 const double BIC = double(nPars * logl(data) + llh);
76
77 return BIC;
78}

◆ GetCredibleInterval()

void GetCredibleInterval ( const std::unique_ptr< TH1D > &  hist,
std::unique_ptr< TH1D > &  hpost_copy,
const double  coverage = 0.6827 
)

KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading.

Parameters
histhistograms based on which we calculate credible interval
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Loop over histogram bins with highest number of entries until covered 90 or 68.3%

Get bin of highest content and save the number of entries reached so far

Replace bin value by -1 so it is not looped over as being maximum bin again

Definition at line 316 of file StatisticalUtils.cpp.

316 {
317// ***************
318 if(coverage > 1)
319 {
320 MACH3LOG_ERROR("Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage);
321 throw MaCh3Exception(__FILE__ , __LINE__ );
322 }
323 //KS: Reset first copy of histogram
324 hpost_copy->Reset("");
325 hpost_copy->Fill(0.0, 0.0);
326
327 //KS: Temporary structure to be thread save
328 std::vector<double> hist_copy(hist->GetXaxis()->GetNbins()+1);
329 std::vector<bool> hist_copy_fill(hist->GetXaxis()->GetNbins()+1);
330 for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
331 {
332 hist_copy[i] = hist->GetBinContent(i);
333 hist_copy_fill[i] = false;
334 }
335
337 const long double Integral = hist->Integral();
338 long double sum = 0;
339
340 while ((sum / Integral) < coverage)
341 {
343 int max_entry_bin = 0;
344 double max_entries = 0.;
345 for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
346 {
347 if (hist_copy[i] > max_entries)
348 {
349 max_entries = hist_copy[i];
350 max_entry_bin = i;
351 }
352 }
354 hist_copy[max_entry_bin] = -1.;
355 hist_copy_fill[max_entry_bin] = true;
356
357 sum += max_entries;
358 }
359 //KS: Now fill our copy only for bins which got included in coverage region
360 for(int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
361 {
362 if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i));
363 }
364}

◆ GetCredibleIntervalSig()

void GetCredibleIntervalSig ( const std::unique_ptr< TH1D > &  hist,
std::unique_ptr< TH1D > &  hpost_copy,
const bool  CredibleInSigmas,
const double  coverage = 0.6827 
)

KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading.

Parameters
histhistograms based on which we calculate credible interval
CredibleInSigmasWhether interval is in sigmas or percentage
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Definition at line 367 of file StatisticalUtils.cpp.

367 {
368// ***************
369 //KS: Slightly different approach depending if intervals are in percentage or sigmas
370 if(CredibleInSigmas) {
371 //KS: Convert sigmas into percentage
372 const double CredReg = GetSigmaValue(int(std::round(coverage)));
373 GetCredibleInterval(hist, hpost_copy, CredReg);
374 } else {
375 GetCredibleInterval(hist, hpost_copy, coverage);
376 }
377}
double GetSigmaValue(const int sigma)
KS: Convert sigma from normal distribution into percentage.
void GetCredibleInterval(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...

◆ GetCredibleRegion()

void GetCredibleRegion ( std::unique_ptr< TH2D > &  hist2D,
const double  coverage = 0.6827 
)

KS: Set 2D contour within some coverage.

Parameters
hist2Dhistograms based on which we calculate credible regions
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Loop over histogram bins with highest number of entries until covered 90 or 68.3%

Get bin of highest content and save the number of entries reached so far

Replace bin value by -1 so it is not looped over as being maximum bin again

Definition at line 380 of file StatisticalUtils.cpp.

380 {
381// ***************
382 if(coverage > 1)
383 {
384 MACH3LOG_ERROR("Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1 {}", coverage);
385 throw MaCh3Exception(__FILE__ , __LINE__ );
386 }
387
388 //KS: Temporary structure to be thread save
389 std::vector<std::vector<double>> hist_copy(hist2D->GetXaxis()->GetNbins()+1,
390 std::vector<double>(hist2D->GetYaxis()->GetNbins()+1));
391 for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) {
392 for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) {
393 hist_copy[i][j] = hist2D->GetBinContent(i, j);
394 }
395 }
396
398 const long double Integral = hist2D->Integral();
399 long double sum = 0;
400
401 //We need to as ROOT requires array to set to contour
402 double Contour[1];
403 while ((sum / Integral) < coverage)
404 {
406 int max_entry_bin_x = 0;
407 int max_entry_bin_y = 0;
408 double max_entries = 0.;
409 for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i)
410 {
411 for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j)
412 {
413 if (hist_copy[i][j] > max_entries)
414 {
415 max_entries = hist_copy[i][j];
416 max_entry_bin_x = i;
417 max_entry_bin_y = j;
418 }
419 }
420 }
422 hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.;
423
424 sum += max_entries;
425 Contour[0] = max_entries;
426 }
427 hist2D->SetContour(1, Contour);
428}

◆ GetCredibleRegionSig()

void GetCredibleRegionSig ( std::unique_ptr< TH2D > &  hist2D,
const bool  CredibleInSigmas,
const double  coverage = 0.6827 
)

KS: Set 2D contour within some coverage.

Parameters
hist2Dhistograms based on which we calculate credible regions
CredibleInSigmasWhether interval is in sigmas or percentage
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Definition at line 431 of file StatisticalUtils.cpp.

431 {
432// ***************
433 if(CredibleInSigmas) {
434 //KS: Convert sigmas into percentage
435 const double CredReg = GetSigmaValue(int(std::round(coverage)));
436 GetCredibleRegion(hist2D, CredReg);
437 } else {
438 GetCredibleRegion(hist2D, coverage);
439 }
440}
void GetCredibleRegion(std::unique_ptr< TH2D > &hist2D, const double coverage)
KS: Set 2D contour within some coverage.

◆ GetDunneKaboth()

std::string GetDunneKaboth ( const double  BayesFactor)

KS: Based on Table 1 in https://www.t2k.org/docs/technotes/435.

Parameters
BayesFactorObtained value of Bayes factor

Definition at line 20 of file StatisticalUtils.cpp.

20 {
21// **************************
22 std::string DunneKaboth = "";
23 //KS: Get fancy DunneKaboth Scale as I am to lazy to look into table every time
24
25 if(2.125 > BayesFactor) DunneKaboth = "< 1 #sigma";
26 else if( 20.74 > BayesFactor) DunneKaboth = "> 1 #sigma";
27 else if( 369.4 > BayesFactor) DunneKaboth = "> 2 #sigma";
28 else if( 15800 > BayesFactor) DunneKaboth = "> 3 #sigma";
29 else if( 1745000 > BayesFactor) DunneKaboth = "> 4 #sigma";
30 else DunneKaboth = "> 5 #sigma";
31
32 return DunneKaboth;
33}

◆ GetGaussian()

void GetGaussian ( TH1D *&  hist,
TF1 *  gauss,
double &  Mean,
double &  Error 
)

CW: Fit Gaussian to posterior.

Parameters
histhistograms to which we fit gaussian
gausstf1 with gaussian, we pass pointer to make things faster
MeanGaussian Mean value
ErrorGaussian Error value

Definition at line 208 of file StatisticalUtils.cpp.

208 {
209// **************************
210 const double meanval = hist->GetMean();
211 const double err = hist->GetRMS();
212 const double peakval = hist->GetBinCenter(hist->GetMaximumBin());
213
214 // Set the range for the Gaussian fit
215 gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err);
216 // Set the starting parameters close to RMS and peaks of the histograms
217 gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err);
218
219 // Perform the fit
220 hist->Fit(gauss->GetName(),"Rq");
221 hist->SetStats(0);
222
223 Mean = gauss->GetParameter(1);
224 Error = gauss->GetParameter(2);
225}

◆ GetHPD()

void GetHPD ( TH1D *const  hist,
double &  Mean,
double &  Error,
double &  Error_p,
double &  Error_m,
const double  coverage = 0.6827 
)

Get Highest Posterior Density (HPD)

Parameters
histhistograms from which we HPD
MeanHPD Mean value
ErrorHPD Error value
Error_pHPD Negative (left hand side) Error value
Error_mHPD Positive (right hand side) Error value
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Definition at line 228 of file StatisticalUtils.cpp.

228 {
229// ****************
230 // Get the bin which has the largest posterior density
231 const int MaxBin = hist->GetMaximumBin();
232 // And it's value
233 const double peakval = hist->GetBinCenter(MaxBin);
234
235 // The total integral of the posterior
236 const long double Integral = hist->Integral();
237 //KS: and integral of left handed and right handed parts
238 const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0;
239 const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0;
240
241 // Keep count of how much area we're covering
242 //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error
243 long double sum = hist->GetBinContent(MaxBin)/2.0;
244
245 // Counter for current bin
246 int CurrBin = MaxBin;
247 while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) {
248 CurrBin++;
249 sum += hist->GetBinContent(CurrBin);
250 }
251 const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin));
252 // Reset the sum
253 //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error
254 sum = hist->GetBinContent(MaxBin)/2.0;
255
256 // Reset the bin counter
257 CurrBin = MaxBin;
258 // Counter for current bin
259 while (sum/LowIntegral < coverage && CurrBin > 1) {
260 CurrBin--;
261 sum += hist->GetBinContent(CurrBin);
262 }
263 const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin));
264
265 // Now do the double sided HPD
266 //KS: Start sum from the HPD
267 sum = hist->GetBinContent(MaxBin);
268 int LowBin = MaxBin;
269 int HighBin = MaxBin;
270 long double LowCon = 0.0;
271 long double HighCon = 0.0;
272
273 while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1))
274 {
275 LowCon = 0.0;
276 HighCon = 0.0;
277 //KS:: Move further only if you haven't reached histogram end
278 if(LowBin > 1)
279 {
280 LowBin--;
281 LowCon = hist->GetBinContent(LowBin);
282 }
283 if(HighBin < hist->GetNbinsX())
284 {
285 HighBin++;
286 HighCon = hist->GetBinContent(HighBin);
287 }
288
289 // If we're on the last slice and the lower contour is larger than the upper
290 if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) {
291 sum += LowCon;
292 break;
293 // If we're on the last slice and the upper contour is larger than the lower
294 } else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) {
295 sum += HighCon;
296 break;
297 } else {
298 sum += LowCon + HighCon;
299 }
300 }
301
302 double sigma_hpd = 0.0;
303 if (LowCon > HighCon) {
304 sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin));
305 } else {
306 sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin));
307 }
308
309 Mean = peakval;
310 Error = sigma_hpd;
311 Error_p = sigma_p;
312 Error_m = sigma_m;
313}

◆ GetIQR()

double GetIQR ( TH1D *  Hist)

Interquartile Range (IQR)

Parameters
histhistograms from which we IQR

Definition at line 443 of file StatisticalUtils.cpp.

443 {
444// *********************
445 if(Hist->Integral() == 0) return 0.0;
446
447 constexpr double quartiles_x[3] = {0.25, 0.5, 0.75};
448 double quartiles[3];
449
450 Hist->GetQuantiles(3, quartiles, quartiles_x);
451
452 return quartiles[2] - quartiles[0];
453}

◆ GetJeffreysScale()

std::string GetJeffreysScale ( const double  BayesFactor)

KS: Following H. Jeffreys [17].

Parameters
BayesFactorObtained value of Bayes factor

Definition at line 5 of file StatisticalUtils.cpp.

5 {
6// **************************
7 std::string JeffreysScale = "";
8 //KS: Get fancy Jeffreys Scale as I am to lazy to look into table every time
9 if(BayesFactor < 0) JeffreysScale = "Negative";
10 else if( 5 > BayesFactor) JeffreysScale = "Barely worth mentioning";
11 else if( 10 > BayesFactor) JeffreysScale = "Substantial";
12 else if( 15 > BayesFactor) JeffreysScale = "Strong";
13 else if( 20 > BayesFactor) JeffreysScale = "Very strong";
14 else JeffreysScale = "Decisive";
15
16 return JeffreysScale;
17}

◆ GetModeError()

double GetModeError ( TH1D *  hpost)

Get the mode error from a TH1D.

Parameters
hposthist from which we extract mode error

Definition at line 542 of file StatisticalUtils.cpp.

542 {
543// ****************
544 // Get the bin which has the largest posterior density
545 int MaxBin = hpost->GetMaximumBin();
546
547 // The total integral of the posterior
548 const double Integral = hpost->Integral();
549
550 int LowBin = MaxBin;
551 int HighBin = MaxBin;
552 double sum = hpost->GetBinContent(MaxBin);;
553 double LowCon = 0.0;
554 double HighCon = 0.0;
555 while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) )
556 {
557 LowCon = 0.0;
558 HighCon = 0.0;
559 //KS:: Move further only if you haven't reached histogram end
560 if(LowBin > 1)
561 {
562 LowBin--;
563 LowCon = hpost->GetBinContent(LowBin);
564 }
565 if(HighBin < hpost->GetNbinsX())
566 {
567 HighBin++;
568 HighCon = hpost->GetBinContent(HighBin);
569 }
570
571 // If we're on the last slice and the lower contour is larger than the upper
572 if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) {
573 sum += LowCon;
574 break;
575 // If we're on the last slice and the upper contour is larger than the lower
576 } else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) {
577 sum += HighCon;
578 break;
579 } else {
580 sum += LowCon + HighCon;
581 }
582 }
583
584 double Mode_Error = 0.0;
585 if (LowCon > HighCon) {
586 Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin));
587 } else {
588 Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin));
589 }
590
591 return Mode_Error;
592}

◆ GetNeffective()

double GetNeffective ( const int  N1,
const int  N2 
)

KS: See 14.3.10 in Numerical Recipes in C [23].

Definition at line 81 of file StatisticalUtils.cpp.

81 {
82// ****************
83 const double Nominator = (N1+N2);
84 const double Denominator = (N1*N2);
85 const double N_e = Nominator/Denominator;
86 return N_e;
87}

◆ GetNumberOfRuns()

int GetNumberOfRuns ( const std::vector< int > &  GroupClasifier)

KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6.pdf.

Definition at line 137 of file StatisticalUtils.cpp.

137 {
138// ****************
139 int NumberOfRuns = 0;
140 int PreviousGroup = -999;
141
142 //KS: If group changed increment run
143 for (unsigned int i = 0; i < GroupClasifier.size(); i++)
144 {
145 if(GroupClasifier[i] != PreviousGroup)
146 NumberOfRuns++;
147 PreviousGroup = GroupClasifier[i];
148 }
149
150 return NumberOfRuns;
151}

◆ GetPValueFromZScore()

double GetPValueFromZScore ( const double  zScore)

Compute the P-value from a given Z-score.

The P-value represents the probability of observing a value as extreme as the given Z-score under the null hypothesis of a standard normal distribution.

Parameters
zScoreThe Z-score for which to compute the P-value.
Returns
The P-value corresponding to the given Z-score.

Definition at line 535 of file StatisticalUtils.cpp.

535 {
536// ********************
537 return 0.5 * std::erfc(-zScore / std::sqrt(2));
538}

◆ GetSigmaValue()

double GetSigmaValue ( const int  sigma)

KS: Convert sigma from normal distribution into percentage.

Definition at line 36 of file StatisticalUtils.cpp.

36 {
37// *********************
38 double width = 0;
39 switch (std::abs(sigma))
40 {
41 case 1:
42 width = 0.682689492137;
43 break;
44 case 2:
45 width = 0.954499736104;
46 break;
47 case 3:
48 width = 0.997300203937;
49 break;
50 case 4:
51 width = 0.999936657516;
52 break;
53 case 5:
54 width = 0.999999426697;
55 break;
56 case 6:
57 width = 0.999999998027;
58 break;
59 default:
60 MACH3LOG_ERROR("{} is unsupported value of sigma", sigma);
61 throw MaCh3Exception(__FILE__ , __LINE__ );
62 break;
63 }
64 return width;
65}

◆ GetSubOptimality()

double GetSubOptimality ( const std::vector< double > &  EigenValues,
const int  TotalTarameters 
)

Based on [24].

Parameters
EigenValuesEigen values of covariance matrix

Definition at line 184 of file StatisticalUtils.cpp.

184 {
185// *********************
186 double sum_eigenvalues_squared_inv = 0.0;
187 double sum_eigenvalues_inv = 0.0;
188 for (unsigned int j = 0; j < EigenValues.size(); j++)
189 {
190 //KS: IF Eigen values are super small skip them
191 //if(EigenValues[j] < 0.0000001) continue;
192 sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2);
193 sum_eigenvalues_inv += 1.0 / EigenValues[j];
194 }
195 const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2);
196 return SubOptimality;
197}

◆ GetZScore()

double GetZScore ( const double  value,
const double  mean,
const double  stddev 
)

Compute the Z-score for a given value.

The Z-score indicates how many standard deviations a value is from the mean. A positive Z-score means the value is above the mean, while a negative Z-score means it is below the mean.

Parameters
valueThe data point for which to compute the Z-score.
meanThe mean of the data set.
stddevThe standard deviation of the data set. Must be non-zero.
Returns
The Z-score of the given value.
Warning
Ensure that stddev is not zero to avoid division by zero.

Definition at line 529 of file StatisticalUtils.cpp.

529 {
530// ********************
531 return (value - mean) / stddev;
532}

◆ ThinningMCMC()

void ThinningMCMC ( const std::string &  FilePath,
const int  ThinningCut 
)

Thin MCMC Chain, to save space and maintain low autocorrelations.

Parameters
FilePathPath to MCMC chain you want to thin
ThinningCutevery which entry you want to thin [19]
Warning
Thinning is done over entry not steps, it may now work very well for merged chains

Definition at line 487 of file StatisticalUtils.cpp.

487 {
488// ********************
489 // Define the path for the temporary thinned file
490 std::string TempFilePath = "Thinned_" + FilePath;
491 int ret = system(("cp " + FilePath + " " + TempFilePath).c_str());
492 if (ret != 0) {
493 MACH3LOG_WARN("Error: system call to copy file failed with code {}", ret);
494 }
495
496 TFile *inFile = M3::Open(TempFilePath, "RECREATE", __FILE__, __LINE__);
497 TTree *inTree = inFile->Get<TTree>("posteriors");
498 if (!inTree) {
499 MACH3LOG_ERROR("Error: TTree 'posteriors' not found in file.");
500 inFile->ls();
501 inFile->Close();
502 throw MaCh3Exception(__FILE__, __LINE__);
503 }
504
505 // Clone the structure without data
506 TTree *outTree = inTree->CloneTree(0);
507
508 // Loop over entries and apply thinning
509 Long64_t nEntries = inTree->GetEntries();
510 double retainedPercentage = (double(nEntries) / ThinningCut) / double(nEntries) * 100;
511 MACH3LOG_INFO("Thinning will retain {:.2f}% of chains", retainedPercentage);
512 for (Long64_t i = 0; i < nEntries; i++) {
513 if (i % (nEntries/10) == 0) {
514 MaCh3Utils::PrintProgressBar(i, nEntries);
515 }
516 if (i % ThinningCut == 0) {
517 inTree->GetEntry(i);
518 outTree->Fill();
519 }
520 }
521 inFile->WriteTObject(outTree, "posteriors", "kOverwrite");
522 inFile->Close();
523 delete inFile;
524
525 MACH3LOG_INFO("Thinned TTree saved and overwrote original in: {}", TempFilePath);
526}
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.
Definition: Monitor.cpp:212