MaCh3  2.2.3
Reference Guide
Functions
StatisticalUtils.cpp File Reference
#include "Fitters/StatisticalUtils.h"
Include dependency graph for StatisticalUtils.cpp:

Go to the source code of this file.

Functions

std::string GetJeffreysScale (const double BayesFactor)
 KS: Following H. Jeffreys [19]. More...
 
std::string GetDunneKaboth (const double BayesFactor)
 KS: Based on Table 1 in https://www.t2k.org/docs/technotes/435. More...
 
double GetSigmaValue (const int sigma)
 KS: Convert sigma from normal distribution into percentage. More...
 
double GetBIC (const double llh, const int data, const int nPars)
 Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC, SBC, SBIC) More...
 
double GetNeffective (const int N1, const int N2)
 KS: See 14.3.10 in Numerical Recipes in C [26]. More...
 
void CheckBonferoniCorrectedpValue (const std::vector< std::string > &SampleNameVec, const std::vector< double > &PValVec, const double Threshold)
 KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63. More...
 
double GetAndersonDarlingTestStat (const double CumulativeData, const double CumulativeMC, const double CumulativeJoint)
 
int GetNumberOfRuns (const std::vector< int > &GroupClasifier)
 KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6.pdf. More...
 
double GetBetaParameter (const double data, const double mc, const double w2, const TestStatistic TestStat)
 KS: Calculate Beta parameter which will be different based on specified test statistic. More...
 
double GetSubOptimality (const std::vector< double > &EigenValues, const int TotalTarameters)
 Based on [27]. More...
 
void GetArithmetic (TH1D *const hist, double &Mean, double &Error)
 CW: Get Arithmetic mean from posterior. More...
 
void GetGaussian (TH1D *&hist, TF1 *gauss, double &Mean, double &Error)
 CW: Fit Gaussian to posterior. More...
 
void GetHPD (TH1D *const hist, double &Mean, double &Error, double &Error_p, double &Error_m, const double coverage)
 Get Highest Posterior Density (HPD) More...
 
void GetCredibleInterval (const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
 KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading. More...
 
void GetCredibleIntervalSig (const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const bool CredibleInSigmas, const double coverage)
 KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading. More...
 
void GetCredibleRegion (std::unique_ptr< TH2D > &hist2D, const double coverage)
 KS: Set 2D contour within some coverage. More...
 
void GetCredibleRegionSig (std::unique_ptr< TH2D > &hist2D, const bool CredibleInSigmas, const double coverage)
 KS: Set 2D contour within some coverage. More...
 
double GetIQR (TH1D *Hist)
 Interquartile Range (IQR) More...
 
double ComputeKLDivergence (TH2Poly *DataPoly, TH2Poly *PolyMC)
 Compute the Kullback-Leibler divergence between two TH2Poly histograms. More...
 
double FisherCombinedPValue (const std::vector< double > &pvalues)
 KS: Combine p-values using Fisher's method. More...
 
void ThinningMCMC (const std::string &FilePath, const int ThinningCut)
 Thin MCMC Chain, to save space and maintain low autocorrelations. More...
 
double GetZScore (const double value, const double mean, const double stddev)
 Compute the Z-score for a given value. More...
 
double GetPValueFromZScore (const double zScore)
 Compute the P-value from a given Z-score. More...
 
double GetModeError (TH1D *hpost)
 Get the mode error from a TH1D. More...
 
void Get2DBayesianpValue (TH2D *Histogram)
 Calculates the 2D Bayesian p-value and generates a visualization. More...
 

Function Documentation

◆ CheckBonferoniCorrectedpValue()

void CheckBonferoniCorrectedpValue ( const std::vector< std::string > &  SampleNameVec,
const std::vector< double > &  PValVec,
const double  Threshold = 0.05 
)

KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63.

Parameters
SampleNameVecvector of sample names
PValVecpvalue for each sample
Thresholdpvalue accepted threshold, usually 5%

Definition at line 90 of file StatisticalUtils.cpp.

92  {
93 // ****************
94  MACH3LOG_INFO("");
95  if(SampleNameVec.size() != PValVec.size())
96  {
97  MACH3LOG_ERROR("Size of vectors do not match");
98  throw MaCh3Exception(__FILE__ , __LINE__ );
99  }
100  const size_t NumberOfStatisticalTests = SampleNameVec.size();
101  //KS: 0.05 or 5% is value used by T2K.
102  const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests);
103  const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown;
104  MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown);
105 
106  int Counter = 0;
107  for(unsigned int i = 0; i < SampleNameVec.size(); i++)
108  {
109  if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) {
110  MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
111  MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]);
112  Counter++;
113  } else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) {
114  MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]);
115  MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]);
116  Counter++;
117  }
118  }
119  if(Counter == 0) {
120  MACH3LOG_INFO("Every sample passed Bonferroni-corrected statistical significance level test");
121  } else {
122  MACH3LOG_WARN("{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter);
123  }
124  MACH3LOG_INFO("");
125 }
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:27
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:25
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:26
Custom exception class for MaCh3 errors.

◆ ComputeKLDivergence()

double ComputeKLDivergence ( TH2Poly *  DataPoly,
TH2Poly *  PolyMC 
)

Compute the Kullback-Leibler divergence between two TH2Poly histograms.

Parameters
DataPolyPointer to the data histogram (TH2Poly).
PolyMCPointer to the Monte Carlo histogram (TH2Poly).
Returns
The Kullback-Leibler divergence value. Returns 0 if the data or MC integral is zero.

Definition at line 456 of file StatisticalUtils.cpp.

456  {
457 // *********************
458  double klDivergence = 0.0;
459  double DataIntegral = NoOverflowIntegral(DataPoly);
460  double MCIntegral = NoOverflowIntegral(PolyMC);
461  for (int i = 1; i < DataPoly->GetNumberOfBins()+1; ++i)
462  {
463  if (DataPoly->GetBinContent(i) > 0 && PolyMC->GetBinContent(i) > 0) {
464  klDivergence += DataPoly->GetBinContent(i) / DataIntegral *
465  std::log((DataPoly->GetBinContent(i) / DataIntegral) / ( PolyMC->GetBinContent(i) / MCIntegral));
466  }
467  }
468  return klDivergence;
469 }
double NoOverflowIntegral(TH2Poly *poly)
WP: Helper function for calculating binned Integral of TH2Poly i.e not including overflow.

◆ FisherCombinedPValue()

double FisherCombinedPValue ( const std::vector< double > &  pvalues)

KS: Combine p-values using Fisher's method.

Parameters
pvaluesA vector of individual p-values to combine.
Returns
The combined p-value, representing the overall significance.

Definition at line 471 of file StatisticalUtils.cpp.

471  {
472 // ********************
473  double testStatistic = 0;
474  for(size_t i = 0; i < pvalues.size(); i++)
475  {
476  const double pval = std::max(0.00001, pvalues[i]);
477  testStatistic += -2.0 * std::log(pval);
478  }
479  // Degrees of freedom is twice the number of p-values
480  int degreesOfFreedom = int(2 * pvalues.size());
481  double pValue = TMath::Prob(testStatistic, degreesOfFreedom);
482 
483  return pValue;
484 }

◆ Get2DBayesianpValue()

void Get2DBayesianpValue ( TH2D *  Histogram)

Calculates the 2D Bayesian p-value and generates a visualization.

Parameters
HistogramA pointer to a TH2D histogram object. The function modifies the histogram's title to include the p-value information.
Warning
The canvas is saved to the current ROOT file using TempCanvas->Write().

Definition at line 599 of file StatisticalUtils.cpp.

599  {
600 // ****************
601  const double TotalIntegral = Histogram->Integral();
602  // Count how many fills are above y=x axis
603  // This is the 2D p-value
604  double Above = 0.0;
605  for (int i = 0; i < Histogram->GetXaxis()->GetNbins(); ++i) {
606  const double xvalue = Histogram->GetXaxis()->GetBinCenter(i+1);
607  for (int j = 0; j < Histogram->GetYaxis()->GetNbins(); ++j) {
608  const double yvalue = Histogram->GetYaxis()->GetBinCenter(j+1);
609  // We're only interested in being _ABOVE_ the y=x axis
610  if (xvalue >= yvalue) {
611  Above += Histogram->GetBinContent(i+1, j+1);
612  }
613  }
614  }
615  const double pvalue = Above/TotalIntegral;
616  std::stringstream ss;
617  ss << int(Above) << "/" << int(TotalIntegral) << "=" << pvalue;
618  Histogram->SetTitle((std::string(Histogram->GetTitle())+"_"+ss.str()).c_str());
619 
620  // Now add the TLine going diagonally
621  double minimum = Histogram->GetXaxis()->GetBinLowEdge(1);
622  if (Histogram->GetYaxis()->GetBinLowEdge(1) > minimum) {
623  minimum = Histogram->GetYaxis()->GetBinLowEdge(1);
624  }
625  double maximum = Histogram->GetXaxis()->GetBinLowEdge(Histogram->GetXaxis()->GetNbins());
626  if (Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins()) < maximum) {
627  maximum = Histogram->GetYaxis()->GetBinLowEdge(Histogram->GetYaxis()->GetNbins());
628  //KS: Extend by bin width to perfectly fit canvas
629  maximum += Histogram->GetYaxis()->GetBinWidth(Histogram->GetYaxis()->GetNbins());
630  }
631  else maximum += Histogram->GetXaxis()->GetBinWidth(Histogram->GetXaxis()->GetNbins());
632  auto TempLine = std::make_unique<TLine>(minimum, minimum, maximum, maximum);
633  TempLine->SetLineColor(kRed);
634  TempLine->SetLineWidth(2);
635 
636  std::string Title = Histogram->GetName();
637  Title += "_canv";
638  auto TempCanvas = std::make_unique<TCanvas>(Title.c_str(), Title.c_str(), 1024, 1024);
639  TempCanvas->SetGridx();
640  TempCanvas->SetGridy();
641  TempCanvas->cd();
642  gStyle->SetPalette(81);
643  Histogram->SetMinimum(-0.01);
644  Histogram->Draw("colz");
645  TempLine->Draw("same");
646 
647  TempCanvas->Write();
648 }

◆ GetAndersonDarlingTestStat()

double GetAndersonDarlingTestStat ( const double  CumulativeData,
const double  CumulativeMC,
const double  CumulativeJoint 
)
Parameters
CumulativeDataValue of CDF for data
CumulativeMCValue of CDF for MC
CumulativeJointValue of CDF for joint data and MC distribution

Definition at line 128 of file StatisticalUtils.cpp.

128  {
129 // ****************
130  double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint));
131 
132  if( std::isinf(ADstat) || std::isnan(ADstat)) return 0;
133  return ADstat;
134 }

◆ GetArithmetic()

void GetArithmetic ( TH1D *const  hist,
double &  Mean,
double &  Error 
)

CW: Get Arithmetic mean from posterior.

Parameters
histhistograms from which we extract arithmetic mean
MeanArithmetic Mean value
ErrorArithmetic Error value

Definition at line 201 of file StatisticalUtils.cpp.

201  {
202 // **************************
203  Mean = hist->GetMean();
204  Error = hist->GetRMS();
205 }
double ** Mean
Definition: RHat.cpp:58

◆ GetBetaParameter()

double GetBetaParameter ( const double  data,
const double  mc,
const double  w2,
TestStatistic  TestStat 
)

KS: Calculate Beta parameter which will be different based on specified test statistic.

Parameters
dataNumber of data events in a bin
mcNumber of MC events in a bin
w2Value of weight squared in a bin
TestStatTest statistic based on which we calculate beta

Definition at line 154 of file StatisticalUtils.cpp.

154  {
155 // ****************
156  double Beta = 0.0;
157 
158  if (TestStat == kDembinskiAbdelmotteleb) {
159  //the so-called effective count
160  const double k = mc*mc / w2;
161  //Calculate beta which is scaling factor between true and generated MC
162  Beta = (data + k) / (mc + k);
163  }
164  //KS: Below is technically only true for Cowan's BB, which will not be true for Poisson or IceCube, because why not...
165  else {
166  // CW: Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc
167  const double fractional = std::sqrt(w2)/mc;
168  // CW: -b/2a in quadratic equation
169  const double temp = mc*fractional*fractional-1;
170  // CW: b^2 - 4ac in quadratic equation
171  const double temp2 = temp*temp + 4*data*fractional*fractional;
172  if (temp2 < 0) {
173  MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!");
174  throw MaCh3Exception(__FILE__ , __LINE__ );
175  }
176  // CW: Solve for the positive beta
177  Beta = (-1*temp+std::sqrt(temp2))/2.;
178  }
179  return Beta;
180 }
@ kDembinskiAbdelmotteleb
Based on .

◆ GetBIC()

double GetBIC ( const double  llh,
const int  data,
const int  nPars 
)

Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC, SBC, SBIC)

Definition at line 68 of file StatisticalUtils.cpp.

68  {
69 // ****************
70  if(nPars == 0)
71  {
72  MACH3LOG_ERROR("You haven't passed number of model parameters as it is still zero");
73  throw MaCh3Exception(__FILE__ , __LINE__ );
74  }
75  const double BIC = double(nPars * logl(data) + llh);
76 
77  return BIC;
78 }

◆ GetCredibleInterval()

void GetCredibleInterval ( const std::unique_ptr< TH1D > &  hist,
std::unique_ptr< TH1D > &  hpost_copy,
const double  coverage = 0.6827 
)

KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading.

Parameters
histhistograms based on which we calculate credible interval
hpost_copyTo make code thread safe we use copy of histograms which user has to pass
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Loop over histogram bins with highest number of entries until covered 90 or 68.3%

Get bin of highest content and save the number of entries reached so far

Replace bin value by -1 so it is not looped over as being maximum bin again

Definition at line 316 of file StatisticalUtils.cpp.

316  {
317 // ***************
318  if(coverage > 1)
319  {
320  MACH3LOG_ERROR("Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage);
321  throw MaCh3Exception(__FILE__ , __LINE__ );
322  }
323  //KS: Reset first copy of histogram
324  hpost_copy->Reset("");
325  hpost_copy->Fill(0.0, 0.0);
326 
327  //KS: Temporary structure to be thread save
328  std::vector<double> hist_copy(hist->GetXaxis()->GetNbins()+1);
329  std::vector<bool> hist_copy_fill(hist->GetXaxis()->GetNbins()+1);
330  for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
331  {
332  hist_copy[i] = hist->GetBinContent(i);
333  hist_copy_fill[i] = false;
334  }
335 
337  const long double Integral = hist->Integral();
338  long double sum = 0;
339 
340  while ((sum / Integral) < coverage)
341  {
343  int max_entry_bin = 0;
344  double max_entries = 0.;
345  for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
346  {
347  if (hist_copy[i] > max_entries)
348  {
349  max_entries = hist_copy[i];
350  max_entry_bin = i;
351  }
352  }
354  hist_copy[max_entry_bin] = -1.;
355  hist_copy_fill[max_entry_bin] = true;
356 
357  sum += max_entries;
358  }
359  //KS: Now fill our copy only for bins which got included in coverage region
360  for(int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i)
361  {
362  if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i));
363  }
364 }

◆ GetCredibleIntervalSig()

void GetCredibleIntervalSig ( const std::unique_ptr< TH1D > &  hist,
std::unique_ptr< TH1D > &  hpost_copy,
const bool  CredibleInSigmas,
const double  coverage = 0.6827 
)

KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading.

Parameters
histhistograms based on which we calculate credible interval
hpost_copyTo make code thread safe we use copy of histograms which user has to pass
CredibleInSigmasWhether interval is in sigmas or percentage
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Definition at line 367 of file StatisticalUtils.cpp.

367  {
368 // ***************
369  //KS: Slightly different approach depending if intervals are in percentage or sigmas
370  if(CredibleInSigmas) {
371  //KS: Convert sigmas into percentage
372  const double CredReg = GetSigmaValue(int(std::round(coverage)));
373  GetCredibleInterval(hist, hpost_copy, CredReg);
374  } else {
375  GetCredibleInterval(hist, hpost_copy, coverage);
376  }
377 }
double GetSigmaValue(const int sigma)
KS: Convert sigma from normal distribution into percentage.
void GetCredibleInterval(const std::unique_ptr< TH1D > &hist, std::unique_ptr< TH1D > &hpost_copy, const double coverage)
KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning,...

◆ GetCredibleRegion()

void GetCredibleRegion ( std::unique_ptr< TH2D > &  hist2D,
const double  coverage = 0.6827 
)

KS: Set 2D contour within some coverage.

Parameters
hist2Dhistograms based on which we calculate credible regions
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Loop over histogram bins with highest number of entries until covered 90 or 68.3%

Get bin of highest content and save the number of entries reached so far

Replace bin value by -1 so it is not looped over as being maximum bin again

Definition at line 380 of file StatisticalUtils.cpp.

380  {
381 // ***************
382  if(coverage > 1)
383  {
384  MACH3LOG_ERROR("Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1 {}", coverage);
385  throw MaCh3Exception(__FILE__ , __LINE__ );
386  }
387 
388  //KS: Temporary structure to be thread save
389  std::vector<std::vector<double>> hist_copy(hist2D->GetXaxis()->GetNbins()+1,
390  std::vector<double>(hist2D->GetYaxis()->GetNbins()+1));
391  for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) {
392  for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) {
393  hist_copy[i][j] = hist2D->GetBinContent(i, j);
394  }
395  }
396 
398  const long double Integral = hist2D->Integral();
399  long double sum = 0;
400 
401  //We need to as ROOT requires array to set to contour
402  double Contour[1];
403  while ((sum / Integral) < coverage)
404  {
406  int max_entry_bin_x = 0;
407  int max_entry_bin_y = 0;
408  double max_entries = 0.;
409  for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i)
410  {
411  for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j)
412  {
413  if (hist_copy[i][j] > max_entries)
414  {
415  max_entries = hist_copy[i][j];
416  max_entry_bin_x = i;
417  max_entry_bin_y = j;
418  }
419  }
420  }
422  hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.;
423 
424  sum += max_entries;
425  Contour[0] = max_entries;
426  }
427  hist2D->SetContour(1, Contour);
428 }

◆ GetCredibleRegionSig()

void GetCredibleRegionSig ( std::unique_ptr< TH2D > &  hist2D,
const bool  CredibleInSigmas,
const double  coverage = 0.6827 
)

KS: Set 2D contour within some coverage.

Parameters
hist2Dhistograms based on which we calculate credible regions
CredibleInSigmasWhether interval is in sigmas or percentage
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Definition at line 431 of file StatisticalUtils.cpp.

431  {
432 // ***************
433  if(CredibleInSigmas) {
434  //KS: Convert sigmas into percentage
435  const double CredReg = GetSigmaValue(int(std::round(coverage)));
436  GetCredibleRegion(hist2D, CredReg);
437  } else {
438  GetCredibleRegion(hist2D, coverage);
439  }
440 }
void GetCredibleRegion(std::unique_ptr< TH2D > &hist2D, const double coverage)
KS: Set 2D contour within some coverage.

◆ GetDunneKaboth()

std::string GetDunneKaboth ( const double  BayesFactor)

KS: Based on Table 1 in https://www.t2k.org/docs/technotes/435.

Parameters
BayesFactorObtained value of Bayes factor

Definition at line 20 of file StatisticalUtils.cpp.

20  {
21 // **************************
22  std::string DunneKaboth = "";
23  //KS: Get fancy DunneKaboth Scale as I am to lazy to look into table every time
24 
25  if(2.125 > BayesFactor) DunneKaboth = "< 1 #sigma";
26  else if( 20.74 > BayesFactor) DunneKaboth = "> 1 #sigma";
27  else if( 369.4 > BayesFactor) DunneKaboth = "> 2 #sigma";
28  else if( 15800 > BayesFactor) DunneKaboth = "> 3 #sigma";
29  else if( 1745000 > BayesFactor) DunneKaboth = "> 4 #sigma";
30  else DunneKaboth = "> 5 #sigma";
31 
32  return DunneKaboth;
33 }

◆ GetGaussian()

void GetGaussian ( TH1D *&  hist,
TF1 *  gauss,
double &  Mean,
double &  Error 
)

CW: Fit Gaussian to posterior.

Parameters
histhistograms to which we fit gaussian
gausstf1 with gaussian, we pass pointer to make things faster
MeanGaussian Mean value
ErrorGaussian Error value

Definition at line 208 of file StatisticalUtils.cpp.

208  {
209 // **************************
210  const double meanval = hist->GetMean();
211  const double err = hist->GetRMS();
212  const double peakval = hist->GetBinCenter(hist->GetMaximumBin());
213 
214  // Set the range for the Gaussian fit
215  gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err);
216  // Set the starting parameters close to RMS and peaks of the histograms
217  gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err);
218 
219  // Perform the fit
220  hist->Fit(gauss->GetName(),"Rq");
221  hist->SetStats(0);
222 
223  Mean = gauss->GetParameter(1);
224  Error = gauss->GetParameter(2);
225 }

◆ GetHPD()

void GetHPD ( TH1D *const  hist,
double &  Mean,
double &  Error,
double &  Error_p,
double &  Error_m,
const double  coverage = 0.6827 
)

Get Highest Posterior Density (HPD)

Parameters
histhistograms from which we HPD
MeanHPD Mean value
ErrorHPD Error value
Error_pHPD Negative (left hand side) Error value
Error_mHPD Positive (right hand side) Error value
coverageWhat is defined coverage, by default 0.6827 (1 sigma)

Definition at line 228 of file StatisticalUtils.cpp.

228  {
229 // ****************
230  // Get the bin which has the largest posterior density
231  const int MaxBin = hist->GetMaximumBin();
232  // And it's value
233  const double peakval = hist->GetBinCenter(MaxBin);
234 
235  // The total integral of the posterior
236  const long double Integral = hist->Integral();
237  //KS: and integral of left handed and right handed parts
238  const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0;
239  const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0;
240 
241  // Keep count of how much area we're covering
242  //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error
243  long double sum = hist->GetBinContent(MaxBin)/2.0;
244 
245  // Counter for current bin
246  int CurrBin = MaxBin;
247  while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) {
248  CurrBin++;
249  sum += hist->GetBinContent(CurrBin);
250  }
251  const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin));
252  // Reset the sum
253  //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error
254  sum = hist->GetBinContent(MaxBin)/2.0;
255 
256  // Reset the bin counter
257  CurrBin = MaxBin;
258  // Counter for current bin
259  while (sum/LowIntegral < coverage && CurrBin > 1) {
260  CurrBin--;
261  sum += hist->GetBinContent(CurrBin);
262  }
263  const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin));
264 
265  // Now do the double sided HPD
266  //KS: Start sum from the HPD
267  sum = hist->GetBinContent(MaxBin);
268  int LowBin = MaxBin;
269  int HighBin = MaxBin;
270  long double LowCon = 0.0;
271  long double HighCon = 0.0;
272 
273  while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1))
274  {
275  LowCon = 0.0;
276  HighCon = 0.0;
277  //KS:: Move further only if you haven't reached histogram end
278  if(LowBin > 1)
279  {
280  LowBin--;
281  LowCon = hist->GetBinContent(LowBin);
282  }
283  if(HighBin < hist->GetNbinsX())
284  {
285  HighBin++;
286  HighCon = hist->GetBinContent(HighBin);
287  }
288 
289  // If we're on the last slice and the lower contour is larger than the upper
290  if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) {
291  sum += LowCon;
292  break;
293  // If we're on the last slice and the upper contour is larger than the lower
294  } else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) {
295  sum += HighCon;
296  break;
297  } else {
298  sum += LowCon + HighCon;
299  }
300  }
301 
302  double sigma_hpd = 0.0;
303  if (LowCon > HighCon) {
304  sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin));
305  } else {
306  sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin));
307  }
308 
309  Mean = peakval;
310  Error = sigma_hpd;
311  Error_p = sigma_p;
312  Error_m = sigma_m;
313 }

◆ GetIQR()

double GetIQR ( TH1D *  Hist)

Interquartile Range (IQR)

Parameters
Histhistograms from which we IQR

Definition at line 443 of file StatisticalUtils.cpp.

443  {
444 // *********************
445  if(Hist->Integral() == 0) return 0.0;
446 
447  constexpr double quartiles_x[3] = {0.25, 0.5, 0.75};
448  double quartiles[3];
449 
450  Hist->GetQuantiles(3, quartiles, quartiles_x);
451 
452  return quartiles[2] - quartiles[0];
453 }

◆ GetJeffreysScale()

std::string GetJeffreysScale ( const double  BayesFactor)

KS: Following H. Jeffreys [19].

Parameters
BayesFactorObtained value of Bayes factor

Definition at line 5 of file StatisticalUtils.cpp.

5  {
6 // **************************
7  std::string JeffreysScale = "";
8  //KS: Get fancy Jeffreys Scale as I am to lazy to look into table every time
9  if(BayesFactor < 0) JeffreysScale = "Negative";
10  else if( 5 > BayesFactor) JeffreysScale = "Barely worth mentioning";
11  else if( 10 > BayesFactor) JeffreysScale = "Substantial";
12  else if( 15 > BayesFactor) JeffreysScale = "Strong";
13  else if( 20 > BayesFactor) JeffreysScale = "Very strong";
14  else JeffreysScale = "Decisive";
15 
16  return JeffreysScale;
17 }

◆ GetModeError()

double GetModeError ( TH1D *  hpost)

Get the mode error from a TH1D.

Parameters
hposthist from which we extract mode error

Definition at line 545 of file StatisticalUtils.cpp.

545  {
546 // ****************
547  // Get the bin which has the largest posterior density
548  int MaxBin = hpost->GetMaximumBin();
549 
550  // The total integral of the posterior
551  const double Integral = hpost->Integral();
552 
553  int LowBin = MaxBin;
554  int HighBin = MaxBin;
555  double sum = hpost->GetBinContent(MaxBin);;
556  double LowCon = 0.0;
557  double HighCon = 0.0;
558  while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) )
559  {
560  LowCon = 0.0;
561  HighCon = 0.0;
562  //KS:: Move further only if you haven't reached histogram end
563  if(LowBin > 1)
564  {
565  LowBin--;
566  LowCon = hpost->GetBinContent(LowBin);
567  }
568  if(HighBin < hpost->GetNbinsX())
569  {
570  HighBin++;
571  HighCon = hpost->GetBinContent(HighBin);
572  }
573 
574  // If we're on the last slice and the lower contour is larger than the upper
575  if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) {
576  sum += LowCon;
577  break;
578  // If we're on the last slice and the upper contour is larger than the lower
579  } else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) {
580  sum += HighCon;
581  break;
582  } else {
583  sum += LowCon + HighCon;
584  }
585  }
586 
587  double Mode_Error = 0.0;
588  if (LowCon > HighCon) {
589  Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin));
590  } else {
591  Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin));
592  }
593 
594  return Mode_Error;
595 }

◆ GetNeffective()

double GetNeffective ( const int  N1,
const int  N2 
)

KS: See 14.3.10 in Numerical Recipes in C [26].

Definition at line 81 of file StatisticalUtils.cpp.

81  {
82 // ****************
83  const double Nominator = (N1+N2);
84  const double Denominator = (N1*N2);
85  const double N_e = Nominator/Denominator;
86  return N_e;
87 }

◆ GetNumberOfRuns()

int GetNumberOfRuns ( const std::vector< int > &  GroupClasifier)

KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6.pdf.

Definition at line 137 of file StatisticalUtils.cpp.

137  {
138 // ****************
139  int NumberOfRuns = 0;
140  int PreviousGroup = -999;
141 
142  //KS: If group changed increment run
143  for (unsigned int i = 0; i < GroupClasifier.size(); i++)
144  {
145  if(GroupClasifier[i] != PreviousGroup)
146  NumberOfRuns++;
147  PreviousGroup = GroupClasifier[i];
148  }
149 
150  return NumberOfRuns;
151 }

◆ GetPValueFromZScore()

double GetPValueFromZScore ( const double  zScore)

Compute the P-value from a given Z-score.

The P-value represents the probability of observing a value as extreme as the given Z-score under the null hypothesis of a standard normal distribution.

Parameters
zScoreThe Z-score for which to compute the P-value.
Returns
The P-value corresponding to the given Z-score.

Definition at line 538 of file StatisticalUtils.cpp.

538  {
539 // ********************
540  return 0.5 * std::erfc(-zScore / std::sqrt(2));
541 }

◆ GetSigmaValue()

double GetSigmaValue ( const int  sigma)

KS: Convert sigma from normal distribution into percentage.

Definition at line 36 of file StatisticalUtils.cpp.

36  {
37 // *********************
38  double width = 0;
39  switch (std::abs(sigma))
40  {
41  case 1:
42  width = 0.682689492137;
43  break;
44  case 2:
45  width = 0.954499736104;
46  break;
47  case 3:
48  width = 0.997300203937;
49  break;
50  case 4:
51  width = 0.999936657516;
52  break;
53  case 5:
54  width = 0.999999426697;
55  break;
56  case 6:
57  width = 0.999999998027;
58  break;
59  default:
60  MACH3LOG_ERROR("{} is unsupported value of sigma", sigma);
61  throw MaCh3Exception(__FILE__ , __LINE__ );
62  break;
63  }
64  return width;
65 }

◆ GetSubOptimality()

double GetSubOptimality ( const std::vector< double > &  EigenValues,
const int  TotalTarameters 
)

Based on [27].

Parameters
EigenValuesEigen values of covariance matrix
TotalTarametersTotal number of parameters needed to correctly calculate suboptimality

Definition at line 184 of file StatisticalUtils.cpp.

184  {
185 // *********************
186  double sum_eigenvalues_squared_inv = 0.0;
187  double sum_eigenvalues_inv = 0.0;
188  for (unsigned int j = 0; j < EigenValues.size(); j++)
189  {
190  //KS: IF Eigen values are super small skip them
191  //if(EigenValues[j] < 0.0000001) continue;
192  sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2);
193  sum_eigenvalues_inv += 1.0 / EigenValues[j];
194  }
195  const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2);
196  return SubOptimality;
197 }

◆ GetZScore()

double GetZScore ( const double  value,
const double  mean,
const double  stddev 
)

Compute the Z-score for a given value.

The Z-score indicates how many standard deviations a value is from the mean. A positive Z-score means the value is above the mean, while a negative Z-score means it is below the mean.

Parameters
valueThe data point for which to compute the Z-score.
meanThe mean of the data set.
stddevThe standard deviation of the data set. Must be non-zero.
Returns
The Z-score of the given value.
Warning
Ensure that stddev is not zero to avoid division by zero.

Definition at line 532 of file StatisticalUtils.cpp.

532  {
533 // ********************
534  return (value - mean) / stddev;
535 }

◆ ThinningMCMC()

void ThinningMCMC ( const std::string &  FilePath,
const int  ThinningCut 
)

Thin MCMC Chain, to save space and maintain low autocorrelations.

Parameters
FilePathPath to MCMC chain you want to thin
ThinningCutevery which entry you want to thin [21]
Warning
Thinning is done over entry not steps, it may now work very well for merged chains

Definition at line 487 of file StatisticalUtils.cpp.

487  {
488 // ********************
489  std::string FilePathNowRoot = FilePath;
490  if (FilePath.size() >= 5 && FilePath.substr(FilePath.size() - 5) == ".root") {
491  FilePathNowRoot = FilePath.substr(0, FilePath.size() - 5);
492  }
493  std::string TempFilePath = FilePathNowRoot + "_thinned.root";
494  int ret = system(("cp " + FilePath + " " + TempFilePath).c_str());
495  if (ret != 0) {
496  MACH3LOG_WARN("System call to copy file failed with code {}", ret);
497  }
498 
499  TFile *inFile = M3::Open(TempFilePath, "RECREATE", __FILE__, __LINE__);
500  TTree *inTree = inFile->Get<TTree>("posteriors");
501  if (!inTree) {
502  MACH3LOG_ERROR("TTree 'posteriors' not found in file.");
503  inFile->ls();
504  inFile->Close();
505  throw MaCh3Exception(__FILE__, __LINE__);
506  }
507 
508  // Clone the structure without data
509  TTree *outTree = inTree->CloneTree(0);
510 
511  // Loop over entries and apply thinning
512  Long64_t nEntries = inTree->GetEntries();
513  double retainedPercentage = (double(nEntries) / ThinningCut) / double(nEntries) * 100;
514  MACH3LOG_INFO("Thinning will retain {:.2f}% of chains", retainedPercentage);
515  for (Long64_t i = 0; i < nEntries; i++) {
516  if (i % (nEntries/10) == 0) {
517  MaCh3Utils::PrintProgressBar(i, nEntries);
518  }
519  if (i % ThinningCut == 0) {
520  inTree->GetEntry(i);
521  outTree->Fill();
522  }
523  }
524  inFile->WriteTObject(outTree, "posteriors", "kOverwrite");
525  inFile->Close();
526  delete inFile;
527 
528  MACH3LOG_INFO("Thinned TTree saved and overwrote original in: {}", TempFilePath);
529 }
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
void PrintProgressBar(const Long64_t Done, const Long64_t All)
KS: Simply print progress bar.
Definition: Monitor.cpp:213