MaCh3  2.2.3
Reference Guide
SampleHandlerBase.cpp
Go to the documentation of this file.
1 #include "SampleHandlerBase.h"
2 
3 // ***************************************************************************
5 // ***************************************************************************
6  nEvents = 0;
7  nSamples = 0;
8 }
9 
10 // ***************************************************************************
12 // ***************************************************************************
13 
14 }
15 
16 // ***************************************************************************
17 // Poisson likelihood calc for data and MC event rates
18 double SampleHandlerBase::GetPoissonLLH(const double data, const double mc) const {
19 // ***************************************************************************
20  // Return MC if there are no data, returns 0 for data == 0 && mc == 0
21  if ( data == 0 ) return mc;
22 
23  // If there are some data, but the prediction falls below the MC bound => return Poisson LogL for the low MC bound
24  if ( mc < M3::_LOW_MC_BOUND_ ) {
25  if ( data > M3::_LOW_MC_BOUND_ ) return ( M3::_LOW_MC_BOUND_ - data + data * std::log( data/M3::_LOW_MC_BOUND_ ) );
26  else if ( data >= mc ) return 0.;
27  }
28 
29  // Otherwise, just return usual Poisson LogL using Stirling's approximation
30  // http://hyperphysics.phy-astr.gsu.edu/hbase/math/stirling.html
31  return ( mc - data + data * std::log( data / mc ) );
32 }
33 
34 // *************************
35 // data is data, mc is mc, w2 is Sum(w_{i}^2) (sum of weights squared), which is sigma^2_{MC stats}
36 double SampleHandlerBase::GetTestStatLLH(const double data, const double mc, const double w2) const {
37 // *************************
38  switch (fTestStatistic)
39  {
40  //CW: Not full Barlow-Beeston or what is referred to as "light": we're not introducing any more parameters
41  // Assume the MC has a Gaussian distribution around generated
42  // As in https://arxiv.org/abs/1103.0354 eq 10, 11
43  //CW: Calculate the Barlow-Beeston likelihood contribution from MC statistics
44  // Assumes the beta scaling parameters are Gaussian distributed
45  // Follows arXiv:1103.0354 section 5 and equation 8, 9, 10, 11 on page 4/5
46  // Essentially solves equation 11
47  case (kBarlowBeeston):
48  {
49  // The MC used in the likelihood calculation is allowed to be changed by Barlow Beeston beta parameters
50  double newmc = mc;
51 
52  // If MC falls below the low MC bound, use low MC bound for newmc
53  if ( mc < M3::_LOW_MC_BOUND_ ) {
54  if ( data > M3::_LOW_MC_BOUND_ ) newmc = M3::_LOW_MC_BOUND_;
55  else if ( data >= mc ) return 0.;
56  }
57 
58  // Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc
59  const double fractional = std::sqrt( w2 ) / newmc;
60  // fractional^2 to avoid doing same operation several times
61  const double fractional2 = fractional * fractional;
62  // b in quadratic equation
63  const double temp = newmc * fractional2 - 1;
64  // b^2 - 4ac in quadratic equation
65  const double temp2 = temp * temp + 4 * data * fractional2;
66  if ( temp2 < 0 ) {
67  MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!");
68  throw MaCh3Exception(__FILE__ , __LINE__ );
69  }
70  // Solve for the positive beta
71  const double beta = ( -1 * temp + sqrt( temp2 ) ) / 2.;
72 
73  // If there is no data, test-stat shall return only MC*beta
74  double stat = mc * beta;
75  // With data, test-stat shall return LogL for newMC*beta which includes the low MC bound
76  if ( data > 0 ) {
77  newmc *= beta;
78  stat = newmc - data + data * std::log( data / newmc );
79  }
80 
81  // Now, MC stat penalty
82  // The penalty from MC statistics using Conways approach (https://cds.cern.ch/record/1333496?)
83  double penalty = 0;
84  if ( fractional > 0 ) penalty = ( beta - 1 ) * ( beta - 1 ) / ( 2 * fractional2 );
85 
86  // Returns test-stat plus the MC stat penalty
87  return stat+penalty;
88  }
89  break;
90  //KS: Alternative calculation of Barlow-Beeston following Hans Dembinski and Ahmed Abdelmottele arXiv:2206.12346v2
92  {
93  //KS: code follows authors implementation from:
94  //https://github.com/scikit-hep/iminuit/blob/059d06b00cae097ebf340b218b4eb57357111df8/src/iminuit/cost.py#L274-L300
95 
96  // If there is no MC stat error for any reason, return Poisson LogL
97  if ( w2 == 0 ) return GetPoissonLLH(data,mc);
98 
99  // The MC can be changed
100  double newmc = mc;
101 
102  // If MC falls below the low MC bound, use low MC bound for newmc
103  if ( mc < M3::_LOW_MC_BOUND_ ) {
104  if ( data > M3::_LOW_MC_BOUND_ ) newmc = M3::_LOW_MC_BOUND_;
105  else if ( data >= mc ) return 0.;
106  }
107 
108  //the so-called effective count
109  const double k = newmc * newmc / w2;
110  //Calculate beta which is scaling factor between true and generated MC
111  const double beta = ( data + k ) / ( newmc + k );
112 
113  newmc *= beta;
114 
115  // And penalise the movement in beta relative the mc uncertainty
116  const double penalty = k * beta - k + k * std::log( k / ( k * beta ) );
117 
118  // If there are no data, this shall return newmc
119  double stat = newmc;
120  // All likelihood calculations may use the bare Poisson likelihood, so calculate here
121  // Only if there are some data
122  if ( data > 0 ) stat = newmc - data + data * std::log( data / newmc );
123 
124  // Return the statistical contribution and penalty
125  return stat+penalty;
126  }
127  break;
128  //CW: Also try the IceCube likelihood
129  // It does not modify the MC content
130  // https://arxiv.org/abs/1901.04645
131  // Argüelles, C.A., Schneider, A. & Yuan, T. J. High Energ. Phys. (2019) 2019: 30. https://doi.org/10.1007/JHEP06(2019)030
132  // We essentially construct eq 3.16 and take the logarithm
133  // in eq 3.16, mu is MC, sigma2 is w2, k is data
134  case (kIceCube):
135  {
136  // IceCube low MC bound is implemented to return Poisson(data, _LOW_MC_BOUND_)
137  // up until the IceCube(data, mc) test-statistic is less than Poisson(data, _LOW_MC_BOUND_)
138  // The 0 MC limit is set to Poisson(data, 0.) as there is no way to get a non-diverging and reasonable guess on w2
139 
140  // If there is 0 MC uncertainty (technically also when MC is 0) => Return Poisson(data, mc)
141  if ( w2 == 0 ) return GetPoissonLLH(data,mc);
142 
143  // Auxiliary variables
144  const long double b = mc / w2;
145  const long double a = mc * b + 1;
146 
147  // Use C99's implementation of log of gamma function to not be C++11 dependent
148  const double stat = double( -1 * ( a * logl( b ) + lgammal( data + a ) - lgammal( data + 1 ) - ( ( data + a ) * log1pl( b ) ) - lgammal( a ) ) );
149 
150  // Check whether the stat is more than Poisson-like bound for low mc (mc < data)
151  // TN: I believe this might get some extra optimization
152  if ( mc <= data ) {
153  if ( data <= M3::_LOW_MC_BOUND_ ) return 0.;
154  const double poisson = GetPoissonLLH(data, M3::_LOW_MC_BOUND_);
155  if ( stat > poisson ) return poisson;
156  }
157 
158  // Otherwise, return IceCube test-stat
159  return stat;
160  }
161  break;
162  //KS: Pearson works on assumption that event distribution in each bin is described by a Gaussian which in our case is not fulfilled for all bins, hence use it at your own risk
163  case (kPearson):
164  {
165  //KS: 2 is because this function returns -LLH not -2LLH
166  // With no data return the MC/2.
167  if ( data == 0 ) return mc/2.;
168 
169  // If MC is lower than the low MC bound, return the test-stat at the bound
170  if ( mc < M3::_LOW_MC_BOUND_ ) {
171  if ( data > M3::_LOW_MC_BOUND_ ) return ( data - M3::_LOW_MC_BOUND_ ) * ( data - M3::_LOW_MC_BOUND_ ) / ( 2. * M3::_LOW_MC_BOUND_ );
172  else if ( data >= mc ) return 0.;
173  }
174 
175  // Return the Pearson metric
176  return ( data - mc ) * ( data - mc ) / ( 2 * mc );
177  }
178  break;
179  case (kPoisson):
180  {
181  //Just call GetPoissonLLH which doesn't take in weights
182  //and is a Poisson likelihood comparison.
183  return GetPoissonLLH(data, mc);
184  }
185  break;
187  MACH3LOG_ERROR("kNTestStatistics is not a valid TestStatistic!");
188  throw MaCh3Exception(__FILE__, __LINE__);
189  default:
190  MACH3LOG_ERROR("Couldn't find TestStatistic {} exiting!", static_cast<int>(fTestStatistic));
191  throw MaCh3Exception(__FILE__ , __LINE__ );
192  } // end switch
193 }
194 
195 // ***************************************************************************
196 // CW: Silence cout and cerr. Last is risky but psyche persists on spamming both
198 // ***************************************************************************
199  #if DEBUG > 0
200  return;
201  #else
202  buf = std::cout.rdbuf();
203  errbuf = std::cerr.rdbuf();
204  std::cout.rdbuf( nullptr );
205  std::cerr.rdbuf( nullptr );
206  #endif
207 }
208 
209 // ***************************************************************************
210 // CW: Reset cout and cerr
212 // ***************************************************************************
213  #if DEBUG > 0
214  return;
215  #else
216  std::cout.rdbuf(buf);
217  std::cerr.rdbuf(errbuf);
218  #endif
219 }
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:27
@ kNTestStatistics
Number of test statistics.
@ kPearson
Standard Pearson likelihood .
@ kBarlowBeeston
Barlow-Beeston () following Conway approximation ()
@ kIceCube
Based on .
@ kDembinskiAbdelmotteleb
Based on .
@ kPoisson
Standard Poisson likelihood .
Custom exception class for MaCh3 errors.
virtual ~SampleHandlerBase()
destructor
void NowTalk()
CW: Redirect std::cout to silence some experiment specific libraries.
std::streambuf * buf
Keep the cout buffer.
SampleHandlerBase()
The main constructor.
std::streambuf * errbuf
Keep the cerr buffer.
TestStatistic fTestStatistic
Test statistic tells what kind of likelihood sample is using.
M3::int_t nSamples
Contains how many samples we've got.
unsigned int nEvents
Number of MC events are there.
void QuietPlease()
CW: Redirect std::cout to silence some experiment specific libraries.
double GetTestStatLLH(const double data, const double mc, const double w2) const
Calculate test statistic for a single bin. Calculation depends on setting of fTestStatistic....
double GetPoissonLLH(const double data, const double mc) const
Calculate test statistic for a single bin using Poisson.
constexpr static const double _LOW_MC_BOUND_
MC prediction lower bound in bin to identify problematic binning definitions and handle LogL calculat...
Definition: Core.h:76