MaCh3  2.4.2
Reference Guide
SampleHandlerBase.cpp
Go to the documentation of this file.
1 #include "SampleHandlerBase.h"
2 
3 // ***************************************************************************
5 // ***************************************************************************
6  nEvents = 0;
7  nSamples = 0;
8 }
9 
10 // ***************************************************************************
12 // ***************************************************************************
13 }
14 
15 // ***************************************************************************
16 // Poisson likelihood calc for data and MC event rates
17 double SampleHandlerBase::GetPoissonLLH(const double data, const double mc) const {
18 // ***************************************************************************
19  // Return MC if there are no data, returns 0 for data == 0 && mc == 0
20  if ( data == 0 ) return mc;
21 
22  // If there are some data, but the prediction falls below the MC bound => return Poisson LogL for the low MC bound
23  if ( mc < M3::_LOW_MC_BOUND_ ) {
24  if ( data > M3::_LOW_MC_BOUND_ ) return ( M3::_LOW_MC_BOUND_ - data + data * std::log( data/M3::_LOW_MC_BOUND_ ) );
25  else if ( data >= mc ) return 0.;
26  }
27 
28  // Otherwise, just return usual Poisson LogL using Stirling's approximation
29  // http://hyperphysics.phy-astr.gsu.edu/hbase/math/stirling.html
30  return ( mc - data + data * std::log( data / mc ) );
31 }
32 
33 // *************************
34 // data is data, mc is mc, w2 is Sum(w_{i}^2) (sum of weights squared), which is sigma^2_{MC stats}
35 double SampleHandlerBase::GetTestStatLLH(const double data, const double mc, const double w2) const {
36 // *************************
37  switch (fTestStatistic)
38  {
39  //CW: Not full Barlow-Beeston or what is referred to as "light": we're not introducing any more parameters
40  // Assume the MC has a Gaussian distribution around generated
41  // As in https://arxiv.org/abs/1103.0354 eq 10, 11
42  //CW: Calculate the Barlow-Beeston likelihood contribution from MC statistics
43  // Assumes the beta scaling parameters are Gaussian distributed
44  // Follows arXiv:1103.0354 section 5 and equation 8, 9, 10, 11 on page 4/5
45  // Essentially solves equation 11
46  case (kBarlowBeeston):
47  {
48  // The MC used in the likelihood calculation is allowed to be changed by Barlow Beeston beta parameters
49  double newmc = mc;
50 
51  // If MC falls below the low MC bound, use low MC bound for newmc
52  if ( mc < M3::_LOW_MC_BOUND_ ) {
53  if ( data > M3::_LOW_MC_BOUND_ ) newmc = M3::_LOW_MC_BOUND_;
54  else if ( data >= mc ) return 0.;
55  }
56 
57  // Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc
58  const double fractional = std::sqrt( w2 ) / newmc;
59  // fractional^2 to avoid doing same operation several times
60  const double fractional2 = fractional * fractional;
61  // b in quadratic equation
62  const double temp = newmc * fractional2 - 1;
63  // b^2 - 4ac in quadratic equation
64  const double temp2 = temp * temp + 4 * data * fractional2;
65  if ( temp2 < 0 ) {
66  MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!");
67  throw MaCh3Exception(__FILE__ , __LINE__ );
68  }
69  // Solve for the positive beta
70  const double beta = ( -1 * temp + sqrt( temp2 ) ) / 2.;
71 
72  // If there is no data, test-stat shall return only MC*beta
73  double stat = mc * beta;
74  // With data, test-stat shall return LogL for newMC*beta which includes the low MC bound
75  if ( data > 0 ) {
76  newmc *= beta;
77  stat = newmc - data + data * std::log( data / newmc );
78  }
79 
80  // Now, MC stat penalty
81  // The penalty from MC statistics using Conways approach (https://cds.cern.ch/record/1333496?)
82  double penalty = 0;
83  if ( fractional > 0 ) penalty = ( beta - 1 ) * ( beta - 1 ) / ( 2 * fractional2 );
84 
85  // Returns test-stat plus the MC stat penalty
86  return stat+penalty;
87  }
88  break;
89  //KS: Alternative calculation of Barlow-Beeston following Hans Dembinski and Ahmed Abdelmottele arXiv:2206.12346v2
91  {
92  //KS: code follows authors implementation from:
93  //https://github.com/scikit-hep/iminuit/blob/059d06b00cae097ebf340b218b4eb57357111df8/src/iminuit/cost.py#L274-L300
94 
95  // If there is no MC stat error for any reason, return Poisson LogL
96  if ( w2 == 0 ) return GetPoissonLLH(data,mc);
97 
98  // The MC can be changed
99  double newmc = mc;
100 
101  // If MC falls below the low MC bound, use low MC bound for newmc
102  if ( mc < M3::_LOW_MC_BOUND_ ) {
103  if ( data > M3::_LOW_MC_BOUND_ ) newmc = M3::_LOW_MC_BOUND_;
104  else if ( data >= mc ) return 0.;
105  }
106 
107  //the so-called effective count
108  const double k = newmc * newmc / w2;
109  //Calculate beta which is scaling factor between true and generated MC
110  const double beta = ( data + k ) / ( newmc + k );
111 
112  newmc *= beta;
113 
114  // And penalise the movement in beta relative the mc uncertainty
115  const double penalty = k * beta - k + k * std::log( k / ( k * beta ) );
116 
117  // If there are no data, this shall return newmc
118  double stat = newmc;
119  // All likelihood calculations may use the bare Poisson likelihood, so calculate here
120  // Only if there are some data
121  if ( data > 0 ) stat = newmc - data + data * std::log( data / newmc );
122 
123  // Return the statistical contribution and penalty
124  return stat+penalty;
125  }
126  break;
127  //CW: Also try the IceCube likelihood
128  // It does not modify the MC content
129  // https://arxiv.org/abs/1901.04645
130  // Argüelles, C.A., Schneider, A. & Yuan, T. J. High Energ. Phys. (2019) 2019: 30. https://doi.org/10.1007/JHEP06(2019)030
131  // We essentially construct eq 3.16 and take the logarithm
132  // in eq 3.16, mu is MC, sigma2 is w2, k is data
133  case (kIceCube):
134  {
135  // IceCube low MC bound is implemented to return Poisson(data, _LOW_MC_BOUND_)
136  // up until the IceCube(data, mc) test-statistic is less than Poisson(data, _LOW_MC_BOUND_)
137  // The 0 MC limit is set to Poisson(data, 0.) as there is no way to get a non-diverging and reasonable guess on w2
138 
139  // If there is 0 MC uncertainty (technically also when MC is 0) => Return Poisson(data, mc)
140  if ( w2 == 0 ) return GetPoissonLLH(data,mc);
141 
142  // Auxiliary variables
143  const long double b = mc / w2;
144  const long double a = mc * b + 1;
145 
146  // Use C99's implementation of log of gamma function to not be C++11 dependent
147  const double stat = double( -1 * ( a * logl( b ) + lgammal( data + a ) - lgammal( data + 1 ) - ( ( data + a ) * log1pl( b ) ) - lgammal( a ) ) );
148 
149  // Check whether the stat is more than Poisson-like bound for low mc (mc < data)
150  // TN: I believe this might get some extra optimization
151  if ( mc <= data ) {
152  if ( data <= M3::_LOW_MC_BOUND_ ) return 0.;
153  const double poisson = GetPoissonLLH(data, M3::_LOW_MC_BOUND_);
154  if ( stat > poisson ) return poisson;
155  }
156 
157  // Otherwise, return IceCube test-stat
158  return stat;
159  }
160  break;
161  //KS: Pearson works on assumption that event distribution in each bin is described by a Gaussian which in our case is not fulfilled for all bins, hence use it at your own risk
162  case (kPearson):
163  {
164  //KS: 2 is because this function returns -LLH not -2LLH
165  // With no data return the MC/2.
166  if ( data == 0 ) return mc/2.;
167 
168  // If MC is lower than the low MC bound, return the test-stat at the bound
169  if ( mc < M3::_LOW_MC_BOUND_ ) {
170  if ( data > M3::_LOW_MC_BOUND_ ) return ( data - M3::_LOW_MC_BOUND_ ) * ( data - M3::_LOW_MC_BOUND_ ) / ( 2. * M3::_LOW_MC_BOUND_ );
171  else if ( data >= mc ) return 0.;
172  }
173 
174  // Return the Pearson metric
175  return ( data - mc ) * ( data - mc ) / ( 2 * mc );
176  }
177  break;
178  case (kPoisson):
179  {
180  //Just call GetPoissonLLH which doesn't take in weights
181  //and is a Poisson likelihood comparison.
182  return GetPoissonLLH(data, mc);
183  }
184  break;
186  MACH3LOG_ERROR("kNTestStatistics is not a valid TestStatistic!");
187  throw MaCh3Exception(__FILE__, __LINE__);
188  default:
189  MACH3LOG_ERROR("Couldn't find TestStatistic {} exiting!", static_cast<int>(fTestStatistic));
190  throw MaCh3Exception(__FILE__ , __LINE__ );
191  } // end switch
192 }
193 
194 // ***************************************************************************
195 // CW: Silence cout and cerr. Last is risky but psyche persists on spamming both
197 // ***************************************************************************
198  #if DEBUG > 0
199  return;
200  #else
201  buf = std::cout.rdbuf();
202  errbuf = std::cerr.rdbuf();
203  std::cout.rdbuf( nullptr );
204  std::cerr.rdbuf( nullptr );
205  #endif
206 }
207 
208 // ***************************************************************************
209 // CW: Reset cout and cerr
211 // ***************************************************************************
212  #if DEBUG > 0
213  return;
214  #else
215  std::cout.rdbuf(buf);
216  std::cerr.rdbuf(errbuf);
217  #endif
218 }
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
@ kNTestStatistics
Number of test statistics.
@ kPearson
Standard Pearson likelihood .
@ kBarlowBeeston
Barlow-Beeston () following Conway approximation ()
@ kIceCube
Based on .
@ kDembinskiAbdelmotteleb
Based on .
@ kPoisson
Standard Poisson likelihood .
Custom exception class used throughout MaCh3.
virtual ~SampleHandlerBase()
destructor
void NowTalk()
CW: Redirect std::cout to silence some experiment specific libraries.
std::streambuf * buf
Keep the cout buffer.
double GetTestStatLLH(const double data, const double mc, const double w2) const
Calculate test statistic for a single bin. Calculation depends on setting of fTestStatistic....
SampleHandlerBase()
The main constructor.
std::streambuf * errbuf
Keep the cerr buffer.
TestStatistic fTestStatistic
Test statistic tells what kind of likelihood sample is using.
M3::int_t nSamples
Contains how many samples we've got.
unsigned int nEvents
Number of MC events are there.
void QuietPlease()
CW: Redirect std::cout to silence some experiment specific libraries.
double GetPoissonLLH(const double data, const double mc) const
Calculate test statistic for a single bin using Poisson.
constexpr static const double _LOW_MC_BOUND_
MC prediction lower bound in bin to identify problematic binning definitions and handle LogL calculat...
Definition: Core.h:83