MaCh3 2.2.1
Reference Guide
Loading...
Searching...
No Matches
SampleHandlerBase.cpp
Go to the documentation of this file.
1#include "SampleHandlerBase.h"
2
3// ***************************************************************************
5// ***************************************************************************
6 nEvents = 0;
7 nSamples = 0;
8}
9
10// ***************************************************************************
12// ***************************************************************************
13
14}
15
16// ***************************************************************************
17// Poisson likelihood calc for data and MC event rates
18double SampleHandlerBase::GetTestStatLLH(const double data, const double mc) const {
19// ***************************************************************************
20 // Return MC if there are no data, returns 0 for data == 0 && mc == 0
21 if ( data == 0 ) return mc;
22
23 // If there are some data, but the prediction falls below the MC bound => return Poisson LogL for the low MC bound
24 if ( mc < M3::_LOW_MC_BOUND_ ) {
25 if ( data > M3::_LOW_MC_BOUND_ ) return ( M3::_LOW_MC_BOUND_ - data + data * std::log( data/M3::_LOW_MC_BOUND_ ) );
26 else if ( data >= mc ) return 0.;
27 }
28
29 // Otherwise, just return usual Poisson LogL using Stirling's approximation
30 // http://hyperphysics.phy-astr.gsu.edu/hbase/math/stirling.html
31 return ( mc - data + data * std::log( data / mc ) );
32}
33
34// *************************
35// data is data, mc is mc, w2 is Sum(w_{i}^2) (sum of weights squared), which is sigma^2_{MC stats}
36double SampleHandlerBase::GetTestStatLLH(const double data, const double mc, const double w2) const {
37// *************************
38 switch (fTestStatistic)
39 {
40 //CW: Not full Barlow-Beeston or what is referred to as "light": we're not introducing any more parameters
41 // Assume the MC has a Gaussian distribution around generated
42 // As in https://arxiv.org/abs/1103.0354 eq 10, 11
43 //CW: Calculate the Barlow-Beeston likelihood contribution from MC statistics
44 // Assumes the beta scaling parameters are Gaussian distributed
45 // Follows arXiv:1103.0354 section 5 and equation 8, 9, 10, 11 on page 4/5
46 // Essentially solves equation 11
47 case (kBarlowBeeston):
48 {
49 // The MC used in the likelihood calculation is allowed to be changed by Barlow Beeston beta parameters
50 double newmc = mc;
51
52 // If MC falls below the low MC bound, use low MC bound for newmc
53 if ( mc < M3::_LOW_MC_BOUND_ ) {
54 if ( data > M3::_LOW_MC_BOUND_ ) newmc = M3::_LOW_MC_BOUND_;
55 else if ( data >= mc ) return 0.;
56 }
57
58 // Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc
59 const double fractional = std::sqrt( w2 ) / newmc;
60 // fractional^2 to avoid doing same operation several times
61 const double fractional2 = fractional * fractional;
62 // b in quadratic equation
63 const double temp = newmc * fractional2 - 1;
64 // b^2 - 4ac in quadratic equation
65 const double temp2 = temp * temp + 4 * data * fractional2;
66 if ( temp2 < 0 ) {
67 MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!");
68 throw MaCh3Exception(__FILE__ , __LINE__ );
69 }
70 // Solve for the positive beta
71 const double beta = ( -1 * temp + sqrt( temp2 ) ) / 2.;
72
73 // If there is no data, test-stat shall return only MC*beta
74 double stat = mc * beta;
75 // With data, test-stat shall return LogL for newMC*beta which includes the low MC bound
76 if ( data > 0 ) {
77 newmc *= beta;
78 stat = newmc - data + data * std::log( data / newmc );
79 }
80
81 // Now, MC stat penalty
82 // The penalty from MC statistics using Conways approach (https://cds.cern.ch/record/1333496?)
83 double penalty = 0;
84 if ( fractional > 0 ) penalty = ( beta - 1 ) * ( beta - 1 ) / ( 2 * fractional2 );
85
86 // Returns test-stat plus the MC stat penalty
87 return stat+penalty;
88 }
89 break;
90 //KS: Alternative calculation of Barlow-Beeston following Hans Dembinski and Ahmed Abdelmottele arXiv:2206.12346v2
92 {
93 //KS: code follows authors implementation from:
94 //https://github.com/scikit-hep/iminuit/blob/059d06b00cae097ebf340b218b4eb57357111df8/src/iminuit/cost.py#L274-L300
95
96 // If there is no MC stat error for any reason, return Poisson LogL
97 if ( w2 == 0 ) return GetTestStatLLH(data,mc);
98
99 // The MC can be changed
100 double newmc = mc;
101
102 // If MC falls below the low MC bound, use low MC bound for newmc
103 if ( mc < M3::_LOW_MC_BOUND_ ) {
104 if ( data > M3::_LOW_MC_BOUND_ ) newmc = M3::_LOW_MC_BOUND_;
105 else if ( data >= mc ) return 0.;
106 }
107
108 //the so-called effective count
109 const double k = newmc * newmc / w2;
110 //Calculate beta which is scaling factor between true and generated MC
111 const double beta = ( data + k ) / ( newmc + k );
112
113 newmc *= beta;
114
115 // And penalise the movement in beta relative the mc uncertainty
116 const double penalty = k * beta - k + k * std::log( k / ( k * beta ) );
117
118 // If there are no data, this shall return newmc
119 double stat = newmc;
120 // All likelihood calculations may use the bare Poisson likelihood, so calculate here
121 // Only if there are some data
122 if ( data > 0 ) stat = newmc - data + data * std::log( data / newmc );
123
124 // Return the statistical contribution and penalty
125 return stat+penalty;
126 }
127 break;
128 //CW: Also try the IceCube likelihood
129 // It does not modify the MC content
130 // https://arxiv.org/abs/1901.04645
131 // Argüelles, C.A., Schneider, A. & Yuan, T. J. High Energ. Phys. (2019) 2019: 30. https://doi.org/10.1007/JHEP06(2019)030
132 // We essentially construct eq 3.16 and take the logarithm
133 // in eq 3.16, mu is MC, sigma2 is w2, k is data
134 case (kIceCube):
135 {
136 // IceCube low MC bound is implemented to return Poisson(data, _LOW_MC_BOUND_)
137 // up until the IceCube(data, mc) test-statistic is less than Poisson(data, _LOW_MC_BOUND_)
138 // The 0 MC limit is set to Poisson(data, 0.) as there is no way to get a non-diverging and reasonable guess on w2
139
140 // If there is 0 MC uncertainty (technically also when MC is 0) => Return Poisson(data, mc)
141 if ( w2 == 0 ) return GetTestStatLLH(data,mc);
142
143 // Auxiliary variables
144 const long double b = mc / w2;
145 const long double a = mc * b + 1;
146
147 // Use C99's implementation of log of gamma function to not be C++11 dependent
148 const double stat = double( -1 * ( a * logl( b ) + lgammal( data + a ) - lgammal( data + 1 ) - ( ( data + a ) * log1pl( b ) ) - lgammal( a ) ) );
149
150 // Check whether the stat is more than Poisson-like bound for low mc (mc < data)
151 // TN: I believe this might get some extra optimization
152 if ( mc <= data ) {
153 if ( data <= M3::_LOW_MC_BOUND_ ) return 0.;
154 const double poisson = GetTestStatLLH(data, M3::_LOW_MC_BOUND_);
155 if ( stat > poisson ) return poisson;
156 }
157
158 // Otherwise, return IceCube test-stat
159 return stat;
160 }
161 break;
162 //KS: Pearson works on assumption that event distribution in each bin is described by a Gaussian which in our case is not fulfilled for all bins, hence use it at your own risk
163 case (kPearson):
164 {
165 //KS: 2 is because this function returns -LLH not -2LLH
166 // With no data return the MC/2.
167 if ( data == 0 ) return mc/2.;
168
169 // If MC is lower than the low MC bound, return the test-stat at the bound
170 if ( mc < M3::_LOW_MC_BOUND_ ) {
171 if ( data > M3::_LOW_MC_BOUND_ ) return ( data - M3::_LOW_MC_BOUND_ ) * ( data - M3::_LOW_MC_BOUND_ ) / ( 2. * M3::_LOW_MC_BOUND_ );
172 else if ( data >= mc ) return 0.;
173 }
174
175 // Return the Pearson metric
176 return ( data - mc ) * ( data - mc ) / ( 2 * mc );
177 }
178 break;
179 case (kPoisson):
180 {
181 //Just call GetTestStatLLH which doesn't take in weights
182 //and is a Poisson likelihood comparison.
183 return GetTestStatLLH(data, mc);
184 }
185 break;
186 case TestStatistic::kNTestStatistics:
187 MACH3LOG_ERROR("kNTestStatistics is not a valid TestStatistic!");
188 throw MaCh3Exception(__FILE__, __LINE__);
189 default:
190 MACH3LOG_ERROR("Couldn't find TestStatistic {} exiting!", static_cast<int>(fTestStatistic));
191 throw MaCh3Exception(__FILE__ , __LINE__ );
192 } // end switch
193}
194
195// ***************************************************************************
196// CW: Silence cout and cerr. Last is risky but psyche persists on spamming both
198// ***************************************************************************
199 #if DEBUG > 0
200 return;
201 #else
202 buf = std::cout.rdbuf();
203 errbuf = std::cerr.rdbuf();
204 std::cout.rdbuf( nullptr );
205 std::cerr.rdbuf( nullptr );
206 #endif
207}
208
209// ***************************************************************************
210// CW: Reset cout and cerr
212// ***************************************************************************
213 #if DEBUG > 0
214 return;
215 #else
216 std::cout.rdbuf(buf);
217 std::cerr.rdbuf(errbuf);
218 #endif
219}
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:25
@ kPearson
Standard Pearson likelihood .
@ kBarlowBeeston
Barlow-Beeston () following Conway approximation ()
@ kIceCube
Based on .
@ kDembinskiAbdelmotteleb
Based on .
@ kPoisson
Standard Poisson likelihood .
Custom exception class for MaCh3 errors.
virtual ~SampleHandlerBase()
destructor
void NowTalk()
CW: Redirect std::cout to silence some experiment specific libraries.
std::streambuf * buf
Keep the cout buffer.
SampleHandlerBase()
The main constructor.
std::streambuf * errbuf
Keep the cerr buffer.
TestStatistic fTestStatistic
Test statistic tells what kind of likelihood sample is using.
M3::int_t nSamples
Contains how many samples we've got.
unsigned int nEvents
Number of MC events are there.
void QuietPlease()
CW: Redirect std::cout to silence some experiment specific libraries.
double GetTestStatLLH(double data, double mc) const
Calculate test statistic for a single bin using Poisson.
static constexpr const double _LOW_MC_BOUND_
MC prediction lower bound in bin to identify problematic binning definitions and handle LogL calculat...
Definition: Core.h:73