MaCh3  2.5.1
Reference Guide
BinningHandler.cpp
Go to the documentation of this file.
2 
3 // ************************************************
5 // ************************************************
6 }
7 
8 // ************************************************
36 std::vector<std::vector<double>> UniformBinEdgeConfigParser(YAML::Node const &bin_edges_node,
37  bool &found_range_specifier) {
38 // ************************************************
39  if (bin_edges_node.IsMap()) {
40  found_range_specifier = true;
41  return std::vector<std::vector<double>>{
42  BinRangeToBinEdges(bin_edges_node),
43  };
44  } else if (bin_edges_node.IsSequence()) {
45  if (!bin_edges_node.size()) {
46  std::stringstream ss;
47  ss << bin_edges_node;
48  MACH3LOG_ERROR("When parsing binning, found an empty sequence:\n{}",
49  ss.str());
50  throw MaCh3Exception(__FILE__, __LINE__);
51  }
52  auto const &first_el = bin_edges_node[0];
53  if (first_el.IsScalar() || first_el.IsMap()) { // 1D binning
54  return std::vector<std::vector<double>>{
55  BuildBinEdgesFromNode(bin_edges_node, found_range_specifier),
56  };
57  }
58  // ND binning
59  std::vector<std::vector<double>> dims;
60  for (auto const &dim_node : bin_edges_node) {
61  dims.push_back(BuildBinEdgesFromNode(dim_node, found_range_specifier));
62  }
63  return dims;
64  } else {
65  std::stringstream ss;
66  ss << bin_edges_node;
68  "When parsing binning, expected to find a YAML map or sequence, "
69  "but found:\n{}",
70  ss.str());
71  throw MaCh3Exception(__FILE__, __LINE__);
72  }
73 }
74 
75 // ************************************************
76 // Function to setup the binning of your sample histograms and the underlying
77 // arrays that get handled in fillArray() and fillArray_MP().
78 // The Binning.XBinEdges are filled in the daughter class from the sample config file.
79 // This "passing" can be removed.
80 void BinningHandler::SetupSampleBinning(const YAML::Node& Settings, SampleInfo& SingleSample) {
81 // ************************************************
82  MACH3LOG_INFO("Setting up Sample Binning");
83  //Binning
84  SingleSample.VarStr = (Settings["VarStr"] && Settings["VarStr"].IsScalar())
85  ? std::vector<std::string>{Get<std::string>(
86  Settings["VarStr"], __FILE__, __LINE__)}
87  : Get<std::vector<std::string>>(Settings["VarStr"],
88  __FILE__, __LINE__);
89  SingleSample.nDimensions = static_cast<int>(SingleSample.VarStr.size());
90 
91  SampleBinningInfo SingleBinning;
92  bool Uniform = Get<bool>(Settings["Uniform"], __FILE__ , __LINE__);
93  bool found_range_specifier = false;
94  if(Uniform == false) {
95  if(Settings["Bins"].IsSequence()) {
96  SingleBinning.InitNonUniform(Get<std::vector<std::vector<std::vector<double>>>>(Settings["Bins"], __FILE__, __LINE__));
97  } else if(Settings["Bins"].IsMap()){
98  // Load binning from external file
99  auto file = Get<std::string>(Settings["Bins"]["File"], __FILE__, __LINE__);
100  M3::AddPath(file);
101  auto key = Get<std::string>(Settings["Bins"]["Key"], __FILE__, __LINE__);
102  auto binfile = LoadYamlConfig(file, __FILE__, __LINE__);
103  SingleBinning.InitNonUniform(Get<std::vector<std::vector<std::vector<double>>>>(binfile[key], __FILE__, __LINE__));
104  } else {
105  std::stringstream ss;
106  ss << Settings["Bins"];
108  "When parsing binning, expected to find a YAML map or sequence, "
109  "but found:\n{}",
110  ss.str());
111  throw MaCh3Exception(__FILE__, __LINE__);
112  }
113  } else {
114  YAML::Node const & bin_edges_node = Settings["BinEdges"] ? Settings["BinEdges"] : Settings["VarBins"];
115  if(!bin_edges_node){
116  MACH3LOG_ERROR("When setting up Uniform sample binning, didn't find expected key: BinEdges (or VarBins for backward compatibility).");
117  throw MaCh3Exception(__FILE__, __LINE__);
118  }
119  SingleBinning.InitUniform(UniformBinEdgeConfigParser(bin_edges_node, found_range_specifier));
120  }
121  if(SingleSample.VarStr.size() != SingleBinning.BinEdges.size()) {
122  MACH3LOG_ERROR("Number of variables ({}) does not match number of bin edge sets ({}) in sample config '{}'",
123  SingleSample.VarStr.size(), SingleBinning.BinEdges.size(),SingleSample.SampleTitle);
124  if(found_range_specifier){
125  std::stringstream ss;
126  ss << (Settings["BinEdges"] ? Settings["BinEdges"] : Settings["VarBins"]);
127  MACH3LOG_ERROR(R"(A bin range specifier was found in node:
128 
129  {}
130 
131  Please carefully check the number of square brackets used, a 2D binning
132  comprised of just 2 range specifiers must explicitly include the axis
133  list specifier like:
134 
135  BinEdges: [ [ {{linspace: {{nb:10, low:0, up: 10}}}} ], [ {{linspace: {{nb:5, low:10, up: 100}}}} ] ]
136  )", ss.str());
137  }
138  throw MaCh3Exception(__FILE__, __LINE__);
139  }
140 
141  SampleBinning.emplace_back(SingleBinning);
142 
143  // now setup global numbering
145 }
146 
147 // ************************************************
148 int BinningHandler::FindGlobalBin(const int NomSample,
149  const std::vector<const double*>& KinVar,
150  const std::vector<int>& NomBin) const {
151 // ************************************************
152  //DB Find the relevant bin in the PDF for each event
153  const int Dim = static_cast<int>(KinVar.size());
154  const SampleBinningInfo& _restrict_ Binning = SampleBinning[NomSample];
155  int GlobalBin = 0;
156 
157  for(int i = 0; i < Dim; ++i) {
158  const double Var = *KinVar[i];
159  const int Bin = Binning.FindBin(i, Var, NomBin[i]);
160  // KS: If we are outside of range in only one dimension this mean out of bounds, we can simply quickly finish
161  if(Bin < 0) return M3::UnderOverFlowBin;
162  // KS: inline GetBin computation to avoid any memory allocation, which in reweight loop is very costly
163  GlobalBin += Bin * Binning.Strides[i];
164  }
165 
166  if(Binning.Uniform) {
167  GlobalBin += static_cast<int>(Binning.GlobalOffset);
168  return GlobalBin;
169  } else {
170  const auto& _restrict_ BinMapping = Binning.BinGridMapping[GlobalBin];
171  const size_t nNonUniBins = BinMapping.size();
172  for(size_t iBin = 0; iBin < nNonUniBins; iBin++) {
173  const int BinNumber = BinMapping[iBin];
174  const auto& _restrict_ NonUniBin = Binning.Bins[BinNumber];
175  if(NonUniBin.IsEventInside(KinVar)){
176  return BinNumber + Binning.GlobalOffset;
177  }
178  }
179  MACH3LOG_DEBUG("Didn't find any bin so returning UnderOverFlowBin");
180  return M3::UnderOverFlowBin;
181  }
182 }
183 
184 // ************************************************
185 int BinningHandler::FindNominalBin(const int iSample,
186  const int iDim,
187  const double Var) const {
188 // ************************************************
189  const SampleBinningInfo& info = SampleBinning[iSample];
190 
191  const auto& edges = info.BinEdges[iDim];
192 
193  // Outside binning range
194  if (Var < edges.front() || Var >= edges.back()) {
195  return M3::UnderOverFlowBin;
196  }
197  return static_cast<int>(std::distance(edges.begin(), std::upper_bound(edges.begin(), edges.end(), Var)) - 1);
198 }
199 
200 // ************************************************
201 int BinningHandler::GetBinSafe(const int Sample, const std::vector<int>& Bins) const {
202 // ************************************************
203  const int GlobalBin = SampleBinning[Sample].GetBinSafe(Bins);
204  return GlobalBin;
205 }
206 
207 // ************************************************
208 int BinningHandler::GetGlobalBinSafe(const int Sample, const std::vector<int>& Bins) const {
209 // ************************************************
210  const int GlobalBin = SampleBinning[Sample].GetBinSafe(Bins) + static_cast<int>(SampleBinning[Sample].GlobalOffset);
211  return GlobalBin;
212 }
213 
214 // ************************************************
215 int BinningHandler::GetSampleStartBin(const int Sample) const {
216 // ************************************************
217  return static_cast<int>(SampleBinning[Sample].GlobalOffset);
218 }
219 
220 // ************************************************
221 int BinningHandler::GetSampleEndBin(const int Sample) const {
222 // ************************************************
223  if (Sample == static_cast<int>(SampleBinning.size()) - 1) {
224  return GetNBins();
225  } else {
226  return static_cast<int>(SampleBinning[Sample+1].GlobalOffset);
227  }
228 }
229 
230 // ************************************************
233 // ************************************************
234  if (SampleBinning.empty()) {
235  MACH3LOG_ERROR("No binning samples provided.");
236  throw MaCh3Exception(__FILE__, __LINE__);
237  }
238 
239  int GlobalOffsetCounter = 0;
240  for(size_t iSample = 0; iSample < SampleBinning.size(); iSample++){
241  SampleBinning[iSample].GlobalOffset = GlobalOffsetCounter;
242  GlobalOffsetCounter += SampleBinning[iSample].nBins;
243  }
244  // lastly modify total number of bins
245  TotalNumberOfBins = GlobalOffsetCounter;
246 }
247 
248 // ************************************************
249 // Get fancy name for a given bin, to help match it with global properties
250 std::string BinningHandler::GetBinName(const int iSample, const std::vector<int>& Bins) const {
251 // ************************************************
252  const auto& Binning = SampleBinning[iSample];
253  if(!Binning.Uniform) {
254  MACH3LOG_ERROR("When using Non-Uniform binning for sample {} please use One bin instead of Axis bins", iSample);
255  throw MaCh3Exception(__FILE__, __LINE__);
256  }
257  return GetBinName(iSample, GetBinSafe(iSample, Bins));
258 }
259 
260 // ************************************************
261 // Get fancy name for a given bin, to help match it with global properties
262 std::string BinningHandler::GetBinName(const int iSample, const int SampleBin) const {
263 // ************************************************
264  const auto& Binning = SampleBinning[iSample];
265 
266  // Safety checks
267  if (SampleBin < 0 || SampleBin >= static_cast<int>(Binning.nBins)) {
268  MACH3LOG_ERROR("Requested bin {} is out of range for sample {}", SampleBin, iSample);
269  throw MaCh3Exception(__FILE__, __LINE__);
270  }
271  std::string BinName;
272 
273  if(Binning.Uniform) {
274  int Dim = static_cast<int>(Binning.Strides.size());
275  std::vector<int> Bins(Dim, 0);
276  int Remaining = SampleBin;
277 
278  // Convert the flat/global bin index into per-dimension indices
279  // Dim0 is the fastest-changing axis, Dim1 the next, etc.
280  //
281  // For example (2D):
282  // x = bin % Nx
283  // y = bin / Nx
284  //
285  // For 3D:
286  // x = bin % Nx
287  // y = (bin / Nx) % Ny
288  // z = bin / (Nx * Ny)
289  for (int i = 0; i < Dim; ++i) {
290  const int nBinsDim = static_cast<int>(Binning.BinEdges[i].size()) - 1;
291  Bins[i] = Remaining % nBinsDim;
292  Remaining /= nBinsDim;
293  }
294 
295  for (int i = 0; i < Dim; ++i) {
296  if (i > 0) BinName += ", ";
297  const double min = Binning.BinEdges[i].at(Bins[i]);
298  const double max = Binning.BinEdges[i].at(Bins[i] + 1);
299  BinName += fmt::format("Dim{} ({:g}, {:g})", i, min, max);
300  }
301  } else{
302  const BinInfo& bin = Binning.Bins[SampleBin];
303  const int Dim = static_cast<int>(bin.Extent.size());
304 
305  for (int i = 0; i < Dim; ++i) {
306  if (i > 0) BinName += ", ";
307  const double min = bin.Extent[i][0];
308  const double max = bin.Extent[i][1];
309  BinName += fmt::format("Dim{} ({:g}, {:g})", i, min, max);
310  }
311  }
312  return BinName;
313 }
314 
315 // ************************************************
316 std::string BinningHandler::GetBinName(const int GlobalBin) const {
317 // ************************************************
318  int SampleBin = GetSampleIndex(GlobalBin);
319  int LocalBin = GetLocalBinFromGlobalBin(SampleBinning, GlobalBin);
320  return GetBinName(SampleBin, LocalBin);
321 }
322 
323 // ************************************************
324 int BinningHandler::GetSampleIndex(const int GlobalBin) const {
325 // ************************************************
326  int SampleBin = GetSampleFromGlobalBin(SampleBinning, GlobalBin);
327  return SampleBin;
328 }
329 
330 // ************************************************
331 int BinningHandler::GetNAxisBins(const int iSample, const int iDim) const {
332 // ************************************************
333  const auto& Binning = SampleBinning[iSample];
334  if(!Binning.Uniform) {
335  MACH3LOG_ERROR("When using Non-Uniform binning for sample {} please use global bin instead of {}", iSample, __func__);
336  throw MaCh3Exception(__FILE__, __LINE__);
337  } else{
338  return static_cast<int>(Binning.AxisNBins.at(iDim));
339  }
340 }
341 
342 // ************************************************
343 bool BinningHandler::IsUniform(const int iSample) const {
344 // ************************************************
345  const auto& Binning = SampleBinning[iSample];
346  return Binning.Uniform;
347 }
348 
349 // ************************************************
350 const std::vector<BinInfo> BinningHandler::GetNonUniformBins(const int iSample) const {
351 // ************************************************
352  const auto& Binning = SampleBinning[iSample];
353  if(!Binning.Uniform) {
354  return Binning.Bins;
355  } else{
356  MACH3LOG_ERROR("{} for sample {} will not work becasue binnin is unfiorm", __func__, iSample);
357  throw MaCh3Exception(__FILE__, __LINE__);
358  }
359 }
360 
std::vector< std::vector< double > > UniformBinEdgeConfigParser(YAML::Node const &bin_edges_node, bool &found_range_specifier)
Parses YAML node describing multidim uniform binning.
#define _restrict_
KS: Using restrict limits the effects of pointer aliasing, aiding optimizations. While reading I foun...
Definition: Core.h:108
std::vector< double > BuildBinEdgesFromNode(YAML::Node const &bin_edges_node, bool &found_range_specifier)
Builds a single dimension's bin edges from YAML::Node.
std::vector< double > BinRangeToBinEdges(YAML::Node const &bin_range)
Converts a range (linspace/logspace) to a std::vector<double>
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
int GetSampleFromGlobalBin(const std::vector< SampleBinningInfo > &BinningInfo, const int GlobalBin)
Get the sample index corresponding to a global bin number.
int GetLocalBinFromGlobalBin(const std::vector< SampleBinningInfo > &BinningInfo, const int GlobalBin)
Get the local (sample) bin index from a global bin number.
YAML::Node LoadYamlConfig(const std::string &filename, const std::string &File, const int Line)
Open YAML file.
Definition: YamlHelper.h:356
Type Get(const YAML::Node &node, const std::string File, const int Line)
Get content of config file.
Definition: YamlHelper.h:291
const std::vector< BinInfo > GetNonUniformBins(const int iSample) const
Return NonUnifomr bins to for example check extent etc.
void SetGlobalBinNumbers()
Sets the GlobalOffset for each SampleBinningInfo to enable linearization of multiple 2D binning sampl...
int GetSampleStartBin(const int iSample) const
Get bin number corresponding to where given sample starts.
BinningHandler()
Constructor.
std::vector< SampleBinningInfo > SampleBinning
Binning info for individual sample.
int FindGlobalBin(const int iSample, const std::vector< const double * > &KinVar, const std::vector< int > &NomBin) const
Find Global bin including.
void SetupSampleBinning(const YAML::Node &Settings, SampleInfo &SingleSample)
Function to setup the binning of your sample histograms and the underlying arrays that get handled in...
int GetBinSafe(const int iSample, const std::vector< int > &Bins) const
Get gloabl bin based on sample, and dimension of each sample without any safety checks.
int GetNBins() const
Get total number of bins over all samples/kinematic bins etc.
std::string GetBinName(const int GlobalBin) const
Get fancy name for a given bin, to help match it with global properties.
int FindNominalBin(const int iSample, const int iDim, const double Var) const
Find the nominal bin for a given variable in a given sample and dimension.
int GetGlobalBinSafe(const int iSample, const std::vector< int > &Bins) const
Get gloabl bin based on sample, and dimension of each sample with additional checks.
bool IsUniform(const int iSample) const
Tells whether given sample is using unform binning.
int GetSampleIndex(const int GlobalBin) const
Returns sample index based on global bin.
int GetSampleEndBin(const int iSample) const
Get bin number corresponding to where given sample ends.
int TotalNumberOfBins
Total number of bins.
int GetNAxisBins(const int iSample, const int iDim) const
Get Number of N-axis bins for a given sample.
Custom exception class used throughout MaCh3.
constexpr static const int UnderOverFlowBin
Mark bin which is overflow or underflow in MaCh3 binning.
Definition: Core.h:91
void AddPath(std::string &FilePath)
Prepends the MACH3 environment path to FilePath if it is not already present.
Definition: Monitor.cpp:382
KS: This hold bin extents in N-Dimensions allowing to check if Bin falls into.
std::vector< std::array< double, 2 > > Extent
KS: Struct storing all information required for sample binning.
void InitUniform(const std::vector< std::vector< double >> &InputEdges)
Initialise Uniform Binning.
void InitNonUniform(const std::vector< std::vector< std::vector< double >>> &InputBins)
Initialise Non-Uniform Binning.
std::vector< std::vector< double > > BinEdges
Vector to hold N-axis bin-edges.
KS: Store info about MC sample.
Definition: SampleInfo.h:40
int nDimensions
Keep track of the dimensions of the sample binning.
Definition: SampleInfo.h:60
std::vector< std::string > VarStr
the strings associated with the variables used for the binning e.g. "RecoNeutrinoEnergy"
Definition: SampleInfo.h:52
std::string SampleTitle
the name of this sample e.g."muon-like" used for printing
Definition: SampleInfo.h:55