MaCh3  2.5.0
Reference Guide
BinningHandler.cpp
Go to the documentation of this file.
2 
3 // ************************************************
5 // ************************************************
6 }
7 
8 // ************************************************
9 auto BinRangeToBinEdges(YAML::Node const &bin_range) {
10 // ************************************************
11  bool is_lin = true;
12  YAML::Node bin_range_specifier;
13  if (bin_range["linspace"]) {
14  bin_range_specifier = bin_range["linspace"];
15  } else if (bin_range["logspace"]) {
16  is_lin = false;
17  bin_range_specifier = bin_range["logspace"];
18  } else {
19  std::stringstream ss;
20  ss << bin_range;
21  MACH3LOG_ERROR("When parsing binning, expected bin range specifier with "
22  "key linspace or logspace, but found,\n{}",
23  ss.str());
24  throw MaCh3Exception(__FILE__, __LINE__);
25  }
26 
27  auto nb = Get<int>(bin_range_specifier["nb"], __FILE__, __LINE__);
28  auto low = Get<double>(bin_range_specifier["low"], __FILE__, __LINE__);
29  auto up = Get<double>(bin_range_specifier["up"], __FILE__, __LINE__);
30 
31  std::vector<double> edges(nb + 1, low);
32  // force the last bin to be exactly as parsed to avoid numerical instabilities
33  // not quite lining back up with the end of the range, which could
34  // cause spurious errors or infinitesimally small bins for specifications
35  // like: [ { logspace: { nb: 10, 1E-1, 10}, 10, 11} ]
36  edges.back() = up;
37 
38  if (is_lin) {
39  double bw = (up - low) / nb;
40  for (int i = 0; i < (nb - 1); ++i) {
41  edges[i + 1] = edges[i] + bw;
42  }
43  } else {
44  double llow = std::log10(low);
45  double lup = std::log10(up);
46  double lbw = (lup - llow) / nb;
47  for (int i = 0; i < (nb - 1); ++i) {
48  edges[i + 1] = std::pow(10, llow + (i + 1) * lbw);
49  }
50  }
51 
52  return edges;
53 }
54 
55 // ************************************************
63 auto BuildBinEdgesFromNode(YAML::Node const &bin_edges_node,
64  bool &found_range_specifier) {
65 // ************************************************
66  if (bin_edges_node.IsMap()) {
67  found_range_specifier = true;
68  return BinRangeToBinEdges(bin_edges_node);
69  }
70  std::vector<double> edges_builder;
71  if (!bin_edges_node.IsSequence()) {
72  std::stringstream ss;
73  ss << bin_edges_node;
75  "When parsing binning, expected to find a YAML map or sequence, "
76  "but found:\n{}",
77  ss.str());
78  throw MaCh3Exception(__FILE__, __LINE__);
79  }
80  for (auto const &it : bin_edges_node) {
81  if (it.IsScalar()) {
82  edges_builder.push_back(it.as<double>());
83  } else if (it.IsMap()) {
84  found_range_specifier = true;
85  auto range_edges = BinRangeToBinEdges(it);
86  std::copy(range_edges.begin(), range_edges.end(),
87  std::back_inserter(edges_builder));
88  } else {
89  std::stringstream ss;
90  ss << bin_edges_node;
92  "When parsing binning, expected elements in outer sequence to all be "
93  "either scalars or maps, but found:\n{}",
94  ss.str());
95  throw MaCh3Exception(__FILE__, __LINE__);
96  }
97  }
98 
99  // Check for duplicates or out-of-order bins
100  std::vector<double> edges;
101  for (size_t eb_it = 0; eb_it < edges_builder.size(); ++eb_it) {
102  if (edges.size()) {
103  if (edges_builder[eb_it] == edges.back()) { // remove duplicate edges
104  continue;
105  } else if (edges_builder[eb_it] < edges.back()) {
106  std::stringstream ss;
107  ss << "[ ";
108  for(auto const & e : edges_builder){
109  ss << fmt::format("{:.3g} ", e);
110  }
111  ss << "]";
113  "When parsing binning, found edges that were not monotonically "
114  "increasing, problem bin at index: {}:\n{}",
115  eb_it, ss.str());
116  throw MaCh3Exception(__FILE__, __LINE__);
117  }
118  }
119  edges.push_back(edges_builder[eb_it]);
120  }
121 
122  return edges;
123 }
124 
125 // ************************************************
153 auto UniformBinEdgeConfigParser(YAML::Node const &bin_edges_node,
154  bool &found_range_specifier) {
155 // ************************************************
156  if (bin_edges_node.IsMap()) {
157  found_range_specifier = true;
158  return std::vector<std::vector<double>>{
159  BinRangeToBinEdges(bin_edges_node),
160  };
161  } else if (bin_edges_node.IsSequence()) {
162  if (!bin_edges_node.size()) {
163  std::stringstream ss;
164  ss << bin_edges_node;
165  MACH3LOG_ERROR("When parsing binning, found an empty sequence:\n{}",
166  ss.str());
167  throw MaCh3Exception(__FILE__, __LINE__);
168  }
169  auto const &first_el = bin_edges_node[0];
170  if (first_el.IsScalar() || first_el.IsMap()) { // 1D binning
171  return std::vector<std::vector<double>>{
172  BuildBinEdgesFromNode(bin_edges_node, found_range_specifier),
173  };
174  }
175  // ND binning
176  std::vector<std::vector<double>> dims;
177  for (auto const &dim_node : bin_edges_node) {
178  dims.push_back(BuildBinEdgesFromNode(dim_node, found_range_specifier));
179  }
180  return dims;
181  } else {
182  std::stringstream ss;
183  ss << bin_edges_node;
185  "When parsing binning, expected to find a YAML map or sequence, "
186  "but found:\n{}",
187  ss.str());
188  throw MaCh3Exception(__FILE__, __LINE__);
189  }
190 }
191 
192 // ************************************************
193 // Function to setup the binning of your sample histograms and the underlying
194 // arrays that get handled in fillArray() and fillArray_MP().
195 // The Binning.XBinEdges are filled in the daughter class from the sample config file.
196 // This "passing" can be removed.
197 void BinningHandler::SetupSampleBinning(const YAML::Node& Settings, SampleInfo& SingleSample) {
198 // ************************************************
199  MACH3LOG_INFO("Setting up Sample Binning");
200  //Binning
201  SingleSample.VarStr = (Settings["VarStr"] && Settings["VarStr"].IsScalar())
202  ? std::vector<std::string>{Get<std::string>(
203  Settings["VarStr"], __FILE__, __LINE__)}
204  : Get<std::vector<std::string>>(Settings["VarStr"],
205  __FILE__, __LINE__);
206  SingleSample.nDimensions = static_cast<int>(SingleSample.VarStr.size());
207 
208  SampleBinningInfo SingleBinning;
209  bool Uniform = Get<bool>(Settings["Uniform"], __FILE__ , __LINE__);
210  bool found_range_specifier = false;
211  if(Uniform == false) {
212  if(Settings["Bins"].IsSequence()) {
213  SingleBinning.InitNonUniform(Get<std::vector<std::vector<std::vector<double>>>>(Settings["Bins"], __FILE__, __LINE__));
214  } else if(Settings["Bins"].IsMap()){
215  // Load binning from external file
216  auto file = Get<std::string>(Settings["Bins"]["File"], __FILE__, __LINE__);
217  M3::AddPath(file);
218  auto key = Get<std::string>(Settings["Bins"]["Key"], __FILE__, __LINE__);
219  auto binfile = LoadYamlConfig(file, __FILE__, __LINE__);
220  SingleBinning.InitNonUniform(Get<std::vector<std::vector<std::vector<double>>>>(binfile[key], __FILE__, __LINE__));
221  } else {
222  std::stringstream ss;
223  ss << Settings["Bins"];
225  "When parsing binning, expected to find a YAML map or sequence, "
226  "but found:\n{}",
227  ss.str());
228  throw MaCh3Exception(__FILE__, __LINE__);
229  }
230  } else {
231  YAML::Node const & bin_edges_node = Settings["BinEdges"] ? Settings["BinEdges"] : Settings["VarBins"];
232  if(!bin_edges_node){
233  MACH3LOG_ERROR("When setting up Uniform sample binning, didn't find expected key: BinEdges (or VarBins for backward compatibility).");
234  throw MaCh3Exception(__FILE__, __LINE__);
235  }
236  SingleBinning.InitUniform(UniformBinEdgeConfigParser(bin_edges_node, found_range_specifier));
237  }
238  if(SingleSample.VarStr.size() != SingleBinning.BinEdges.size()) {
239  MACH3LOG_ERROR("Number of variables ({}) does not match number of bin edge sets ({}) in sample config '{}'",
240  SingleSample.VarStr.size(), SingleBinning.BinEdges.size(),SingleSample.SampleTitle);
241  if(found_range_specifier){
242  std::stringstream ss;
243  ss << (Settings["BinEdges"] ? Settings["BinEdges"] : Settings["VarBins"]);
244  MACH3LOG_ERROR(R"(A bin range specifier was found in node:
245 
246  {}
247 
248 Please carefully check the number of square brackets used, a 2D binning
249  comprised of just 2 range specifiers must explicitly include the axis
250  list specifier like:
251 
252  BinEdges: [ [ {{linspace: {{nb:10, low:0, up: 10}}}} ], [ {{linspace: {{nb:5, low:10, up: 100}}}} ] ]
253 )", ss.str());
254  }
255  throw MaCh3Exception(__FILE__, __LINE__);
256  }
257 
258  SampleBinning.emplace_back(SingleBinning);
259 
260  // now setup global numbering
262 }
263 
264 // ************************************************
265 int BinningHandler::FindGlobalBin(const int NomSample,
266  const std::vector<const double*>& KinVar,
267  const std::vector<int>& NomBin) const {
268 // ************************************************
269  //DB Find the relevant bin in the PDF for each event
270  const int Dim = static_cast<int>(KinVar.size());
271  const SampleBinningInfo& _restrict_ Binning = SampleBinning[NomSample];
272  int GlobalBin = 0;
273 
274  for(int i = 0; i < Dim; ++i) {
275  const double Var = *KinVar[i];
276  const int Bin = Binning.FindBin(i, Var, NomBin[i]);
277  // KS: If we are outside of range in only one dimension this mean out of bounds, we can simply quickly finish
278  if(Bin < 0) return M3::UnderOverFlowBin;
279  // KS: inline GetBin computation to avoid any memory allocation, which in reweight loop is very costly
280  GlobalBin += Bin * Binning.Strides[i];
281  }
282 
283  if(Binning.Uniform) {
284  GlobalBin += static_cast<int>(Binning.GlobalOffset);
285  return GlobalBin;
286  } else {
287  const auto& _restrict_ BinMapping = Binning.BinGridMapping[GlobalBin];
288  const size_t nNonUniBins = BinMapping.size();
289  for(size_t iBin = 0; iBin < nNonUniBins; iBin++) {
290  const int BinNumber = BinMapping[iBin];
291  const auto& _restrict_ NonUniBin = Binning.Bins[BinNumber];
292  if(NonUniBin.IsEventInside(KinVar)){
293  return BinNumber + Binning.GlobalOffset;
294  }
295  }
296  MACH3LOG_DEBUG("Didn't find any bin so returning UnderOverFlowBin");
297  return M3::UnderOverFlowBin;
298  }
299 }
300 
301 // ************************************************
302 int BinningHandler::FindNominalBin(const int iSample,
303  const int iDim,
304  const double Var) const {
305 // ************************************************
306  const SampleBinningInfo& info = SampleBinning[iSample];
307 
308  const auto& edges = info.BinEdges[iDim];
309 
310  // Outside binning range
311  if (Var < edges.front() || Var >= edges.back()) {
312  return M3::UnderOverFlowBin;
313  }
314  return static_cast<int>(std::distance(edges.begin(), std::upper_bound(edges.begin(), edges.end(), Var)) - 1);
315 }
316 
317 // ************************************************
318 int BinningHandler::GetBinSafe(const int Sample, const std::vector<int>& Bins) const {
319 // ************************************************
320  const int GlobalBin = SampleBinning[Sample].GetBinSafe(Bins);
321  return GlobalBin;
322 }
323 
324 // ************************************************
325 int BinningHandler::GetGlobalBinSafe(const int Sample, const std::vector<int>& Bins) const {
326 // ************************************************
327  const int GlobalBin = SampleBinning[Sample].GetBinSafe(Bins) + static_cast<int>(SampleBinning[Sample].GlobalOffset);
328  return GlobalBin;
329 }
330 
331 // ************************************************
332 int BinningHandler::GetSampleStartBin(const int Sample) const {
333 // ************************************************
334  return static_cast<int>(SampleBinning[Sample].GlobalOffset);
335 }
336 
337 // ************************************************
338 int BinningHandler::GetSampleEndBin(const int Sample) const {
339 // ************************************************
340  if (Sample == static_cast<int>(SampleBinning.size()) - 1) {
341  return GetNBins();
342  } else {
343  return static_cast<int>(SampleBinning[Sample+1].GlobalOffset);
344  }
345 }
346 
347 // ************************************************
350 // ************************************************
351  if (SampleBinning.empty()) {
352  MACH3LOG_ERROR("No binning samples provided.");
353  throw MaCh3Exception(__FILE__, __LINE__);
354  }
355 
356  int GlobalOffsetCounter = 0;
357  for(size_t iSample = 0; iSample < SampleBinning.size(); iSample++){
358  SampleBinning[iSample].GlobalOffset = GlobalOffsetCounter;
359  GlobalOffsetCounter += SampleBinning[iSample].nBins;
360  }
361  // lastly modify total number of bins
362  TotalNumberOfBins = GlobalOffsetCounter;
363 }
364 
365 // ************************************************
366 // Get fancy name for a given bin, to help match it with global properties
367 std::string BinningHandler::GetBinName(const int iSample, const std::vector<int>& Bins) const {
368 // ************************************************
369  const auto& Binning = SampleBinning[iSample];
370  if(!Binning.Uniform) {
371  MACH3LOG_ERROR("When using Non-Uniform binning for sample {} please use One bin instead of Axis bins", iSample);
372  throw MaCh3Exception(__FILE__, __LINE__);
373  }
374  return GetBinName(iSample, GetBinSafe(iSample, Bins));
375 }
376 
377 // ************************************************
378 // Get fancy name for a given bin, to help match it with global properties
379 std::string BinningHandler::GetBinName(const int iSample, const int SampleBin) const {
380 // ************************************************
381  const auto& Binning = SampleBinning[iSample];
382 
383  // Safety checks
384  if (SampleBin < 0 || SampleBin >= static_cast<int>(Binning.nBins)) {
385  MACH3LOG_ERROR("Requested bin {} is out of range for sample {}", SampleBin, iSample);
386  throw MaCh3Exception(__FILE__, __LINE__);
387  }
388  std::string BinName;
389 
390  if(Binning.Uniform) {
391  int Dim = static_cast<int>(Binning.Strides.size());
392  std::vector<int> Bins(Dim, 0);
393  int Remaining = SampleBin;
394 
395  // Convert the flat/global bin index into per-dimension indices
396  // Dim0 is the fastest-changing axis, Dim1 the next, etc.
397  //
398  // For example (2D):
399  // x = bin % Nx
400  // y = bin / Nx
401  //
402  // For 3D:
403  // x = bin % Nx
404  // y = (bin / Nx) % Ny
405  // z = bin / (Nx * Ny)
406  for (int i = 0; i < Dim; ++i) {
407  const int nBinsDim = static_cast<int>(Binning.BinEdges[i].size()) - 1;
408  Bins[i] = Remaining % nBinsDim;
409  Remaining /= nBinsDim;
410  }
411 
412  for (int i = 0; i < Dim; ++i) {
413  if (i > 0) BinName += ", ";
414  const double min = Binning.BinEdges[i].at(Bins[i]);
415  const double max = Binning.BinEdges[i].at(Bins[i] + 1);
416  BinName += fmt::format("Dim{} ({:g}, {:g})", i, min, max);
417  }
418  } else{
419  const BinInfo& bin = Binning.Bins[SampleBin];
420  const int Dim = static_cast<int>(bin.Extent.size());
421 
422  for (int i = 0; i < Dim; ++i) {
423  if (i > 0) BinName += ", ";
424  const double min = bin.Extent[i][0];
425  const double max = bin.Extent[i][1];
426  BinName += fmt::format("Dim{} ({:g}, {:g})", i, min, max);
427  }
428  }
429  return BinName;
430 }
431 
432 // ************************************************
433 std::string BinningHandler::GetBinName(const int GlobalBin) const {
434 // ************************************************
435  int SampleBin = GetSampleFromGlobalBin(SampleBinning, GlobalBin);
436  int LocalBin = GetLocalBinFromGlobalBin(SampleBinning, GlobalBin);
437  return GetBinName(SampleBin, LocalBin);
438 }
439 
440 // ************************************************
441 int BinningHandler::GetNAxisBins(const int iSample, const int iDim) const {
442 // ************************************************
443  const auto& Binning = SampleBinning[iSample];
444  if(!Binning.Uniform) {
445  MACH3LOG_ERROR("When using Non-Uniform binning for sample {} please use global bin instead of {}", iSample, __func__);
446  throw MaCh3Exception(__FILE__, __LINE__);
447  } else{
448  return static_cast<int>(Binning.AxisNBins.at(iDim));
449  }
450 }
451 
452 // ************************************************
453 bool BinningHandler::IsUniform(const int iSample) const {
454 // ************************************************
455  const auto& Binning = SampleBinning[iSample];
456  return Binning.Uniform;
457 }
458 
459 // ************************************************
460 const std::vector<BinInfo> BinningHandler::GetNonUniformBins(const int iSample) const {
461 // ************************************************
462  const auto& Binning = SampleBinning[iSample];
463  if(!Binning.Uniform) {
464  return Binning.Bins;
465  } else{
466  MACH3LOG_ERROR("{} for sample {} will not work becasue binnin is unfiorm", __func__, iSample);
467  throw MaCh3Exception(__FILE__, __LINE__);
468  }
469 }
470 
auto BinRangeToBinEdges(YAML::Node const &bin_range)
auto BuildBinEdgesFromNode(YAML::Node const &bin_edges_node, bool &found_range_specifier)
Builds a single dimension's bin edges from YAML::Node.
auto UniformBinEdgeConfigParser(YAML::Node const &bin_edges_node, bool &found_range_specifier)
Parses YAML node describing multidim uniform binning.
#define _restrict_
KS: Using restrict limits the effects of pointer aliasing, aiding optimizations. While reading I foun...
Definition: Core.h:108
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
int GetSampleFromGlobalBin(const std::vector< SampleBinningInfo > &BinningInfo, const int GlobalBin)
Get the sample index corresponding to a global bin number.
int GetLocalBinFromGlobalBin(const std::vector< SampleBinningInfo > &BinningInfo, const int GlobalBin)
Get the local (sample) bin index from a global bin number.
YAML::Node LoadYamlConfig(const std::string &filename, const std::string &File, const int Line)
Open YAML file.
Definition: YamlHelper.h:356
Type Get(const YAML::Node &node, const std::string File, const int Line)
Get content of config file.
Definition: YamlHelper.h:291
const std::vector< BinInfo > GetNonUniformBins(const int iSample) const
Return NonUnifomr bins to for example check extent etc.
void SetGlobalBinNumbers()
Sets the GlobalOffset for each SampleBinningInfo to enable linearization of multiple 2D binning sampl...
int GetSampleStartBin(const int iSample) const
Get bin number corresponding to where given sample starts.
BinningHandler()
Constructor.
std::vector< SampleBinningInfo > SampleBinning
Binning info for individual sample.
int FindGlobalBin(const int iSample, const std::vector< const double * > &KinVar, const std::vector< int > &NomBin) const
Find Global bin including.
void SetupSampleBinning(const YAML::Node &Settings, SampleInfo &SingleSample)
Function to setup the binning of your sample histograms and the underlying arrays that get handled in...
int GetBinSafe(const int iSample, const std::vector< int > &Bins) const
Get gloabl bin based on sample, and dimension of each sample without any safety checks.
int GetNBins() const
Get total number of bins over all samples/kinematic bins etc.
std::string GetBinName(const int GlobalBin) const
Get fancy name for a given bin, to help match it with global properties.
int FindNominalBin(const int iSample, const int iDim, const double Var) const
Find the nominal bin for a given variable in a given sample and dimension.
int GetGlobalBinSafe(const int iSample, const std::vector< int > &Bins) const
Get gloabl bin based on sample, and dimension of each sample with additional checks.
bool IsUniform(const int iSample) const
Tells whether given sample is using unform binning.
int GetSampleEndBin(const int iSample) const
Get bin number corresponding to where given sample ends.
int TotalNumberOfBins
Total number of bins.
int GetNAxisBins(const int iSample, const int iDim) const
Get Number of N-axis bins for a given sample.
Custom exception class used throughout MaCh3.
constexpr static const int UnderOverFlowBin
Mark bin which is overflow or underflow in MaCh3 binning.
Definition: Core.h:91
void AddPath(std::string &FilePath)
Prepends the MACH3 environment path to FilePath if it is not already present.
Definition: Monitor.cpp:382
KS: This hold bin extents in N-Dimensions allowing to check if Bin falls into.
std::vector< std::array< double, 2 > > Extent
KS: Struct storing all information required for sample binning.
void InitUniform(const std::vector< std::vector< double >> &InputEdges)
Initialise Uniform Binning.
void InitNonUniform(const std::vector< std::vector< std::vector< double >>> &InputBins)
Initialise Non-Uniform Binning.
std::vector< std::vector< double > > BinEdges
Vector to hold N-axis bin-edges.
KS: Store info about MC sample.
int nDimensions
Keep track of the dimensions of the sample binning.
std::vector< std::string > VarStr
the strings associated with the variables used for the binning e.g. "RecoNeutrinoEnergy"
std::string SampleTitle
the name of this sample e.g."muon-like"