MaCh3  2.4.2
Reference Guide
Functions | Variables
CombineMaCh3Chains.cpp File Reference

Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files. More...

#include <unistd.h>
#include "Manager/Manager.h"
#include "Samples/SampleStructs.h"
#include "Samples/HistogramUtils.h"
#include "TList.h"
#include "TFile.h"
#include "TMacro.h"
#include "TTree.h"
#include "TMD5.h"
#include "TFileMerger.h"
#include "TKey.h"
#include "TROOT.h"
Include dependency graph for CombineMaCh3Chains.cpp:

Go to the source code of this file.

Functions

bool ShouldSkipLine (const std::string &line, const std::vector< std::string > &SkipVector)
 KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging. More...
 
bool CompareTwoConfigs (const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
 make sure two configs are identical but skip specified fields. For example when comparing two chains nsteps or output name might be different and this is still fine to merge More...
 
bool checkSoftwareVersions (TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
 EM: Will compare the version header contained in the two provided files and shout if they don't match. More...
 
void CopyDir (TDirectory *source)
 When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have this bad boy. More...
 
bool CompareHistograms (const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
 Compare two histograms if they are identical. More...
 
bool CheckFolder (TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
 Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'. More...
 
void FastMergeTTrees (const std::vector< std::string > &files, const std::string &outFile, const std::string &TTreeName)
 custom function for merging TTree, should be similar to what HADD is using More...
 
void CombineChain ()
 
void usage ()
 
void ParseArg (int argc, char *argv[])
 
int main (int argc, char *argv[])
 

Variables

std::string OutFileName = ""
 
int targetCompression = 1
 
std::vector< std::string > inpFileList
 
bool forceOverwrite = false
 
bool forceMerge = false
 

Detailed Description

Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files.

Author
Ewan Miller
Kamil Skwarczynski

Definition in file CombineMaCh3Chains.cpp.

Function Documentation

◆ CheckFolder()

bool CheckFolder ( TFile *  file,
TFile *  prevFile,
const std::string &  FolderName,
const std::vector< std::string > &  SkipVector = {} 
)

Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.

Definition at line 163 of file CombineMaCh3Chains.cpp.

163  {})
164 {
165  bool mismatch = false;
166  TDirectory* dir = file->GetDirectory(FolderName.c_str());
167  TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
168 
169  if (!dir || !prevDir) {
170  MACH3LOG_ERROR("Could not find folder '{}' in one or both files", FolderName);
171  return true;
172  }
173 
174  TIter nextKey(dir->GetListOfKeys());
175  TKey* key;
176 
177  while ((key = static_cast<TKey*>(nextKey()))) {
178  const std::string objName = key->GetName();
179  TObject* obj = key->ReadObj();
180  if (!obj) continue;
181 
182  // Handle TH1 comparison
183  if (obj->InheritsFrom("TH1")) {
184  TH1* hist = static_cast<TH1*>(obj);
185  TH1* prevHist = dynamic_cast<TH1*>(prevDir->Get(objName.c_str()));
186  if (!prevHist) {
187  MACH3LOG_ERROR("Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
188  mismatch = true;
189  continue;
190  }
191  if (!CompareHistograms(hist, prevHist, objName, FolderName)) {
192  mismatch = true;
193  }
194  }
195  // Handle TMacro comparison
196  else if (obj->InheritsFrom("TMacro")) {
197  TMacro* macro = static_cast<TMacro*>(obj);
198  TMacro* prevMacro = dynamic_cast<TMacro*>(prevDir->Get(objName.c_str()));
199  if (!prevMacro) {
200  MACH3LOG_ERROR("Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
201  mismatch = true;
202  continue;
203  }
204  if (!CompareTwoConfigs(TMacroToString(*macro), TMacroToString(*prevMacro), SkipVector)) {
205  mismatch = true;
206  }
207  }
208  }
209  return mismatch;
210 }
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
make sure two configs are identical but skip specified fields. For example when comparing two chains ...
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
std::string TMacroToString(const TMacro &macro)
KS: Convert a ROOT TMacro object to a string representation.
Definition: YamlHelper.h:123

◆ checkSoftwareVersions()

bool checkSoftwareVersions ( TFile *  file,
TFile *  prevFile,
const std::string &  ConfigName,
const std::vector< std::string > &  SkipVector = {} 
)

EM: Will compare the version header contained in the two provided files and shout if they don't match.

Definition at line 85 of file CombineMaCh3Chains.cpp.

85  {})
86 {
87  bool weirdFile = false;
88 
89  TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
90  TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
91 
92  // EM: compare the digest of the version header file in this file, with the previous one
93  if(!CompareTwoConfigs(TMacroToString(*versionHeader), TMacroToString(*prevVersionHeader), SkipVector)){
94  MACH3LOG_ERROR("Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
95  MACH3LOG_ERROR("This strongly suggests that this file was made with different software versions than the previous ones");
96  weirdFile = true;
97  }
98 
99  return weirdFile;
100 }

◆ CombineChain()

void CombineChain ( )

Definition at line 228 of file CombineMaCh3Chains.cpp.

229 {
230  std::string outFileOption;
231  if(forceOverwrite) outFileOption = "RECREATE";
232  else outFileOption = "CREATE";
233 
234  TFile *prevFile = nullptr;
235 
236  // EM: loop through all the files in the provided list, compare the embedded version and config files
237  // If they match, we add the file to the list of files to be merged.
238  // If not, we throw an error and provide a (hopefully) helpful message telling the user why the files couldn't be merged.
239  for(uint fileId = 0; fileId < inpFileList.size(); fileId++)
240  {
241  std::string fileName = inpFileList[fileId];
242  TFile *file = new TFile(fileName.c_str());
243 
244  if(file->Get<TTree>("posteriors")->GetEntries() == 0){
245  MACH3LOG_WARN("Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
246  MACH3LOG_WARN("That's weird but I guess there's no rule that says a file can't be empty");
247  MACH3LOG_WARN("I'll skip it but maybe double check that this doesn't indicate some deeper problem");
248  continue;
249  }
250 
251  // EM: need to set this in the initial case
252  if(prevFile == nullptr) {
253  prevFile = file;
254  }
255 
256  MACH3LOG_DEBUG("############ File {} #############", fileId);
257 
258  bool weirdFile = false;
259  if(checkSoftwareVersions(file, prevFile, "MaCh3Engine/version_header")) weirdFile = true;
260  if(checkSoftwareVersions(file, prevFile, "MaCh3_Config", {"OutputFile:", "NSteps:"})) weirdFile = true;
261  if(CheckFolder(file, prevFile, "SampleFolder")) weirdFile = true;
262  if(CheckFolder(file, prevFile, "CovarianceFolder")) weirdFile = true;
263 
264  if(weirdFile && !forceMerge){
265  MACH3LOG_ERROR("");
266  MACH3LOG_ERROR("=====================================================================================");
267  MACH3LOG_ERROR("This is not a great idea and could lead to weird outputs and cause some big headaches");
268  MACH3LOG_ERROR("further down the road. But if you reeeeally wanna do it and you know what you're");
269  MACH3LOG_ERROR("doing you can come here and remove the 'throw'");
270  MACH3LOG_ERROR("Or use -m option");
271  MACH3LOG_ERROR("{}:{}", __FILE__, __LINE__ + 2);
272  MACH3LOG_ERROR("=====================================================================================");
273  throw MaCh3Exception(__FILE__ , __LINE__ );
274  }
275 
276  if(prevFile != file) {
277  prevFile->Close();
278  delete prevFile;
279  }
280 
281  // EM: set these for the next iteration
282  prevFile = file;
283  }
284 
285  if (!forceOverwrite && access(OutFileName.c_str(), F_OK) != -1) {
286  MACH3LOG_ERROR("Output file '{}' already exists. Use -f to force overwrite.", OutFileName);
287  throw MaCh3Exception(__FILE__, __LINE__);
288  }
289  //KS: Create new file
290  TFile* outputFile = M3::Open(OutFileName, "recreate", __FILE__, __LINE__);
291  outputFile->Close();
292  delete outputFile;
293 
294  TStopwatch clock;
295  clock.Start();
296 
297  MACH3LOG_INFO("Starting merging");
298  FastMergeTTrees(inpFileList, OutFileName, "posteriors");
300 
301  clock.Stop();
302  MACH3LOG_INFO("Merging of took {:.2f}s to finish", clock.RealTime());
303 
304  //KS: Sadly we need to open file to save TDirectories to not have weird copy of several obejcts there...
305  outputFile = M3::Open(OutFileName, "UPDATE", __FILE__, __LINE__);
306  outputFile->cd();
307 
308  // EM: Write out the version and config files to the combined file
309  std::vector<std::string> configNames = {"MaCh3_Config", "Reweight_Config", "Smearing_Config"};
310  for (std::size_t i = 0; i < configNames.size(); ++i) {
311  const std::string& name = configNames[i];
312  TMacro* macro = prevFile->Get<TMacro>(name.c_str());
313  if (macro != nullptr) {
314  macro->Write();
315  delete macro;
316  }
317  }
318 
319  // Get the source directory
320  TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>("MaCh3Engine");
321  TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>("CovarianceFolder");
322  TDirectory *SampleFolderDir = prevFile->Get<TDirectory>("SampleFolder");
323 
324  CopyDir(MaCh3EngineDir);
325  CopyDir(CovarianceFolderDir);
326  CopyDir(SampleFolderDir);
327 
328  outputFile->Close();
329  delete outputFile;
330 
331  delete prevFile;
332  MACH3LOG_INFO("Done!");
333 }
std::string OutFileName
bool forceMerge
void CopyDir(TDirectory *source)
When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have thi...
bool forceOverwrite
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
void FastMergeTTrees(const std::vector< std::string > &files, const std::string &outFile, const std::string &TTreeName)
custom function for merging TTree, should be similar to what HADD is using
std::vector< std::string > inpFileList
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:36
Custom exception class used throughout MaCh3.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.

◆ CompareHistograms()

bool CompareHistograms ( const TH1 *  h1,
const TH1 *  h2,
const std::string &  histName,
const std::string &  folderName 
)

Compare two histograms if they are identical.

Todo:
add checks for stuff like bin content etc

Definition at line 145 of file CombineMaCh3Chains.cpp.

146 {
147  if (!h1 || !h2) {
148  MACH3LOG_ERROR("Null pointer passed to CompareHistograms for '{}'", histName);
149  return false;
150  }
151 
152  const double int1 = h1->Integral();
153  const double int2 = h2->Integral();
154  if (std::abs(int1 - int2) > 1e-6) {
155  MACH3LOG_ERROR("Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
156  histName, folderName, int1, int2);
157  return false;
158  }
159  return true;
160 }

◆ CompareTwoConfigs()

bool CompareTwoConfigs ( const std::string &  File1,
const std::string &  File2,
const std::vector< std::string > &  SkipVector 
)

make sure two configs are identical but skip specified fields. For example when comparing two chains nsteps or output name might be different and this is still fine to merge

Parameters
File1Config from chain1
File2Config from chain2
SkipVectorFields in yaml file to skip

Definition at line 51 of file CombineMaCh3Chains.cpp.

51  {
52  std::istringstream file1(File1);
53  std::istringstream file2(File2);
54 
55  std::string line1, line2;
56  int lineNumber = 1;
57  bool areEqual = true;
58 
59  while (std::getline(file1, line1) && std::getline(file2, line2)) {
60  if (ShouldSkipLine(line1, SkipVector) || ShouldSkipLine(line2, SkipVector)) {
61  ++lineNumber;
62  continue;
63  }
64  if (line1 != line2) {
65  areEqual = false;
66  MACH3LOG_WARN("Difference found on line {}:", lineNumber);
67  MACH3LOG_WARN("Config1: {}", line1);
68  MACH3LOG_WARN("Config2: {}", line2);
69  }
70  ++lineNumber;
71  }
72  // Check if one file has extra lines
73  while (std::getline(file1, line1)) {
74  MACH3LOG_WARN("Extra line in {} on line {}: {}", File1, lineNumber, line1);
75  ++lineNumber;
76  }
77  while (std::getline(file2, line2)) {
78  MACH3LOG_WARN("Extra line in {} on line {}: {}", File2, lineNumber, line2);
79  ++lineNumber;
80  }
81  return areEqual;
82 }
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...

◆ CopyDir()

void CopyDir ( TDirectory *  source)

When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have this bad boy.

Definition at line 103 of file CombineMaCh3Chains.cpp.

103  {
104  //copy all objects and subdirs of directory source as a subdir of the current directory
105  source->ls();
106  TDirectory *savdir = gDirectory;
107  TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
108  // if directory doesn't exist make it
109  if (!adir) {
110  adir = savdir->mkdir(source->GetName());
111  }
112  adir->cd();
113  //loop on all entries of this directory
114  TKey *key;
115  TIter nextkey(source->GetListOfKeys());
116  while ((key = static_cast<TKey*>(nextkey()))) {
117  const char *classname = key->GetClassName();
118  TClass *cl = gROOT->GetClass(classname);
119  if (!cl) continue;
120  if (cl->InheritsFrom("TDirectory")) {
121  source->cd(key->GetName());
122  TDirectory *subdir = gDirectory;
123  adir->cd();
124  CopyDir(subdir);
125  adir->cd();
126  } else if (cl->InheritsFrom("TTree")) {
127  TTree *T = source->Get<TTree>(key->GetName());
128  adir->cd();
129  TTree *newT = T->CloneTree();
130  newT->Write();
131  } else {
132  source->cd();
133  TObject *obj = key->ReadObj();
134  adir->cd();
135  obj->Write();
136  delete obj;
137  }
138  }
139  adir->SaveSelf(kTRUE);
140  savdir->cd();
141 }

◆ FastMergeTTrees()

void FastMergeTTrees ( const std::vector< std::string > &  files,
const std::string &  outFile,
const std::string &  TTreeName 
)

custom function for merging TTree, should be similar to what HADD is using

Warning
KS: for some reason if "fast" is enable then I cannot open in ROOT5, no one should use R5 at this point..

Definition at line 214 of file CombineMaCh3Chains.cpp.

214  {
215  TChain chain(TTreeName.c_str());
216  for (const auto& f : files) chain.Add(f.c_str());
217 
218  TFile* outF = TFile::Open(outFile.c_str(), "UPDATE");
219 
220  TTree* newTree = chain.CloneTree(-1, "fast");
221  newTree->SetName(TTreeName.c_str());
222  outF->cd();
223  newTree->Write("", TObject::kOverwrite);
224  outF->Close();
225  delete outF;
226 }

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 411 of file CombineMaCh3Chains.cpp.

412 {
415  ParseArg(argc, argv);
416  CombineChain();
417  return 0;
418 }
void ParseArg(int argc, char *argv[])
void CombineChain()
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:61
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
Definition: Monitor.cpp:12

◆ ParseArg()

void ParseArg ( int  argc,
char *  argv[] 
)

Definition at line 347 of file CombineMaCh3Chains.cpp.

347  {
348  if(argc < 2){
349  MACH3LOG_ERROR("Too few arguments!!");
350  MACH3LOG_ERROR("USAGE:");
351  usage();
352  throw MaCh3Exception(__FILE__ , __LINE__ );
353  }
354 
355  int c;
356  for(;;) {
357  c = getopt(argc, argv, "o:c:mhf");
358  if (c == -1){ // loop over the remaining arguments
359  while (optind < argc){
360  // any non option input is assumed to be a root file
361  std::string fName = std::string(argv[optind]);
362  MACH3LOG_DEBUG("adding {} to file list", fName.c_str());
363  inpFileList.push_back(fName);
364  optind ++;
365  }
366  break;
367  }
368  else{
369  switch (c) {
370  case 'o': {
371  OutFileName = optarg;
372  break;
373  }
374  case 'f': {
375  forceOverwrite = true;
376  break;
377  }
378  case 'c': {
379  targetCompression = atoi(optarg);
380  break;
381  }
382  case 'm': {
383  forceMerge = true;
384  break;
385  }
386  case 'h': {
387  usage();
388  exit(0);
389  }
390  default: {
391  MACH3LOG_ERROR("Unrecognised option");
392  usage();
393  exit(1);
394  }
395  }
396  }
397  }
398 
399  if(OutFileName == ""){
400  MACH3LOG_INFO("Using first file in list as output: ", inpFileList[0].c_str());
402  inpFileList.erase(inpFileList.begin());
403  }
404 
405  if(forceOverwrite){
406  MACH3LOG_INFO("Will overwrite {} if it exists already", OutFileName.c_str());
407  }
408  MACH3LOG_INFO("Combining a total of {} files into {}", inpFileList.size(), OutFileName.c_str());
409 }
void usage()
int targetCompression

◆ ShouldSkipLine()

bool ShouldSkipLine ( const std::string &  line,
const std::vector< std::string > &  SkipVector 
)

KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging.

Definition at line 35 of file CombineMaCh3Chains.cpp.

35  {
36  // Otherwise, check if the line contains any word from SkipVector
37  for (const auto& word : SkipVector) {
38  MACH3LOG_TRACE("{} : {}",line, word);
39  if (line.find(word) != std::string::npos) {
40  MACH3LOG_TRACE("Found matching word, therefore Skipping");
41  return true;
42  }
43  }
44  return false;
45 }
#define MACH3LOG_TRACE
Definition: MaCh3Logger.h:33

◆ usage()

void usage ( )

Definition at line 335 of file CombineMaCh3Chains.cpp.

335  {
336  MACH3LOG_INFO("Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
337  MACH3LOG_INFO("Cmd line syntax should be:");
338  MACH3LOG_INFO("CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
339  MACH3LOG_INFO("inputX.root : names of individual spline files to combine, can specify any number, need at least one");
340  MACH3LOG_INFO("output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
341  MACH3LOG_INFO("-c : target compression level for the combined file, default is 1, in line with hadd");
342  MACH3LOG_INFO("-f : force overwrite the output file if it exists already");
343  MACH3LOG_INFO("-m : merge files in-spite of differences");
344  MACH3LOG_INFO("-h : print this message and exit");
345 }

Variable Documentation

◆ forceMerge

bool forceMerge = false

Definition at line 32 of file CombineMaCh3Chains.cpp.

◆ forceOverwrite

bool forceOverwrite = false

Definition at line 31 of file CombineMaCh3Chains.cpp.

◆ inpFileList

std::vector<std::string> inpFileList

Definition at line 30 of file CombineMaCh3Chains.cpp.

◆ OutFileName

std::string OutFileName = ""

Definition at line 28 of file CombineMaCh3Chains.cpp.

◆ targetCompression

int targetCompression = 1

Definition at line 29 of file CombineMaCh3Chains.cpp.