MaCh3  2.4.2
Reference Guide
CombineMaCh3Chains.cpp
Go to the documentation of this file.
1 // C++ includes
2 #include <unistd.h>
3 
4 // MaCh3 includes
5 #include "Manager/Manager.h"
8 
10 // ROOT includes
11 #include "TList.h"
12 #include "TFile.h"
13 #include "TMacro.h"
14 #include "TTree.h"
15 #include "TMD5.h"
16 #include "TFileMerger.h"
17 #include "TKey.h"
18 #include "TROOT.h"
20 
27 
28 std::string OutFileName = "";
30 std::vector<std::string> inpFileList;
31 bool forceOverwrite = false;
32 bool forceMerge = false;
33 
35 bool ShouldSkipLine(const std::string& line, const std::vector<std::string>& SkipVector) {
36  // Otherwise, check if the line contains any word from SkipVector
37  for (const auto& word : SkipVector) {
38  MACH3LOG_TRACE("{} : {}",line, word);
39  if (line.find(word) != std::string::npos) {
40  MACH3LOG_TRACE("Found matching word, therefore Skipping");
41  return true;
42  }
43  }
44  return false;
45 }
46 
51 bool CompareTwoConfigs(const std::string& File1, const std::string& File2, const std::vector<std::string>& SkipVector) {
52  std::istringstream file1(File1);
53  std::istringstream file2(File2);
54 
55  std::string line1, line2;
56  int lineNumber = 1;
57  bool areEqual = true;
58 
59  while (std::getline(file1, line1) && std::getline(file2, line2)) {
60  if (ShouldSkipLine(line1, SkipVector) || ShouldSkipLine(line2, SkipVector)) {
61  ++lineNumber;
62  continue;
63  }
64  if (line1 != line2) {
65  areEqual = false;
66  MACH3LOG_WARN("Difference found on line {}:", lineNumber);
67  MACH3LOG_WARN("Config1: {}", line1);
68  MACH3LOG_WARN("Config2: {}", line2);
69  }
70  ++lineNumber;
71  }
72  // Check if one file has extra lines
73  while (std::getline(file1, line1)) {
74  MACH3LOG_WARN("Extra line in {} on line {}: {}", File1, lineNumber, line1);
75  ++lineNumber;
76  }
77  while (std::getline(file2, line2)) {
78  MACH3LOG_WARN("Extra line in {} on line {}: {}", File2, lineNumber, line2);
79  ++lineNumber;
80  }
81  return areEqual;
82 }
83 
85 bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string& ConfigName, const std::vector<std::string>& SkipVector = {})
86 {
87  bool weirdFile = false;
88 
89  TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
90  TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
91 
92  // EM: compare the digest of the version header file in this file, with the previous one
93  if(!CompareTwoConfigs(TMacroToString(*versionHeader), TMacroToString(*prevVersionHeader), SkipVector)){
94  MACH3LOG_ERROR("Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
95  MACH3LOG_ERROR("This strongly suggests that this file was made with different software versions than the previous ones");
96  weirdFile = true;
97  }
98 
99  return weirdFile;
100 }
101 
103 void CopyDir(TDirectory *source) {
104  //copy all objects and subdirs of directory source as a subdir of the current directory
105  source->ls();
106  TDirectory *savdir = gDirectory;
107  TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
108  // if directory doesn't exist make it
109  if (!adir) {
110  adir = savdir->mkdir(source->GetName());
111  }
112  adir->cd();
113  //loop on all entries of this directory
114  TKey *key;
115  TIter nextkey(source->GetListOfKeys());
116  while ((key = static_cast<TKey*>(nextkey()))) {
117  const char *classname = key->GetClassName();
118  TClass *cl = gROOT->GetClass(classname);
119  if (!cl) continue;
120  if (cl->InheritsFrom("TDirectory")) {
121  source->cd(key->GetName());
122  TDirectory *subdir = gDirectory;
123  adir->cd();
124  CopyDir(subdir);
125  adir->cd();
126  } else if (cl->InheritsFrom("TTree")) {
127  TTree *T = source->Get<TTree>(key->GetName());
128  adir->cd();
129  TTree *newT = T->CloneTree();
130  newT->Write();
131  } else {
132  source->cd();
133  TObject *obj = key->ReadObj();
134  adir->cd();
135  obj->Write();
136  delete obj;
137  }
138  }
139  adir->SaveSelf(kTRUE);
140  savdir->cd();
141 }
142 
145 bool CompareHistograms(const TH1* h1, const TH1* h2, const std::string& histName, const std::string& folderName)
146 {
147  if (!h1 || !h2) {
148  MACH3LOG_ERROR("Null pointer passed to CompareHistograms for '{}'", histName);
149  return false;
150  }
151 
152  const double int1 = h1->Integral();
153  const double int2 = h2->Integral();
154  if (std::abs(int1 - int2) > 1e-6) {
155  MACH3LOG_ERROR("Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
156  histName, folderName, int1, int2);
157  return false;
158  }
159  return true;
160 }
161 
163 bool CheckFolder(TFile* file, TFile* prevFile, const std::string& FolderName, const std::vector<std::string>& SkipVector = {})
164 {
165  bool mismatch = false;
166  TDirectory* dir = file->GetDirectory(FolderName.c_str());
167  TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
168 
169  if (!dir || !prevDir) {
170  MACH3LOG_ERROR("Could not find folder '{}' in one or both files", FolderName);
171  return true;
172  }
173 
174  TIter nextKey(dir->GetListOfKeys());
175  TKey* key;
176 
177  while ((key = static_cast<TKey*>(nextKey()))) {
178  const std::string objName = key->GetName();
179  TObject* obj = key->ReadObj();
180  if (!obj) continue;
181 
182  // Handle TH1 comparison
183  if (obj->InheritsFrom("TH1")) {
184  TH1* hist = static_cast<TH1*>(obj);
185  TH1* prevHist = dynamic_cast<TH1*>(prevDir->Get(objName.c_str()));
186  if (!prevHist) {
187  MACH3LOG_ERROR("Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
188  mismatch = true;
189  continue;
190  }
191  if (!CompareHistograms(hist, prevHist, objName, FolderName)) {
192  mismatch = true;
193  }
194  }
195  // Handle TMacro comparison
196  else if (obj->InheritsFrom("TMacro")) {
197  TMacro* macro = static_cast<TMacro*>(obj);
198  TMacro* prevMacro = dynamic_cast<TMacro*>(prevDir->Get(objName.c_str()));
199  if (!prevMacro) {
200  MACH3LOG_ERROR("Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
201  mismatch = true;
202  continue;
203  }
204  if (!CompareTwoConfigs(TMacroToString(*macro), TMacroToString(*prevMacro), SkipVector)) {
205  mismatch = true;
206  }
207  }
208  }
209  return mismatch;
210 }
211 
214 void FastMergeTTrees(const std::vector<std::string>& files, const std::string& outFile, const std::string& TTreeName) {
215  TChain chain(TTreeName.c_str());
216  for (const auto& f : files) chain.Add(f.c_str());
217 
218  TFile* outF = TFile::Open(outFile.c_str(), "UPDATE");
219 
220  TTree* newTree = chain.CloneTree(-1, "fast");
221  newTree->SetName(TTreeName.c_str());
222  outF->cd();
223  newTree->Write("", TObject::kOverwrite);
224  outF->Close();
225  delete outF;
226 }
227 
229 {
230  std::string outFileOption;
231  if(forceOverwrite) outFileOption = "RECREATE";
232  else outFileOption = "CREATE";
233 
234  TFile *prevFile = nullptr;
235 
236  // EM: loop through all the files in the provided list, compare the embedded version and config files
237  // If they match, we add the file to the list of files to be merged.
238  // If not, we throw an error and provide a (hopefully) helpful message telling the user why the files couldn't be merged.
239  for(uint fileId = 0; fileId < inpFileList.size(); fileId++)
240  {
241  std::string fileName = inpFileList[fileId];
242  TFile *file = new TFile(fileName.c_str());
243 
244  if(file->Get<TTree>("posteriors")->GetEntries() == 0){
245  MACH3LOG_WARN("Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
246  MACH3LOG_WARN("That's weird but I guess there's no rule that says a file can't be empty");
247  MACH3LOG_WARN("I'll skip it but maybe double check that this doesn't indicate some deeper problem");
248  continue;
249  }
250 
251  // EM: need to set this in the initial case
252  if(prevFile == nullptr) {
253  prevFile = file;
254  }
255 
256  MACH3LOG_DEBUG("############ File {} #############", fileId);
257 
258  bool weirdFile = false;
259  if(checkSoftwareVersions(file, prevFile, "MaCh3Engine/version_header")) weirdFile = true;
260  if(checkSoftwareVersions(file, prevFile, "MaCh3_Config", {"OutputFile:", "NSteps:"})) weirdFile = true;
261  if(CheckFolder(file, prevFile, "SampleFolder")) weirdFile = true;
262  if(CheckFolder(file, prevFile, "CovarianceFolder")) weirdFile = true;
263 
264  if(weirdFile && !forceMerge){
265  MACH3LOG_ERROR("");
266  MACH3LOG_ERROR("=====================================================================================");
267  MACH3LOG_ERROR("This is not a great idea and could lead to weird outputs and cause some big headaches");
268  MACH3LOG_ERROR("further down the road. But if you reeeeally wanna do it and you know what you're");
269  MACH3LOG_ERROR("doing you can come here and remove the 'throw'");
270  MACH3LOG_ERROR("Or use -m option");
271  MACH3LOG_ERROR("{}:{}", __FILE__, __LINE__ + 2);
272  MACH3LOG_ERROR("=====================================================================================");
273  throw MaCh3Exception(__FILE__ , __LINE__ );
274  }
275 
276  if(prevFile != file) {
277  prevFile->Close();
278  delete prevFile;
279  }
280 
281  // EM: set these for the next iteration
282  prevFile = file;
283  }
284 
285  if (!forceOverwrite && access(OutFileName.c_str(), F_OK) != -1) {
286  MACH3LOG_ERROR("Output file '{}' already exists. Use -f to force overwrite.", OutFileName);
287  throw MaCh3Exception(__FILE__, __LINE__);
288  }
289  //KS: Create new file
290  TFile* outputFile = M3::Open(OutFileName, "recreate", __FILE__, __LINE__);
291  outputFile->Close();
292  delete outputFile;
293 
294  TStopwatch clock;
295  clock.Start();
296 
297  MACH3LOG_INFO("Starting merging");
298  FastMergeTTrees(inpFileList, OutFileName, "posteriors");
300 
301  clock.Stop();
302  MACH3LOG_INFO("Merging of took {:.2f}s to finish", clock.RealTime());
303 
304  //KS: Sadly we need to open file to save TDirectories to not have weird copy of several obejcts there...
305  outputFile = M3::Open(OutFileName, "UPDATE", __FILE__, __LINE__);
306  outputFile->cd();
307 
308  // EM: Write out the version and config files to the combined file
309  std::vector<std::string> configNames = {"MaCh3_Config", "Reweight_Config", "Smearing_Config"};
310  for (std::size_t i = 0; i < configNames.size(); ++i) {
311  const std::string& name = configNames[i];
312  TMacro* macro = prevFile->Get<TMacro>(name.c_str());
313  if (macro != nullptr) {
314  macro->Write();
315  delete macro;
316  }
317  }
318 
319  // Get the source directory
320  TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>("MaCh3Engine");
321  TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>("CovarianceFolder");
322  TDirectory *SampleFolderDir = prevFile->Get<TDirectory>("SampleFolder");
323 
324  CopyDir(MaCh3EngineDir);
325  CopyDir(CovarianceFolderDir);
326  CopyDir(SampleFolderDir);
327 
328  outputFile->Close();
329  delete outputFile;
330 
331  delete prevFile;
332  MACH3LOG_INFO("Done!");
333 }
334 
335 void usage(){
336  MACH3LOG_INFO("Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
337  MACH3LOG_INFO("Cmd line syntax should be:");
338  MACH3LOG_INFO("CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
339  MACH3LOG_INFO("inputX.root : names of individual spline files to combine, can specify any number, need at least one");
340  MACH3LOG_INFO("output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
341  MACH3LOG_INFO("-c : target compression level for the combined file, default is 1, in line with hadd");
342  MACH3LOG_INFO("-f : force overwrite the output file if it exists already");
343  MACH3LOG_INFO("-m : merge files in-spite of differences");
344  MACH3LOG_INFO("-h : print this message and exit");
345 }
346 
347 void ParseArg(int argc, char *argv[]){
348  if(argc < 2){
349  MACH3LOG_ERROR("Too few arguments!!");
350  MACH3LOG_ERROR("USAGE:");
351  usage();
352  throw MaCh3Exception(__FILE__ , __LINE__ );
353  }
354 
355  int c;
356  for(;;) {
357  c = getopt(argc, argv, "o:c:mhf");
358  if (c == -1){ // loop over the remaining arguments
359  while (optind < argc){
360  // any non option input is assumed to be a root file
361  std::string fName = std::string(argv[optind]);
362  MACH3LOG_DEBUG("adding {} to file list", fName.c_str());
363  inpFileList.push_back(fName);
364  optind ++;
365  }
366  break;
367  }
368  else{
369  switch (c) {
370  case 'o': {
371  OutFileName = optarg;
372  break;
373  }
374  case 'f': {
375  forceOverwrite = true;
376  break;
377  }
378  case 'c': {
379  targetCompression = atoi(optarg);
380  break;
381  }
382  case 'm': {
383  forceMerge = true;
384  break;
385  }
386  case 'h': {
387  usage();
388  exit(0);
389  }
390  default: {
391  MACH3LOG_ERROR("Unrecognised option");
392  usage();
393  exit(1);
394  }
395  }
396  }
397  }
398 
399  if(OutFileName == ""){
400  MACH3LOG_INFO("Using first file in list as output: ", inpFileList[0].c_str());
402  inpFileList.erase(inpFileList.begin());
403  }
404 
405  if(forceOverwrite){
406  MACH3LOG_INFO("Will overwrite {} if it exists already", OutFileName.c_str());
407  }
408  MACH3LOG_INFO("Combining a total of {} files into {}", inpFileList.size(), OutFileName.c_str());
409 }
410 
411 int main(int argc, char *argv[])
412 {
415  ParseArg(argc, argv);
416  CombineChain();
417  return 0;
418 }
std::string OutFileName
bool forceMerge
int main(int argc, char *argv[])
void CopyDir(TDirectory *source)
When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have thi...
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
void usage()
bool forceOverwrite
void ParseArg(int argc, char *argv[])
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
int targetCompression
void FastMergeTTrees(const std::vector< std::string > &files, const std::string &outFile, const std::string &TTreeName)
custom function for merging TTree, should be similar to what HADD is using
std::vector< std::string > inpFileList
void CombineChain()
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
make sure two configs are identical but skip specified fields. For example when comparing two chains ...
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
#define _MaCh3_Safe_Include_Start_
KS: Avoiding warning checking for headers.
Definition: Core.h:126
#define _MaCh3_Safe_Include_End_
KS: Restore warning checking after including external headers.
Definition: Core.h:140
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:34
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:37
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:35
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:61
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:36
#define MACH3LOG_TRACE
Definition: MaCh3Logger.h:33
std::string TMacroToString(const TMacro &macro)
KS: Convert a ROOT TMacro object to a string representation.
Definition: YamlHelper.h:123
Custom exception class used throughout MaCh3.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
Definition: Monitor.cpp:12