MaCh3  2.2.3
Reference Guide
CombineMaCh3Chains.cpp
Go to the documentation of this file.
1 // C++ includes
2 #include <unistd.h>
3 
4 // MaCh3 includes
5 #include "Manager/Manager.h"
7 
9 // ROOT includes
10 #include "TList.h"
11 #include "TFile.h"
12 #include "TMacro.h"
13 #include "TTree.h"
14 #include "TMD5.h"
15 #include "TFileMerger.h"
16 #include "TKey.h"
17 #include "TROOT.h"
19 
24 
25 std::string OutFileName = "";
27 std::vector<std::string> inpFileList;
28 bool forceOverwrite = false;
29 bool forceMerge = false;
30 
31 
32 
34 bool ShouldSkipLine(const std::string& line, const std::vector<std::string>& SkipVector) {
35  // Otherwise, check if the line contains any word from SkipVector
36  for (const auto& word : SkipVector) {
37  MACH3LOG_TRACE("{} : {}",line, word);
38  if (line.find(word) != std::string::npos) {
39  MACH3LOG_TRACE("Found matching word, therefore Skipping");
40  return true;
41  }
42  }
43  return false;
44 }
45 
46 bool CompareTwoConfigs(const std::string& File1, const std::string& File2, const std::vector<std::string>& SkipVector) {
47  std::istringstream file1(File1);
48  std::istringstream file2(File2);
49 
50  std::string line1, line2;
51  int lineNumber = 1;
52  bool areEqual = true;
53 
54  while (std::getline(file1, line1) && std::getline(file2, line2)) {
55  if (ShouldSkipLine(line1, SkipVector) || ShouldSkipLine(line2, SkipVector)) {
56  ++lineNumber;
57  continue;
58  }
59  if (line1 != line2) {
60  areEqual = false;
61  MACH3LOG_WARN("Difference found on line {}:", lineNumber);
62  MACH3LOG_WARN("Config1: {}", line1);
63  MACH3LOG_WARN("Config2: {}", line2);
64  }
65  ++lineNumber;
66  }
67  // Check if one file has extra lines
68  while (std::getline(file1, line1)) {
69  MACH3LOG_WARN("Extra line in {} on line {}: {}", File1, lineNumber, line1);
70  ++lineNumber;
71  }
72  while (std::getline(file2, line2)) {
73  MACH3LOG_WARN("Extra line in {} on line {}: {}", File2, lineNumber, line2);
74  ++lineNumber;
75  }
76  return areEqual;
77 }
78 
80 bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string& ConfigName, const std::vector<std::string>& SkipVector = {})
81 {
82  bool weirdFile = false;
83 
84  TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
85  TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
86 
87  // EM: compare the digest of the version header file in this file, with the previous one
88  if(!CompareTwoConfigs(TMacroToString(*versionHeader), TMacroToString(*prevVersionHeader), SkipVector)){
89  MACH3LOG_ERROR("Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
90  MACH3LOG_ERROR("This strongly suggests that this file was made with different software versions than the previous ones");
91  weirdFile = true;
92  }
93 
94  return weirdFile;
95 }
96 
97 void CopyDir(TDirectory *source) {
98  //copy all objects and subdirs of directory source as a subdir of the current directory
99  source->ls();
100  TDirectory *savdir = gDirectory;
101  TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
102  adir->cd();
103  //loop on all entries of this directory
104  TKey *key;
105  TIter nextkey(source->GetListOfKeys());
106  while ((key = static_cast<TKey*>(nextkey()))) {
107  const char *classname = key->GetClassName();
108  TClass *cl = gROOT->GetClass(classname);
109  if (!cl) continue;
110  if (cl->InheritsFrom("TDirectory")) {
111  source->cd(key->GetName());
112  TDirectory *subdir = gDirectory;
113  adir->cd();
114  CopyDir(subdir);
115  adir->cd();
116  } else if (cl->InheritsFrom("TTree")) {
117  TTree *T = source->Get<TTree>(key->GetName());
118  adir->cd();
119  TTree *newT = T->CloneTree();
120  newT->Write();
121  } else {
122  source->cd();
123  TObject *obj = key->ReadObj();
124  adir->cd();
125  obj->Write();
126  delete obj;
127  }
128  }
129  adir->SaveSelf(kTRUE);
130  savdir->cd();
131 }
132 
135 bool CompareHistograms(const TH1* h1, const TH1* h2, const std::string& histName, const std::string& folderName)
136 {
137  if (!h1 || !h2) {
138  MACH3LOG_ERROR("Null pointer passed to CompareHistograms for '{}'", histName);
139  return false;
140  }
141 
142  const double int1 = h1->Integral();
143  const double int2 = h2->Integral();
144  if (std::abs(int1 - int2) > 1e-6) {
145  MACH3LOG_ERROR("Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
146  histName, folderName, int1, int2);
147  return false;
148  }
149  return true;
150 }
151 
153 bool CheckFolder(TFile* file, TFile* prevFile, const std::string& FolderName, const std::vector<std::string>& SkipVector = {})
154 {
155  bool mismatch = false;
156  TDirectory* dir = file->GetDirectory(FolderName.c_str());
157  TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
158 
159  if (!dir || !prevDir) {
160  MACH3LOG_ERROR("Could not find folder '{}' in one or both files", FolderName);
161  return true;
162  }
163 
164  TIter nextKey(dir->GetListOfKeys());
165  TKey* key;
166 
167  while ((key = static_cast<TKey*>(nextKey()))) {
168  const std::string objName = key->GetName();
169  TObject* obj = key->ReadObj();
170  if (!obj) continue;
171 
172  // Handle TH1 comparison
173  if (obj->InheritsFrom("TH1")) {
174  TH1* hist = static_cast<TH1*>(obj);
175  TH1* prevHist = dynamic_cast<TH1*>(prevDir->Get(objName.c_str()));
176  if (!prevHist) {
177  MACH3LOG_ERROR("Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
178  mismatch = true;
179  continue;
180  }
181  if (!CompareHistograms(hist, prevHist, objName, FolderName)) {
182  mismatch = true;
183  }
184  }
185  // Handle TMacro comparison
186  else if (obj->InheritsFrom("TMacro")) {
187  TMacro* macro = static_cast<TMacro*>(obj);
188  TMacro* prevMacro = dynamic_cast<TMacro*>(prevDir->Get(objName.c_str()));
189  if (!prevMacro) {
190  MACH3LOG_ERROR("Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
191  mismatch = true;
192  continue;
193  }
194  if (!CompareTwoConfigs(TMacroToString(*macro), TMacroToString(*prevMacro), SkipVector)) {
195  mismatch = true;
196  }
197  }
198  }
199  return mismatch;
200 }
201 
203 {
204  TFileMerger *fileMerger = new TFileMerger();
205 
206  // EM: If we ever add new trees to the chain files they will need to be added here too
207  fileMerger->AddObjectNames("posteriors");
208  fileMerger->AddObjectNames("Settings");
209 
210  MACH3LOG_INFO("These objects will be merged: {}", fileMerger->GetObjectNames());
211 
212  std::string outFileOption;
213  if(forceOverwrite) outFileOption = "RECREATE";
214  else outFileOption = "CREATE";
215 
216  // EM: Attempt to open the output file
217  bool openedFile = fileMerger->OutputFile(OutFileName.c_str(), outFileOption.c_str(), targetCompression);
218  if (!openedFile){
219  MACH3LOG_ERROR("Failed to create output file.");
220  throw MaCh3Exception(__FILE__ , __LINE__ );
221  }
222 
223  TFile *prevFile = nullptr;
224 
225  // EM: loop through all the files in the provided list, compare the embedded version and config files
226  // If they match, we add the file to the list of files to be merged.
227  // If not, we throw an error and provide a (hopefully) helpful message telling the user why the files couldn't be merged.
228  for(uint fileId = 0; fileId < inpFileList.size(); fileId++)
229  {
230  std::string fileName = inpFileList[fileId];
231  TFile *file = new TFile(fileName.c_str());
232 
233  if(file->Get<TTree>("posteriors")->GetEntries() == 0){
234  MACH3LOG_WARN("Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
235  MACH3LOG_WARN("That's weird but I guess there's no rule that says a file can't be empty");
236  MACH3LOG_WARN("I'll skip it but maybe double check that this doesn't indicate some deeper problem");
237  continue;
238  }
239 
240  // EM: need to set this in the initial case
241  if(prevFile == nullptr) {
242  prevFile = file;
243  }
244 
245  MACH3LOG_DEBUG("############ File {} #############", fileId);
246 
247  bool weirdFile = false;
248  if(checkSoftwareVersions(file, prevFile, "MaCh3Engine/version_header")) weirdFile = true;
249  if(checkSoftwareVersions(file, prevFile, "MaCh3_Config", {"OutputFile:", "NSteps:"})) weirdFile = true;
250  if(CheckFolder(file, prevFile, "SampleFolder")) weirdFile = true;
251  if(CheckFolder(file, prevFile, "CovarianceFolder")) weirdFile = true;
252 
253  if(weirdFile && !forceMerge){
254  MACH3LOG_ERROR("");
255  MACH3LOG_ERROR("=====================================================================================");
256  MACH3LOG_ERROR("This is not a great idea and could lead to weird outputs and cause some big headaches");
257  MACH3LOG_ERROR("further down the road. But if you reeeeally wanna do it and you know what you're");
258  MACH3LOG_ERROR("doing you can come here and remove the 'throw'");
259  MACH3LOG_ERROR("Or use -m option");
260  MACH3LOG_ERROR("{}:{}", __FILE__, __LINE__ + 2);
261  MACH3LOG_ERROR("=====================================================================================");
262  throw MaCh3Exception(__FILE__ , __LINE__ );
263  }
264  // EM: file seems good, we'll add the trees to the lists
265  fileMerger->AddFile(file);
266 
267  // EM: set these for the next iteration
268  prevFile = file;
269  }
270 
271  TFile *outputFile = fileMerger->GetOutputFile();
272  outputFile->cd();
273 
274  // EM: Write out the version and config files to the combined file
275  std::vector<std::string> configNames = {"MaCh3_Config", "Reweight_Config", "Smearing_Config"};
276  for (std::size_t i = 0; i < configNames.size(); ++i) {
277  const std::string& name = configNames[i];
278  TMacro* macro = prevFile->Get<TMacro>(name.c_str());
279  if (macro != nullptr) {
280  macro->Write();
281  delete macro;
282  }
283  }
284 
285  // EM: now let's combine all the trees and write to the output file
286  bool mergeSuccess = fileMerger->PartialMerge(TFileMerger::kRegular | TFileMerger::kAll | TFileMerger::kOnlyListed);
287  if(mergeSuccess){
288  MACH3LOG_INFO("Files merged successfully");
289  } else{
290  MACH3LOG_ERROR("Failed to merge files");
291  }
292  delete fileMerger;
293 
294  //KS: Sadly we need to open file to save TDirectories to not have weird copy of several obejcts there...
295  outputFile = new TFile(OutFileName.c_str(), "UPDATE");
296 
297  // Get the source directory
298  TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>("MaCh3Engine");
299  TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>("CovarianceFolder");
300  TDirectory *SampleFolderDir = prevFile->Get<TDirectory>("SampleFolder");
301 
302  outputFile->cd();
303  CopyDir(MaCh3EngineDir);
304  CopyDir(CovarianceFolderDir);
305  CopyDir(SampleFolderDir);
306 
307  delete prevFile;
308  MACH3LOG_INFO("Done!");
309 }
310 
311 void usage(){
312  MACH3LOG_INFO("Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
313  MACH3LOG_INFO("Cmd line syntax should be:");
314  MACH3LOG_INFO("CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
315  MACH3LOG_INFO("inputX.root : names of individual spline files to combine, can specify any number, need at least one");
316  MACH3LOG_INFO("output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
317  MACH3LOG_INFO("-c : target compression level for the combined file, default is 1, in line with hadd");
318  MACH3LOG_INFO("-f : force overwrite the output file if it exists already");
319  MACH3LOG_INFO("-m : merge files in-spite of differences");
320  MACH3LOG_INFO("-h : print this message and exit");
321 }
322 
323 void ParseArg(int argc, char *argv[]){
324  if(argc < 2){
325  MACH3LOG_ERROR("Too few arguments!!");
326  MACH3LOG_ERROR("USAGE:");
327  usage();
328  throw MaCh3Exception(__FILE__ , __LINE__ );
329  }
330 
331  int c;
332  for(;;) {
333  c = getopt(argc, argv, "o:c:hf");
334  if (c == -1){ // loop over the remaining arguments
335  while (optind < argc){
336  // any non option input is assumed to be a root file
337  std::string fName = std::string(argv[optind]);
338  MACH3LOG_DEBUG("adding {} to file list", fName.c_str());
339  inpFileList.push_back(fName);
340  optind ++;
341  }
342  break;
343  }
344  else{
345  switch (c) {
346  case 'o': {
347  OutFileName = optarg;
348  break;
349  }
350  case 'f': {
351  forceOverwrite = true;
352  break;
353  }
354  case 'c': {
355  targetCompression = atoi(optarg);
356  break;
357  }
358  case 'm': {
359  forceMerge = true;
360  break;
361  }
362  case 'h': {
363  usage();
364  exit(0);
365  }
366  default: {
367  MACH3LOG_ERROR("Un recognised option");
368  usage();
369  exit(1);
370  }
371  }
372  }
373  }
374 
375  if(OutFileName == ""){
376  MACH3LOG_INFO("Using first file in list as output: ", inpFileList[0].c_str());
378  inpFileList.erase(inpFileList.begin());
379  }
380 
381  if(forceOverwrite){
382  MACH3LOG_INFO("Will overwrite {} if it exists already", OutFileName.c_str());
383  }
384  MACH3LOG_INFO("Combining a total of {} files into {}", inpFileList.size(), OutFileName.c_str());
385 }
386 
387 int main(int argc, char *argv[])
388 {
391  ParseArg(argc, argv);
392  CombineChain();
393  return 0;
394 }
std::string OutFileName
bool forceMerge
int main(int argc, char *argv[])
void CopyDir(TDirectory *source)
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
void usage()
bool forceOverwrite
void ParseArg(int argc, char *argv[])
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
int targetCompression
std::vector< std::string > inpFileList
void CombineChain()
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
#define _MaCh3_Safe_Include_Start_
KS: Avoiding warning checking for headers.
Definition: Core.h:109
#define _MaCh3_Safe_Include_End_
KS: Restore warning checking after including external headers.
Definition: Core.h:120
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:24
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:27
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:25
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:51
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:26
#define MACH3LOG_TRACE
Definition: MaCh3Logger.h:23
std::string TMacroToString(const TMacro &macro)
KS: Convert a ROOT TMacro object to a string representation.
Definition: YamlHelper.h:118
Custom exception class for MaCh3 errors.
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
Definition: Monitor.cpp:12