MaCh3  2.2.3
Reference Guide
Functions | Variables
CombineMaCh3Chains.cpp File Reference

Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files. More...

#include <unistd.h>
#include "Manager/Manager.h"
#include "Samples/SampleStructs.h"
#include "TList.h"
#include "TFile.h"
#include "TMacro.h"
#include "TTree.h"
#include "TMD5.h"
#include "TFileMerger.h"
#include "TKey.h"
#include "TROOT.h"
Include dependency graph for CombineMaCh3Chains.cpp:

Go to the source code of this file.

Functions

bool ShouldSkipLine (const std::string &line, const std::vector< std::string > &SkipVector)
 KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging. More...
 
bool CompareTwoConfigs (const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
 
bool checkSoftwareVersions (TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
 EM: Will compare the version header contained in the two provided files and shout if they don't match. More...
 
void CopyDir (TDirectory *source)
 
bool CompareHistograms (const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
 Compare two histograms if they are identical. More...
 
bool CheckFolder (TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
 Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'. More...
 
void CombineChain ()
 
void usage ()
 
void ParseArg (int argc, char *argv[])
 
int main (int argc, char *argv[])
 

Variables

std::string OutFileName = ""
 
int targetCompression = 1
 
std::vector< std::string > inpFileList
 
bool forceOverwrite = false
 
bool forceMerge = false
 

Detailed Description

Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files.

Author
Ewan Miller
Kamil Skwarczynski

Definition in file CombineMaCh3Chains.cpp.

Function Documentation

◆ CheckFolder()

bool CheckFolder ( TFile *  file,
TFile *  prevFile,
const std::string &  FolderName,
const std::vector< std::string > &  SkipVector = {} 
)

Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.

Definition at line 153 of file CombineMaCh3Chains.cpp.

153  {})
154 {
155  bool mismatch = false;
156  TDirectory* dir = file->GetDirectory(FolderName.c_str());
157  TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
158 
159  if (!dir || !prevDir) {
160  MACH3LOG_ERROR("Could not find folder '{}' in one or both files", FolderName);
161  return true;
162  }
163 
164  TIter nextKey(dir->GetListOfKeys());
165  TKey* key;
166 
167  while ((key = static_cast<TKey*>(nextKey()))) {
168  const std::string objName = key->GetName();
169  TObject* obj = key->ReadObj();
170  if (!obj) continue;
171 
172  // Handle TH1 comparison
173  if (obj->InheritsFrom("TH1")) {
174  TH1* hist = static_cast<TH1*>(obj);
175  TH1* prevHist = dynamic_cast<TH1*>(prevDir->Get(objName.c_str()));
176  if (!prevHist) {
177  MACH3LOG_ERROR("Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
178  mismatch = true;
179  continue;
180  }
181  if (!CompareHistograms(hist, prevHist, objName, FolderName)) {
182  mismatch = true;
183  }
184  }
185  // Handle TMacro comparison
186  else if (obj->InheritsFrom("TMacro")) {
187  TMacro* macro = static_cast<TMacro*>(obj);
188  TMacro* prevMacro = dynamic_cast<TMacro*>(prevDir->Get(objName.c_str()));
189  if (!prevMacro) {
190  MACH3LOG_ERROR("Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
191  mismatch = true;
192  continue;
193  }
194  if (!CompareTwoConfigs(TMacroToString(*macro), TMacroToString(*prevMacro), SkipVector)) {
195  mismatch = true;
196  }
197  }
198  }
199  return mismatch;
200 }
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:27
std::string TMacroToString(const TMacro &macro)
KS: Convert a ROOT TMacro object to a string representation.
Definition: YamlHelper.h:118

◆ checkSoftwareVersions()

bool checkSoftwareVersions ( TFile *  file,
TFile *  prevFile,
const std::string &  ConfigName,
const std::vector< std::string > &  SkipVector = {} 
)

EM: Will compare the version header contained in the two provided files and shout if they don't match.

Definition at line 80 of file CombineMaCh3Chains.cpp.

80  {})
81 {
82  bool weirdFile = false;
83 
84  TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
85  TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
86 
87  // EM: compare the digest of the version header file in this file, with the previous one
88  if(!CompareTwoConfigs(TMacroToString(*versionHeader), TMacroToString(*prevVersionHeader), SkipVector)){
89  MACH3LOG_ERROR("Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
90  MACH3LOG_ERROR("This strongly suggests that this file was made with different software versions than the previous ones");
91  weirdFile = true;
92  }
93 
94  return weirdFile;
95 }

◆ CombineChain()

void CombineChain ( )

Definition at line 202 of file CombineMaCh3Chains.cpp.

203 {
204  TFileMerger *fileMerger = new TFileMerger();
205 
206  // EM: If we ever add new trees to the chain files they will need to be added here too
207  fileMerger->AddObjectNames("posteriors");
208  fileMerger->AddObjectNames("Settings");
209 
210  MACH3LOG_INFO("These objects will be merged: {}", fileMerger->GetObjectNames());
211 
212  std::string outFileOption;
213  if(forceOverwrite) outFileOption = "RECREATE";
214  else outFileOption = "CREATE";
215 
216  // EM: Attempt to open the output file
217  bool openedFile = fileMerger->OutputFile(OutFileName.c_str(), outFileOption.c_str(), targetCompression);
218  if (!openedFile){
219  MACH3LOG_ERROR("Failed to create output file.");
220  throw MaCh3Exception(__FILE__ , __LINE__ );
221  }
222 
223  TFile *prevFile = nullptr;
224 
225  // EM: loop through all the files in the provided list, compare the embedded version and config files
226  // If they match, we add the file to the list of files to be merged.
227  // If not, we throw an error and provide a (hopefully) helpful message telling the user why the files couldn't be merged.
228  for(uint fileId = 0; fileId < inpFileList.size(); fileId++)
229  {
230  std::string fileName = inpFileList[fileId];
231  TFile *file = new TFile(fileName.c_str());
232 
233  if(file->Get<TTree>("posteriors")->GetEntries() == 0){
234  MACH3LOG_WARN("Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
235  MACH3LOG_WARN("That's weird but I guess there's no rule that says a file can't be empty");
236  MACH3LOG_WARN("I'll skip it but maybe double check that this doesn't indicate some deeper problem");
237  continue;
238  }
239 
240  // EM: need to set this in the initial case
241  if(prevFile == nullptr) {
242  prevFile = file;
243  }
244 
245  MACH3LOG_DEBUG("############ File {} #############", fileId);
246 
247  bool weirdFile = false;
248  if(checkSoftwareVersions(file, prevFile, "MaCh3Engine/version_header")) weirdFile = true;
249  if(checkSoftwareVersions(file, prevFile, "MaCh3_Config", {"OutputFile:", "NSteps:"})) weirdFile = true;
250  if(CheckFolder(file, prevFile, "SampleFolder")) weirdFile = true;
251  if(CheckFolder(file, prevFile, "CovarianceFolder")) weirdFile = true;
252 
253  if(weirdFile && !forceMerge){
254  MACH3LOG_ERROR("");
255  MACH3LOG_ERROR("=====================================================================================");
256  MACH3LOG_ERROR("This is not a great idea and could lead to weird outputs and cause some big headaches");
257  MACH3LOG_ERROR("further down the road. But if you reeeeally wanna do it and you know what you're");
258  MACH3LOG_ERROR("doing you can come here and remove the 'throw'");
259  MACH3LOG_ERROR("Or use -m option");
260  MACH3LOG_ERROR("{}:{}", __FILE__, __LINE__ + 2);
261  MACH3LOG_ERROR("=====================================================================================");
262  throw MaCh3Exception(__FILE__ , __LINE__ );
263  }
264  // EM: file seems good, we'll add the trees to the lists
265  fileMerger->AddFile(file);
266 
267  // EM: set these for the next iteration
268  prevFile = file;
269  }
270 
271  TFile *outputFile = fileMerger->GetOutputFile();
272  outputFile->cd();
273 
274  // EM: Write out the version and config files to the combined file
275  std::vector<std::string> configNames = {"MaCh3_Config", "Reweight_Config", "Smearing_Config"};
276  for (std::size_t i = 0; i < configNames.size(); ++i) {
277  const std::string& name = configNames[i];
278  TMacro* macro = prevFile->Get<TMacro>(name.c_str());
279  if (macro != nullptr) {
280  macro->Write();
281  delete macro;
282  }
283  }
284 
285  // EM: now let's combine all the trees and write to the output file
286  bool mergeSuccess = fileMerger->PartialMerge(TFileMerger::kRegular | TFileMerger::kAll | TFileMerger::kOnlyListed);
287  if(mergeSuccess){
288  MACH3LOG_INFO("Files merged successfully");
289  } else{
290  MACH3LOG_ERROR("Failed to merge files");
291  }
292  delete fileMerger;
293 
294  //KS: Sadly we need to open file to save TDirectories to not have weird copy of several obejcts there...
295  outputFile = new TFile(OutFileName.c_str(), "UPDATE");
296 
297  // Get the source directory
298  TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>("MaCh3Engine");
299  TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>("CovarianceFolder");
300  TDirectory *SampleFolderDir = prevFile->Get<TDirectory>("SampleFolder");
301 
302  outputFile->cd();
303  CopyDir(MaCh3EngineDir);
304  CopyDir(CovarianceFolderDir);
305  CopyDir(SampleFolderDir);
306 
307  delete prevFile;
308  MACH3LOG_INFO("Done!");
309 }
std::string OutFileName
bool forceMerge
void CopyDir(TDirectory *source)
bool forceOverwrite
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
int targetCompression
std::vector< std::string > inpFileList
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:24
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:25
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:26
Custom exception class for MaCh3 errors.

◆ CompareHistograms()

bool CompareHistograms ( const TH1 *  h1,
const TH1 *  h2,
const std::string &  histName,
const std::string &  folderName 
)

Compare two histograms if they are identical.

Todo:
add checks for stuff like bin content etc

Definition at line 135 of file CombineMaCh3Chains.cpp.

136 {
137  if (!h1 || !h2) {
138  MACH3LOG_ERROR("Null pointer passed to CompareHistograms for '{}'", histName);
139  return false;
140  }
141 
142  const double int1 = h1->Integral();
143  const double int2 = h2->Integral();
144  if (std::abs(int1 - int2) > 1e-6) {
145  MACH3LOG_ERROR("Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
146  histName, folderName, int1, int2);
147  return false;
148  }
149  return true;
150 }

◆ CompareTwoConfigs()

bool CompareTwoConfigs ( const std::string &  File1,
const std::string &  File2,
const std::vector< std::string > &  SkipVector 
)

Definition at line 46 of file CombineMaCh3Chains.cpp.

46  {
47  std::istringstream file1(File1);
48  std::istringstream file2(File2);
49 
50  std::string line1, line2;
51  int lineNumber = 1;
52  bool areEqual = true;
53 
54  while (std::getline(file1, line1) && std::getline(file2, line2)) {
55  if (ShouldSkipLine(line1, SkipVector) || ShouldSkipLine(line2, SkipVector)) {
56  ++lineNumber;
57  continue;
58  }
59  if (line1 != line2) {
60  areEqual = false;
61  MACH3LOG_WARN("Difference found on line {}:", lineNumber);
62  MACH3LOG_WARN("Config1: {}", line1);
63  MACH3LOG_WARN("Config2: {}", line2);
64  }
65  ++lineNumber;
66  }
67  // Check if one file has extra lines
68  while (std::getline(file1, line1)) {
69  MACH3LOG_WARN("Extra line in {} on line {}: {}", File1, lineNumber, line1);
70  ++lineNumber;
71  }
72  while (std::getline(file2, line2)) {
73  MACH3LOG_WARN("Extra line in {} on line {}: {}", File2, lineNumber, line2);
74  ++lineNumber;
75  }
76  return areEqual;
77 }
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...

◆ CopyDir()

void CopyDir ( TDirectory *  source)

Definition at line 97 of file CombineMaCh3Chains.cpp.

97  {
98  //copy all objects and subdirs of directory source as a subdir of the current directory
99  source->ls();
100  TDirectory *savdir = gDirectory;
101  TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
102  adir->cd();
103  //loop on all entries of this directory
104  TKey *key;
105  TIter nextkey(source->GetListOfKeys());
106  while ((key = static_cast<TKey*>(nextkey()))) {
107  const char *classname = key->GetClassName();
108  TClass *cl = gROOT->GetClass(classname);
109  if (!cl) continue;
110  if (cl->InheritsFrom("TDirectory")) {
111  source->cd(key->GetName());
112  TDirectory *subdir = gDirectory;
113  adir->cd();
114  CopyDir(subdir);
115  adir->cd();
116  } else if (cl->InheritsFrom("TTree")) {
117  TTree *T = source->Get<TTree>(key->GetName());
118  adir->cd();
119  TTree *newT = T->CloneTree();
120  newT->Write();
121  } else {
122  source->cd();
123  TObject *obj = key->ReadObj();
124  adir->cd();
125  obj->Write();
126  delete obj;
127  }
128  }
129  adir->SaveSelf(kTRUE);
130  savdir->cd();
131 }

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 387 of file CombineMaCh3Chains.cpp.

388 {
391  ParseArg(argc, argv);
392  CombineChain();
393  return 0;
394 }
void ParseArg(int argc, char *argv[])
void CombineChain()
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:51
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
Definition: Monitor.cpp:12

◆ ParseArg()

void ParseArg ( int  argc,
char *  argv[] 
)

Definition at line 323 of file CombineMaCh3Chains.cpp.

323  {
324  if(argc < 2){
325  MACH3LOG_ERROR("Too few arguments!!");
326  MACH3LOG_ERROR("USAGE:");
327  usage();
328  throw MaCh3Exception(__FILE__ , __LINE__ );
329  }
330 
331  int c;
332  for(;;) {
333  c = getopt(argc, argv, "o:c:hf");
334  if (c == -1){ // loop over the remaining arguments
335  while (optind < argc){
336  // any non option input is assumed to be a root file
337  std::string fName = std::string(argv[optind]);
338  MACH3LOG_DEBUG("adding {} to file list", fName.c_str());
339  inpFileList.push_back(fName);
340  optind ++;
341  }
342  break;
343  }
344  else{
345  switch (c) {
346  case 'o': {
347  OutFileName = optarg;
348  break;
349  }
350  case 'f': {
351  forceOverwrite = true;
352  break;
353  }
354  case 'c': {
355  targetCompression = atoi(optarg);
356  break;
357  }
358  case 'm': {
359  forceMerge = true;
360  break;
361  }
362  case 'h': {
363  usage();
364  exit(0);
365  }
366  default: {
367  MACH3LOG_ERROR("Un recognised option");
368  usage();
369  exit(1);
370  }
371  }
372  }
373  }
374 
375  if(OutFileName == ""){
376  MACH3LOG_INFO("Using first file in list as output: ", inpFileList[0].c_str());
378  inpFileList.erase(inpFileList.begin());
379  }
380 
381  if(forceOverwrite){
382  MACH3LOG_INFO("Will overwrite {} if it exists already", OutFileName.c_str());
383  }
384  MACH3LOG_INFO("Combining a total of {} files into {}", inpFileList.size(), OutFileName.c_str());
385 }
void usage()

◆ ShouldSkipLine()

bool ShouldSkipLine ( const std::string &  line,
const std::vector< std::string > &  SkipVector 
)

KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging.

Definition at line 34 of file CombineMaCh3Chains.cpp.

34  {
35  // Otherwise, check if the line contains any word from SkipVector
36  for (const auto& word : SkipVector) {
37  MACH3LOG_TRACE("{} : {}",line, word);
38  if (line.find(word) != std::string::npos) {
39  MACH3LOG_TRACE("Found matching word, therefore Skipping");
40  return true;
41  }
42  }
43  return false;
44 }
#define MACH3LOG_TRACE
Definition: MaCh3Logger.h:23

◆ usage()

void usage ( )

Definition at line 311 of file CombineMaCh3Chains.cpp.

311  {
312  MACH3LOG_INFO("Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
313  MACH3LOG_INFO("Cmd line syntax should be:");
314  MACH3LOG_INFO("CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
315  MACH3LOG_INFO("inputX.root : names of individual spline files to combine, can specify any number, need at least one");
316  MACH3LOG_INFO("output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
317  MACH3LOG_INFO("-c : target compression level for the combined file, default is 1, in line with hadd");
318  MACH3LOG_INFO("-f : force overwrite the output file if it exists already");
319  MACH3LOG_INFO("-m : merge files in-spite of differences");
320  MACH3LOG_INFO("-h : print this message and exit");
321 }

Variable Documentation

◆ forceMerge

bool forceMerge = false

Definition at line 29 of file CombineMaCh3Chains.cpp.

◆ forceOverwrite

bool forceOverwrite = false

Definition at line 28 of file CombineMaCh3Chains.cpp.

◆ inpFileList

std::vector<std::string> inpFileList

Definition at line 27 of file CombineMaCh3Chains.cpp.

◆ OutFileName

std::string OutFileName = ""

Definition at line 25 of file CombineMaCh3Chains.cpp.

◆ targetCompression

int targetCompression = 1

Definition at line 26 of file CombineMaCh3Chains.cpp.