MaCh3 2.2.1
Reference Guide
Loading...
Searching...
No Matches
Functions | Variables
CombineMaCh3Chains.cpp File Reference

Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files. More...

#include <unistd.h>
#include "Manager/Manager.h"
#include "Samples/SampleStructs.h"
#include "TList.h"
#include "TFile.h"
#include "TMacro.h"
#include "TTree.h"
#include "TMD5.h"
#include "TFileMerger.h"
#include "TKey.h"
#include "TROOT.h"
Include dependency graph for CombineMaCh3Chains.cpp:

Go to the source code of this file.

Functions

bool ShouldSkipLine (const std::string &line, const std::vector< std::string > &SkipVector)
 KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging.
 
bool CompareTwoConfigs (const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
 
bool checkSoftwareVersions (TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
 EM: Will compare the version header contained in the two provided files and shout if they don't match.
 
void CopyDir (TDirectory *source)
 
bool CompareHistograms (const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
 Compare two histograms if they are identical.
 
bool CheckFolder (TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
 Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
 
void CombineChain ()
 
void usage ()
 
void ParseArg (int argc, char *argv[])
 
int main (int argc, char *argv[])
 

Variables

std::string OutFileName = ""
 
int targetCompression = 1
 
std::vector< std::string > inpFileList
 
bool forceOverwrite = false
 
bool forceMerge = false
 

Detailed Description

Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files.

Author
Ewan Miller
Kamil Skwarczynski

Definition in file CombineMaCh3Chains.cpp.

Function Documentation

◆ CheckFolder()

bool CheckFolder ( TFile *  file,
TFile *  prevFile,
const std::string &  FolderName,
const std::vector< std::string > &  SkipVector = {} 
)

Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.

Definition at line 153 of file CombineMaCh3Chains.cpp.

153 {})
154{
155 bool mismatch = false;
156 TDirectory* dir = file->GetDirectory(FolderName.c_str());
157 TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
158
159 if (!dir || !prevDir) {
160 MACH3LOG_ERROR("Could not find folder '{}' in one or both files", FolderName);
161 return true;
162 }
163
164 TIter nextKey(dir->GetListOfKeys());
165 TKey* key;
166
167 while ((key = static_cast<TKey*>(nextKey()))) {
168 const std::string objName = key->GetName();
169 TObject* obj = key->ReadObj();
170 if (!obj) continue;
171
172 // Handle TH1 comparison
173 if (obj->InheritsFrom("TH1")) {
174 TH1* hist = static_cast<TH1*>(obj);
175 TH1* prevHist = dynamic_cast<TH1*>(prevDir->Get(objName.c_str()));
176 if (!prevHist) {
177 MACH3LOG_ERROR("Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
178 mismatch = true;
179 continue;
180 }
181 if (!CompareHistograms(hist, prevHist, objName, FolderName)) {
182 mismatch = true;
183 }
184 }
185 // Handle TMacro comparison
186 else if (obj->InheritsFrom("TMacro")) {
187 TMacro* macro = static_cast<TMacro*>(obj);
188 TMacro* prevMacro = dynamic_cast<TMacro*>(prevDir->Get(objName.c_str()));
189 if (!prevMacro) {
190 MACH3LOG_ERROR("Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
191 mismatch = true;
192 continue;
193 }
194 if (!CompareTwoConfigs(TMacroToString(*macro), TMacroToString(*prevMacro), SkipVector)) {
195 mismatch = true;
196 }
197 }
198 }
199 return mismatch;
200}
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:25
std::string TMacroToString(const TMacro &macro)
KS: Convert a ROOT TMacro object to a string representation.
Definition: YamlHelper.h:117

◆ checkSoftwareVersions()

bool checkSoftwareVersions ( TFile *  file,
TFile *  prevFile,
const std::string &  ConfigName,
const std::vector< std::string > &  SkipVector = {} 
)

EM: Will compare the version header contained in the two provided files and shout if they don't match.

Definition at line 80 of file CombineMaCh3Chains.cpp.

80 {})
81{
82 bool weirdFile = false;
83
84 TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
85 TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
86
87 // EM: compare the digest of the version header file in this file, with the previous one
88 if(!CompareTwoConfigs(TMacroToString(*versionHeader), TMacroToString(*prevVersionHeader), SkipVector)){
89 MACH3LOG_ERROR("Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
90 MACH3LOG_ERROR("This strongly suggests that this file was made with different software versions than the previous ones");
91 weirdFile = true;
92 }
93
94 return weirdFile;
95}

◆ CombineChain()

void CombineChain ( )

Definition at line 202 of file CombineMaCh3Chains.cpp.

203{
204 TFileMerger *fileMerger = new TFileMerger();
205
206 // EM: If we ever add new trees to the chain files they will need to be added here too
207 fileMerger->AddObjectNames("posteriors");
208 fileMerger->AddObjectNames("Settings");
209
210 MACH3LOG_INFO("These objects will be merged: {}", fileMerger->GetObjectNames());
211
212 std::string outFileOption;
213 if(forceOverwrite) outFileOption = "RECREATE";
214 else outFileOption = "CREATE";
215
216 // EM: Attempt to open the output file
217 bool openedFile = fileMerger->OutputFile(OutFileName.c_str(), outFileOption.c_str(), targetCompression);
218 if (!openedFile){
219 MACH3LOG_ERROR("Failed to create output file.");
220 throw MaCh3Exception(__FILE__ , __LINE__ );
221 }
222
223 TFile *prevFile = nullptr;
224
225 // EM: loop through all the files in the provided list, compare the embedded version and config files
226 // If they match, we add the file to the list of files to be merged.
227 // If not, we throw an error and provide a (hopefully) helpful message telling the user why the files couldn't be merged.
228 for(uint fileId = 0; fileId < inpFileList.size(); fileId++)
229 {
230 std::string fileName = inpFileList[fileId];
231 TFile *file = new TFile(fileName.c_str());
232
233 if(file->Get<TTree>("posteriors")->GetEntries() == 0){
234 MACH3LOG_WARN("Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
235 MACH3LOG_WARN("That's weird but I guess there's no rule that says a file can't be empty");
236 MACH3LOG_WARN("I'll skip it but maybe double check that this doesn't indicate some deeper problem");
237 continue;
238 }
239
240 // EM: need to set this in the initial case
241 if(prevFile == nullptr) {
242 prevFile = file;
243 }
244
245 MACH3LOG_DEBUG("############ File {} #############", fileId);
246
247 bool weirdFile = false;
248 if(checkSoftwareVersions(file, prevFile, "MaCh3Engine/version_header")) weirdFile = true;
249 if(checkSoftwareVersions(file, prevFile, "MaCh3_Config", {"OutputFile:", "NSteps:"})) weirdFile = true;
250 if(CheckFolder(file, prevFile, "SampleFolder")) weirdFile = true;
251 if(CheckFolder(file, prevFile, "CovarianceFolder")) weirdFile = true;
252
253 if(weirdFile && !forceMerge){
254 MACH3LOG_ERROR("");
255 MACH3LOG_ERROR("=====================================================================================");
256 MACH3LOG_ERROR("This is not a great idea and could lead to weird outputs and cause some big headaches");
257 MACH3LOG_ERROR("further down the road. But if you reeeeally wanna do it and you know what you're");
258 MACH3LOG_ERROR("doing you can come here and remove the 'throw'");
259 MACH3LOG_ERROR("Or use -m option");
260 MACH3LOG_ERROR("{}:{}", __FILE__, __LINE__ + 2);
261 MACH3LOG_ERROR("=====================================================================================");
262 throw MaCh3Exception(__FILE__ , __LINE__ );
263 }
264 // EM: file seems good, we'll add the trees to the lists
265 fileMerger->AddFile(file);
266
267 // EM: set these for the next iteration
268 prevFile = file;
269 }
270
271 TFile *outputFile = fileMerger->GetOutputFile();
272 outputFile->cd();
273
274 // EM: write out the version and config files to the combined file
275 TMacro *MaCh3_Config = prevFile->Get<TMacro>("MaCh3_Config");
276
277 if(MaCh3_Config != NULL) MaCh3_Config->Write();
278 delete MaCh3_Config;
279
280 // EM: now let's combine all the trees and write to the output file
281 bool mergeSuccess = fileMerger->PartialMerge(TFileMerger::kRegular | TFileMerger::kAll | TFileMerger::kOnlyListed);
282 if(mergeSuccess){
283 MACH3LOG_INFO("Files merged successfully");
284 } else{
285 MACH3LOG_ERROR("Failed to merge files");
286 }
287 delete fileMerger;
288
289 //KS: Sadly we need to open file to save TDirectories to not have weird copy of several obejcts there...
290 outputFile = new TFile(OutFileName.c_str(), "UPDATE");
291
292 // Get the source directory
293 TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>("MaCh3Engine");
294 TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>("CovarianceFolder");
295 TDirectory *SampleFolderDir = prevFile->Get<TDirectory>("SampleFolder");
296
297 outputFile->cd();
298 CopyDir(MaCh3EngineDir);
299 CopyDir(CovarianceFolderDir);
300 CopyDir(SampleFolderDir);
301
302 delete prevFile;
303 MACH3LOG_INFO("Done!");
304}
std::string OutFileName
bool forceMerge
void CopyDir(TDirectory *source)
bool forceOverwrite
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
int targetCompression
std::vector< std::string > inpFileList
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:22
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:23
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:24
Custom exception class for MaCh3 errors.

◆ CompareHistograms()

bool CompareHistograms ( const TH1 *  h1,
const TH1 *  h2,
const std::string &  histName,
const std::string &  folderName 
)

Compare two histograms if they are identical.

Todo:
add checks for stuff like bin content etc

Definition at line 135 of file CombineMaCh3Chains.cpp.

136{
137 if (!h1 || !h2) {
138 MACH3LOG_ERROR("Null pointer passed to CompareHistograms for '{}'", histName);
139 return false;
140 }
141
142 const double int1 = h1->Integral();
143 const double int2 = h2->Integral();
144 if (std::abs(int1 - int2) > 1e-6) {
145 MACH3LOG_ERROR("Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
146 histName, folderName, int1, int2);
147 return false;
148 }
149 return true;
150}

◆ CompareTwoConfigs()

bool CompareTwoConfigs ( const std::string &  File1,
const std::string &  File2,
const std::vector< std::string > &  SkipVector 
)

Definition at line 46 of file CombineMaCh3Chains.cpp.

46 {
47 std::istringstream file1(File1);
48 std::istringstream file2(File2);
49
50 std::string line1, line2;
51 int lineNumber = 1;
52 bool areEqual = true;
53
54 while (std::getline(file1, line1) && std::getline(file2, line2)) {
55 if (ShouldSkipLine(line1, SkipVector) || ShouldSkipLine(line2, SkipVector)) {
56 ++lineNumber;
57 continue;
58 }
59 if (line1 != line2) {
60 areEqual = false;
61 MACH3LOG_WARN("Difference found on line {}:", lineNumber);
62 MACH3LOG_WARN("Config1: {}", line1);
63 MACH3LOG_WARN("Config2: {}", line2);
64 }
65 ++lineNumber;
66 }
67 // Check if one file has extra lines
68 while (std::getline(file1, line1)) {
69 MACH3LOG_WARN("Extra line in {} on line {}: {}", File1, lineNumber, line1);
70 ++lineNumber;
71 }
72 while (std::getline(file2, line2)) {
73 MACH3LOG_WARN("Extra line in {} on line {}: {}", File2, lineNumber, line2);
74 ++lineNumber;
75 }
76 return areEqual;
77}
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...

◆ CopyDir()

void CopyDir ( TDirectory *  source)

Definition at line 97 of file CombineMaCh3Chains.cpp.

97 {
98 //copy all objects and subdirs of directory source as a subdir of the current directory
99 source->ls();
100 TDirectory *savdir = gDirectory;
101 TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
102 adir->cd();
103 //loop on all entries of this directory
104 TKey *key;
105 TIter nextkey(source->GetListOfKeys());
106 while ((key = static_cast<TKey*>(nextkey()))) {
107 const char *classname = key->GetClassName();
108 TClass *cl = gROOT->GetClass(classname);
109 if (!cl) continue;
110 if (cl->InheritsFrom("TDirectory")) {
111 source->cd(key->GetName());
112 TDirectory *subdir = gDirectory;
113 adir->cd();
114 CopyDir(subdir);
115 adir->cd();
116 } else if (cl->InheritsFrom("TTree")) {
117 TTree *T = source->Get<TTree>(key->GetName());
118 adir->cd();
119 TTree *newT = T->CloneTree();
120 newT->Write();
121 } else {
122 source->cd();
123 TObject *obj = key->ReadObj();
124 adir->cd();
125 obj->Write();
126 delete obj;
127 }
128 }
129 adir->SaveSelf(kTRUE);
130 savdir->cd();
131}

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 382 of file CombineMaCh3Chains.cpp.

383{
386 ParseArg(argc, argv);
387 CombineChain();
388 return 0;
389}
void ParseArg(int argc, char *argv[])
void CombineChain()
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:30
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
Definition: Monitor.cpp:11

◆ ParseArg()

void ParseArg ( int  argc,
char *  argv[] 
)

Definition at line 318 of file CombineMaCh3Chains.cpp.

318 {
319 if(argc < 2){
320 MACH3LOG_ERROR("Too few arguments!!");
321 MACH3LOG_ERROR("USAGE:");
322 usage();
323 throw MaCh3Exception(__FILE__ , __LINE__ );
324 }
325
326 int c;
327 for(;;) {
328 c = getopt(argc, argv, "o:c:hf");
329 if (c == -1){ // loop over the remaining arguments
330 while (optind < argc){
331 // any non option input is assumed to be a root file
332 std::string fName = std::string(argv[optind]);
333 MACH3LOG_DEBUG("adding {} to file list", fName.c_str());
334 inpFileList.push_back(fName);
335 optind ++;
336 }
337 break;
338 }
339 else{
340 switch (c) {
341 case 'o': {
342 OutFileName = optarg;
343 break;
344 }
345 case 'f': {
346 forceOverwrite = true;
347 break;
348 }
349 case 'c': {
350 targetCompression = atoi(optarg);
351 break;
352 }
353 case 'm': {
354 forceMerge = true;
355 break;
356 }
357 case 'h': {
358 usage();
359 exit(0);
360 }
361 default: {
362 MACH3LOG_ERROR("Un recognised option");
363 usage();
364 exit(1);
365 }
366 }
367 }
368 }
369
370 if(OutFileName == ""){
371 MACH3LOG_INFO("Using first file in list as output: ", inpFileList[0].c_str());
373 inpFileList.erase(inpFileList.begin());
374 }
375
376 if(forceOverwrite){
377 MACH3LOG_INFO("Will overwrite {} if it exists already", OutFileName.c_str());
378 }
379 MACH3LOG_INFO("Combining a total of {} files into {}", inpFileList.size(), OutFileName.c_str());
380}
void usage()

◆ ShouldSkipLine()

bool ShouldSkipLine ( const std::string &  line,
const std::vector< std::string > &  SkipVector 
)

KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging.

Definition at line 34 of file CombineMaCh3Chains.cpp.

34 {
35 // Otherwise, check if the line contains any word from SkipVector
36 for (const auto& word : SkipVector) {
37 MACH3LOG_TRACE("{} : {}",line, word);
38 if (line.find(word) != std::string::npos) {
39 MACH3LOG_TRACE("Found matching word, therefore Skipping");
40 return true;
41 }
42 }
43 return false;
44}
#define MACH3LOG_TRACE
Definition: MaCh3Logger.h:21

◆ usage()

void usage ( )

Definition at line 306 of file CombineMaCh3Chains.cpp.

306 {
307 MACH3LOG_INFO("Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
308 MACH3LOG_INFO("Cmd line syntax should be:");
309 MACH3LOG_INFO("CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
310 MACH3LOG_INFO("inputX.root : names of individual spline files to combine, can specify any number, need at least one");
311 MACH3LOG_INFO("output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
312 MACH3LOG_INFO("-c : target compression level for the combined file, default is 1, in line with hadd");
313 MACH3LOG_INFO("-f : force overwrite the output file if it exists already");
314 MACH3LOG_INFO("-m : merge files in-spite of differences");
315 MACH3LOG_INFO("-h : print this message and exit");
316}

Variable Documentation

◆ forceMerge

bool forceMerge = false

Definition at line 29 of file CombineMaCh3Chains.cpp.

◆ forceOverwrite

bool forceOverwrite = false

Definition at line 28 of file CombineMaCh3Chains.cpp.

◆ inpFileList

std::vector<std::string> inpFileList

Definition at line 27 of file CombineMaCh3Chains.cpp.

◆ OutFileName

std::string OutFileName = ""

Definition at line 25 of file CombineMaCh3Chains.cpp.

◆ targetCompression

int targetCompression = 1

Definition at line 26 of file CombineMaCh3Chains.cpp.