MaCh3 2.2.1
Reference Guide
Loading...
Searching...
No Matches
CombineMaCh3Chains.cpp
Go to the documentation of this file.
1// C++ includes
2#include <unistd.h>
3
4// MaCh3 includes
5#include "Manager/Manager.h"
7
9// ROOT includes
10#include "TList.h"
11#include "TFile.h"
12#include "TMacro.h"
13#include "TTree.h"
14#include "TMD5.h"
15#include "TFileMerger.h"
16#include "TKey.h"
17#include "TROOT.h"
19
24
25std::string OutFileName = "";
27std::vector<std::string> inpFileList;
28bool forceOverwrite = false;
29bool forceMerge = false;
30
31
32
34bool ShouldSkipLine(const std::string& line, const std::vector<std::string>& SkipVector) {
35 // Otherwise, check if the line contains any word from SkipVector
36 for (const auto& word : SkipVector) {
37 MACH3LOG_TRACE("{} : {}",line, word);
38 if (line.find(word) != std::string::npos) {
39 MACH3LOG_TRACE("Found matching word, therefore Skipping");
40 return true;
41 }
42 }
43 return false;
44}
45
46bool CompareTwoConfigs(const std::string& File1, const std::string& File2, const std::vector<std::string>& SkipVector) {
47 std::istringstream file1(File1);
48 std::istringstream file2(File2);
49
50 std::string line1, line2;
51 int lineNumber = 1;
52 bool areEqual = true;
53
54 while (std::getline(file1, line1) && std::getline(file2, line2)) {
55 if (ShouldSkipLine(line1, SkipVector) || ShouldSkipLine(line2, SkipVector)) {
56 ++lineNumber;
57 continue;
58 }
59 if (line1 != line2) {
60 areEqual = false;
61 MACH3LOG_WARN("Difference found on line {}:", lineNumber);
62 MACH3LOG_WARN("Config1: {}", line1);
63 MACH3LOG_WARN("Config2: {}", line2);
64 }
65 ++lineNumber;
66 }
67 // Check if one file has extra lines
68 while (std::getline(file1, line1)) {
69 MACH3LOG_WARN("Extra line in {} on line {}: {}", File1, lineNumber, line1);
70 ++lineNumber;
71 }
72 while (std::getline(file2, line2)) {
73 MACH3LOG_WARN("Extra line in {} on line {}: {}", File2, lineNumber, line2);
74 ++lineNumber;
75 }
76 return areEqual;
77}
78
80bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string& ConfigName, const std::vector<std::string>& SkipVector = {})
81{
82 bool weirdFile = false;
83
84 TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
85 TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
86
87 // EM: compare the digest of the version header file in this file, with the previous one
88 if(!CompareTwoConfigs(TMacroToString(*versionHeader), TMacroToString(*prevVersionHeader), SkipVector)){
89 MACH3LOG_ERROR("Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
90 MACH3LOG_ERROR("This strongly suggests that this file was made with different software versions than the previous ones");
91 weirdFile = true;
92 }
93
94 return weirdFile;
95}
96
97void CopyDir(TDirectory *source) {
98 //copy all objects and subdirs of directory source as a subdir of the current directory
99 source->ls();
100 TDirectory *savdir = gDirectory;
101 TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
102 adir->cd();
103 //loop on all entries of this directory
104 TKey *key;
105 TIter nextkey(source->GetListOfKeys());
106 while ((key = static_cast<TKey*>(nextkey()))) {
107 const char *classname = key->GetClassName();
108 TClass *cl = gROOT->GetClass(classname);
109 if (!cl) continue;
110 if (cl->InheritsFrom("TDirectory")) {
111 source->cd(key->GetName());
112 TDirectory *subdir = gDirectory;
113 adir->cd();
114 CopyDir(subdir);
115 adir->cd();
116 } else if (cl->InheritsFrom("TTree")) {
117 TTree *T = source->Get<TTree>(key->GetName());
118 adir->cd();
119 TTree *newT = T->CloneTree();
120 newT->Write();
121 } else {
122 source->cd();
123 TObject *obj = key->ReadObj();
124 adir->cd();
125 obj->Write();
126 delete obj;
127 }
128 }
129 adir->SaveSelf(kTRUE);
130 savdir->cd();
131}
132
135bool CompareHistograms(const TH1* h1, const TH1* h2, const std::string& histName, const std::string& folderName)
136{
137 if (!h1 || !h2) {
138 MACH3LOG_ERROR("Null pointer passed to CompareHistograms for '{}'", histName);
139 return false;
140 }
141
142 const double int1 = h1->Integral();
143 const double int2 = h2->Integral();
144 if (std::abs(int1 - int2) > 1e-6) {
145 MACH3LOG_ERROR("Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
146 histName, folderName, int1, int2);
147 return false;
148 }
149 return true;
150}
151
153bool CheckFolder(TFile* file, TFile* prevFile, const std::string& FolderName, const std::vector<std::string>& SkipVector = {})
154{
155 bool mismatch = false;
156 TDirectory* dir = file->GetDirectory(FolderName.c_str());
157 TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
158
159 if (!dir || !prevDir) {
160 MACH3LOG_ERROR("Could not find folder '{}' in one or both files", FolderName);
161 return true;
162 }
163
164 TIter nextKey(dir->GetListOfKeys());
165 TKey* key;
166
167 while ((key = static_cast<TKey*>(nextKey()))) {
168 const std::string objName = key->GetName();
169 TObject* obj = key->ReadObj();
170 if (!obj) continue;
171
172 // Handle TH1 comparison
173 if (obj->InheritsFrom("TH1")) {
174 TH1* hist = static_cast<TH1*>(obj);
175 TH1* prevHist = dynamic_cast<TH1*>(prevDir->Get(objName.c_str()));
176 if (!prevHist) {
177 MACH3LOG_ERROR("Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
178 mismatch = true;
179 continue;
180 }
181 if (!CompareHistograms(hist, prevHist, objName, FolderName)) {
182 mismatch = true;
183 }
184 }
185 // Handle TMacro comparison
186 else if (obj->InheritsFrom("TMacro")) {
187 TMacro* macro = static_cast<TMacro*>(obj);
188 TMacro* prevMacro = dynamic_cast<TMacro*>(prevDir->Get(objName.c_str()));
189 if (!prevMacro) {
190 MACH3LOG_ERROR("Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
191 mismatch = true;
192 continue;
193 }
194 if (!CompareTwoConfigs(TMacroToString(*macro), TMacroToString(*prevMacro), SkipVector)) {
195 mismatch = true;
196 }
197 }
198 }
199 return mismatch;
200}
201
203{
204 TFileMerger *fileMerger = new TFileMerger();
205
206 // EM: If we ever add new trees to the chain files they will need to be added here too
207 fileMerger->AddObjectNames("posteriors");
208 fileMerger->AddObjectNames("Settings");
209
210 MACH3LOG_INFO("These objects will be merged: {}", fileMerger->GetObjectNames());
211
212 std::string outFileOption;
213 if(forceOverwrite) outFileOption = "RECREATE";
214 else outFileOption = "CREATE";
215
216 // EM: Attempt to open the output file
217 bool openedFile = fileMerger->OutputFile(OutFileName.c_str(), outFileOption.c_str(), targetCompression);
218 if (!openedFile){
219 MACH3LOG_ERROR("Failed to create output file.");
220 throw MaCh3Exception(__FILE__ , __LINE__ );
221 }
222
223 TFile *prevFile = nullptr;
224
225 // EM: loop through all the files in the provided list, compare the embedded version and config files
226 // If they match, we add the file to the list of files to be merged.
227 // If not, we throw an error and provide a (hopefully) helpful message telling the user why the files couldn't be merged.
228 for(uint fileId = 0; fileId < inpFileList.size(); fileId++)
229 {
230 std::string fileName = inpFileList[fileId];
231 TFile *file = new TFile(fileName.c_str());
232
233 if(file->Get<TTree>("posteriors")->GetEntries() == 0){
234 MACH3LOG_WARN("Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
235 MACH3LOG_WARN("That's weird but I guess there's no rule that says a file can't be empty");
236 MACH3LOG_WARN("I'll skip it but maybe double check that this doesn't indicate some deeper problem");
237 continue;
238 }
239
240 // EM: need to set this in the initial case
241 if(prevFile == nullptr) {
242 prevFile = file;
243 }
244
245 MACH3LOG_DEBUG("############ File {} #############", fileId);
246
247 bool weirdFile = false;
248 if(checkSoftwareVersions(file, prevFile, "MaCh3Engine/version_header")) weirdFile = true;
249 if(checkSoftwareVersions(file, prevFile, "MaCh3_Config", {"OutputFile:", "NSteps:"})) weirdFile = true;
250 if(CheckFolder(file, prevFile, "SampleFolder")) weirdFile = true;
251 if(CheckFolder(file, prevFile, "CovarianceFolder")) weirdFile = true;
252
253 if(weirdFile && !forceMerge){
254 MACH3LOG_ERROR("");
255 MACH3LOG_ERROR("=====================================================================================");
256 MACH3LOG_ERROR("This is not a great idea and could lead to weird outputs and cause some big headaches");
257 MACH3LOG_ERROR("further down the road. But if you reeeeally wanna do it and you know what you're");
258 MACH3LOG_ERROR("doing you can come here and remove the 'throw'");
259 MACH3LOG_ERROR("Or use -m option");
260 MACH3LOG_ERROR("{}:{}", __FILE__, __LINE__ + 2);
261 MACH3LOG_ERROR("=====================================================================================");
262 throw MaCh3Exception(__FILE__ , __LINE__ );
263 }
264 // EM: file seems good, we'll add the trees to the lists
265 fileMerger->AddFile(file);
266
267 // EM: set these for the next iteration
268 prevFile = file;
269 }
270
271 TFile *outputFile = fileMerger->GetOutputFile();
272 outputFile->cd();
273
274 // EM: write out the version and config files to the combined file
275 TMacro *MaCh3_Config = prevFile->Get<TMacro>("MaCh3_Config");
276
277 if(MaCh3_Config != NULL) MaCh3_Config->Write();
278 delete MaCh3_Config;
279
280 // EM: now let's combine all the trees and write to the output file
281 bool mergeSuccess = fileMerger->PartialMerge(TFileMerger::kRegular | TFileMerger::kAll | TFileMerger::kOnlyListed);
282 if(mergeSuccess){
283 MACH3LOG_INFO("Files merged successfully");
284 } else{
285 MACH3LOG_ERROR("Failed to merge files");
286 }
287 delete fileMerger;
288
289 //KS: Sadly we need to open file to save TDirectories to not have weird copy of several obejcts there...
290 outputFile = new TFile(OutFileName.c_str(), "UPDATE");
291
292 // Get the source directory
293 TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>("MaCh3Engine");
294 TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>("CovarianceFolder");
295 TDirectory *SampleFolderDir = prevFile->Get<TDirectory>("SampleFolder");
296
297 outputFile->cd();
298 CopyDir(MaCh3EngineDir);
299 CopyDir(CovarianceFolderDir);
300 CopyDir(SampleFolderDir);
301
302 delete prevFile;
303 MACH3LOG_INFO("Done!");
304}
305
306void usage(){
307 MACH3LOG_INFO("Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
308 MACH3LOG_INFO("Cmd line syntax should be:");
309 MACH3LOG_INFO("CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
310 MACH3LOG_INFO("inputX.root : names of individual spline files to combine, can specify any number, need at least one");
311 MACH3LOG_INFO("output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
312 MACH3LOG_INFO("-c : target compression level for the combined file, default is 1, in line with hadd");
313 MACH3LOG_INFO("-f : force overwrite the output file if it exists already");
314 MACH3LOG_INFO("-m : merge files in-spite of differences");
315 MACH3LOG_INFO("-h : print this message and exit");
316}
317
318void ParseArg(int argc, char *argv[]){
319 if(argc < 2){
320 MACH3LOG_ERROR("Too few arguments!!");
321 MACH3LOG_ERROR("USAGE:");
322 usage();
323 throw MaCh3Exception(__FILE__ , __LINE__ );
324 }
325
326 int c;
327 for(;;) {
328 c = getopt(argc, argv, "o:c:hf");
329 if (c == -1){ // loop over the remaining arguments
330 while (optind < argc){
331 // any non option input is assumed to be a root file
332 std::string fName = std::string(argv[optind]);
333 MACH3LOG_DEBUG("adding {} to file list", fName.c_str());
334 inpFileList.push_back(fName);
335 optind ++;
336 }
337 break;
338 }
339 else{
340 switch (c) {
341 case 'o': {
342 OutFileName = optarg;
343 break;
344 }
345 case 'f': {
346 forceOverwrite = true;
347 break;
348 }
349 case 'c': {
350 targetCompression = atoi(optarg);
351 break;
352 }
353 case 'm': {
354 forceMerge = true;
355 break;
356 }
357 case 'h': {
358 usage();
359 exit(0);
360 }
361 default: {
362 MACH3LOG_ERROR("Un recognised option");
363 usage();
364 exit(1);
365 }
366 }
367 }
368 }
369
370 if(OutFileName == ""){
371 MACH3LOG_INFO("Using first file in list as output: ", inpFileList[0].c_str());
373 inpFileList.erase(inpFileList.begin());
374 }
375
376 if(forceOverwrite){
377 MACH3LOG_INFO("Will overwrite {} if it exists already", OutFileName.c_str());
378 }
379 MACH3LOG_INFO("Combining a total of {} files into {}", inpFileList.size(), OutFileName.c_str());
380}
381
382int main(int argc, char *argv[])
383{
386 ParseArg(argc, argv);
387 CombineChain();
388 return 0;
389}
std::string OutFileName
bool forceMerge
int main(int argc, char *argv[])
void CopyDir(TDirectory *source)
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
void usage()
bool forceOverwrite
void ParseArg(int argc, char *argv[])
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
int targetCompression
std::vector< std::string > inpFileList
void CombineChain()
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
#define _MaCh3_Safe_Include_Start_
KS: Avoiding warning checking for headers.
Definition: Core.h:106
#define _MaCh3_Safe_Include_End_
KS: Restore warning checking after including external headers.
Definition: Core.h:117
#define MACH3LOG_DEBUG
Definition: MaCh3Logger.h:22
#define MACH3LOG_ERROR
Definition: MaCh3Logger.h:25
#define MACH3LOG_INFO
Definition: MaCh3Logger.h:23
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
Definition: MaCh3Logger.h:30
#define MACH3LOG_WARN
Definition: MaCh3Logger.h:24
#define MACH3LOG_TRACE
Definition: MaCh3Logger.h:21
std::string TMacroToString(const TMacro &macro)
KS: Convert a ROOT TMacro object to a string representation.
Definition: YamlHelper.h:117
Custom exception class for MaCh3 errors.
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
Definition: Monitor.cpp:11