Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files.
More...
#include <unistd.h>
#include "Manager/Manager.h"
#include "Samples/SampleStructs.h"
#include "Samples/HistogramUtils.h"
#include "TList.h"
#include "TFile.h"
#include "TMacro.h"
#include "TTree.h"
#include "TMD5.h"
#include "TFileMerger.h"
#include "TKey.h"
#include "TROOT.h"
Go to the source code of this file.
|
| bool | ShouldSkipLine (const std::string &line, const std::vector< std::string > &SkipVector) |
| | KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging. More...
|
| |
| bool | CompareTwoConfigs (const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector) |
| | make sure two configs are identical but skip specified fields. For example when comparing two chains nsteps or output name might be different and this is still fine to merge More...
|
| |
| bool | checkSoftwareVersions (TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={}) |
| | EM: Will compare the version header contained in the two provided files and shout if they don't match. More...
|
| |
| void | CopyDir (TDirectory *source) |
| | When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have this bad boy. More...
|
| |
| bool | CompareHistograms (const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName) |
| | Compare two histograms if they are identical. More...
|
| |
| bool | CheckFolder (TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={}) |
| | Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'. More...
|
| |
| void | FastMergeTTrees (const std::vector< std::string > &files, const std::string &outFile, const std::string &TTreeName) |
| | custom function for merging TTree, should be similar to what HADD is using More...
|
| |
| void | CombineChain () |
| |
| void | usage () |
| |
| void | ParseArg (int argc, char *argv[]) |
| |
| int | main (int argc, char *argv[]) |
| |
Combine chains files produced by MCMC, enforcing the condition that all the files to combine were made using the exact same software versions and config files.
- Author
- Ewan Miller
-
Kamil Skwarczynski
Definition in file CombineMaCh3Chains.cpp.
◆ CheckFolder()
| bool CheckFolder |
( |
TFile * |
file, |
|
|
TFile * |
prevFile, |
|
|
const std::string & |
FolderName, |
|
|
const std::vector< std::string > & |
SkipVector = {} |
|
) |
| |
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
Definition at line 163 of file CombineMaCh3Chains.cpp.
165 bool mismatch =
false;
166 TDirectory* dir = file->GetDirectory(FolderName.c_str());
167 TDirectory* prevDir = prevFile->GetDirectory(FolderName.c_str());
169 if (!dir || !prevDir) {
170 MACH3LOG_ERROR(
"Could not find folder '{}' in one or both files", FolderName);
174 TIter nextKey(dir->GetListOfKeys());
177 while ((key =
static_cast<TKey*
>(nextKey()))) {
178 const std::string objName = key->GetName();
179 TObject* obj = key->ReadObj();
183 if (obj->InheritsFrom(
"TH1")) {
184 TH1* hist =
static_cast<TH1*
>(obj);
185 TH1* prevHist =
dynamic_cast<TH1*
>(prevDir->Get(objName.c_str()));
187 MACH3LOG_ERROR(
"Missing histogram '{}' in previous file (folder '{}')", objName, FolderName);
196 else if (obj->InheritsFrom(
"TMacro")) {
197 TMacro* macro =
static_cast<TMacro*
>(obj);
198 TMacro* prevMacro =
dynamic_cast<TMacro*
>(prevDir->Get(objName.c_str()));
200 MACH3LOG_ERROR(
"Missing TMacro '{}' in previous file (folder '{}')", objName, FolderName);
bool CompareHistograms(const TH1 *h1, const TH1 *h2, const std::string &histName, const std::string &folderName)
Compare two histograms if they are identical.
bool CompareTwoConfigs(const std::string &File1, const std::string &File2, const std::vector< std::string > &SkipVector)
make sure two configs are identical but skip specified fields. For example when comparing two chains ...
std::string TMacroToString(const TMacro ¯o)
KS: Convert a ROOT TMacro object to a string representation.
◆ checkSoftwareVersions()
| bool checkSoftwareVersions |
( |
TFile * |
file, |
|
|
TFile * |
prevFile, |
|
|
const std::string & |
ConfigName, |
|
|
const std::vector< std::string > & |
SkipVector = {} |
|
) |
| |
EM: Will compare the version header contained in the two provided files and shout if they don't match.
Definition at line 85 of file CombineMaCh3Chains.cpp.
87 bool weirdFile =
false;
89 TMacro *versionHeader = file->Get<TMacro>(ConfigName.c_str());
90 TMacro *prevVersionHeader = prevFile->Get<TMacro>(ConfigName.c_str());
94 MACH3LOG_ERROR(
"Looks like the {} embedded config for file {} is different to the previous ones", ConfigName, file->GetName());
95 MACH3LOG_ERROR(
"This strongly suggests that this file was made with different software versions than the previous ones");
◆ CombineChain()
Definition at line 228 of file CombineMaCh3Chains.cpp.
230 std::string outFileOption;
232 else outFileOption =
"CREATE";
234 TFile *prevFile =
nullptr;
239 for(uint fileId = 0; fileId <
inpFileList.size(); fileId++)
242 TFile *file =
new TFile(fileName.c_str());
244 if(file->Get<TTree>(
"posteriors")->GetEntries() == 0){
245 MACH3LOG_WARN(
"Hmmm, file {} Doesn't seem to have any entries", fileName.c_str());
246 MACH3LOG_WARN(
"That's weird but I guess there's no rule that says a file can't be empty");
247 MACH3LOG_WARN(
"I'll skip it but maybe double check that this doesn't indicate some deeper problem");
252 if(prevFile ==
nullptr) {
258 bool weirdFile =
false;
260 if(
checkSoftwareVersions(file, prevFile,
"MaCh3_Config", {
"OutputFile:",
"NSteps:"})) weirdFile =
true;
261 if(
CheckFolder(file, prevFile,
"SampleFolder")) weirdFile =
true;
262 if(
CheckFolder(file, prevFile,
"CovarianceFolder")) weirdFile =
true;
266 MACH3LOG_ERROR(
"=====================================================================================");
267 MACH3LOG_ERROR(
"This is not a great idea and could lead to weird outputs and cause some big headaches");
268 MACH3LOG_ERROR(
"further down the road. But if you reeeeally wanna do it and you know what you're");
272 MACH3LOG_ERROR(
"=====================================================================================");
276 if(prevFile != file) {
302 MACH3LOG_INFO(
"Merging of took {:.2f}s to finish", clock.RealTime());
309 std::vector<std::string> configNames = {
"MaCh3_Config",
"Reweight_Config",
"Smearing_Config"};
310 for (std::size_t i = 0; i < configNames.size(); ++i) {
311 const std::string& name = configNames[i];
312 TMacro* macro = prevFile->Get<TMacro>(name.c_str());
313 if (macro !=
nullptr) {
320 TDirectory *MaCh3EngineDir = prevFile->Get<TDirectory>(
"MaCh3Engine");
321 TDirectory *CovarianceFolderDir = prevFile->Get<TDirectory>(
"CovarianceFolder");
322 TDirectory *SampleFolderDir = prevFile->Get<TDirectory>(
"SampleFolder");
void CopyDir(TDirectory *source)
When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have thi...
bool checkSoftwareVersions(TFile *file, TFile *prevFile, const std::string &ConfigName, const std::vector< std::string > &SkipVector={})
EM: Will compare the version header contained in the two provided files and shout if they don't match...
void FastMergeTTrees(const std::vector< std::string > &files, const std::string &outFile, const std::string &TTreeName)
custom function for merging TTree, should be similar to what HADD is using
std::vector< std::string > inpFileList
bool CheckFolder(TFile *file, TFile *prevFile, const std::string &FolderName, const std::vector< std::string > &SkipVector={})
Loop through TH1 and TMacro objects in FolderName in 'file' and compare with those in 'prevFile'.
Custom exception class used throughout MaCh3.
TFile * Open(const std::string &Name, const std::string &Type, const std::string &File, const int Line)
Opens a ROOT file with the given name and mode.
◆ CompareHistograms()
| bool CompareHistograms |
( |
const TH1 * |
h1, |
|
|
const TH1 * |
h2, |
|
|
const std::string & |
histName, |
|
|
const std::string & |
folderName |
|
) |
| |
Compare two histograms if they are identical.
- Todo:
- add checks for stuff like bin content etc
Definition at line 145 of file CombineMaCh3Chains.cpp.
148 MACH3LOG_ERROR(
"Null pointer passed to CompareHistograms for '{}'", histName);
152 const double int1 = h1->Integral();
153 const double int2 = h2->Integral();
154 if (std::abs(int1 - int2) > 1e-6) {
155 MACH3LOG_ERROR(
"Histogram '{}' in folder '{}' has different integrals: current = {}, previous = {}",
156 histName, folderName, int1, int2);
◆ CompareTwoConfigs()
| bool CompareTwoConfigs |
( |
const std::string & |
File1, |
|
|
const std::string & |
File2, |
|
|
const std::vector< std::string > & |
SkipVector |
|
) |
| |
make sure two configs are identical but skip specified fields. For example when comparing two chains nsteps or output name might be different and this is still fine to merge
- Parameters
-
| File1 | Config from chain1 |
| File2 | Config from chain2 |
| SkipVector | Fields in yaml file to skip |
Definition at line 51 of file CombineMaCh3Chains.cpp.
52 std::istringstream file1(File1);
53 std::istringstream file2(File2);
55 std::string line1, line2;
59 while (std::getline(file1, line1) && std::getline(file2, line2)) {
73 while (std::getline(file1, line1)) {
74 MACH3LOG_WARN(
"Extra line in {} on line {}: {}", File1, lineNumber, line1);
77 while (std::getline(file2, line2)) {
78 MACH3LOG_WARN(
"Extra line in {} on line {}: {}", File2, lineNumber, line2);
bool ShouldSkipLine(const std::string &line, const std::vector< std::string > &SkipVector)
KS: This allow us to skip output name etc in config. We expect Output name will be different but this...
◆ CopyDir()
| void CopyDir |
( |
TDirectory * |
source | ) |
|
When we merge two chains they have TDirectory ROOT didn't provide method for this so here we have this bad boy.
Definition at line 103 of file CombineMaCh3Chains.cpp.
106 TDirectory *savdir = gDirectory;
107 TDirectory *adir = savdir->Get<TDirectory>(source->GetName());
110 adir = savdir->mkdir(source->GetName());
115 TIter nextkey(source->GetListOfKeys());
116 while ((key =
static_cast<TKey*
>(nextkey()))) {
117 const char *classname = key->GetClassName();
118 TClass *cl = gROOT->GetClass(classname);
120 if (cl->InheritsFrom(
"TDirectory")) {
121 source->cd(key->GetName());
122 TDirectory *subdir = gDirectory;
126 }
else if (cl->InheritsFrom(
"TTree")) {
127 TTree *T = source->Get<TTree>(key->GetName());
129 TTree *newT = T->CloneTree();
133 TObject *obj = key->ReadObj();
139 adir->SaveSelf(kTRUE);
◆ FastMergeTTrees()
| void FastMergeTTrees |
( |
const std::vector< std::string > & |
files, |
|
|
const std::string & |
outFile, |
|
|
const std::string & |
TTreeName |
|
) |
| |
custom function for merging TTree, should be similar to what HADD is using
- Warning
- KS: for some reason if "fast" is enable then I cannot open in ROOT5, no one should use R5 at this point..
Definition at line 214 of file CombineMaCh3Chains.cpp.
215 TChain chain(TTreeName.c_str());
216 for (
const auto& f : files) chain.Add(f.c_str());
218 TFile* outF =
TFile::Open(outFile.c_str(),
"UPDATE");
220 TTree* newTree = chain.CloneTree(-1,
"fast");
221 newTree->SetName(TTreeName.c_str());
223 newTree->Write(
"", TObject::kOverwrite);
◆ main()
| int main |
( |
int |
argc, |
|
|
char * |
argv[] |
|
) |
| |
Definition at line 411 of file CombineMaCh3Chains.cpp.
void ParseArg(int argc, char *argv[])
void SetMaCh3LoggerFormat()
Set messaging format of the logger.
void MaCh3Welcome()
KS: Prints welcome message with MaCh3 logo.
◆ ParseArg()
| void ParseArg |
( |
int |
argc, |
|
|
char * |
argv[] |
|
) |
| |
Definition at line 347 of file CombineMaCh3Chains.cpp.
357 c = getopt(argc, argv,
"o:c:mhf");
359 while (optind < argc){
361 std::string fName = std::string(argv[optind]);
◆ ShouldSkipLine()
| bool ShouldSkipLine |
( |
const std::string & |
line, |
|
|
const std::vector< std::string > & |
SkipVector |
|
) |
| |
KS: This allow us to skip output name etc in config. We expect Output name will be different but this doesn't invalidate chain merging.
Definition at line 35 of file CombineMaCh3Chains.cpp.
37 for (
const auto& word : SkipVector) {
39 if (line.find(word) != std::string::npos) {
◆ usage()
Definition at line 335 of file CombineMaCh3Chains.cpp.
336 MACH3LOG_INFO(
"Combine MaCh3 Chains files, very similar to hadd, but will compare embedded version info in the files to avoid accidentally combining files made with different software versions. Also avoids having a hige dump of separate version files in the output that happens with hadd.");
338 MACH3LOG_INFO(
"CombineMaCh3Chains [-h] [-c [0-9]] [-f] [-o <output file>] input1.root [input2.root, input3.root ...]");
339 MACH3LOG_INFO(
"inputX.root : names of individual spline files to combine, can specify any number, need at least one");
340 MACH3LOG_INFO(
"output file : name of combined spline file. optional: if not specified, the app will just use the first input file as the output, the same as hadd'");
341 MACH3LOG_INFO(
"-c : target compression level for the combined file, default is 1, in line with hadd");
342 MACH3LOG_INFO(
"-f : force overwrite the output file if it exists already");
◆ forceMerge
◆ forceOverwrite
| bool forceOverwrite = false |
◆ inpFileList
| std::vector<std::string> inpFileList |
◆ OutFileName
| std::string OutFileName = "" |
◆ targetCompression
| int targetCompression = 1 |