CbmRoot
Loading...
Searching...
No Matches
CbmQaCheckerObjectDB.cxx
Go to the documentation of this file.
1/* Copyright (C) 2023-2024 GSI Helmholtzzentrum fuer Schwerionenforschung, Darmstadt
2 SPDX-License-Identifier: GPL-3.0-only
3 Authors: Sergei Zharko [committer] */
4
9
11
12#include "Logger.h"
13#include "TDirectory.h"
14#include "TFile.h"
15#include "TH1.h"
16#include "TString.h"
17
18#include <algorithm>
19#include <regex>
20#include <sstream>
21
22#include <yaml-cpp/yaml.h>
23
25
26// ---------------------------------------------------------------------------------------------------------------------
27//
29{
30 fvDatasets.clear();
31 fvFiles.clear();
32 fvFileLabels.clear();
33 fvObjects.clear();
34 fvVersionLabels.clear();
35 fvVersionPaths.clear();
37}
38
39// ---------------------------------------------------------------------------------------------------------------------
40//
41void ObjectDB::AddVersion(const char* label, const char* path)
42{
43 fvVersionLabels.push_back(label);
44 fvVersionPaths.push_back(path);
45}
46
47// ---------------------------------------------------------------------------------------------------------------------
48//
49void ObjectDB::AddDataset(const char* dataset) { fvDatasets.push_back(dataset); }
50
51// ---------------------------------------------------------------------------------------------------------------------
52//
53std::string ObjectDB::GetInputFileName(int iVersion, int iFile, int iDataset) const
54{
55 std::string res = fvFiles[iFile];
56 res = std::regex_replace(res, std::regex("\\%v"), fvVersionPaths[iVersion]);
57 res = std::regex_replace(res, std::regex("\\%d"), fvDatasets[iDataset]);
58 return res;
59}
60
61
62// ---------------------------------------------------------------------------------------------------------------------
63//
65{
66 // ----- Check consistency of input values
67 LOG_IF(fatal, !GetNofObjects()) << "ObjectDB: No objects were passed to the checker";
68 LOG_IF(fatal, GetNofDatasets() < 1) << "ObjectDB: No datasets were found, at least one dataset should be provided";
69 LOG_IF(fatal, GetNofVersions() < 2) << "ObjectDB: File handler should have at least two versions to compare ("
70 << GetNofVersions() << " were provided)";
71
72
73 // ----- Define output file
74 if (fsOutputPath.empty()) {
75 fsOutputPath = "QaCheckerOutput.root";
76 }
77
78 // ----- Define default version index
79 if (fsDefaultLabel.size()) {
80 auto it = std::find(fvVersionLabels.cbegin(), fvVersionLabels.cend(), fsDefaultLabel);
81 if (it == fvVersionLabels.cend()) {
82 std::stringstream msg;
83 msg << "ObjectDB: registered default label \"" << fsDefaultLabel << "\" is not found among the version labels:\n";
84 for (const auto& label : fvVersionLabels) {
85 msg << "\t- " << label << '\n';
86 }
87 LOG(fatal) << msg.str();
88 }
89 fDefVersionID = it - fvVersionLabels.cbegin();
90 }
91 else {
92 fDefVersionID = 0;
93 LOG(warn) << "ObjectDB: default version was not registered. Using the first version as the default one (\""
94 << fvVersionLabels[fDefVersionID] << "\")";
95 }
96
97 // ----- Read object list from file
98 for (size_t iFile = 0; iFile < fvObjects.size(); ++iFile) {
99 if (fvObjects[iFile].size() == 0) {
100 this->ReadObjectList(iFile);
101 }
102 }
103
104 LOG(info) << this->ToString();
105
106 // ----- Init the object index vector
108 fvObjectFirstGlobIndex.resize(fvObjects.size() + 1, 0);
109 for (size_t iFile = 1; iFile <= fvObjects.size(); ++iFile) {
110 fvObjectFirstGlobIndex[iFile] = fvObjectFirstGlobIndex[iFile - 1] + fvObjects[iFile - 1].size();
111 }
112
113 // ----- Add root path of input, if it were defined
114 auto regexSlashes = std::regex("(/+)"); // regular expression for a sequence of consecutive slashes
115 for (auto& path : fvVersionPaths) {
116 if (fsInputRootPath.size()) {
117 path = fsInputRootPath + "/" + path;
118 }
119 path = std::regex_replace(path, regexSlashes, "/"); // replace all consecutive slashes with a single one
120 }
121}
122
123// ---------------------------------------------------------------------------------------------------------------------
124//
125void ObjectDB::ReadFromYAML(const char* configName)
126{
127 // ----- Open input file
128 YAML::Node config;
129 try {
130 config = YAML::LoadFile(configName)["checker"];
131 }
132 catch (const YAML::BadFile& exc) {
133 LOG(fatal) << "ObjectDB: configuration file " << configName << " does not exist";
134 }
135 catch (const YAML::ParserException& exc) {
136 LOG(fatal) << "ObjectDB: configuration file " << configName << " is badly formatted";
137 }
138
139 // ----- Define file-object map
140 if (const auto& node = config["files"]) {
141 if (fvObjectFirstGlobIndex.size()) {
142 LOG(warn) << "ObjectDB: file-object map was defined before. Redefining it from the config file " << configName;
143 fvFiles.clear();
144 fvFileLabels.clear();
145 fvObjects.clear();
146 }
147 try {
148 // Calculate total number of objects and files
149 size_t nFiles = node.size();
150 fvFiles.reserve(nFiles);
151 fvFileLabels.reserve(nFiles);
152 fvObjects.reserve(nFiles);
153
154 // Fill vectors
155 for (const auto& fileNode : node) {
156 const auto& objectsNode = fileNode["objects"];
157 int nObjects = objectsNode ? objectsNode.size() : 0;
158 auto& objectsInFile = fvObjects.emplace_back();
159 objectsInFile.reserve(nObjects);
160 if (nObjects > 0) {
161 for (const auto& objectNode : objectsNode) {
162 objectsInFile.push_back(objectNode.as<std::string>());
163 }
164 }
165 fvFiles.push_back(fileNode["name"].as<std::string>());
166 fvFileLabels.push_back(fileNode["label"].as<std::string>());
167 }
168 }
169 catch (const YAML::InvalidNode& exc) {
170 LOG(fatal) << "ObjectDB: error while reading checker/files node from the config " << configName;
171 }
172 }
173 else {
174 LOG(warn) << "ObjectDB: node checker/inputformat is not defined in the config " << configName;
175 }
176
177 // ----- Define dataset names
178 if (const auto& node = config["datasets"]) {
179 LOG_IF(fatal, fvDatasets.size())
180 << "ObjectDB: dataset names were defined before. Please, use only one initialisation method:"
181 << " either configuration file, or setters of the checker::Core class";
182 try {
183 fvDatasets.reserve(node.size());
184 for (const auto& datasetNode : node) {
185 fvDatasets.push_back(datasetNode.as<std::string>());
186 }
187 }
188 catch (const YAML::InvalidNode& exc) {
189 LOG(fatal) << "ObjectDB:: error while reading checker/datasets node from the config " << configName;
190 }
191 }
192 else {
193 LOG(warn) << "ObjectDB: node checker/datasets is not defined in the config " << configName;
194 }
195
196 // ----- Define version names
197 if (const auto& node = config["versions"]) {
198 LOG_IF(fatal, fvVersionLabels.size())
199 << "ObjectDB: dataset names were defined before. Attempt to redefine dataset names from config " << configName;
200 try {
201 fvVersionLabels.reserve(node.size());
202 fvVersionPaths.reserve(node.size());
203 for (const auto& versionNode : node) {
204 fvVersionLabels.push_back(versionNode["label"].as<std::string>());
205 fvVersionPaths.push_back(versionNode["path"].as<std::string>());
206 }
207 }
208 catch (const YAML::InvalidNode& exc) {
209 LOG(fatal) << "ObjectDB:: error while reading checker/versions node from the config " << configName;
210 }
211 }
212 else {
213 LOG(warn) << "ObjectDB: node checker/versions is not defined in the config " << configName;
214 }
215
216 // ----- Define default version
217 if (const auto& node = config["default_label"]) {
218 try {
219 SetDefaultLabel(node.as<std::string>().c_str());
220 }
221 catch (const YAML::InvalidNode& exc) {
222 LOG(fatal) << "ObjectDB:: error while reading checker/default_label node from the config " << configName;
223 }
224 }
225
226 // ----- Define the comparison parameters
227 if (const auto& node = config["settings"]) {
228 try {
229 double ratioMin = node["ratio_min"].as<double>(fRatioMin);
230 double ratioMax = node["ratio_max"].as<double>(fRatioMax);
231 SetRatioRange(ratioMin, ratioMax);
232
233 double pValThresh = node["pval_threshold"].as<double>(fPvalThresh);
234 SetPvalThreshold(pValThresh);
235 }
236 catch (const YAML::InvalidNode& exc) {
237 LOG(fatal) << "ObjectDB:: error while reading checker/versions node from the config " << configName;
238 }
239 }
240}
241
242// ---------------------------------------------------------------------------------------------------------------------
243//
244void ObjectDB::CollectObjectPaths(TDirectory* pDir, const TString& parentPath, std::set<std::string>& paths)
245{
246 for (auto&& pKey : *(pDir->GetListOfKeys())) {
247 TString sName = parentPath + pKey->GetName();
248 if (gFile->Get<TH1>(sName)) {
249 paths.insert(sName.Data());
250 }
251 else if (auto* pSubDir = gFile->Get<TDirectory>(sName)) {
252 CollectObjectPaths(pSubDir, sName + "/", paths);
253 }
254 }
255}
256
257// ---------------------------------------------------------------------------------------------------------------------
258//
260{
261 // TODO: test performance, probably unordered_set will fit better
262 std::set<std::string> objectPaths;
263 LOG(info) << "Reading object list from files: ...";
264 for (int iDs = 0; iDs < static_cast<int>(fvDatasets.size()); ++iDs) {
265 TString fileName = this->GetInputFileName(fDefVersionID, iFile, iDs);
266 LOG(info) << "- file: " << fileName;
267 TFile fileIn{fileName, "READONLY"};
268 fileIn.cd();
269 CollectObjectPaths(&fileIn, "", objectPaths);
270 fileIn.Close();
271 }
272 fvObjects[iFile].clear();
273 fvObjects[iFile].reserve(objectPaths.size());
274 fvObjects[iFile].insert(fvObjects[iFile].begin(), objectPaths.begin(), objectPaths.end());
275 LOG(info) << "Reading object list from files: done";
276}
277
278// ---------------------------------------------------------------------------------------------------------------------
279//
281{
282 if (pVal <= 0 || pVal >= 1) {
283 LOG(fatal) << "ObjectDB::SetPvalThreshold(): p-value threshold runs out the range (0, 1): " << pVal;
284 }
285 fPvalThresh = pVal;
286}
287
288// ---------------------------------------------------------------------------------------------------------------------
289//
290void ObjectDB::SetRatioRange(double min, double max)
291{
292 if (min > max || min < 0) {
293 LOG(fatal) << "ObjectDB::SetPvalThreshold(): min and max for ratio run out the range: min = " << min
294 << ", max = " << max;
295 }
296 fRatioMin = min;
297 fRatioMax = max;
298}
299
300
301// ---------------------------------------------------------------------------------------------------------------------
302//
303std::string ObjectDB::ToString(int verbose) const
304{
305 std::stringstream msg;
306 if (verbose > 0) {
307 msg << '\n';
308 msg << " ********************\n";
309 msg << " ** CBM QA-Checker **\n";
310 msg << " ********************\n\n";
311
312 msg << "\e[1mVersions\e[0m:\n";
313 for (size_t iV = 0; iV < fvVersionLabels.size(); ++iV) {
314 if (iV == (size_t) fDefVersionID) {
315 msg << "\t- " << fvVersionLabels[iV] << " (path: " << fvVersionPaths[iV] << ") -> \e[1;33mDEFAULT\e[0m\n";
316 }
317 else {
318 msg << "\t- " << fvVersionLabels[iV] << " (path: " << fvVersionPaths[iV] << ")\n";
319 }
320 }
321 msg << "\e[1mDatasets\e[0m:\n";
322 for (const auto& dataset : fvDatasets) {
323 msg << "\t- " << dataset << "\n";
324 }
325 msg << "\e[1mFiles\e[0m:\n";
326 for (size_t iF = 0; iF < fvFiles.size(); ++iF) {
327 msg << "\t- " << fvFiles[iF];
328 if (verbose > 1) {
329 msg << " with objects:\n";
330 for (const auto& object : fvObjects[iF]) {
331 msg << "\t\t- " << object << '\n';
332 }
333 }
334 else {
335 msg << '\n';
336 }
337 }
338 }
339 return msg.str();
340}
Database for processed objects in the QA checker framework (implementation)
friend fscal max(fscal x, fscal y)
friend fscal min(fscal x, fscal y)
static constexpr size_t size()
Definition KfSimdPseudo.h:2
A data base class for processed objects.
std::vector< std::string > fvFileLabels
Container of file labels (used in output)
int GetNofObjects() const
Gets total number of objects.
void ReadObjectList(int iFile)
Reads list of histograms from file.
std::string ToString(int verbose=1) const
String representation of the content.
static void CollectObjectPaths(TDirectory *pDir, const TString &parentPah, std::set< std::string > &paths)
Loops over ROOT-file and collects object paths.
std::string GetInputFileName(int iVersion, int iFile, int iDataset) const
Gets name of file from indexes of version, file and dataset.
std::vector< int > fvObjectFirstGlobIndex
First global index of object in a file.
std::vector< std::string > fvVersionPaths
Container of version paths.
void AddDataset(const char *dataset)
Adds dataset.
int GetNofDatasets() const
Gets number of datasets.
std::string fsInputRootPath
Root path for input files.
std::vector< std::vector< std::string > > fvObjects
Container of object names vs file id.
void SetPvalThreshold(double pVal)
Sets P-value threshold.
void SetRatioRange(double min, double max)
Sets ratio accepted range.
std::vector< std::string > fvDatasets
Container of dataset names.
int GetNofVersions() const
Gets number of versions.
std::vector< std::string > fvFiles
Container of file names.
void AddVersion(const char *label, const char *path)
Adds version.
std::vector< std::string > fvVersionLabels
Container of version labels.
double fRatioMin
Lower boundary for ratio deviation.
void SetDefaultLabel(const char *defaultLabel)
Sets default version label.
double fPvalThresh
P-value threshold for histograms equality.
double fRatioMax
Upper boundary for ratio deviation.
void ReadFromYAML(const char *configName)
Reads DB from YAML node.
int fDefVersionID
Index of default version.
void Init()
Initializes the database.
std::string fsDefaultLabel
Name of default version label.
std::string fsOutputPath
Path to the output file.