CbmRoot
Loading...
Searching...
No Matches
CbmQaCheckerObjectDB.cxx
Go to the documentation of this file.
1/* Copyright (C) 2023-2024 GSI Helmholtzzentrum fuer Schwerionenforschung, Darmstadt
2 SPDX-License-Identifier: GPL-3.0-only
3 Authors: Sergei Zharko [committer] */
4
9
11
12#include "Logger.h"
13#include "TDirectory.h"
14#include "TFile.h"
15#include "TH1.h"
16#include "TString.h"
17
18#include <algorithm>
19#include <regex>
20#include <sstream>
21
22#include <yaml-cpp/yaml.h>
23
25
26// ---------------------------------------------------------------------------------------------------------------------
27//
29{
30 fvDatasets.clear();
31 fvFiles.clear();
32 fvFileLabels.clear();
33 fvObjects.clear();
34 fvVersionLabels.clear();
35 fvVersionPaths.clear();
37}
38
39// ---------------------------------------------------------------------------------------------------------------------
40//
41void ObjectDB::AddVersion(const char* label, const char* path)
42{
43 fvVersionLabels.push_back(label);
44 fvVersionPaths.push_back(path);
45}
46
47// ---------------------------------------------------------------------------------------------------------------------
48//
49void ObjectDB::AddDataset(const char* dataset) { fvDatasets.push_back(dataset); }
50
51// ---------------------------------------------------------------------------------------------------------------------
52//
53std::string ObjectDB::GetInputFileName(int iVersion, int iFile, int iDataset) const
54{
55 std::string res = fvFiles[iFile];
56 res = std::regex_replace(res, std::regex("\\%v"), fvVersionPaths[iVersion]);
57 res = std::regex_replace(res, std::regex("\\%d"), fvDatasets[iDataset]);
58 return res;
59}
60
61
62// ---------------------------------------------------------------------------------------------------------------------
63//
65{
66 // ----- Check consistency of input values
67 LOG_IF(fatal, !GetNofObjects()) << "ObjectDB: No objects were passed to the checker";
68 LOG_IF(fatal, GetNofDatasets() < 1) << "ObjectDB: No datasets were found, at least one dataset should be provided";
69 LOG_IF(fatal, GetNofVersions() < 2) << "ObjectDB: File handler should have at least two versions to compare ("
70 << GetNofVersions() << " were provided)";
71
72
73 // ----- Define output file
74 if (fsOutputPath.empty()) {
75 fsOutputPath = "QaCheckerOutput.root";
76 }
77
78 // ----- Define default version index
79 if (fsDefaultLabel.size()) {
80 auto it = std::find(fvVersionLabels.cbegin(), fvVersionLabels.cend(), fsDefaultLabel);
81 if (it == fvVersionLabels.cend()) {
82 std::stringstream msg;
83 msg << "ObjectDB: registered default label \"" << fsDefaultLabel << "\" is not found among the version labels:\n";
84 for (const auto& label : fvVersionLabels) {
85 msg << "\t- " << label << '\n';
86 }
87 LOG(fatal) << msg.str();
88 }
89 fDefVersionID = it - fvVersionLabels.cbegin();
90 }
91 else {
92 fDefVersionID = 0;
93 LOG(warn) << "ObjectDB: default version was not registered. Using the first version as the default one (\""
94 << fvVersionLabels[fDefVersionID] << "\")";
95 }
96 //fvObjects.clear();
97 //fvObjectParameters.clear();
98 fvObjects.resize(fvFiles.size()); // resize the vector of objects in files
99
100 // ----- Read object list from file
101 for (size_t iFile = 0; iFile < fvObjects.size(); ++iFile) {
102 if (fvObjects[iFile].size() == 0) {
103 this->ReadObjectList(iFile);
104 }
105 }
106
107 LOG(info) << this->ToString();
108
109 // ----- Init the object index vector
111 fvObjectFirstGlobIndex.resize(fvObjects.size() + 1, 0);
112 for (size_t iFile = 1; iFile <= fvObjects.size(); ++iFile) {
113 fvObjectFirstGlobIndex[iFile] = fvObjectFirstGlobIndex[iFile - 1] + fvObjects[iFile - 1].size();
114 }
115
116 // ----- Add root path of input, if it were defined
117 auto regexSlashes = std::regex("(/+)"); // regular expression for a sequence of consecutive slashes
118 for (auto& path : fvVersionPaths) {
119 if (fsInputRootPath.size()) {
120 path = fsInputRootPath + "/" + path;
121 }
122 path = std::regex_replace(path, regexSlashes, "/"); // replace all consecutive slashes with a single one
123 }
124}
125
126// ---------------------------------------------------------------------------------------------------------------------
127//
128void ObjectDB::ReadFromYAML(const char* configName)
129{
130 // ----- Open input file
131 YAML::Node config;
132 try {
133 config = YAML::LoadFile(configName)["checker"];
134 }
135 catch (const YAML::BadFile& exc) {
136 LOG(fatal) << "ObjectDB: configuration file " << configName << " does not exist";
137 }
138 catch (const YAML::ParserException& exc) {
139 LOG(fatal) << "ObjectDB: configuration file " << configName << " is badly formatted";
140 }
141 // --- Parse comparison settings into fvObjectParameters
142 std::unordered_map<std::string, uint32_t> mParIndexMap; // temporary map to store parameter labels and their IDs
143
144 //fvObjectParameters.clear();
145 if (const auto& settingsNode = config["settings"]) {
146 for (const auto& paramEntry : settingsNode) {
147 ObjectParameters params;
148 const std::string label = paramEntry.first.as<std::string>();
149 const auto& node = paramEntry.second;
150
151 params.ratioMin = node["ratio_min"].as<double>(fRatioMin);
152 params.ratioMax = node["ratio_max"].as<double>(fRatioMax);
153 params.pvalThreshold = node["pval_threshold"].as<double>(fPvalThresh);
154 params.compMethod = node["comp_method"].as<std::string>("E");
155
156 mParIndexMap[label] = fvObjectParameters.size();
157 fvObjectParameters.push_back(std::move(params));
158 }
159 }
160 // ----- Define file-object map
161 if (const auto& node = config["files"]) {
162 if (fvObjectFirstGlobIndex.size()) {
163 LOG(warn) << "ObjectDB: file-object map was defined before. Redefining it from the config file " << configName;
164 fvFiles.clear();
165 fvFileLabels.clear();
166 fvObjects.clear();
167 }
168 try {
169 // Calculate total number of objects and files
170 size_t nFiles = node.size();
171 fvFiles.reserve(nFiles);
172 fvFileLabels.reserve(nFiles);
173 fvObjects.reserve(nFiles);
174
175 // Fill vectors
176 for (const auto& fileNode : node) {
177 const auto& objectsNode = fileNode["objects"];
178 int nObjects = objectsNode ? objectsNode.size() : 0;
179 std::vector<std::pair<std::string, uint32_t>> objectsInFile;
180
181 if (nObjects > 0) {
182 for (const auto& objectNode : objectsNode) {
183 uint32_t parId = 0;
184 std::string name = objectNode["name"].as<std::string>();
185 // look up setting to parameter ID
186 if (auto parLabelNode = objectNode["setting"]) {
187 auto parKey = parLabelNode.as<std::string>();
188 auto keyIdIt = mParIndexMap.find(parKey);
189 if (keyIdIt != mParIndexMap.end()) {
190 parId = keyIdIt->second;
191 }
192 else {
193 std::stringstream msg;
194 msg << "Error: object '" << name << "' uses undefined setting '" << parKey << "'";
195 throw std::runtime_error(msg.str());
196 }
197 }
198 objectsInFile.emplace_back(name, parId);
199 }
200 }
201 fvFiles.push_back(fileNode["name"].as<std::string>());
202 fvFileLabels.push_back(fileNode["label"].as<std::string>());
203 fvObjects.push_back(std::move(objectsInFile));
204 }
205 }
206 catch (const YAML::InvalidNode& exc) {
207 LOG(fatal) << "ObjectDB: error while reading checker/files node from the config " << configName;
208 }
209 }
210 else {
211 LOG(warn) << "ObjectDB: node checker/inputformat is not defined in the config " << configName;
212 }
213
214 // ----- Define dataset names
215 if (const auto& node = config["datasets"]) {
216 LOG_IF(fatal, fvDatasets.size())
217 << "ObjectDB: dataset names were defined before. Please, use only one initialisation method:"
218 << " either configuration file, or setters of the checker::Core class";
219 try {
220 fvDatasets.reserve(node.size());
221 for (const auto& datasetNode : node) {
222 fvDatasets.push_back(datasetNode.as<std::string>());
223 }
224 }
225 catch (const YAML::InvalidNode& exc) {
226 LOG(fatal) << "ObjectDB:: error while reading checker/datasets node from the config " << configName;
227 }
228 }
229 else {
230 LOG(warn) << "ObjectDB: node checker/datasets is not defined in the config " << configName;
231 }
232
233 // ----- Define version names
234 if (const auto& node = config["versions"]) {
235 LOG_IF(fatal, fvVersionLabels.size())
236 << "ObjectDB: dataset names were defined before. Attempt to redefine dataset names from config " << configName;
237 try {
238 fvVersionLabels.reserve(node.size());
239 fvVersionPaths.reserve(node.size());
240 for (const auto& versionNode : node) {
241 fvVersionLabels.push_back(versionNode["label"].as<std::string>());
242 fvVersionPaths.push_back(versionNode["path"].as<std::string>());
243 }
244 }
245 catch (const YAML::InvalidNode& exc) {
246 LOG(fatal) << "ObjectDB:: error while reading checker/versions node from the config " << configName;
247 }
248 }
249 else {
250 LOG(warn) << "ObjectDB: node checker/versions is not defined in the config " << configName;
251 }
252
253 // ----- Define default version
254 if (const auto& node = config["default_label"]) {
255 try {
256 SetDefaultLabel(node.as<std::string>().c_str());
257 }
258 catch (const YAML::InvalidNode& exc) {
259 LOG(fatal) << "ObjectDB:: error while reading checker/default_label node from the config " << configName;
260 }
261 }
262
263
264}
265
266// ---------------------------------------------------------------------------------------------------------------------
267//
268void ObjectDB::CollectObjectPaths(TDirectory* pDir, const TString& parentPath, std::set<std::string>& paths)
269{
270 for (auto&& pKey : *(pDir->GetListOfKeys())) {
271 TString sName = parentPath + pKey->GetName();
272 if (gFile->Get<TH1>(sName)) {
273 paths.insert(sName.Data());
274 }
275 else if (auto* pSubDir = gFile->Get<TDirectory>(sName)) {
276 CollectObjectPaths(pSubDir, sName + "/", paths);
277 }
278 }
279}
280
281// ---------------------------------------------------------------------------------------------------------------------
282//
283
285{
286 // TODO: test performance, probably unordered_set will fit better
287 std::set<std::string> objectPaths;
288 LOG(info) << "Reading object list from files: ...";
289
290 for (int iDs = 0; iDs < static_cast<int>(fvDatasets.size()); ++iDs) {
291 TString fileName = this->GetInputFileName(fDefVersionID, iFile, iDs);
292 LOG(info) << "- file: " << fileName;
293 TFile fileIn{fileName, "READONLY"};
294 fileIn.cd();
295 CollectObjectPaths(&fileIn, "", objectPaths);
296 fileIn.Close();
297 }
298
299 // Prepare a temporary container of <path, param> pairs
300 std::vector<std::pair<std::string, uint32_t>> objectsInFile;
301 objectsInFile.reserve(objectPaths.size());
302
303 for (const auto& path : objectPaths) {
304 objectsInFile.emplace_back(path, 0); // Default param is 0
305 }
306
307 // Move the result to fvObjects[iFile]
308 fvObjects[iFile] = std::move(objectsInFile);
309
310 LOG(info) << "Reading object list from files: done";
311}
312
313
314// ---------------------------------------------------------------------------------------------------------------------
315//
317{
318 if (pVal <= 0 || pVal >= 1) {
319 LOG(fatal) << "ObjectDB::SetPvalThreshold(): p-value threshold runs out the range (0, 1): " << pVal;
320 }
321 fPvalThresh = pVal;
322}
323
324// ---------------------------------------------------------------------------------------------------------------------
325//
326void ObjectDB::SetRatioRange(double min, double max)
327{
328 if (min > max || min < 0) {
329 LOG(fatal) << "ObjectDB::SetPvalThreshold(): min and max for ratio run out the range: min = " << min
330 << ", max = " << max;
331 }
332 fRatioMin = min;
333 fRatioMax = max;
334}
335
336
337// ---------------------------------------------------------------------------------------------------------------------
338//
339std::string ObjectDB::ToString(int verbose) const
340{
341 std::stringstream msg;
342 if (verbose > 0) {
343 msg << '\n';
344 msg << " ********************\n";
345 msg << " ** CBM QA-Checker **\n";
346 msg << " ********************\n\n";
347
348 msg << "\e[1mVersions\e[0m:\n";
349 for (size_t iV = 0; iV < fvVersionLabels.size(); ++iV) {
350 if (iV == static_cast<size_t>(fDefVersionID)) {
351 msg << "\t- " << fvVersionLabels[iV] << " (path: " << fvVersionPaths[iV] << ") -> \e[1;33mDEFAULT\e[0m\n";
352 }
353 else {
354 msg << "\t- " << fvVersionLabels[iV] << " (path: " << fvVersionPaths[iV] << ")\n";
355 }
356 }
357
358 msg << "\e[1mDatasets\e[0m:\n";
359 for (const auto& dataset : fvDatasets) {
360 msg << "\t- " << dataset << "\n";
361 }
362
363 msg << "\e[1mFiles\e[0m:\n";
364 for (size_t iF = 0; iF < fvFiles.size(); ++iF) {
365 msg << "\t- " << fvFiles[iF];
366 if (verbose > 1) {
367 msg << " with objects:\n";
368 for (const auto& object : fvObjects[iF]) {
369 msg << "\t\t- " << object.first << " (param: " << object.second << ")\n";
370 }
371 }
372 else {
373 msg << '\n';
374 }
375 }
376 }
377 return msg.str();
378}
Database for processed objects in the QA checker framework (implementation)
friend fscal max(fscal x, fscal y)
friend fscal min(fscal x, fscal y)
static constexpr size_t size()
Definition KfSimdPseudo.h:2
void Clear()
Clears content.
A data base class for processed objects.
std::vector< std::string > fvFileLabels
Container of file labels (used in output)
int GetNofObjects() const
Gets total number of objects.
void ReadObjectList(int iFile)
Reads list of histograms from file.
std::string ToString(int verbose=1) const
String representation of the content.
static void CollectObjectPaths(TDirectory *pDir, const TString &parentPah, std::set< std::string > &paths)
Loops over ROOT-file and collects object paths.
std::string GetInputFileName(int iVersion, int iFile, int iDataset) const
Gets name of file from indexes of version, file and dataset.
std::vector< int > fvObjectFirstGlobIndex
First global index of object in a file.
std::vector< std::string > fvVersionPaths
Container of version paths.
void AddDataset(const char *dataset)
Adds dataset.
int GetNofDatasets() const
Gets number of datasets.
std::vector< ObjectParameters > fvObjectParameters
Container of object parameters.
std::string fsInputRootPath
Root path for input files.
void SetPvalThreshold(double pVal)
Sets P-value threshold.
void SetRatioRange(double min, double max)
Sets ratio accepted range.
std::vector< std::string > fvDatasets
Container of dataset names.
int GetNofVersions() const
Gets number of versions.
std::vector< std::string > fvFiles
Container of file names.
void AddVersion(const char *label, const char *path)
Adds version.
std::vector< std::string > fvVersionLabels
Container of version labels.
double fRatioMin
Lower boundary for ratio deviation.
void SetDefaultLabel(const char *defaultLabel)
Sets default version label.
double fPvalThresh
P-value threshold for histograms equality.
double fRatioMax
Upper boundary for ratio deviation.
void ReadFromYAML(const char *configName)
Reads DB from YAML node.
int fDefVersionID
Index of default version.
void Init()
Initializes the database.
std::string fsDefaultLabel
Name of default version label.
std::vector< std::vector< std::pair< std::string, uint32_t > > > fvObjects
Container of object names and corresponding parameter IDs in fvObjectParameters.
std::string fsOutputPath
Path to the output file.