CbmRoot
Loading...
Searching...
No Matches
TimingsFormat.cxx
Go to the documentation of this file.
1/* Copyright (C) 2023-2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
2 SPDX-License-Identifier: GPL-3.0-only
3 Authors: Felix Weiglhofer [committer] */
4#include "TimingsFormat.h"
5
6#include <iomanip>
7#include <sstream>
8
9#include <fmt/format.h>
10#include <xpu/host.h>
11#include <yaml-cpp/emitter.h>
12
13// Helper functions
14namespace
15{
16 // Remove the cbm::algo:: prefix from kernel names
17 std::string_view KernelNameStripped(const xpu::kernel_timings& kt)
18 {
19 constexpr std::string_view prefix = "cbm::algo::";
20 if (kt.name().compare(0, prefix.size(), prefix) == 0) {
21 return kt.name().substr(prefix.size());
22 }
23 else {
24 return kt.name();
25 }
26 }
27
28 void MakeReportYamlEntry(std::string_view name, double time, double throughput, YAML::Emitter& ss)
29 {
30 if (!std::isnormal(throughput)) {
31 throughput = 0;
32 }
33 ss << YAML::Key << std::string{name};
34 ss << YAML::Flow << YAML::Value;
35 ss << YAML::BeginMap;
36 ss << YAML::Key << "time" << YAML::Value << time;
37 ss << YAML::Key << "throughput" << YAML::Value << throughput;
38 ss << YAML::EndMap;
39 }
40
41 void MakeReportYamlTimer(const xpu::timings& t, YAML::Emitter& ss)
42 {
43 ss << YAML::BeginMap;
44 MakeReportYamlEntry("wall", t.wall(), t.throughput(), ss);
45 MakeReportYamlEntry("memcpy_h2d", t.copy(xpu::h2d), t.throughput_copy(xpu::h2d), ss);
46 MakeReportYamlEntry("memcpy_d2h", t.copy(xpu::d2h), t.throughput_copy(xpu::d2h), ss);
47 MakeReportYamlEntry("memset", t.memset(), t.throughput_memset(), ss);
48 for (const auto& st : t.children()) {
49 if (st.kernels().empty() && st.children().empty()) {
50 MakeReportYamlEntry(st.name(), st.wall(), st.throughput(), ss);
51 }
52 else {
53 ss << YAML::Key << std::string{st.name()} << YAML::Value;
54 MakeReportYamlTimer(st, ss);
55 }
56 }
57 for (const auto& kt : t.kernels()) {
58 MakeReportYamlEntry(KernelNameStripped(kt), kt.total(), kt.throughput(), ss);
59 }
60
61 ss << YAML::EndMap;
62 }
63
64} // namespace
65
66namespace cbm::algo
67{
68
70
71 public:
72 void Begin(size_t align)
73 {
74 fAlign = align;
75 fSS = std::stringstream();
76 }
77
78 void Title(std::string_view title)
79 {
80 Indent();
81 fSS << fmt::format("{:<{}}\n", title, fAlign);
82 }
83
84 std::string Finalize() { return fSS.str(); }
85
86 void Fmt(const xpu::timings& t)
87 {
88 fIndent += 2;
89 Measurement("Memcpy(h2d)", t.copy(xpu::h2d), t.throughput_copy(xpu::h2d));
90 NewLine();
91 Measurement("Memcpy(d2h)", t.copy(xpu::d2h), t.throughput_copy(xpu::d2h));
92 NewLine();
93 Measurement("Memset", t.memset(), t.throughput_memset());
94 NewLine();
95
96 // Merge subtimers with identical names
97 // Useful eg in unpacking, where unpacker might be called multiple times per TS
98 std::unordered_map<std::string, xpu::timings> subtimers;
99 for (xpu::timings& st : t.children()) {
100 subtimers[std::string(st.name())].merge(st);
101 }
102
103 for (auto& [name, st] : subtimers) {
104 if (st.kernels().empty() && st.children().empty()) {
105 Measurement(name, st.wall(), st.throughput());
106 NewLine();
107 }
108 else {
109 Title(name);
110 Fmt(st);
111 NewLine();
112 }
113 }
114
115 for (xpu::kernel_timings& kt : t.kernels()) {
116 Measurement(KernelNameStripped(kt), kt.total(), kt.throughput());
117 NewLine();
118 }
119
120 if (!t.kernels().empty()) {
121 Measurement("Kernel time", t.kernel_time(), t.throughput_kernels());
122 NewLine();
123 }
124 Measurement("Wall time", t.wall(), t.throughput());
125 fIndent -= 2;
126 }
127
128 void FmtSubtimers(const xpu::timings& t)
129 {
130 const auto subtimes = t.children();
131 for (auto it = subtimes.begin(); it != subtimes.end(); ++it) {
132 Title(it->name());
133 Fmt(*it);
134 if (std::next(it) != subtimes.end()) {
135 NewLine();
136 }
137 }
138 }
139
140 void FmtSummary(const xpu::timings& t)
141 {
142 fIndent += 2;
143 Measurement("Memcpy(h2d)", t.copy(xpu::h2d), t.throughput_copy(xpu::h2d));
144 NewLine();
145 Measurement("Memcpy(d2h)", t.copy(xpu::d2h), t.throughput_copy(xpu::d2h));
146 NewLine();
147 Measurement("Memset", t.memset(), t.throughput_memset());
148 NewLine();
149 Measurement("Kernel time", t.kernel_time(), t.throughput_kernels());
150 NewLine();
151 Measurement("Wall time", t.wall(), t.throughput());
152 fIndent -= 2;
153 }
154
155 void NewLine() { fSS << "\n"; }
156
157 private:
158 void Measurement(std::string_view name, f64 time, f64 throughput)
159 {
160 Indent();
161 fSS << std::setw(fAlign) << std::setfill(' ') << std::left << fmt::format("{}:", name);
162 Real(time, 10, 3, "ms");
163 if (std::isnormal(throughput)) {
164 fSS << " (";
165 Real(throughput, 7, 3, "GB/s");
166 fSS << ")";
167 }
168 }
169
170 void Real(double x, int width, int precision, std::string_view unit)
171 {
172 fSS << std::setw(width) << std::setfill(' ') << std::right << std::fixed << std::setprecision(precision) << x
173 << " " << unit;
174 }
175
176 void Indent() { fSS << std::setw(fIndent) << std::setfill(' ') << std::left << ""; }
177
178 size_t fAlign = 0;
179 size_t fIndent = 0;
180 std::stringstream fSS;
181
182 }; // class TimingsFormat
183
184 std::string MakeReport(std::string_view title, const xpu::timings& t, size_t align)
185 {
186 TimingsFormat tf;
187 tf.Begin(align);
188 tf.Title(title);
189 tf.Fmt(t);
190 return tf.Finalize();
191 }
192
193 std::string MakeReportSubtimers(std::string_view title, const xpu::timings& t, size_t align)
194 {
195 TimingsFormat tf;
196 tf.Begin(align);
197 tf.Title(title);
198 tf.FmtSubtimers(t);
199 return tf.Finalize();
200 }
201
202 std::string MakeReportSummary(std::string_view title, const xpu::timings& t, size_t align)
203 {
204 TimingsFormat tf;
205 tf.Begin(align);
206 tf.Title(title);
207 tf.FmtSummary(t);
208 return tf.Finalize();
209 }
210
211 std::string MakeReportYaml(const xpu::timings& t)
212 {
213 YAML::Emitter ss;
214 ss << YAML::BeginDoc;
215 ss << YAML::Precision(6);
216 ss << YAML::BeginMap;
217 for (const auto& subtimer : t.children()) {
218 ss << YAML::Key << std::string{subtimer.name()};
219 ss << YAML::Value;
220 MakeReportYamlTimer(subtimer, ss);
221 }
222 ss << YAML::EndMap;
223 ss << YAML::EndDoc;
224 return ss.c_str();
225 }
226
227} // namespace cbm::algo
void Begin(size_t align)
void FmtSubtimers(const xpu::timings &t)
void Measurement(std::string_view name, f64 time, f64 throughput)
void Title(std::string_view title)
void Fmt(const xpu::timings &t)
void Real(double x, int width, int precision, std::string_view unit)
void FmtSummary(const xpu::timings &t)
std::string MakeReportSubtimers(std::string_view title, const xpu::timings &t, size_t align)
Print timings from subtimers.
std::string MakeReportYaml(const xpu::timings &t)
Print timings in YAML format.
std::string MakeReport(std::string_view title, const xpu::timings &t, size_t align)
Print timings from top-level times and subtimers.
double f64
Definition Definitions.h:25
std::string MakeReportSummary(std::string_view title, const xpu::timings &t, size_t align)
Only print the top-level times (Elapsed time, total kernel time, memcpy and memset times)....