11#include <yaml-cpp/emitter.h>
17 std::string_view KernelNameStripped(
const xpu::kernel_timings& kt)
19 constexpr std::string_view prefix =
"cbm::algo::";
20 if (kt.name().compare(0, prefix.size(), prefix) == 0) {
21 return kt.name().substr(prefix.size());
28 void MakeReportYamlEntry(std::string_view name,
double time,
double throughput, YAML::Emitter& ss)
30 if (!std::isnormal(throughput)) {
33 ss << YAML::Key << std::string{name};
34 ss << YAML::Flow << YAML::Value;
36 ss << YAML::Key <<
"time" << YAML::Value << time;
37 ss << YAML::Key <<
"throughput" << YAML::Value << throughput;
41 void MakeReportYamlTimer(
const xpu::timings& t, YAML::Emitter& ss)
44 MakeReportYamlEntry(
"wall", t.wall(), t.throughput(), ss);
45 MakeReportYamlEntry(
"memcpy_h2d", t.copy(xpu::h2d), t.throughput_copy(xpu::h2d), ss);
46 MakeReportYamlEntry(
"memcpy_d2h", t.copy(xpu::d2h), t.throughput_copy(xpu::d2h), ss);
47 MakeReportYamlEntry(
"memset", t.memset(), t.throughput_memset(), ss);
48 for (
const auto& st : t.children()) {
49 if (st.kernels().empty() && st.children().empty()) {
50 MakeReportYamlEntry(st.name(), st.wall(), st.throughput(), ss);
53 ss << YAML::Key << std::string{st.name()} << YAML::Value;
54 MakeReportYamlTimer(st, ss);
57 for (
const auto& kt : t.kernels()) {
58 MakeReportYamlEntry(KernelNameStripped(kt), kt.total(), kt.throughput(), ss);
75 fSS = std::stringstream();
78 void Title(std::string_view title)
81 fSS << fmt::format(
"{:<{}}\n", title,
fAlign);
86 void Fmt(
const xpu::timings& t)
89 Measurement(
"Memcpy(h2d)", t.copy(xpu::h2d), t.throughput_copy(xpu::h2d));
91 Measurement(
"Memcpy(d2h)", t.copy(xpu::d2h), t.throughput_copy(xpu::d2h));
93 Measurement(
"Memset", t.memset(), t.throughput_memset());
98 std::unordered_map<std::string, xpu::timings> subtimers;
99 for (xpu::timings& st : t.children()) {
100 subtimers[std::string(st.name())].merge(st);
103 for (
auto& [name, st] : subtimers) {
104 if (st.kernels().empty() && st.children().empty()) {
115 for (xpu::kernel_timings& kt : t.kernels()) {
116 Measurement(KernelNameStripped(kt), kt.total(), kt.throughput());
120 if (!t.kernels().empty()) {
121 Measurement(
"Kernel time", t.kernel_time(), t.throughput_kernels());
124 Measurement(
"Wall time", t.wall(), t.throughput());
130 const auto subtimes = t.children();
131 for (
auto it = subtimes.begin(); it != subtimes.end(); ++it) {
134 if (std::next(it) != subtimes.end()) {
143 Measurement(
"Memcpy(h2d)", t.copy(xpu::h2d), t.throughput_copy(xpu::h2d));
145 Measurement(
"Memcpy(d2h)", t.copy(xpu::d2h), t.throughput_copy(xpu::d2h));
147 Measurement(
"Memset", t.memset(), t.throughput_memset());
149 Measurement(
"Kernel time", t.kernel_time(), t.throughput_kernels());
151 Measurement(
"Wall time", t.wall(), t.throughput());
161 fSS << std::setw(
fAlign) << std::setfill(
' ') << std::left << fmt::format(
"{}:", name);
162 Real(time, 10, 3,
"ms");
163 if (std::isnormal(throughput)) {
165 Real(throughput, 7, 3,
"GB/s");
170 void Real(
double x,
int width,
int precision, std::string_view unit)
172 fSS << std::setw(width) << std::setfill(
' ') << std::right << std::fixed << std::setprecision(precision) <<
x
184 std::string
MakeReport(std::string_view title,
const xpu::timings& t,
size_t align)
214 ss << YAML::BeginDoc;
215 ss << YAML::Precision(6);
216 ss << YAML::BeginMap;
217 for (
const auto& subtimer : t.children()) {
218 ss << YAML::Key << std::string{subtimer.name()};
220 MakeReportYamlTimer(subtimer, ss);
std::string MakeReportSubtimers(std::string_view title, const xpu::timings &t, size_t align)
Print timings from subtimers.
std::string MakeReportYaml(const xpu::timings &t)
Print timings in YAML format.
std::string MakeReport(std::string_view title, const xpu::timings &t, size_t align)
Print timings from top-level times and subtimers.
std::string MakeReportSummary(std::string_view title, const xpu::timings &t, size_t align)
Only print the top-level times (Elapsed time, total kernel time, memcpy and memset times)....