57 size_t nModules =
fPars->setup.modules.size();
58 size_t nModuleSides = nModules * 2;
59 size_t nDigisTotal = digis.size();
66 if (
fPars->memory.IsDynamic())
70 throw ProcessingError(
"STS Hitfinder Chain: Too many digis per module for static allocation: {} > {}",
73 if (nDigisTotal >
fPars->memory.maxNDigisPerTS) {
74 throw ProcessingError(
"STS Hitfinder Chain: Too many digis per timeslice for static allocation: {} > {}",
75 nDigisTotal,
fPars->memory.maxNDigisPerTS);
81#ifdef CBM_ONLINE_USE_FAIRLOGGER
82 if (fair::Logger::GetConsoleSeverity() == fair::Severity::trace) {
84 if (
Opts().LogLevel() == trace) {
94 if (hfc.maxHitsPerModule > hfc.hitsAllocatedPerModule) {
95 L_(error) << fmt::format(
"STS Hitfinder Chain: Too many hits per module for static allocation: {} > {}",
96 hfc.maxHitsPerModule, hfc.hitsAllocatedPerModule);
97 hfc.maxHitsPerModule = hfc.hitsAllocatedPerModule;
102 L_(debug) <<
"STS Hitfinder Chain: Clearing buffers...";
107 queue.memset(hfc.monitor, 0);
108 queue.memset(hfc.digiConnectorsPerModule, 0);
109 queue.memset(hfc.channelOffsetPerModule, 0);
114 queue.memset(hfc.nClustersPerModule, 0);
116 queue.memset(hfc.nHitsPerModule, 0);
117 queue.memset(hfc.maxClusterTimeErrorByModuleSide, 0);
119 L_(debug) <<
"STS Hitfinder Chain: Copy digis buffers...";
124 const CbmStsDigi* digisH = xpu::h_view(hfc.digisPerModule).data();
125 CbmStsDigi* digisD = hfc.digisPerModule.get();
126 if (digisH != digisD) queue.copy(digisH, digisD, nDigisTotal);
127 queue.copy(hfc.digiOffsetPerModule, xpu::h2d);
129 L_(debug) <<
"STS Hitfinder Chain: Sort Digis...";
132 queue.launch<
SortDigis>(xpu::n_blocks(nModuleSides));
133 xpu::k_add_bytes<SortDigis>(nDigisTotal *
sizeof(
CbmStsDigi));
134#ifdef CBM_ONLINE_USE_FAIRLOGGER
135 if (fair::Logger::GetConsoleSeverity() == fair::Severity::trace) {
137 if (
Opts().LogLevel() == trace) {
139 L_(trace) <<
"Ensuring STS digis are sorted...";
140 queue.copy(hfc.digisPerModule, xpu::d2h);
141 queue.copy(hfc.digiOffsetPerModule, xpu::d2h);
146 L_(debug) <<
"STS Hitfinder Chain: Find Clusters...";
147 if (!
Params().
sts.findClustersMultiKernels) {
148 queue.launch<
FindClusters>(xpu::n_blocks(nModuleSides));
152 xpu::k_add_bytes<ChannelOffsets>(nDigisTotal *
sizeof(
CbmStsDigi));
154 xpu::k_add_bytes<CreateDigiConnections>(nDigisTotal *
sizeof(
CbmStsDigi));
156 xpu::k_add_bytes<CreateClusters>(nDigisTotal *
sizeof(
CbmStsDigi));
158#ifdef CBM_ONLINE_USE_FAIRLOGGER
159 if (fair::Logger::GetConsoleSeverity() == fair::Severity::trace) {
161 if (
Opts().LogLevel() == trace) {
163 L_(trace) <<
"Ensuring STS channel offsets correct...";
164 xpu::buffer_prop propsOffset{hfc.channelOffsetPerModule};
165 std::vector<u32> channelOffsetPerModule;
166 channelOffsetPerModule.resize(propsOffset.size());
167 queue.copy(hfc.channelOffsetPerModule.get(), channelOffsetPerModule.data(), propsOffset.size());
171 L_(trace) <<
"Ensuring STS clusters are ok...";
172 xpu::buffer_prop props{hfc.clusterIdxPerModule};
174 std::vector<ClusterIdx> clusterIdxPerModule;
175 clusterIdxPerModule.resize(props.size());
176 std::vector<PaddedToCacheLine<int>> nClustersPerModule;
177 nClustersPerModule.resize(
fPars->setup.modules.size() * 2, 0);
179 queue.copy(hfc.clusterIdxPerModule.get(), clusterIdxPerModule.data(), props.size());
180 queue.copy(hfc.nClustersPerModule.get(), nClustersPerModule.data(), nClustersPerModule.size());
185 L_(debug) <<
"STS Hitfinder Chain: Sort Clusters...";
186 queue.launch<
SortClusters>(xpu::n_blocks(nModuleSides));
188 L_(debug) <<
"STS Hitfinder Chain: Find Hits...";
189 queue.copy(hfc.nClustersPerModule, xpu::d2h);
191 xpu::h_view nClusters(hfc.nClustersPerModule);
192 size_t nClustersTotal = std::accumulate(nClusters.begin(), nClusters.end(), 0);
193 xpu::k_add_bytes<SortClusters>(nClustersTotal *
sizeof(
ClusterIdx));
195 size_t nClustersFront = std::accumulate(nClusters.begin(), nClusters.begin() + nModules, 0);
197 bool isCpu = xpu::device::active().backend() == xpu::cpu;
200 xpu::grid findHitsG = xpu::n_threads(isCpu ? threadsCPU : nClustersFront);
202 xpu::k_add_bytes<FindHits>(nClustersTotal *
sizeof(
sts::Cluster));
204 queue.copy(hfc.nHitsPerModule, xpu::d2h);
208 queue.copy(hfc.monitor, xpu::d2h);
214 xpu::scoped_timer hitStreams_{
"Sort Hits"};
219#ifdef CBM_ONLINE_USE_FAIRLOGGER
220 if (fair::Logger::GetConsoleSeverity() == fair::Severity::trace) {
222 if (
Opts().LogLevel() == trace) {
228 xpu::h_view monitor(hfc.monitor);
232 if (monitor[0].nClusterBucketOverflow > 0) {
233 L_(error) <<
"STS Hitfinder Chain: Cluster bucket overflow! " << monitor[0].nClusterBucketOverflow
234 <<
" clusters were discarded!";
236 for (
size_t m = 0; m < nModules * 2; m++) {
237 if (
static_cast<size_t>(nClusters[m]) > hfc.maxClustersPerModule) {
238 L_(error) <<
"STS Hitfinder Chain: Cluster bucket overflow in module " << m <<
" with " << *nClusters[m]
239 <<
" (of " << hfc.maxClustersPerModule <<
" max)"
241 nClusters[m] = hfc.maxClustersPerModule;
246 xpu::h_view nHits(hfc.nHitsPerModule);
247 if (monitor[0].nHitBucketOverflow > 0) {
248 L_(error) <<
"STS Hitfinder Chain: Hit bucket overflow! " << monitor[0].nHitBucketOverflow
249 <<
" hits were discarded!";
251 for (
size_t m = 0; m < nModules; m++) {
252 const size_t nHitsModule = nHits[m];
253 const size_t nClustersModule = nClusters[m];
254 const float hitsPerCluster = nClustersModule > 0 ? nHitsModule / float(nClustersModule) : 0;
255 if (nHitsModule > hfc.maxHitsPerModule) {
256 L_(error) <<
"STS Hitfinder Chain: Hit bucket overflow in module " << m <<
" with " << nHitsModule <<
" (of "
257 << hfc.maxHitsPerModule <<
" max) hits! "
258 <<
"Module has " << nClustersModule <<
" clusters. " << hitsPerCluster <<
" hits per cluster.";
259 nHits[m] = hfc.maxHitsPerModule;
267 size_t nHitsTotal = std::accumulate(nHits.begin(), nHits.end(), 0);
268 L_(debug) <<
"Timeslice contains " << nHitsTotal <<
" STS hits and " << nClustersTotal <<
" STS clusters";
271 result.
clusters = std::move(clusters);
288 const int nChannels =
fPars->setup.nChannels / 2;
289 const int nModules =
fPars->setup.modules.size();
290 const int nModuleSides = 2 * nModules;
300 size_t nLandauTableEntries =
fPars->setup.landauTable.values.size();
301 fHitfinder.landauTableSize = nLandauTableEntries;
303 fHitfinder.landauTable.reset(nLandauTableEntries, xpu::buf_io);
304 q.copy(
fPars->setup.landauTable.values.data(),
fHitfinder.landauTable.get(), nLandauTableEntries);
307 fHitfinder.localToGlobalRotationByModule.reset(9 * nModules, xpu::buf_io);
308 fHitfinder.localToGlobalTranslationByModule.reset(3 * nModules, xpu::buf_io);
311 xpu::h_view hRot(
fHitfinder.localToGlobalRotationByModule);
312 xpu::h_view hTrans(
fHitfinder.localToGlobalTranslationByModule);
313 for (
int m = 0; m < nModules; m++) {
314 const auto&
module = fPars->setup.modules.at(m);
315 std::copy_n(module.localToGlobal.rotation.data(), 9, &hRot[m * 9]);
316 std::copy_n(module.localToGlobal.translation.data(), 3, &hTrans[m * 3]);
320 q.copy(
fHitfinder.localToGlobalRotationByModule, xpu::h2d);
321 q.copy(
fHitfinder.localToGlobalTranslationByModule, xpu::h2d);
324 fHitfinder.sensorPars.reset(nModules, xpu::buf_io);
325 xpu::h_view hSensorPars(
fHitfinder.sensorPars);
326 for (
int m = 0; m < nModules; m++) {
327 const auto&
module = fPars->setup.modules.at(m);
328 auto& gpuPars = hSensorPars[m];
329 gpuPars.dY =
module.dY;
330 gpuPars.pitch =
module.pitch;
331 gpuPars.stereoF =
module.stereoF;
332 gpuPars.stereoB =
module.stereoB;
333 gpuPars.lorentzF =
module.lorentzF;
334 gpuPars.lorentzB =
module.lorentzB;
339 fHitfinder.maxClusterTimeErrorByModuleSide.reset(nModuleSides, xpu::buf_device);
348 L_(debug) <<
"STS Hitfinder Chain: Allocating dynamic memory for " << maxNDigisPerModule <<
" digis per module and "
349 << nDigisTotal <<
" digis in total";
352 xpu::scoped_timer t_(
"Allocate");
357 fPars->memory.maxNDigisPerModule = maxNDigisPerModule;
358 fPars->memory.maxNDigisPerTS = nDigisTotal;
361 const int nChannels =
fPars->setup.nChannels / 2;
362 const int nModules =
fPars->setup.modules.size();
363 const int nModuleSides = 2 * nModules;
365 const size_t maxNClustersPerModule =
fPars->memory.MaxNClustersPerModule();
366 const size_t maxNHitsPerModule =
fPars->memory.MaxNHitsPerModule();
369 const size_t maxHitsTotal = maxNHitsPerModule * nModules;
372 fHitfinder.digiOffsetPerModule.reset(nModuleSides + 1, xpu::buf_io);
373 fHitfinder.digisPerModule.reset(nDigisTotal, xpu::buf_io);
375 fHitfinder.digisPerModuleTmp.reset(nDigisTotal, xpu::buf_device);
376 fHitfinder.digiConnectorsPerModule.reset(nDigisTotal, xpu::buf_device);
378 fHitfinder.channelOffsetPerModule.reset(nModuleSides * nChannels, xpu::buf_device);
381 fHitfinder.maxClustersPerModule = maxNClustersPerModule;
383 fHitfinder.clusterIdxPerModule.reset(maxNClustersPerModule * nModuleSides, xpu::buf_device);
384 fHitfinder.clusterIdxPerModuleTmp.reset(maxNClustersPerModule * nModuleSides, xpu::buf_device);
385 fHitfinder.clusterIdxSortedPerModule.reset(nModuleSides, xpu::buf_device);
387 fHitfinder.clusterDataPerModule.reset(maxNClustersPerModule * nModuleSides, xpu::buf_io);
388 fHitfinder.nClustersPerModule.reset(nModuleSides, xpu::buf_io);
391 fHitfinder.hitsAllocatedPerModule = maxNHitsPerModule;
392 fHitfinder.hitsPerModule.reset(maxNHitsPerModule * nModules, xpu::buf_device);
393 fHitfinder.nHitsPerModule.reset(nModules, xpu::buf_io);
396 fHitfinder.hitsFlat.reset(maxHitsTotal, xpu::buf_pinned);
479 L_(debug) <<
"STS Hitfinder Chain: Flattening digis";
480 xpu::scoped_timer t_(
"Flatten Digis");
481 xpu::t_add_bytes(digis.size_bytes());
483 const auto& modules =
fPars->setup.modules;
485 size_t nModules = modules.size();
486 size_t nModuleSides = nModules * 2;
487 int nChannelsPerSide =
fPars->setup.nChannels / 2;
488 xpu::h_view pDigisFlat(
fHitfinder.digisPerModule);
490 xpu::h_view pMdigiOffset(
fHitfinder.digiOffsetPerModule);
492 for (
size_t m = 1; m < nModuleSides + 1; m++) {
493 pMdigiOffset[m] = pMdigiOffset[m - 1] + digiMap.
nDigisPerModule.at(m - 1);
497 for (
auto&
v : digiOffsetsPerThread)
498 v.resize(nModuleSides);
499 for (
size_t i = 1; i < digiOffsetsPerThread.size(); i++) {
500 for (
size_t m = 0; m < nModuleSides; m++) {
501 for (
size_t j = 0; j < i; j++) {
510 auto& digiOffsets = digiOffsetsPerThread.at(threadId);
513 for (
size_t i = 0; i < digis.size(); i++) {
514 const auto& digi = digis[i];
520 bool isFront = digi.GetChannel() < nChannelsPerSide;
521 moduleIndex += isFront ? 0 : nModules;
523 size_t moduleOffset = pMdigiOffset[moduleIndex];
524 size_t digiOffset = digiOffsets[moduleIndex]++;
526 pDigisFlat[moduleOffset + digiOffset] = digi;
535 for (
size_t i = pMdigiOffset[nModules]; i < pMdigiOffset[nModuleSides]; i++) {
536 auto& digi = pDigisFlat[i];
537 digi.SetChannel(digi.GetChannel() - nChannelsPerSide);
619 xpu::h_view nClusters(hfc.nClustersPerModule);
620 int maxClustersPerModule = hfc.maxClustersPerModule;
621 sts::Cluster* clusters = hfc.clusterDataPerModule.get();
623 int nModuleSides = nClusters.size();
625 L_(debug) <<
"Copy clusters from " << nModuleSides <<
" modules sides";
627 std::vector<u32> addresses;
628 for (
size_t i = 0; i < 2; i++) {
629 for (
auto& m :
fPars->setup.modules)
630 addresses.push_back(m.address);
633 size_t nClustersTotal = std::accumulate(nClusters.begin(), nClusters.end(), 0);
637 std::vector<sts::Cluster> clustersFlat;
638 clustersFlat.resize(nClustersTotal);
640 for (
int m = 0; m < nModuleSides; m++) {
641 size_t nClustersInModule = nClusters[m];
642 queue.copy(clusters + maxClustersPerModule * m, clustersFlat.data() + offset, nClustersInModule);
643 offset += nClustersInModule;
647 std::vector<size_t> sizes(nClusters.begin(), nClusters.end());
782 xpu::h_view digisPerModule(
fHitfinder.digisPerModule);
783 xpu::h_view digiOffsetPerModule(
fHitfinder.digiOffsetPerModule);
784 for (
size_t m = 0; m <
fPars->setup.modules.size() * 2; m++) {
785 int nChannels =
fPars->setup.nChannels / 2;
787 std::vector<u32> expectedChannelOffsets(nChannels, 0);
789 int offset = digiOffsetPerModule[m];
790 int nDigis = digiOffsetPerModule[m + 1] - offset;
791 gsl::span<CbmStsDigi> digis(&digisPerModule[offset], nDigis);
792 gsl::span<u32> channelOffsets = channelOffsetsByModule.subspan(m * nChannels, nChannels);
794 if (nDigis == 0)
continue;
796 if (channelOffsets[0] != 0) {
797 throw FatalError(
"Module {}: First channel offset is not 0", m);
800 int chan = digis[0].GetChannel();
801 for (
int i = 0; i < nDigis; i++) {
802 if (digis[i].GetChannel() != chan) {
803 while (chan < digis[i].GetChannel()) {
805 expectedChannelOffsets[chan] = i;
809 while (chan < nChannels) {
811 expectedChannelOffsets[chan] = nDigis;
814 for (
int i = 0; i < nChannels; i++) {
815 if (channelOffsets[i] != expectedChannelOffsets[i]) {
816 throw FatalError(
"Module {}: Channel offset for channel {} is {} but should be {}", m, i, channelOffsets[i],
817 expectedChannelOffsets[i]);