54 int iModule = ctx.block_idx_x();
55 CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]];
56 CbmStsDigi* digisTmp = &digisPerModuleTmp[digiOffsetPerModule[iModule]];
57 int nDigis = GetNDigis(iModule);
65 if (digisOut == digisTmp) {
67 for (
int i = ctx.thread_idx_x(); i < nDigis; i += ctx.block_dim_x()) {
68 digis[i] = digisTmp[i];
75 CalculateOffsetsParallel(ctx);
76 xpu::barrier(ctx.pos());
77 FindClustersParallel(ctx);
78 xpu::barrier(ctx.pos());
79 CalculateClustersParallel(ctx);
93 unsigned int* channelOffsets,
unsigned int const nDigis)
const
95 channelOffsets[0] = 0;
97 for (
unsigned int pos = ctx.thread_idx_x();
pos < nDigis - 1;
pos += (
unsigned int) ctx.block_dim_x()) {
100 if (currChannel != nextChannel) {
101 for (
int i = currChannel + 1; i <= nextChannel; i++) {
102 channelOffsets[i] =
pos + 1;
107 for (
int c = digis[nDigis - 1].GetChannel() + 1; c < nChannels; c++) {
108 channelOffsets[c] = nDigis;
122 int const iModule = ctx.block_idx_x();
123 CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]];
124 auto const nDigis = GetNDigis(iModule);
126 if (nDigis == 0)
return;
128 auto* channelOffsets = &channelOffsetPerModule[iModule * nChannels];
130 CalculateChannelOffsets(ctx, digis, channelOffsets, nDigis);
143 const i32 iGThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x();
144 const i32 nDigisTotal = digiOffsetPerModule[2 * nModules];
145 if (iGThread >= nDigisTotal) {
150 while (
size_t(iGThread) >= digiOffsetPerModule[iModule + 1]) {
154 i32 iLocal = iGThread - digiOffsetPerModule[iModule];
156 const CbmStsDigi myDigi = digisPerModule[iGThread];
158 u32 nDigis = GetNDigis(iModule);
159 if (nDigis == 0)
return;
161 xpu::view digis(&digisPerModule[digiOffsetPerModule[iModule]], nDigis);
162 xpu::view digiConnector(&digiConnectorsPerModule[digiOffsetPerModule[iModule]], nDigis);
163 xpu::view channelOffsets(&channelOffsetPerModule[iModule * nChannels], nChannels);
166 if (channel == nChannels - 1)
return;
169 const f32 tResol = GetTimeResolution(iModule, channel);
173 const u32 nextChannelEnd = (channel + 2 < nChannels) ? channelOffsets[channel + 2] : nDigis;
176 u32 start = channelOffsets[channel + 1];
177 u32 end = nextChannelEnd;
178 while (start < end) {
179 u32 mid = (start + end) / 2;
180 real timeDiffDigis = GetTimeDiff(myDigi, digis[mid]);
181 if (deltaT < timeDiffDigis) {
189 for (
auto nextIter = start; nextIter < nextChannelEnd; nextIter++) {
196 real timeDiffDigis = xpu::abs(GetTimeDiff(myDigi, otherDigi));
197 if (deltaT < timeDiffDigis) {
204 digiConnector[iLocal].SetNext(nextIter);
205 digiConnector[nextIter].SetHasPrevious(
true);
220 const int nModuleSides = 2 * nModules;
223 int iThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x();
225 for (; iModule < nModuleSides; iModule++) {
226 i32 nDigis = GetNDigis(iModule);
227 if (iThread < nDigis) {
233 if (iModule >= nModuleSides) {
237 const CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]];
238 auto* digiConnector = &digiConnectorsPerModule[digiOffsetPerModule[iModule]];
241 const int iDigi = iThread;
243 if (digiConnector[iDigi].HasPrevious()) {
247 if (!digiConnector[iDigi].HasNext()) {
249 CreateClusterFromConnectors1(iModule, digis, iDigi);
251 else if (!digiConnector[digiConnector[iDigi].next()].HasNext()) {
253 CreateClusterFromConnectors2(iModule, digis, digiConnector, iDigi);
257 CreateClusterFromConnectorsN(iModule, digis, digiConnector, iDigi);
285 int const iModule = ctx.block_idx_x();
286 for (
unsigned int currIter = ctx.thread_idx_x(); currIter < nDigis; currIter += (
unsigned int) ctx.block_dim_x()) {
288 if (digiConnector[currIter].HasPrevious())
continue;
298 const float timeError = asic.timeResolution;
303 .fPosition = float(digi.
GetChannel()) + (IsBackside(iModule) ? nChannels : 0.f),
304 .fPositionError = 1.f / xpu::sqrt(24.f),
306 .fTimeError = timeError,
309 SaveMaxError(timeError, iModule);
311 AddCluster(iModule, time, cluster);
321 float eNoiseSq = asic.noise * asic.noise;
323 float chargePerAdc = asic.dynamicRange / float(asic.nAdc);
324 float eDigitSq = chargePerAdc * chargePerAdc / 12.f;
332 x1 -= float(nChannels);
336 float width1 = LandauWidth(q1);
337 float eq1sq = width1 * width1 + eNoiseSq + eDigitSq;
338 float width2 = LandauWidth(q2);
339 float eq2sq = width2 * width2 + eNoiseSq + eDigitSq;
343 float timeError = asic.timeResolution * 0.70710678f;
346 float x = x1 + 0.5f + (q2 - q1) / 3.f / xpu::max(q1, q2);
350 x += float(nChannels);
358 ex0sq = (q2 - q1) * (q2 - q1) / q2 / q2 / 72.f;
359 ex1sq = eq1sq / q2 / q2 / 9.f;
360 ex2sq = eq2sq * q1 * q1 / q2 / q2 / q2 / q2 / 9.f;
363 ex0sq = (q2 - q1) * (q2 - q1) / q1 / q1 / 72.f;
364 ex1sq = eq1sq * q2 * q2 / q1 / q1 / q1 / q1 / 9.f;
365 ex2sq = eq2sq / q1 / q1 / 9.f;
367 float xError = xpu::sqrt(ex0sq + ex1sq + ex2sq);
370 float charge = q1 + q2;
372 if (IsBackside(iModule))
x += nChannels;
378 .fPositionError = xError,
379 .fTime = uint32_t(time),
380 .fTimeError = timeError,
383 SaveMaxError(timeError, iModule);
384 AddCluster(iModule, time, cls);
393 auto calculateClusterCharges = [
this, &cProps, &digis, &digiConnector](
int index) {
395 float eNoiseSq = asic.noise * asic.noise;
396 float chargePerAdc = asic.dynamicRange / float(asic.nAdc);
397 float eDigitSq = chargePerAdc * chargePerAdc / 12.f;
401 float lWidth = LandauWidth(charge);
402 float eChargeSq = lWidth * lWidth + eNoiseSq + eDigitSq;
404 if (!digiConnector[index].HasPrevious()) {
408 cProps.
eqFsq = eChargeSq;
410 else if (!digiConnector[index].HasNext()) {
414 cProps.
eqLsq = eChargeSq;
418 cProps.
eqMsq += eChargeSq;
423 calculateClusterCharges(digiIndex);
426 while (digiConnector[digiIndex].HasNext()) {
427 digiIndex = digiConnector[digiIndex].
next();
428 calculateClusterCharges(digiIndex);
435 float nDigis = ChanDist(cProps.
chanF, cProps.
chanL) + 1;
437 float timeError = (cProps.
tResolSum / nDigis) / xpu::sqrt(nDigis);
438 float qSum = cProps.
qF + cProps.
qM + cProps.
qL;
441 cProps.
qM /= (nDigis - 2.f);
442 cProps.
eqMsq /= (nDigis - 2.f);
445 float x = 0.5f * (float(cProps.
chanF + cProps.
chanL) + (cProps.
qL - cProps.
qF) / cProps.
qM);
449 x += float(nChannels);
453 float exFsq = cProps.
eqFsq / cProps.
qM / cProps.
qM / 4.f;
454 float exMsq = cProps.
eqMsq * (cProps.
qL - cProps.
qF) * (cProps.
qL - cProps.
qF) / cProps.
qM / cProps.
qM / cProps.
qM
456 float exLsq = cProps.
eqLsq / cProps.
qM / cProps.
qM / 4.f;
457 float xError = xpu::sqrt(exFsq + exMsq + exLsq);
461 x = cProps.
xSum / qSum;
464 if (IsBackside(iModule)) {
470 .fSize = int(nDigis),
472 .fPositionError = xError,
473 .fTime = uint32_t(cProps.
tSum),
474 .fTimeError = timeError,
477 SaveMaxError(timeError, iModule);
478 AddCluster(iModule, cProps.
tSum, cls);
486 int firstModule = ctx.block_idx_x();
487 int lastModule = ctx.block_idx_x();
488 if (ctx.grid_dim_x() == 1) {
490 lastModule = 2 * nModules - 1;
493 for (
int iModule = firstModule; iModule <= lastModule; iModule++) {
494 size_t offset = iModule * maxClustersPerModule;
495 ClusterIdx* clusterIdx = &clusterIdxPerModule[offset];
496 ClusterIdx* clusterIdxTmp = &clusterIdxPerModuleTmp[offset];
497 int nClusters = nClustersPerModule[iModule];
502 if (nClusters == 0)
return;
509 clusterIdx = clsSort.sort(clusterIdx, nClusters, clusterIdxTmp, [](
const ClusterIdx a) {
return a.
fTime; });
515 if (ctx.thread_idx_x() == 0) clusterIdxSortedPerModule[iModule] = clusterIdx;
522 int iThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x();
527 for (; iModule < nModules; iModule++) {
528 if (iThread < nClustersPerModule[iModule])
break;
529 iThread -= nClustersPerModule[iModule];
533 if (iModule >= nModules) {
537 int iModuleF = iModule;
538 int iModuleB = iModuleF + nModules;
539 size_t offsetF = iModuleF * maxClustersPerModule;
540 size_t offsetB = iModuleB * maxClustersPerModule;
541 ClusterIdx* clusterIdxF = clusterIdxSortedPerModule[iModuleF];
542 ClusterIdx* clusterIdxB = clusterIdxSortedPerModule[iModuleB];
543 sts::Cluster* clusterDataF = &clusterDataPerModule[offsetF];
544 sts::Cluster* clusterDataB = &clusterDataPerModule[offsetB];
545 int nClustersF = nClustersPerModule[iModuleF];
546 int nClustersB = nClustersPerModule[iModuleB];
547 size_t nHitsWritten = nHitsPerModule[iModule];
556 if (nClustersF == 0 || nClustersB == 0) {
566 iChunk * nClustersPerChunk;
570 if (iClusterF >= nClustersF) {
580 if (nHitsWritten > 2 * maxHitsPerModule) {
597 pars.
dX = float(nChannels) * pars.
pitch;
600 float maxTerrF = maxClusterTimeErrorByModuleSide[iModuleF];
601 float maxTerrB = maxClusterTimeErrorByModuleSide[iModuleB];
603 float maxSigmaBoth = 4.f * xpu::sqrt(maxTerrF * maxTerrF + maxTerrB * maxTerrB);
610 int end = nClustersB;
611 while (end - start > 1) {
612 int mid = (start + end) / 2;
613 auto timeDiffCluster = GetTimeDiff(clusterIdxF[iClusterF], clusterIdxB[mid]);
614 if (maxSigmaBoth < timeDiffCluster) {
625 xpu::min(iClusterF + nClustersPerChunk, nClustersF);
630 for (; iClusterF < endClusterF; iClusterF++) {
634 float maxSigma = 4.f * xpu::sqrt(clsDataF.
fTimeError * clsDataF.
fTimeError + maxTerrB * maxTerrB);
636 for (
int iClusterB = startB; iClusterB < nClustersB; iClusterB++) {
640 auto timeDiff = GetTimeDiff(clsIdxF, clsIdxB);
642 if (timeDiff > 0 && timeDiff > maxSigmaBoth) {
646 else if (timeDiff > 0 && timeDiff > maxSigma) {
649 else if (timeDiff < 0 && xpu::abs(timeDiff) > maxSigma) {
656 if (doChargeCorrelation) {
658 if (xpu::abs(chargeDiff) > ChargeDelta) {
663 float timeCut = -1.f;
672 if (xpu::abs(timeDiff) > timeCut) {
676 IntersectClusters(iModule, pars, clsIdxF, clsDataF, clsIdxB, clsDataB);
688 float xF = GetClusterPosition(pars, clsF.
fPosition,
true);
690 float du = exF * xpu::cos(xpu::deg_to_rad() * pars.
stereoF);
691 float xB = GetClusterPosition(pars, clsB.
fPosition,
false);
693 float dv = exB * xpu::cos(xpu::deg_to_rad() * pars.
stereoB);
696 if (xF < 0.f || xF > pars.
dX || xB < 0.f || xB > pars.
dX) {
713 int nF1 = xpu::min(0, nF);
714 int nF2 = xpu::max(0, nF);
715 int nB1 = xpu::min(0, nB);
716 int nB2 = xpu::max(0, nB);
724 for (
int iF = nF1; iF <= nF2; iF++) {
725 float xFi = xF - float(iF) * pars.
dX;
726 for (
int iB = nB1; iB <= nB2; iB++) {
727 float xBi = xB - float(iB) * pars.
dX;
730 bool found = Intersect(pars, xFi, exF, xBi, exB, xC, yC, varX, varY, varXY);
731 if (not found)
continue;
735 xC -= 0.5f * pars.
dX;
736 yC -= 0.5f * pars.
dY;
738 CreateHit(iBlock, xC, yC, varX, varY, varXY, idxF, clsF, idxB, clsB, du, dv);
747 int iChannel = int(centre);
748 float xDif = centre - float(iChannel);
752 int iStrip = iChannel - (isFront ? 0 : nChannels);
756 float xCluster = (float(iStrip) + xDif + 0.5f) * pars.
pitch;
770 float&
y,
float& varX,
float& varY,
float& varXY)
const
789 if (xpu::abs(pars.
stereoF) < 0.001f) {
795 return IsInside(pars,
x - pars.
dX / 2.f,
y - pars.
dY / 2.f);
799 if (xpu::abs(pars.
stereoB) < 0.001f) {
805 return IsInside(pars,
x - pars.
dX / 2.f,
y - pars.
dY / 2.f);
814 varY = pars.
errorFac * (exF * exF + exB * exB);
817 return IsInside(pars,
x - pars.
dX / 2.f,
y - pars.
dY / 2.f);
823 return x >= -pars.
dX / 2.f
824 &&
x <= pars.
dX / 2.f
825 &&
y >= -pars.
dY / 2.f
826 &&
y <= pars.
dY / 2.f;
832 const Cluster& clsB,
float du,
float dv)
const
835 float globalX, globalY, globalZ;
836 ToGlobal(iModule, xLocal, yLocal, 0.f, globalX, globalY, globalZ);
841 float errX = xpu::sqrt(varX);
842 float errY = xpu::sqrt(varY);
845 float hitTime = 0.5f * (float(idxF.
fTime) + float(idxB.
fTime));
848 float hitTimeError = 0.5f * xpu::sqrt(etF * etF + etB * etB);
854 .fTime =
static_cast<u32>(hitTime),
858 .fTimeError = hitTimeError,
862 .fFrontClusterId = idxF.
fIdx,
863 .fBackClusterId = idxB.
fIdx,
866 int idx = xpu::atomic_add(&nHitsPerModule[iModule], 1);
868 if (
size_t(idx) >= maxHitsPerModule) {
869 xpu::atomic_add(&monitor->nHitBucketOverflow, 1);
873 hitsPerModule[iModule * hitsAllocatedPerModule + idx] = hit;
878 if (charge <= landauStepSize)
return landauTable[0];
879 if (charge > landauStepSize * (landauTableSize - 1))
return landauTable[landauTableSize - 1];
881 int tableIdx = xpu::ceil(charge / landauStepSize);
882 float e2 = tableIdx * landauStepSize;
883 float v2 = landauTable[tableIdx];
885 float e1 = tableIdx * landauStepSize;
886 float v1 = landauTable[tableIdx];
887 return v1 + (charge - e1) * (v2 - v1) / (e2 - e1);
892 const float* tr = &localToGlobalTranslationByModule[iModule * 3];
893 const float* rot = &localToGlobalRotationByModule[iModule * 9];
895 gx = tr[0] + lx * rot[0] + ly * rot[1] + lz * rot[2];
896 gy = tr[1] + lx * rot[3] + ly * rot[4] + lz * rot[5];
897 gz = tr[2] + lx * rot[6] + ly * rot[7] + lz * rot[8];
XPU_EXPORT(sts::TheHitfinder)
XPU_EXPORT(cbm::algo::Params)
Data class for a single-channel message in the STS.
XPU_D uint16_t GetChannel() const
Channel number in module @value Channel number.
XPU_D uint16_t GetChargeU16() const
XPU_D uint32_t GetTimeU32() const
XPU_D unsigned int next() const
XPU_D void CreateClusterFromConnectors1(int const iModule, const CbmStsDigi *digis, int const digiIndex) const
XPU_D void CalculateChannelOffsets(FindClusters::context &ctx, CbmStsDigi *digis, unsigned int *channelOffsets, unsigned int nDigis) const
XPU_D void SortClusters(SortClusters::context &) const
XPU_D void CalculateOffsetsParallel(FindClusters::context &) const
XPU_D bool Intersect(const HitfinderCache &pars, float xF, float exF, float xB, float exB, float &x, float &y, float &varX, float &varY, float &varXY) const
XPU_D void FindClustersSingleStep(FindClusters::context &) const
XPU_D bool IsInside(const HitfinderCache &pars, float x, float y) const
XPU_D float LandauWidth(float charge) const
XPU_D void CreateClusterFromConnectorsN(int const iModule, const CbmStsDigi *digis, DigiConnector *digiConnector, int const digiIndex) const
XPU_D void CalculateClustersDigiWise(FindClusters::context &ctx, CbmStsDigi *digis, DigiConnector *digiConnector, unsigned int const nDigis) const
XPU_D void IntersectClusters(int iBlock, const HitfinderCache &pars, const ClusterIdx &idxF, const sts::Cluster &clsF, const ClusterIdx &idxB, const sts::Cluster &clsB) const
XPU_D void SortDigisInSpaceAndTime(SortDigis::context &) const
XPU_D void CalculateClustersParallel(FindClusters::context &) const
XPU_D void CreateClusterFromConnectors2(int const iModule, const CbmStsDigi *digis, DigiConnector *digiConnector, int const digiIndex) const
XPU_D void ToGlobal(int iModule, float lx, float ly, float lz, float &gx, float &gy, float &gz) const
XPU_D void CreateHit(int iBlocks, float xLocal, float yLocal, float varX, float varY, float varXY, const ClusterIdx &idxF, const Cluster &clsF, const ClusterIdx &idxB, const sts::Cluster &clsB, float du, float dv) const
XPU_D void FindHits(FindHits::context &) const
XPU_D void FindClustersParallel(FindClusters::context &) const
XPU_D float GetClusterPosition(const HitfinderCache &pars, float centre, bool isFront) const
@ kFindHitsChunksPerModule
f32 chargeCorrelationDelta
XPU_D void operator()(context &)
xpu::kernel_context< shared_memory, constants > context
u32 fTime
Cluster time (average of digi times) [ns].
real fTimeError
Error of cluster time [ns].
real fPosition
Cluster centre in channel number units.
real fPositionError
Cluster centre error (r.m.s.) in channel number units.
real fCharge
Total charge.
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
XPU_D void operator()(context &)
xpu::kernel_context< shared_memory, constants > context
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
XPU_D void operator()(context &)
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
xpu::block_sort< u32, ClusterIdx, kSortClustersBlockSize, kSortClustersItemsPerThread > sort_t
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
xpu::block_sort< u64, CbmStsDigi, kSortDigisBlockSize, kSortDigisItemsPerThread > sort_t
xpu::kernel_context< shared_memory, constants > context