54 int iModule = ctx.block_idx_x();
55 CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]];
56 CbmStsDigi* digisTmp = &digisPerModuleTmp[digiOffsetPerModule[iModule]];
57 int nDigis = GetNDigis(iModule);
65 if (digisOut == digisTmp) {
67 for (
int i = ctx.thread_idx_x(); i < nDigis; i += ctx.block_dim_x()) {
68 digis[i] = digisTmp[i];
75 CalculateOffsetsParallel(ctx);
76 xpu::barrier(ctx.pos());
77 FindClustersParallel(ctx);
78 xpu::barrier(ctx.pos());
79 CalculateClustersParallel(ctx);
93 unsigned int* channelOffsets,
unsigned int const nDigis)
const
95 channelOffsets[0] = 0;
97 for (
unsigned int pos = ctx.thread_idx_x();
pos < nDigis - 1;
pos += (
unsigned int) ctx.block_dim_x()) {
100 if (currChannel != nextChannel) {
101 for (
int i = currChannel + 1; i <= nextChannel; i++) {
102 channelOffsets[i] =
pos + 1;
107 for (
int c = digis[nDigis - 1].GetChannel() + 1; c < nChannels; c++) {
108 channelOffsets[c] = nDigis;
122 int const iModule = ctx.block_idx_x();
123 CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]];
124 auto const nDigis = GetNDigis(iModule);
126 if (nDigis == 0)
return;
128 auto* channelOffsets = &channelOffsetPerModule[iModule * nChannels];
130 CalculateChannelOffsets(ctx, digis, channelOffsets, nDigis);
143 const i32 iGThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x();
144 const i32 nDigisTotal = digiOffsetPerModule[2 * nModules];
145 if (iGThread >= nDigisTotal) {
150 while (
size_t(iGThread) >= digiOffsetPerModule[iModule + 1]) {
154 i32 iLocal = iGThread - digiOffsetPerModule[iModule];
156 const CbmStsDigi myDigi = digisPerModule[iGThread];
158 u32 nDigis = GetNDigis(iModule);
159 if (nDigis == 0)
return;
161 xpu::view digis(&digisPerModule[digiOffsetPerModule[iModule]], nDigis);
162 xpu::view digiConnector(&digiConnectorsPerModule[digiOffsetPerModule[iModule]], nDigis);
163 xpu::view channelOffsets(&channelOffsetPerModule[iModule * nChannels], nChannels);
166 if (channel == nChannels - 1)
return;
169 const f32 tResol = GetTimeResolution(iModule, channel);
173 const u32 nextChannelEnd = (channel + 2 < nChannels) ? channelOffsets[channel + 2] : nDigis;
175 float firstPossibleTime = float(myDigi.
GetTimeU32()) - deltaT;
178 u32 start = channelOffsets[channel + 1];
179 u32 end = nextChannelEnd;
180 while (start < end) {
181 u32 mid = (start + end) / 2;
182 if (
float(digis[mid].GetTimeU32()) < firstPossibleTime) {
190 for (
auto nextIter = start; nextIter < nextChannelEnd; nextIter++) {
209 digiConnector[iLocal].SetNext(nextIter);
210 digiConnector[nextIter].SetHasPrevious(
true);
225 const int nModuleSides = 2 * nModules;
228 int iThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x();
230 for (; iModule < nModuleSides; iModule++) {
231 i32 nDigis = GetNDigis(iModule);
232 if (iThread < nDigis) {
238 if (iModule >= nModuleSides) {
242 const CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]];
243 auto* digiConnector = &digiConnectorsPerModule[digiOffsetPerModule[iModule]];
246 const int iDigi = iThread;
248 if (digiConnector[iDigi].HasPrevious()) {
252 if (!digiConnector[iDigi].HasNext()) {
254 CreateClusterFromConnectors1(iModule, digis, iDigi);
256 else if (!digiConnector[digiConnector[iDigi].next()].HasNext()) {
258 CreateClusterFromConnectors2(iModule, digis, digiConnector, iDigi);
262 CreateClusterFromConnectorsN(iModule, digis, digiConnector, iDigi);
290 int const iModule = ctx.block_idx_x();
291 for (
unsigned int currIter = ctx.thread_idx_x(); currIter < nDigis; currIter += (
unsigned int) ctx.block_dim_x()) {
293 if (digiConnector[currIter].HasPrevious())
continue;
303 const float timeError = asic.timeResolution;
308 .fPosition = float(digi.
GetChannel()) + (IsBackside(iModule) ? nChannels : 0.f),
309 .fPositionError = 1.f / xpu::sqrt(24.f),
311 .fTimeError = timeError,
314 SaveMaxError(timeError, iModule);
316 AddCluster(iModule, time, cluster);
326 float eNoiseSq = asic.noise * asic.noise;
328 float chargePerAdc = asic.dynamicRange / float(asic.nAdc);
329 float eDigitSq = chargePerAdc * chargePerAdc / 12.f;
337 x1 -= float(nChannels);
341 float width1 = LandauWidth(q1);
342 float eq1sq = width1 * width1 + eNoiseSq + eDigitSq;
343 float width2 = LandauWidth(q2);
344 float eq2sq = width2 * width2 + eNoiseSq + eDigitSq;
348 float timeError = asic.timeResolution * 0.70710678f;
351 float x = x1 + 0.5f + (q2 - q1) / 3.f / xpu::max(q1, q2);
355 x += float(nChannels);
363 ex0sq = (q2 - q1) * (q2 - q1) / q2 / q2 / 72.f;
364 ex1sq = eq1sq / q2 / q2 / 9.f;
365 ex2sq = eq2sq * q1 * q1 / q2 / q2 / q2 / q2 / 9.f;
368 ex0sq = (q2 - q1) * (q2 - q1) / q1 / q1 / 72.f;
369 ex1sq = eq1sq * q2 * q2 / q1 / q1 / q1 / q1 / 9.f;
370 ex2sq = eq2sq / q1 / q1 / 9.f;
372 float xError = xpu::sqrt(ex0sq + ex1sq + ex2sq);
375 float charge = q1 + q2;
377 if (IsBackside(iModule))
x += nChannels;
383 .fPositionError = xError,
384 .fTime = uint32_t(time),
385 .fTimeError = timeError,
388 SaveMaxError(timeError, iModule);
389 AddCluster(iModule, time, cls);
398 auto calculateClusterCharges = [
this, &cProps, &digis, &digiConnector](
int index) {
400 float eNoiseSq = asic.noise * asic.noise;
401 float chargePerAdc = asic.dynamicRange / float(asic.nAdc);
402 float eDigitSq = chargePerAdc * chargePerAdc / 12.f;
406 float lWidth = LandauWidth(charge);
407 float eChargeSq = lWidth * lWidth + eNoiseSq + eDigitSq;
409 if (!digiConnector[index].HasPrevious()) {
413 cProps.
eqFsq = eChargeSq;
415 else if (!digiConnector[index].HasNext()) {
419 cProps.
eqLsq = eChargeSq;
423 cProps.
eqMsq += eChargeSq;
428 calculateClusterCharges(digiIndex);
431 while (digiConnector[digiIndex].HasNext()) {
432 digiIndex = digiConnector[digiIndex].
next();
433 calculateClusterCharges(digiIndex);
440 float nDigis = ChanDist(cProps.
chanF, cProps.
chanL) + 1;
442 float timeError = (cProps.
tResolSum / nDigis) / xpu::sqrt(nDigis);
443 float qSum = cProps.
qF + cProps.
qM + cProps.
qL;
446 cProps.
qM /= (nDigis - 2.f);
447 cProps.
eqMsq /= (nDigis - 2.f);
450 float x = 0.5f * (float(cProps.
chanF + cProps.
chanL) + (cProps.
qL - cProps.
qF) / cProps.
qM);
454 x += float(nChannels);
458 float exFsq = cProps.
eqFsq / cProps.
qM / cProps.
qM / 4.f;
459 float exMsq = cProps.
eqMsq * (cProps.
qL - cProps.
qF) * (cProps.
qL - cProps.
qF) / cProps.
qM / cProps.
qM / cProps.
qM
461 float exLsq = cProps.
eqLsq / cProps.
qM / cProps.
qM / 4.f;
462 float xError = xpu::sqrt(exFsq + exMsq + exLsq);
466 x = cProps.
xSum / qSum;
469 if (IsBackside(iModule)) {
475 .fSize = int(nDigis),
477 .fPositionError = xError,
478 .fTime = uint32_t(cProps.
tSum),
479 .fTimeError = timeError,
482 SaveMaxError(timeError, iModule);
483 AddCluster(iModule, cProps.
tSum, cls);
491 int firstModule = ctx.block_idx_x();
492 int lastModule = ctx.block_idx_x();
493 if (ctx.grid_dim_x() == 1) {
495 lastModule = 2 * nModules - 1;
498 for (
int iModule = firstModule; iModule <= lastModule; iModule++) {
499 size_t offset = iModule * maxClustersPerModule;
500 ClusterIdx* clusterIdx = &clusterIdxPerModule[offset];
501 ClusterIdx* clusterIdxTmp = &clusterIdxPerModuleTmp[offset];
502 int nClusters = nClustersPerModule[iModule];
507 if (nClusters == 0)
return;
514 clusterIdx = clsSort.sort(clusterIdx, nClusters, clusterIdxTmp, [](
const ClusterIdx a) {
return a.
fTime; });
520 if (ctx.thread_idx_x() == 0) clusterIdxSortedPerModule[iModule] = clusterIdx;
527 int iThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x();
532 for (; iModule < nModules; iModule++) {
533 if (iThread < nClustersPerModule[iModule])
break;
534 iThread -= nClustersPerModule[iModule];
538 if (iModule >= nModules) {
542 int iModuleF = iModule;
543 int iModuleB = iModuleF + nModules;
544 size_t offsetF = iModuleF * maxClustersPerModule;
545 size_t offsetB = iModuleB * maxClustersPerModule;
546 ClusterIdx* clusterIdxF = clusterIdxSortedPerModule[iModuleF];
547 ClusterIdx* clusterIdxB = clusterIdxSortedPerModule[iModuleB];
548 sts::Cluster* clusterDataF = &clusterDataPerModule[offsetF];
549 sts::Cluster* clusterDataB = &clusterDataPerModule[offsetB];
550 int nClustersF = nClustersPerModule[iModuleF];
551 int nClustersB = nClustersPerModule[iModuleB];
552 size_t nHitsWritten = nHitsPerModule[iModule];
561 if (nClustersF == 0 || nClustersB == 0) {
571 iChunk * nClustersPerChunk;
575 if (iClusterF >= nClustersF) {
585 if (nHitsWritten > 2 * maxHitsPerModule) {
602 pars.
dX = float(nChannels) * pars.
pitch;
605 float maxTerrF = maxClusterTimeErrorByModuleSide[iModuleF];
606 float maxTerrB = maxClusterTimeErrorByModuleSide[iModuleB];
608 float maxSigmaBoth = 4.f * xpu::sqrt(maxTerrF * maxTerrF + maxTerrB * maxTerrB);
611 float firstPossibleTime = float(clusterIdxF[iClusterF].fTime) - maxSigmaBoth;
616 int end = nClustersB;
617 while (end - start > 1) {
618 int mid = (start + end) / 2;
619 if (
float(clusterIdxB[mid].fTime) < firstPossibleTime) {
630 xpu::min(iClusterF + nClustersPerChunk, nClustersF);
635 for (; iClusterF < endClusterF; iClusterF++) {
639 float maxSigma = 4.f * xpu::sqrt(clsDataF.
fTimeError * clsDataF.
fTimeError + maxTerrB * maxTerrB);
641 for (
int iClusterB = startB; iClusterB < nClustersB; iClusterB++) {
645 float timeDiff = float(clsIdxF.
fTime) - float(clsIdxB.
fTime);
647 if (timeDiff > 0 && timeDiff > maxSigmaBoth) {
651 else if (timeDiff > 0 && timeDiff > maxSigma) {
654 else if (timeDiff < 0 && xpu::abs(timeDiff) > maxSigma) {
661 if (doChargeCorrelation) {
663 if (xpu::abs(chargeDiff) > ChargeDelta) {
668 float timeCut = -1.f;
677 if (xpu::abs(
float(clsIdxF.
fTime) -
float(clsIdxB.
fTime)) > timeCut) {
681 IntersectClusters(iModule, pars, clsIdxF, clsDataF, clsIdxB, clsDataB);
693 float xF = GetClusterPosition(pars, clsF.
fPosition,
true);
695 float du = exF * xpu::cos(xpu::deg_to_rad() * pars.
stereoF);
696 float xB = GetClusterPosition(pars, clsB.
fPosition,
false);
698 float dv = exB * xpu::cos(xpu::deg_to_rad() * pars.
stereoB);
701 if (xF < 0.f || xF > pars.
dX || xB < 0.f || xB > pars.
dX) {
718 int nF1 = xpu::min(0, nF);
719 int nF2 = xpu::max(0, nF);
720 int nB1 = xpu::min(0, nB);
721 int nB2 = xpu::max(0, nB);
729 for (
int iF = nF1; iF <= nF2; iF++) {
730 float xFi = xF - float(iF) * pars.
dX;
731 for (
int iB = nB1; iB <= nB2; iB++) {
732 float xBi = xB - float(iB) * pars.
dX;
735 bool found = Intersect(pars, xFi, exF, xBi, exB, xC, yC, varX, varY, varXY);
736 if (not found)
continue;
740 xC -= 0.5f * pars.
dX;
741 yC -= 0.5f * pars.
dY;
743 CreateHit(iBlock, xC, yC, varX, varY, varXY, idxF, clsF, idxB, clsB, du, dv);
752 int iChannel = int(centre);
753 float xDif = centre - float(iChannel);
757 int iStrip = iChannel - (isFront ? 0 : nChannels);
761 float xCluster = (float(iStrip) + xDif + 0.5f) * pars.
pitch;
775 float&
y,
float& varX,
float& varY,
float& varXY)
const
794 if (xpu::abs(pars.
stereoF) < 0.001f) {
800 return IsInside(pars,
x - pars.
dX / 2.f,
y - pars.
dY / 2.f);
804 if (xpu::abs(pars.
stereoB) < 0.001f) {
810 return IsInside(pars,
x - pars.
dX / 2.f,
y - pars.
dY / 2.f);
819 varY = pars.
errorFac * (exF * exF + exB * exB);
822 return IsInside(pars,
x - pars.
dX / 2.f,
y - pars.
dY / 2.f);
828 return x >= -pars.
dX / 2.f
829 &&
x <= pars.
dX / 2.f
830 &&
y >= -pars.
dY / 2.f
831 &&
y <= pars.
dY / 2.f;
837 const Cluster& clsB,
float du,
float dv)
const
840 float globalX, globalY, globalZ;
841 ToGlobal(iModule, xLocal, yLocal, 0.f, globalX, globalY, globalZ);
846 float errX = xpu::sqrt(varX);
847 float errY = xpu::sqrt(varY);
850 float hitTime = 0.5f * (float(idxF.
fTime) + float(idxB.
fTime));
853 float hitTimeError = 0.5f * xpu::sqrt(etF * etF + etB * etB);
859 .fTime =
static_cast<u32>(hitTime),
863 .fTimeError = hitTimeError,
867 .fFrontClusterId = idxF.
fIdx,
868 .fBackClusterId = idxB.
fIdx,
871 int idx = xpu::atomic_add(&nHitsPerModule[iModule], 1);
873 if (
size_t(idx) >= maxHitsPerModule) {
874 xpu::atomic_add(&monitor->nHitBucketOverflow, 1);
878 hitsPerModule[iModule * hitsAllocatedPerModule + idx] = hit;
883 if (charge <= landauStepSize)
return landauTable[0];
884 if (charge > landauStepSize * (landauTableSize - 1))
return landauTable[landauTableSize - 1];
886 int tableIdx = xpu::ceil(charge / landauStepSize);
887 float e2 = tableIdx * landauStepSize;
888 float v2 = landauTable[tableIdx];
890 float e1 = tableIdx * landauStepSize;
891 float v1 = landauTable[tableIdx];
892 return v1 + (charge - e1) * (v2 - v1) / (e2 - e1);
897 const float* tr = &localToGlobalTranslationByModule[iModule * 3];
898 const float* rot = &localToGlobalRotationByModule[iModule * 9];
900 gx = tr[0] + lx * rot[0] + ly * rot[1] + lz * rot[2];
901 gy = tr[1] + lx * rot[3] + ly * rot[4] + lz * rot[5];
902 gz = tr[2] + lx * rot[6] + ly * rot[7] + lz * rot[8];
XPU_EXPORT(sts::TheHitfinder)
XPU_EXPORT(cbm::algo::Params)
Data class for a single-channel message in the STS.
XPU_D uint16_t GetChannel() const
Channel number in module @value Channel number.
XPU_D uint16_t GetChargeU16() const
XPU_D uint32_t GetTimeU32() const
XPU_D unsigned int next() const
XPU_D void CreateClusterFromConnectors1(int const iModule, const CbmStsDigi *digis, int const digiIndex) const
XPU_D void CalculateChannelOffsets(FindClusters::context &ctx, CbmStsDigi *digis, unsigned int *channelOffsets, unsigned int nDigis) const
XPU_D void SortClusters(SortClusters::context &) const
XPU_D void CalculateOffsetsParallel(FindClusters::context &) const
XPU_D bool Intersect(const HitfinderCache &pars, float xF, float exF, float xB, float exB, float &x, float &y, float &varX, float &varY, float &varXY) const
XPU_D void FindClustersSingleStep(FindClusters::context &) const
XPU_D bool IsInside(const HitfinderCache &pars, float x, float y) const
XPU_D float LandauWidth(float charge) const
XPU_D void CreateClusterFromConnectorsN(int const iModule, const CbmStsDigi *digis, DigiConnector *digiConnector, int const digiIndex) const
XPU_D void CalculateClustersDigiWise(FindClusters::context &ctx, CbmStsDigi *digis, DigiConnector *digiConnector, unsigned int const nDigis) const
XPU_D void IntersectClusters(int iBlock, const HitfinderCache &pars, const ClusterIdx &idxF, const sts::Cluster &clsF, const ClusterIdx &idxB, const sts::Cluster &clsB) const
XPU_D void SortDigisInSpaceAndTime(SortDigis::context &) const
XPU_D void CalculateClustersParallel(FindClusters::context &) const
XPU_D void CreateClusterFromConnectors2(int const iModule, const CbmStsDigi *digis, DigiConnector *digiConnector, int const digiIndex) const
XPU_D void ToGlobal(int iModule, float lx, float ly, float lz, float &gx, float &gy, float &gz) const
XPU_D void CreateHit(int iBlocks, float xLocal, float yLocal, float varX, float varY, float varXY, const ClusterIdx &idxF, const Cluster &clsF, const ClusterIdx &idxB, const sts::Cluster &clsB, float du, float dv) const
XPU_D void FindHits(FindHits::context &) const
XPU_D void FindClustersParallel(FindClusters::context &) const
XPU_D float GetClusterPosition(const HitfinderCache &pars, float centre, bool isFront) const
@ kFindHitsChunksPerModule
f32 chargeCorrelationDelta
XPU_D void operator()(context &)
xpu::kernel_context< shared_memory, constants > context
u32 fTime
Cluster time (average of digi times) [ns].
real fTimeError
Error of cluster time [ns].
real fPosition
Cluster centre in channel number units.
real fPositionError
Cluster centre error (r.m.s.) in channel number units.
real fCharge
Total charge.
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
XPU_D void operator()(context &)
xpu::kernel_context< shared_memory, constants > context
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
XPU_D void operator()(context &)
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
xpu::block_sort< u32, ClusterIdx, kSortClustersBlockSize, kSortClustersItemsPerThread > sort_t
xpu::kernel_context< shared_memory, constants > context
XPU_D void operator()(context &)
xpu::block_sort< u64, CbmStsDigi, kSortDigisBlockSize, kSortDigisItemsPerThread > sort_t
xpu::kernel_context< shared_memory, constants > context