16{
17#ifdef MAYAFLUX_ARCH_X64
18 const __m256 vals = _mm256_loadu_ps(v);
19 const __m256
threshold = _mm256_set1_ps(iso);
20 const __m256 cmp = _mm256_cmp_ps(vals, threshold, _CMP_LT_OS);
21 return static_cast<uint32_t>(_mm256_movemask_ps(cmp));
22#elif defined(MAYAFLUX_ARCH_ARM64)
23 const float32x4_t lo = vld1q_f32(v);
24 const float32x4_t hi = vld1q_f32(v + 4);
25 const float32x4_t thresh = vdupq_n_f32(iso);
26
27
28
29
30 const uint32x4_t lo_mask = vcltq_f32(lo, thresh);
31 const uint32x4_t hi_mask = vcltq_f32(hi, thresh);
32
33
34
35 alignas(16) static const uint32_t k_lo_shifts[4] = { 31, 30, 29, 28 };
36 alignas(16) static const uint32_t k_hi_shifts[4] = { 27, 26, 25, 24 };
37
38 const uint32x4_t lo_bits = vshlq_u32(vshrq_n_u32(lo_mask, 31),
39 vld1q_s32(reinterpret_cast<const int32_t*>(k_lo_shifts)));
40 const uint32x4_t hi_bits = vshlq_u32(vshrq_n_u32(hi_mask, 31),
41 vld1q_s32(reinterpret_cast<const int32_t*>(k_hi_shifts)));
42
43 const uint32x4_t combined = vorrq_u32(lo_bits, hi_bits);
44 return vaddvq_u32(combined);
45#else
46 uint32_t idx = 0;
47 idx |= static_cast<uint32_t>(v[0] < iso) << 0;
48 idx |= static_cast<uint32_t>(v[1] < iso) << 1;
49 idx |= static_cast<uint32_t>(v[2] < iso) << 2;
50 idx |= static_cast<uint32_t>(v[3] < iso) << 3;
51 idx |= static_cast<uint32_t>(v[4] < iso) << 4;
52 idx |= static_cast<uint32_t>(v[5] < iso) << 5;
53 idx |= static_cast<uint32_t>(v[6] < iso) << 6;
54 idx |= static_cast<uint32_t>(v[7] < iso) << 7;
55 return idx;
56#endif
57}
Tendency< D, float > threshold(const Tendency< D, float > &t, float thresh)
Zero output below threshold, pass through above.