Skip to content

Commit

Permalink
fix(simd.h): gather_mask was wrong for no-simd fallback
Browse files Browse the repository at this point in the history
The operation it's supposed to be performing is to merge in the
gathered value where the mask is set, and leave mask=0 lanes
unchanged. But for our "no SIMD" fallback (i.e., no AVX2 for 4- and
8-wide), we incorrectly implemented it as setting the unmasked lanes
to 0!

This was quite wrong, and unfortunately masked by the fact that our
test of the function initialized a variable to 0. Change to a better
number. The best number.

Signed-off-by: Larry Gritz <[email protected]>
  • Loading branch information
lgritz committed Mar 12, 2024
1 parent 12be3e8 commit 3b597b6
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
8 changes: 4 additions & 4 deletions src/include/OpenImageIO/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -4411,7 +4411,7 @@ vint4::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t& v
#if OIIO_SIMD_AVX >= 2
m_simd = _mm_mask_i32gather_epi32 (m_simd, baseptr, vindex, _mm_cvtps_epi32(mask), scale);
#else
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
#endif
}

Expand Down Expand Up @@ -5294,7 +5294,7 @@ vint8::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t& v
#if OIIO_SIMD_AVX >= 2
m_simd = _mm256_mask_i32gather_epi32 (m_simd, baseptr, vindex, _mm256_cvtps_epi32(mask), scale);
#else
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
#endif
}

Expand Down Expand Up @@ -7058,7 +7058,7 @@ vfloat4::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t&
#if OIIO_SIMD_AVX >= 2
m_simd = _mm_mask_i32gather_ps (m_simd, baseptr, vindex, mask, scale);
#else
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
#endif
}

Expand Down Expand Up @@ -8966,7 +8966,7 @@ vfloat8::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t&
#if OIIO_SIMD_AVX >= 2
m_simd = _mm256_mask_i32gather_ps (m_simd, baseptr, vindex, mask, scale);
#else
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
#endif
}

Expand Down
6 changes: 3 additions & 3 deletions src/libutil/simd_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,9 @@ test_gatherscatter()
OIIO_CHECK_SIMD_EQUAL(g, VEC::Iota());

BOOL mask = BOOL::from_bitmask(0x55555555); // every other one
ELEM every_other_iota[] = { 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0 };
gm = 0;
gm = 42;
gm.gather_mask (mask, gather_source.data(), indices);
ELEM every_other_iota[] = { 0, 42, 2, 42, 4, 42, 6, 42, 8, 42, 10, 42, 12, 42, 14, 42 };
OIIO_CHECK_SIMD_EQUAL (gm, VEC(every_other_iota));

std::vector<ELEM> scatter_out (bufsize, (ELEM)-1);
Expand All @@ -622,7 +622,7 @@ test_gatherscatter()
OIIO_CHECK_EQUAL (scatter_out[i], ((i%3) == 1 && (i&1) ? i/3 : -1));

benchmark ("gather", [&](const ELEM *d){ VEC v; v.gather (d, indices); return v; }, gather_source.data());
benchmark ("gather_mask", [&](const ELEM *d){ VEC v; v.gather_mask (mask, d, indices); return v; }, gather_source.data());
benchmark ("gather_mask", [&](const ELEM *d){ VEC v = ELEM(0); v.gather_mask (mask, d, indices); return v; }, gather_source.data());
benchmark ("scatter", [&](ELEM *d){ g.scatter (d, indices); return g; }, scatter_out.data());
benchmark ("scatter_mask", [&](ELEM *d){ g.scatter_mask (mask, d, indices); return g; }, scatter_out.data());
}
Expand Down

0 comments on commit 3b597b6

Please sign in to comment.