Skip to content

Commit

Permalink
Fix bugs in CharToColumnMapper (#16787)
Browse files Browse the repository at this point in the history
Aside from overall simplifying `CharToColumnMapper` this fixes 2 bugs:
* The backward search loop may have iterated 1 column too far,
  because it didn't stop at `*current <= *target`, but rather at
  `*(current - 1) <= *target`. This issue was only apparent when
  surrogate pairs were being used in a row.
* When the target offset is that of a trailing surrogate pair
  the forward search loop may have iterated 1 column too far.
  It's somewhat unlikely for this to happen since this code is
  only used through ICU, but you never know.

This is a continuation of PR #16775.
  • Loading branch information
lhecker authored Feb 29, 2024
1 parent b780d8a commit 043d5cd
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 36 deletions.
56 changes: 22 additions & 34 deletions src/buffer/out/Row.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,47 +92,35 @@ CharToColumnMapper::CharToColumnMapper(const wchar_t* chars, const uint16_t* cha

// If given a position (`offset`) inside the ROW's text, this function will return the corresponding column.
// This function in particular returns the glyph's first column.
til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept
til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t targetOffset) noexcept
{
offset = clamp(offset, 0, _lastCharOffset);
targetOffset = clamp(targetOffset, 0, _lastCharOffset);

// This code needs to fulfill two conditions on top of the obvious (a forward/backward search):
// A: We never want to stop on a column that is marked with CharOffsetsTrailer (= "GetLeadingColumn").
// B: With these parameters we always want to stop at currentOffset=4:
// _charOffsets={4, 6}
// currentOffset=4 *OR* 6
// targetOffset=5
// This is because we're being asked for a "LeadingColumn", while the caller gave us the offset of a
// trailing surrogate pair or similar. Returning the column of the leading half is the correct choice.

auto col = _currentColumn;
const auto currentOffset = _charOffsets[col] & CharOffsetsMask;
auto currentOffset = _charOffsets[col];

// Goal: Move the _currentColumn cursor to a cell which contains the given target offset.
// Depending on where the target offset is we have to either search forward or backward.
if (offset < currentOffset)
// A plain forward-search until we find our targetOffset.
// This loop may iterate too far and thus violate our example in condition B, however...
while (targetOffset > (currentOffset & CharOffsetsMask))
{
// Backward search.
// Goal: Find the first preceding column where the offset is <= the target offset. This results in the first
// cell that contains our target offset, even if that offset is in the middle of a long grapheme.
//
// We abuse the fact that the trailing half of wide glyphs is marked with CharOffsetsTrailer to our advantage.
// Since they're >0x8000, the `offset < _charOffsets[col]` check will always be true and ensure we iterate over them.
//
// Since _charOffsets cannot contain negative values and because offset has been
// clamped to be positive we naturally exit when reaching the first column.
for (; offset < _charOffsets[col - 1]; --col)
{
}
currentOffset = _charOffsets[++col];
}
else if (offset > currentOffset)
// This backward-search is not just a counter-part to the above, but simultaneously also handles conditions A and B.
// It abuses the fact that columns marked with CharOffsetsTrailer are >0x8000 and targetOffset is always <0x8000.
// This means we skip all "trailer" columns when iterating backwards, and only stop on a non-trailer (= condition A).
// Condition B is fixed simply because we iterate backwards after the forward-search (in that exact order).
while (targetOffset < currentOffset)
{
// Forward search.
// Goal: Find the first subsequent column where the offset is > the target offset.
// We stop 1 column before that however so that the next loop works correctly.
// It's the inverse of the loop above.
//
// Since offset has been clamped to be at most 1 less than the maximum
// _charOffsets value the loop naturally exits before hitting the end.
for (; offset >= (_charOffsets[col + 1] & CharOffsetsMask); ++col)
{
}
// Now that we found the cell that definitely includes this char offset,
// we have to iterate back to the cell's starting column.
for (; WI_IsFlagSet(_charOffsets[col], CharOffsetsTrailer); --col)
{
}
currentOffset = _charOffsets[--col];
}

_currentColumn = col;
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/out/Row.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ struct CharToColumnMapper
{
CharToColumnMapper(const wchar_t* chars, const uint16_t* charOffsets, ptrdiff_t lastCharOffset, til::CoordType currentColumn) noexcept;

til::CoordType GetLeadingColumnAt(ptrdiff_t offset) noexcept;
til::CoordType GetLeadingColumnAt(ptrdiff_t targetOffset) noexcept;
til::CoordType GetTrailingColumnAt(ptrdiff_t offset) noexcept;
til::CoordType GetLeadingColumnAt(const wchar_t* str) noexcept;
til::CoordType GetTrailingColumnAt(const wchar_t* str) noexcept;
Expand Down
3 changes: 2 additions & 1 deletion src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<ClCompile Include="ReflowTests.cpp" />
<ClCompile Include="TextColorTests.cpp" />
<ClCompile Include="TextAttributeTests.cpp" />
<ClCompile Include="UTextAdapterTests.cpp" />
<ClCompile Include="precomp.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
Expand Down Expand Up @@ -41,4 +42,4 @@
<Import Project="$(SolutionDir)src\common.build.post.props" />
<Import Project="$(SolutionDir)src\common.build.tests.props" />
<Import Project="$(SolutionDir)src\common.nugetversions.targets" />
</Project>
</Project>
63 changes: 63 additions & 0 deletions src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include "precomp.h"

#include "WexTestClass.h"
#include "../textBuffer.hpp"
#include "../../renderer/inc/DummyRenderer.hpp"

template<>
class WEX::TestExecution::VerifyOutputTraits<std::vector<til::point_span>>
{
public:
static WEX::Common::NoThrowString ToString(const std::vector<til::point_span>& vec)
{
WEX::Common::NoThrowString str;
str.Append(L"{ ");
for (size_t i = 0; i < vec.size(); ++i)
{
const auto& s = vec[i];
if (i != 0)
{
str.Append(L", ");
}
str.AppendFormat(L"{(%d, %d), (%d, %d)}", s.start.x, s.start.y, s.end.x, s.end.y);
}
str.Append(L" }");
return str;
}
};

class UTextAdapterTests
{
TEST_CLASS(UTextAdapterTests);

TEST_METHOD(Unicode)
{
DummyRenderer renderer;
TextBuffer buffer{ til::size{ 24, 1 }, TextAttribute{}, 0, false, renderer };

RowWriteState state{
.text = L"abc 𝒶𝒷𝒸 abc ネコちゃん",
};
buffer.Write(0, TextAttribute{}, state);
VERIFY_IS_TRUE(state.text.empty());

static constexpr auto s = [](til::CoordType beg, til::CoordType end) -> til::point_span {
return { { beg, 0 }, { end, 0 } };
};

auto expected = std::vector{ s(0, 2), s(8, 10) };
auto actual = buffer.SearchText(L"abc", false);
VERIFY_ARE_EQUAL(expected, actual);

expected = std::vector{ s(5, 5) };
actual = buffer.SearchText(L"𝒷", false);
VERIFY_ARE_EQUAL(expected, actual);

expected = std::vector{ s(12, 15) };
actual = buffer.SearchText(L"ネコ", false);
VERIFY_ARE_EQUAL(expected, actual);
}
};
1 change: 1 addition & 0 deletions src/buffer/out/ut_textbuffer/sources
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ SOURCES = \
ReflowTests.cpp \
TextColorTests.cpp \
TextAttributeTests.cpp \
UTextAdapterTests.cpp \
DefaultResource.rc \

TARGETLIBS = \
Expand Down
12 changes: 12 additions & 0 deletions src/inc/til/point.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,18 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
til::point start;
til::point end;

constexpr bool operator==(const point_span& rhs) const noexcept
{
// `__builtin_memcmp` isn't an official standard, but it's the
// only way at the time of writing to get a constexpr `memcmp`.
return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0;
}

constexpr bool operator!=(const point_span& rhs) const noexcept
{
return __builtin_memcmp(this, &rhs, sizeof(rhs)) != 0;
}
};
}

Expand Down

0 comments on commit 043d5cd

Please sign in to comment.