From ae7087ca0b91e577f3bb127eee918cde6b81943d Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Fri, 15 May 2020 21:51:54 -0700 Subject: [PATCH 1/3] TEMP REVERT EMJ FIXES --- .../actions/spell-check/whitelist/whitelist.txt | 1 - src/types/CodepointWidthDetector.cpp | 15 ++++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/actions/spell-check/whitelist/whitelist.txt b/.github/actions/spell-check/whitelist/whitelist.txt index 5b4e81fc8f4..330b8b24b93 100644 --- a/.github/actions/spell-check/whitelist/whitelist.txt +++ b/.github/actions/spell-check/whitelist/whitelist.txt @@ -2632,7 +2632,6 @@ wfdopen WFill wfopen wfstream -WGL WHelper whitelisting WIDTHSCROLL diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 223fac93c4d..ffb30b57412 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -38,10 +38,6 @@ namespace // 0x2666 0x2710, // 0x270E 0x2765 0x1f000 - 0x1f02b except 0x1f004 0x1f594 // - // GH #5822 - Removed glyphs that appear inside of CP437 (https://en.wikipedia.org/wiki/Code_page_437) - // and WGL4 (https://en.wikipedia.org/wiki/Windows_Glyph_List_4) since they've been narrow since the - // beginning of time and changing it to wide would only cause destruction. - // // *** Codepoint ranges marked with "OVR" have their given width from EastAsianWidth.txt overridden. UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, @@ -183,6 +179,7 @@ namespace UnicodeRange{ 0x2592, 0x2595, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x25aa, 0x25ab, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous }, @@ -192,7 +189,7 @@ namespace UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, + UnicodeRange{ 0x25fb, 0x25fe, CodepointWidth::Wide }, // OVR 5fb-5fc UnicodeRange{ 0x2600, 0x2604, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, @@ -207,9 +204,13 @@ namespace UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x262a, 0x262a, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2638, 0x2639, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, - UnicodeRange{ 0x265f, 0x265f, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x265f, 0x2660, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2665, 0x2666, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2668, 0x2668, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, From b2bd271f994b8f9e8233f4cf370def4780999ccc Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Fri, 15 May 2020 21:52:01 -0700 Subject: [PATCH 2/3] Revert "Make most emojis full-width (#5795)" This reverts commit 7ae34336da0be8428738df5e13dfc4c5f1df6f75. --- .../spell-check/whitelist/whitelist.txt | 1 - src/types/CodepointWidthDetector.cpp | 184 ++++++------------ src/types/convert.cpp | 90 +++++++++ 3 files changed, 145 insertions(+), 130 deletions(-) diff --git a/.github/actions/spell-check/whitelist/whitelist.txt b/.github/actions/spell-check/whitelist/whitelist.txt index 330b8b24b93..58da42f6e46 100644 --- a/.github/actions/spell-check/whitelist/whitelist.txt +++ b/.github/actions/spell-check/whitelist/whitelist.txt @@ -1671,7 +1671,6 @@ OUTPATHROOT Outptr Ov OVERLAPPEDWINDOW -OVR OWNDC OWNERDRAWFIXED packageuwp diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index ffb30b57412..3d9b58bcec3 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -19,26 +19,9 @@ namespace return range.upperBound < searchTerm; } - static constexpr std::array s_wideAndAmbiguousTable{ + static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. - // - // GH #900 - Supplemented with emoji codepoints from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt - // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to - // force a font lookup, but since we default all Ambiguous width to Narrow, those emojis always - // came out looking squished/tiny. They've been moved into this table and marked as Wide. - // - // There are a couple of codepoints that Microsoft specifically gave an emoji representation - // even if it's not specified as an emoji in the standard. I'll list the ones I'm aware of in this comment in case - // we decide to add them in the future: - // 0x261A-0x261C, 0x261E-0x261F - // 0x2661, - // 0x2662, - // 0x2664, - // 0x2666 0x2710, - // 0x270E 0x2765 0x1f000 - 0x1f02b except 0x1f004 0x1f594 - // - // *** Codepoint ranges marked with "OVR" have their given width from EastAsianWidth.txt overridden. UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -124,7 +107,6 @@ namespace UnicodeRange{ 0x2121, 0x2122, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2126, 0x2126, CodepointWidth::Ambiguous }, UnicodeRange{ 0x212b, 0x212b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2139, 0x2139, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2153, 0x2154, CodepointWidth::Ambiguous }, UnicodeRange{ 0x215b, 0x215e, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2160, 0x216b, CodepointWidth::Ambiguous }, @@ -165,21 +147,17 @@ namespace UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous }, UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, - UnicodeRange{ 0x2328, 0x232a, CodepointWidth::Wide }, // OVR 328 - UnicodeRange{ 0x23cf, 0x23cf, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x23e9, 0x23ef, CodepointWidth::Wide }, // OVR 3ed-3ef - UnicodeRange{ 0x23f0, 0x23f3, CodepointWidth::Wide }, // OVR 3f1-3f2 - UnicodeRange{ 0x23f8, 0x23fa, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2460, 0x24c1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x24c2, 0x24c2, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x24c3, 0x24e9, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2329, 0x232a, CodepointWidth::Wide }, + UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, + UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, + UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, + UnicodeRange{ 0x2460, 0x24e9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x24eb, 0x254b, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2550, 0x2573, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2580, 0x258f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2592, 0x2595, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25aa, 0x25ab, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous }, @@ -189,86 +167,56 @@ namespace UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fb, 0x25fe, CodepointWidth::Wide }, // OVR 5fb-5fc - UnicodeRange{ 0x2600, 0x2604, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x260e, 0x260e, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2611, 0x2611, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, - UnicodeRange{ 0x2618, 0x2618, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x261d, 0x261d, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2620, 0x2620, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2622, 0x2623, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x262a, 0x262a, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, - UnicodeRange{ 0x265f, 0x2660, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2665, 0x2666, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2668, 0x2668, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2660, 0x2661, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2663, 0x2665, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2667, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x267b, 0x267b, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x267e, 0x267f, CodepointWidth::Wide }, // OVR 67e - UnicodeRange{ 0x2692, 0x2697, CodepointWidth::Wide }, // OVR 692, 694-697 - UnicodeRange{ 0x2699, 0x2699, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x269b, 0x269c, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, + UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26a0, 0x26a1, CodepointWidth::Wide }, // OVR 6a0 - UnicodeRange{ 0x26a7, 0x26a7, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, - UnicodeRange{ 0x26b0, 0x26b1, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, - UnicodeRange{ 0x26c6, 0x26c7, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26c8, 0x26c8, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x26c9, 0x26cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ce, 0x26cf, CodepointWidth::Wide }, // OVR 6CF - UnicodeRange{ 0x26d0, 0x26d0, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d1, 0x26d1, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x26d2, 0x26d2, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d3, 0x26d4, CodepointWidth::Wide }, // OVR 6d3 + UnicodeRange{ 0x26c6, 0x26cd, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, + UnicodeRange{ 0x26cf, 0x26d3, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26e8, 0x26e8, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26e9, 0x26ea, CodepointWidth::Wide }, // OVR 6e9 - UnicodeRange{ 0x26eb, 0x26ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f0, 0x26f5, CodepointWidth::Wide }, // OVR 6f0-6f1, 6f4 - UnicodeRange{ 0x26f6, 0x26f6, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f7, 0x26fa, CodepointWidth::Wide }, // OVR 6f8-6f9 + UnicodeRange{ 0x26e8, 0x26e9, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, + UnicodeRange{ 0x26eb, 0x26f1, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, + UnicodeRange{ 0x26f4, 0x26f4, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, + UnicodeRange{ 0x26f6, 0x26f9, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2702, 0x2702, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x2708, 0x270d, CodepointWidth::Wide }, // OVR 708-709, 70c-70d - UnicodeRange{ 0x270f, 0x270f, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2712, 0x2712, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2714, 0x2714, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2716, 0x2716, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x271d, 0x271d, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2721, 0x2721, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, - UnicodeRange{ 0x2733, 0x2734, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2744, 0x2744, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2747, 0x2747, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, - UnicodeRange{ 0x2763, 0x2764, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, - UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, @@ -315,67 +263,45 @@ namespace UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f170, 0x1f171, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f172, 0x1f17d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f17e, 0x1f17f, CodepointWidth::Wide }, // OVR 17f - UnicodeRange{ 0x1f180, 0x1f18d, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x1f170, 0x1f18d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f1e6, 0x1f1ff, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f200, 0x1f202, CodepointWidth::Wide }, UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide }, UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide }, UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide }, - UnicodeRange{ 0x1f300, 0x1f321, CodepointWidth::Wide }, // OVR 321 - UnicodeRange{ 0x1f324, 0x1f393, CodepointWidth::Wide }, // OVR 324-32c, 336, 37d - UnicodeRange{ 0x1f396, 0x1f397, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f399, 0x1f39b, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f39e, 0x1f39f, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f3a0, 0x1f3f0, CodepointWidth::Wide }, // OVR 3cb-3ce, 3d4-3df - UnicodeRange{ 0x1f3f3, 0x1f3f5, CodepointWidth::Wide }, // OVR 3f3, 3f5 - UnicodeRange{ 0x1f3f7, 0x1f4fd, CodepointWidth::Wide }, // OVR 3f7, 43f, 4fd + UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, + UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, + UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, + UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, + UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, - UnicodeRange{ 0x1f549, 0x1f54e, CodepointWidth::Wide }, // OVR 549-54a + UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, - UnicodeRange{ 0x1f56f, 0x1f570, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f573, 0x1f57a, CodepointWidth::Wide }, // OVR 573-579 - UnicodeRange{ 0x1f587, 0x1f587, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f58a, 0x1f58d, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f590, 0x1f590, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5a4, 0x1f5a5, CodepointWidth::Wide }, // OVR 5a5 - UnicodeRange{ 0x1f5a8, 0x1f5a8, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5bc, 0x1f5bc, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5c2, 0x1f5c4, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5d1, 0x1f5d3, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5dc, 0x1f5de, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5e1, 0x1f5e1, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5e3, 0x1f5e3, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5e8, 0x1f5e8, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5ef, 0x1f5ef, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5f3, 0x1f5f3, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5fa, 0x1f64f, CodepointWidth::Wide }, // OVR 5fa + UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6cb, 0x1f6d2, CodepointWidth::Wide }, // OVR 6cb, 6cd-6cf - UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6e0, 0x1f6e5, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f6e9, 0x1f6e9, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6f0, 0x1f6f0, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f6f3, 0x1f6fc, CodepointWidth::Wide }, // OVR 6f3 - UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f90c, 0x1f9ff, CodepointWidth::Wide }, // OVR 93b, 946 - UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa78, 0x1fa7a, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide }, - UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide }, - UnicodeRange{ 0x1fac0, 0x1fac2, CodepointWidth::Wide }, - UnicodeRange{ 0x1fad0, 0x1fad6, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6f4, 0x1f6f8, CodepointWidth::Wide }, + UnicodeRange{ 0x1f910, 0x1f93e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f940, 0x1f94c, CodepointWidth::Wide }, + UnicodeRange{ 0x1f950, 0x1f96b, CodepointWidth::Wide }, + UnicodeRange{ 0x1f980, 0x1f997, CodepointWidth::Wide }, + UnicodeRange{ 0x1f9c0, 0x1f9c0, CodepointWidth::Wide }, + UnicodeRange{ 0x1f9d0, 0x1f9e6, CodepointWidth::Wide }, UnicodeRange{ 0x20000, 0x2fffd, CodepointWidth::Wide }, UnicodeRange{ 0x30000, 0x3fffd, CodepointWidth::Wide }, UnicodeRange{ 0xe0100, 0xe01ef, CodepointWidth::Ambiguous }, diff --git a/src/types/convert.cpp b/src/types/convert.cpp index f33ab36201d..f4608a2e35a 100644 --- a/src/types/convert.cpp +++ b/src/types/convert.cpp @@ -384,6 +384,96 @@ CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept { return CodepointWidth::Narrow; } + // 0x2010 - 0x2B59 varies between narrow, ambiguous, and wide by character and font (Unicode 9.0) + // However, there are a bunch of retroactive-emoji in this range. Things that weren't emoji and then they became + // "emoji" later. As a result, they jumped from a fixed narrow definition to a now ambiguous definition. + // There are others in this range already defined as wide or ambiguous, but we're just going to + // implicitly say they're all ambiguous here to force a font lookup. + // I picked the ones that looked like color double-wide emoji in my browser that weren't already + // covered easily by the half-width/full-width table (see CodepointWidthDetector.cpp) + // See https://unicode.org/Public/emoji/12.0/emoji-data.txt + else if ((0x2194 <= wch && wch <= 0x2199) || + (0x21A9 <= wch && wch <= 0x21AA) || + (0x231A <= wch && wch <= 0x231B) || + 0x2328 == wch || + 0x23CF == wch || + (0x23E9 <= wch && wch <= 0x23F3) || + (0x23F8 <= wch && wch <= 0x23FA) || + 0x24C2 == wch || + (0x25AA <= wch && wch <= 0x25AB) || + 0x25B6 == wch || + 0x25C0 == wch || + (0x25FB <= wch && wch <= 0x25FE) || + (0x2600 <= wch && wch <= 0x2604) || + 0x260E == wch || + 0x2611 == wch || + (0x2614 <= wch && wch <= 0x2615) || + 0x2618 == wch || + 0x261D == wch || + 0x2620 == wch || + (0x2622 <= wch && wch <= 0x2623) || + 0x2626 == wch || + 0x262A == wch || + (0x262E <= wch && wch <= 0x262F) || + (0x2638 <= wch && wch <= 0x263A) || + 0x2640 == wch || + 0x2642 == wch || + (0x2648 <= wch && wch <= 0x2653) || + (0x265F <= wch && wch <= 0x2660) || + 0x2663 == wch || + (0x2665 <= wch && wch <= 0x2666) || + 0x2668 == wch || + 0x267B == wch || + (0x267E <= wch && wch <= 0x267F) || + (0x2692 <= wch && wch <= 0x2697) || + 0x2699 == wch || + (0x269B <= wch && wch <= 0x269C) || + (0x26A0 <= wch && wch <= 0x26A1) || + (0x26AA <= wch && wch <= 0x26AB) || + (0x26B0 <= wch && wch <= 0x26B1) || + (0x26BD <= wch && wch <= 0x26BE) || + (0x26C4 <= wch && wch <= 0x26C5) || + 0x26C8 == wch || + 0x26CE == wch || + 0x26CF == wch || + 0x26D1 == wch || + (0x26D3 <= wch && wch <= 0x26D4) || + (0x26E9 <= wch && wch <= 0x26EA) || + (0x26F0 <= wch && wch <= 0x26F5) || + (0x26F7 <= wch && wch <= 0x26FA) || + 0x26FD == wch || + 0x2702 == wch || + 0x2705 == wch || + (0x2708 <= wch && wch <= 0x2709) || + (0x270A <= wch && wch <= 0x270B) || + (0x270C <= wch && wch <= 0x270D) || + 0x270F == wch || + 0x2712 == wch || + 0x2714 == wch || + 0x2716 == wch || + 0x271D == wch || + 0x2721 == wch || + 0x2728 == wch || + (0x2733 <= wch && wch <= 0x2734) || + 0x2744 == wch || + 0x2747 == wch || + 0x274C == wch || + 0x274E == wch || + (0x2753 <= wch && wch <= 0x2755) || + 0x2757 == wch || + (0x2763 <= wch && wch <= 0x2764) || + (0x2795 <= wch && wch <= 0x2797) || + 0x27A1 == wch || + 0x27B0 == wch || + 0x27BF == wch || + (0x2934 <= wch && wch <= 0x2935) || + (0x2B05 <= wch && wch <= 0x2B07) || + (0x2B1B <= wch && wch <= 0x2B1C) || + 0x2B50 == wch || + 0x2B55 == wch) + { + return CodepointWidth::Ambiguous; + } else if (0x2B5A <= wch && wch <= 0x2E44) { // From Unicode 9.0, this range is narrow (assorted languages) From b11ef4d1965978826118f96697bcd0ff56e11225 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Fri, 15 May 2020 23:58:55 -0700 Subject: [PATCH 3/3] Partially regenerate codepoint widths from Emoji 13.0 This removes all glyphs from the emoji list that do not default to "emoji presentation" (EPres). It removes all local overrides, but retain the comments about the emoji we left out that are Microsoft-specific. This brings us fully in line with the most popular Terminals on OS X, except that we squash our emoji down to fit in one cell and they let them hang over the edges and damage other characters. Oh well. Refs #900, #5914. --- .../spell-check/whitelist/whitelist.txt | 3 + src/types/CodepointWidthDetector.cpp | 175 +++++++++++------- src/types/convert.cpp | 90 --------- 3 files changed, 110 insertions(+), 158 deletions(-) diff --git a/.github/actions/spell-check/whitelist/whitelist.txt b/.github/actions/spell-check/whitelist/whitelist.txt index 58da42f6e46..a9a0c9324b2 100644 --- a/.github/actions/spell-check/whitelist/whitelist.txt +++ b/.github/actions/spell-check/whitelist/whitelist.txt @@ -710,6 +710,7 @@ Elems elif elseif emacs +Emoji Emojis emptybox enabledelayedexpansion @@ -729,6 +730,7 @@ ENUMLOGFONTEX enumranges envvar eol +EPres ERASEBKGND errno errorlevel @@ -2432,6 +2434,7 @@ uap uapadmin ubuntu ucd +UCD uch UCHAR ucs diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 3d9b58bcec3..9eb637d00bf 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -19,9 +19,39 @@ namespace return range.upperBound < searchTerm; } - static constexpr std::array s_wideAndAmbiguousTable{ + static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. + // + // GH #900 - Supplemented with emoji codepoints from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt + // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to + // force a font lookup, but since we default all Ambiguous width to Narrow, those emojis always + // came out looking squished/tiny. They've been moved into this table and marked as Wide. + // + // === UCD Definitions === + // EA - EastAsianWidth + // Emoji - Emoji + // EPres - Emoji Presentation + // ======================= + // + // This table has been partially regenerated from the Unicode Character Database as of 13.0, with + // the following rules: + // Codepoints whose EA is "W", "F" are Wide + // Codepoints whose EA is "A" are Ambiguous + // Codepoints where Emoji=Y and EPres=Y are Emoji, therefore Wide + // - + // Codepoints where Emoji=Y but EPres=*N* are only Emoji when followed + // by U+FE0F variation selector 15. + // + // There are a couple of codepoints that Microsoft specifically gave an emoji representation + // even if it's not specified as an emoji in the standard. I'll list the ones I'm aware of in this comment in case + // we decide to add them in the future: + // 0x261A-0x261C, 0x261E-0x261F + // 0x2661, + // 0x2662, + // 0x2664, + // 0x2666 0x2710, + // 0x270E 0x2765 0x1f000 - 0x1f02b except 0x1f004 0x1f594 UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -146,11 +176,11 @@ namespace UnicodeRange{ 0x22a5, 0x22a5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, + UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x2329, 0x232a, CodepointWidth::Wide }, - UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, - UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, - UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, + UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x2460, 0x24e9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x24eb, 0x254b, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2550, 0x2573, CodepointWidth::Ambiguous }, @@ -167,61 +197,61 @@ namespace UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, + UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, + UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous }, UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, + UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x2660, 0x2661, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2663, 0x2665, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2667, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, - UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, + UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, - UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, - UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, + UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, + UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26c6, 0x26cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, + UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26cf, 0x26d3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, + UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e8, 0x26e9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, + UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26eb, 0x26f1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, + UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26f4, 0x26f4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, + UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26f6, 0x26f9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, + UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, + UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, - UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, + UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, - UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, - UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, - UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, + UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, - UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, - UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, - UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, - UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, - UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, + UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x2b56, 0x2b59, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2e80, 0x2e99, CodepointWidth::Wide }, UnicodeRange{ 0x2e9b, 0x2ef3, CodepointWidth::Wide }, @@ -258,50 +288,59 @@ namespace UnicodeRange{ 0x18800, 0x18af2, CodepointWidth::Wide }, UnicodeRange{ 0x1b000, 0x1b11e, CodepointWidth::Wide }, UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, - UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, + UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f170, 0x1f18d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, + UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x1f1e6, 0x1f1ff, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x1f200, 0x1f202, CodepointWidth::Wide }, UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide }, UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide }, - UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, + UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide }, - UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, - UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, - UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, - UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, - UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, - UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, - UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, - UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, - UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6f4, 0x1f6f8, CodepointWidth::Wide }, - UnicodeRange{ 0x1f910, 0x1f93e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f940, 0x1f94c, CodepointWidth::Wide }, - UnicodeRange{ 0x1f950, 0x1f96b, CodepointWidth::Wide }, - UnicodeRange{ 0x1f980, 0x1f997, CodepointWidth::Wide }, - UnicodeRange{ 0x1f9c0, 0x1f9c0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f9d0, 0x1f9e6, CodepointWidth::Wide }, + UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f6f4, 0x1f6fc, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f90c, 0x1f93a, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f93c, 0x1f945, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f947, 0x1f978, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f97a, 0x1f9cb, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f9cd, 0x1f9ff, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fa78, 0x1fa7a, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fac0, 0x1fac2, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1fad0, 0x1fad6, CodepointWidth::Wide }, // Emoji=Y EPres=Y UnicodeRange{ 0x20000, 0x2fffd, CodepointWidth::Wide }, UnicodeRange{ 0x30000, 0x3fffd, CodepointWidth::Wide }, UnicodeRange{ 0xe0100, 0xe01ef, CodepointWidth::Ambiguous }, diff --git a/src/types/convert.cpp b/src/types/convert.cpp index f4608a2e35a..f33ab36201d 100644 --- a/src/types/convert.cpp +++ b/src/types/convert.cpp @@ -384,96 +384,6 @@ CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept { return CodepointWidth::Narrow; } - // 0x2010 - 0x2B59 varies between narrow, ambiguous, and wide by character and font (Unicode 9.0) - // However, there are a bunch of retroactive-emoji in this range. Things that weren't emoji and then they became - // "emoji" later. As a result, they jumped from a fixed narrow definition to a now ambiguous definition. - // There are others in this range already defined as wide or ambiguous, but we're just going to - // implicitly say they're all ambiguous here to force a font lookup. - // I picked the ones that looked like color double-wide emoji in my browser that weren't already - // covered easily by the half-width/full-width table (see CodepointWidthDetector.cpp) - // See https://unicode.org/Public/emoji/12.0/emoji-data.txt - else if ((0x2194 <= wch && wch <= 0x2199) || - (0x21A9 <= wch && wch <= 0x21AA) || - (0x231A <= wch && wch <= 0x231B) || - 0x2328 == wch || - 0x23CF == wch || - (0x23E9 <= wch && wch <= 0x23F3) || - (0x23F8 <= wch && wch <= 0x23FA) || - 0x24C2 == wch || - (0x25AA <= wch && wch <= 0x25AB) || - 0x25B6 == wch || - 0x25C0 == wch || - (0x25FB <= wch && wch <= 0x25FE) || - (0x2600 <= wch && wch <= 0x2604) || - 0x260E == wch || - 0x2611 == wch || - (0x2614 <= wch && wch <= 0x2615) || - 0x2618 == wch || - 0x261D == wch || - 0x2620 == wch || - (0x2622 <= wch && wch <= 0x2623) || - 0x2626 == wch || - 0x262A == wch || - (0x262E <= wch && wch <= 0x262F) || - (0x2638 <= wch && wch <= 0x263A) || - 0x2640 == wch || - 0x2642 == wch || - (0x2648 <= wch && wch <= 0x2653) || - (0x265F <= wch && wch <= 0x2660) || - 0x2663 == wch || - (0x2665 <= wch && wch <= 0x2666) || - 0x2668 == wch || - 0x267B == wch || - (0x267E <= wch && wch <= 0x267F) || - (0x2692 <= wch && wch <= 0x2697) || - 0x2699 == wch || - (0x269B <= wch && wch <= 0x269C) || - (0x26A0 <= wch && wch <= 0x26A1) || - (0x26AA <= wch && wch <= 0x26AB) || - (0x26B0 <= wch && wch <= 0x26B1) || - (0x26BD <= wch && wch <= 0x26BE) || - (0x26C4 <= wch && wch <= 0x26C5) || - 0x26C8 == wch || - 0x26CE == wch || - 0x26CF == wch || - 0x26D1 == wch || - (0x26D3 <= wch && wch <= 0x26D4) || - (0x26E9 <= wch && wch <= 0x26EA) || - (0x26F0 <= wch && wch <= 0x26F5) || - (0x26F7 <= wch && wch <= 0x26FA) || - 0x26FD == wch || - 0x2702 == wch || - 0x2705 == wch || - (0x2708 <= wch && wch <= 0x2709) || - (0x270A <= wch && wch <= 0x270B) || - (0x270C <= wch && wch <= 0x270D) || - 0x270F == wch || - 0x2712 == wch || - 0x2714 == wch || - 0x2716 == wch || - 0x271D == wch || - 0x2721 == wch || - 0x2728 == wch || - (0x2733 <= wch && wch <= 0x2734) || - 0x2744 == wch || - 0x2747 == wch || - 0x274C == wch || - 0x274E == wch || - (0x2753 <= wch && wch <= 0x2755) || - 0x2757 == wch || - (0x2763 <= wch && wch <= 0x2764) || - (0x2795 <= wch && wch <= 0x2797) || - 0x27A1 == wch || - 0x27B0 == wch || - 0x27BF == wch || - (0x2934 <= wch && wch <= 0x2935) || - (0x2B05 <= wch && wch <= 0x2B07) || - (0x2B1B <= wch && wch <= 0x2B1C) || - 0x2B50 == wch || - 0x2B55 == wch) - { - return CodepointWidth::Ambiguous; - } else if (0x2B5A <= wch && wch <= 0x2E44) { // From Unicode 9.0, this range is narrow (assorted languages)