diff --git a/README.rst b/README.rst index ddec997..066c111 100644 --- a/README.rst +++ b/README.rst @@ -38,9 +38,9 @@ than the most current Unicode Standard release files, which this project aims to track. The most current release of this API is based from Unicode Standard release -*7.0.0*, dated *2014-02-28, 23:15:00 GMT [KW, LI]* for table generated by -file ``EastAsianWidth-7.0.0.txt`` and *2014-02-07, 18:42:08 GMT [MD]* for -``DerivedCombiningClass-7.0.0.txt``. +*8.0.0*, dated *2015-02-10, 21:00:00 GMT [KW, LI]* for table generated by +file ``EastAsianWidth-8.0.0.txt`` and *2015-02-13, 13:47:11 GMT [MD]* for +``DerivedGeneralCategory-8.0.0.txt``. Installation ------------ @@ -140,12 +140,14 @@ Updating Tables The command ``python setup.py update`` will fetch the following resources: - http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt -- http://www.unicode.org/Public/UNIDATA/extracted/DerivedCombiningClass.txt +- http://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt -And generate the table files `wcwidth/table_wide.py`_ and `wcwidth/table_comb.py`_. +And generate the table files `wcwidth/table_wide.py`_, +`wcwidth/table_comb.py`_, and `wcwidth/table_zero.py`_. .. _`wcwidth/table_wide.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_wide.py .. _`wcwidth/table_comb.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_comb.py +.. _`wcwidth/table_zero.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_zero.py wcwidth.c --------- diff --git a/bin/wcwidth-browser.py b/bin/wcwidth-browser.py index e7f121c..b137f67 100755 --- a/bin/wcwidth-browser.py +++ b/bin/wcwidth-browser.py @@ -37,7 +37,7 @@ import signal # local -from wcwidth import wcwidth, table_comb +from wcwidth.wcwidth import _bisearch, wcwidth, COMBINING # 3rd-party from blessed import Terminal @@ -116,6 +116,7 @@ def __init__(self, width=2): self.characters = (unichr(idx) for idx in xrange(LIMIT_UCS) if wcwidth(unichr(idx)) == width + and not _bisearch(idx, COMBINING) ) def __iter__(self): @@ -152,7 +153,7 @@ def __init__(self, width=1): """ self.characters = [] letters_o = (u'o' * width) - for boundaries in table_comb.NONZERO_COMBINING: + for boundaries in COMBINING: for val in [_val for _val in range(boundaries[0], boundaries[1] + 1) if _val <= LIMIT_UCS]: diff --git a/bin/wcwidth-combining-comparator.py b/bin/wcwidth-combining-comparator.py index 000a017..c389c38 100755 --- a/bin/wcwidth-combining-comparator.py +++ b/bin/wcwidth-combining-comparator.py @@ -18,7 +18,7 @@ import sys # local imports -from wcwidth.wcwidth import _bisearch, NONZERO_COMBINING +from wcwidth.wcwidth import _bisearch, COMBINING def report_comb_msg(ucs, comb_py, comb_wc): @@ -66,8 +66,8 @@ def report_comb_msg(ucs, comb_py, comb_wc): def _is_equal_combining(ucs): - comb_py = bool(unicodedata.combining(ucs)) - comb_wc = bool(_bisearch(ord(ucs), NONZERO_COMBINING)) + comb_py = bool(unicodedata.category(ucs) in ['Mc', 'Me', 'Mn']) + comb_wc = bool(_bisearch(ord(ucs), COMBINING)) assert comb_py == comb_wc, report_comb_msg(ucs, comb_py, comb_wc) diff --git a/setup.py b/setup.py index fc573dc..373e723 100755 --- a/setup.py +++ b/setup.py @@ -47,9 +47,10 @@ class SetupUpdate(setuptools.Command): EAW_OUT = os.path.join(HERE, 'wcwidth', 'table_wide.py') UCD_URL = ('http://www.unicode.org/Public/UNIDATA/extracted/' - 'DerivedCombiningClass.txt') - UCD_IN = os.path.join(HERE, 'data', 'DerivedCombiningClass.txt') + 'DerivedGeneralCategory.txt') + UCD_IN = os.path.join(HERE, 'data', 'DerivedGeneralCategory.txt') CMB_OUT = os.path.join(HERE, 'wcwidth', 'table_comb.py') + ZERO_OUT = os.path.join(HERE, 'wcwidth', 'table_zero.py') def initialize_options(self): """Override builtin method: no options are available.""" @@ -60,10 +61,11 @@ def finalize_options(self): pass def run(self): - """Execute command: update east-asian and combining tables.""" + """Execute command: update east-asian, combining and zero width tables.""" assert os.getenv('VIRTUAL_ENV'), 'You should be in a virtualenv' self.do_east_asian_width() self.do_combining() + self.do_zero_width() def do_east_asian_width(self): """Fetch and update east-asian tables.""" @@ -75,9 +77,16 @@ def do_east_asian_width(self): def do_combining(self): """Fetch and update combining tables.""" self._do_retrieve(self.UCD_URL, self.UCD_IN) - (version, date, values) = self._do_combining_parse(self.UCD_IN) + (version, date, values) = self._do_category_parse(self.UCD_IN, ('Mc', 'Me', 'Mn',)) table = self._make_table(values) - self._do_write(self.CMB_OUT, 'NONZERO_COMBINING', version, date, table) + self._do_write(self.CMB_OUT, 'COMBINING', version, date, table) + + def do_zero_width(self): + """Fetch and update zero width tables.""" + self._do_retrieve(self.UCD_URL, self.UCD_IN) + (version, date, values) = self._do_category_parse(self.UCD_IN, ('Me', 'Mn',)) + table = self._make_table(values) + self._do_write(self.ZERO_OUT, 'ZERO_WIDTH', version, date, table) @staticmethod def _make_table(values): @@ -143,8 +152,8 @@ def _do_east_asian_width_parse(fname, return version, date, sorted(values) @staticmethod - def _do_combining_parse(fname, exclude_values=(0,)): - """Parse unicode combining tables.""" + def _do_category_parse(fname, categories): + """Parse unicode category tables.""" version, date, values = None, None, [] print("parsing {} ..".format(fname)) for line in open(fname, 'rb'): @@ -159,8 +168,8 @@ def _do_combining_parse(fname, exclude_values=(0,)): continue addrs, details = uline.split(';', 1) addrs, details = addrs.rstrip(), details.lstrip() - if not any(details.startswith('{} #'.format(value)) - for value in exclude_values): + if any(details.startswith('{} #'.format(value)) + for value in categories): start, stop = addrs, addrs if '..' in addrs: start, stop = addrs.split('..') diff --git a/wcwidth/table_comb.py b/wcwidth/table_comb.py index 95eecb7..4203ff2 100644 --- a/wcwidth/table_comb.py +++ b/wcwidth/table_comb.py @@ -1,11 +1,10 @@ -"""Nonzero_Combining table. Created by setup.py.""" -# Generated: 2014-11-20T06:55:26.612062 -# Source: DerivedCombiningClass-7.0.0.txt -# Date: 2014-02-07, 18:42:08 GMT [MD] -NONZERO_COMBINING = ( - (0x0300, 0x034e,), # Combining Grave Accent ..Combining Upwards Arrow - (0x0350, 0x036f,), # Combining Right Arrowhea..Combining Latin Small Le - (0x0483, 0x0487,), # Combining Cyrillic Titlo..Combining Cyrillic Pokry +"""Combining table. Created by setup.py.""" +# Generated: 2015-09-02T01:27:58.918349 +# Source: DerivedGeneralCategory-8.0.0.txt +# Date: 2015-02-13, 13:47:11 GMT [MD] +COMBINING = ( + (0x0300, 0x036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x0483, 0x0489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot @@ -20,137 +19,224 @@ (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x07a6, 0x07b0,), # Thaana Abafili ..Thaana Sukun (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x08e4, 0x08ff,), # Arabic Curly Fatha .. - (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta - (0x094d, 0x094d,), # Devanagari Sign Virama ..Devanagari Sign Virama - (0x0951, 0x0954,), # Devanagari Stress Sign U..Devanagari Acute Accent + (0x08e3, 0x0903,), # (nil) ..Devanagari Sign Visarga + (0x093a, 0x093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x093e, 0x094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw + (0x0951, 0x0957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x0962, 0x0963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x0981, 0x0983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta - (0x09cd, 0x09cd,), # Bengali Sign Virama ..Bengali Sign Virama + (0x09be, 0x09c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x09c7, 0x09c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x09cb, 0x09cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x09d7, 0x09d7,), # Bengali Au Length Mark ..Bengali Au Length Mark + (0x09e2, 0x09e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x0a01, 0x0a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta - (0x0a4d, 0x0a4d,), # Gurmukhi Sign Virama ..Gurmukhi Sign Virama + (0x0a3e, 0x0a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu + (0x0a47, 0x0a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x0a4b, 0x0a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x0a51, 0x0a51,), # Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + (0x0a70, 0x0a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x0a75, 0x0a75,), # Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + (0x0a81, 0x0a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta - (0x0acd, 0x0acd,), # Gujarati Sign Virama ..Gujarati Sign Virama + (0x0abe, 0x0ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x0ac7, 0x0ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x0acb, 0x0acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama + (0x0ae2, 0x0ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x0b01, 0x0b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta - (0x0b4d, 0x0b4d,), # Oriya Sign Virama ..Oriya Sign Virama - (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama - (0x0c4d, 0x0c4d,), # Telugu Sign Virama ..Telugu Sign Virama + (0x0b3e, 0x0b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x0b47, 0x0b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x0b4b, 0x0b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x0b56, 0x0b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark + (0x0b62, 0x0b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x0b82, 0x0b82,), # Tamil Sign Anusvara ..Tamil Sign Anusvara + (0x0bbe, 0x0bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x0bc6, 0x0bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x0bca, 0x0bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x0bd7, 0x0bd7,), # Tamil Au Length Mark ..Tamil Au Length Mark + (0x0c00, 0x0c03,), # (nil) ..Telugu Sign Visarga + (0x0c3e, 0x0c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali + (0x0c46, 0x0c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x0c4a, 0x0c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x0c62, 0x0c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x0c81, 0x0c83,), # (nil) ..Kannada Sign Visarga (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta - (0x0ccd, 0x0ccd,), # Kannada Sign Virama ..Kannada Sign Virama - (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama + (0x0cbe, 0x0cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x0cc6, 0x0cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x0cca, 0x0ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x0cd5, 0x0cd6,), # Kannada Length Mark ..Kannada Ai Length Mark + (0x0ce2, 0x0ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x0d01, 0x0d03,), # (nil) ..Malayalam Sign Visarga + (0x0d3e, 0x0d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x0d46, 0x0d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x0d4a, 0x0d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x0d57, 0x0d57,), # Malayalam Au Length Mark..Malayalam Au Length Mark + (0x0d62, 0x0d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x0d82, 0x0d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna - (0x0e38, 0x0e3a,), # Thai Character Sara U ..Thai Character Phinthu - (0x0e48, 0x0e4b,), # Thai Character Mai Ek ..Thai Character Mai Chatt - (0x0eb8, 0x0eb9,), # Lao Vowel Sign U ..Lao Vowel Sign Uu - (0x0ec8, 0x0ecb,), # Lao Tone Mai Ek ..Lao Tone Mai Catawa + (0x0dcf, 0x0dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti + (0x0dd6, 0x0dd6,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x0dd8, 0x0ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x0df2, 0x0df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x0e31, 0x0e31,), # Thai Character Mai Han-a..Thai Character Mai Han-a + (0x0e34, 0x0e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x0e47, 0x0e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x0eb1, 0x0eb1,), # Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + (0x0eb4, 0x0eb9,), # Lao Vowel Sign I ..Lao Vowel Sign Uu + (0x0ebb, 0x0ebc,), # Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + (0x0ec8, 0x0ecd,), # Lao Tone Mai Ek ..Lao Niggahita (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru - (0x0f71, 0x0f72,), # Tibetan Vowel Sign Aa ..Tibetan Vowel Sign I - (0x0f74, 0x0f74,), # Tibetan Vowel Sign U ..Tibetan Vowel Sign U - (0x0f7a, 0x0f7d,), # Tibetan Vowel Sign E ..Tibetan Vowel Sign Oo - (0x0f80, 0x0f80,), # Tibetan Vowel Sign Rever..Tibetan Vowel Sign Rever - (0x0f82, 0x0f84,), # Tibetan Sign Nyi Zla Naa..Tibetan Mark Halanta + (0x0f3e, 0x0f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x0f71, 0x0f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x0f8d, 0x0f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x0f99, 0x0fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda - (0x1037, 0x1037,), # Myanmar Sign Dot Below ..Myanmar Sign Dot Below - (0x1039, 0x103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x108d, 0x108d,), # Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + (0x102b, 0x103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x1056, 0x1059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x105e, 0x1060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x1062, 0x1064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x1067, 0x106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo + (0x1071, 0x1074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x1082, 0x108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x108f, 0x108f,), # Myanmar Sign Rumai Palau..Myanmar Sign Rumai Palau + (0x109a, 0x109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin - (0x1714, 0x1714,), # Tagalog Sign Virama ..Tagalog Sign Virama - (0x1734, 0x1734,), # Hanunoo Sign Pamudpod ..Hanunoo Sign Pamudpod - (0x17d2, 0x17d2,), # Khmer Sign Coeng ..Khmer Sign Coeng + (0x1712, 0x1714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama + (0x1732, 0x1734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x1752, 0x1753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x1772, 0x1773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x17b4, 0x17d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan + (0x180b, 0x180d,), # Mongolian Free Variation..Mongolian Free Variation (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal - (0x1939, 0x193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x1a17, 0x1a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x1a60, 0x1a60,), # Tai Tham Sign Sakot ..Tai Tham Sign Sakot - (0x1a75, 0x1a7c,), # Tai Tham Sign Tone-1 ..Tai Tham Sign Khuen-lue + (0x1920, 0x192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x1930, 0x193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x1a17, 0x1a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x1a55, 0x1a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x1a60, 0x1a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt - (0x1ab0, 0x1abd,), # (nil) .. - (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan - (0x1b44, 0x1b44,), # Balinese Adeg Adeg ..Balinese Adeg Adeg + (0x1ab0, 0x1abe,), # (nil) .. + (0x1b00, 0x1b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x1b34, 0x1b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x1baa, 0x1bab,), # Sundanese Sign Pamaaeh ..Sundanese Sign Virama - (0x1be6, 0x1be6,), # Batak Sign Tompi ..Batak Sign Tompi - (0x1bf2, 0x1bf3,), # Batak Pangolat ..Batak Panongonan - (0x1c37, 0x1c37,), # Lepcha Sign Nukta ..Lepcha Sign Nukta + (0x1b80, 0x1b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x1ba1, 0x1bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x1be6, 0x1bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x1c24, 0x1c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x1cd4, 0x1ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x1cd4, 0x1ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak - (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above + (0x1cf2, 0x1cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x1cf8, 0x1cf9,), # (nil) .. (0x1dc0, 0x1df5,), # Combining Dotted Grave A.. (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea - (0x20d0, 0x20dc,), # Combining Left Harpoon A..Combining Four Dots Abov - (0x20e1, 0x20e1,), # Combining Left Right Arr..Combining Left Right Arr - (0x20e5, 0x20f0,), # Combining Reverse Solidu..Combining Asterisk Above + (0x20d0, 0x20f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x302a, 0x302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag - (0xa66f, 0xa66f,), # Combining Cyrillic Vzmet..Combining Cyrillic Vzmet + (0xa66f, 0xa672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer - (0xa69f, 0xa69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0xa69e, 0xa69f,), # (nil) ..Combining Cyrillic Lette (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0xa802, 0xa802,), # Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant - (0xa8c4, 0xa8c4,), # Saurashtra Sign Virama ..Saurashtra Sign Virama + (0xa80b, 0xa80b,), # Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + (0xa823, 0xa827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0xa880, 0xa881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0xa8b4, 0xa8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig - (0xa92b, 0xa92d,), # Kayah Li Tone Plophu ..Kayah Li Tone Calya Plop - (0xa953, 0xa953,), # Rejang Virama ..Rejang Virama - (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu - (0xa9c0, 0xa9c0,), # Javanese Pangkon ..Javanese Pangkon + (0xa926, 0xa92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0xa947, 0xa953,), # Rejang Vowel Sign I ..Rejang Virama + (0xa980, 0xa983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0xa9b3, 0xa9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0xa9e5, 0xa9e5,), # (nil) .. + (0xaa29, 0xaa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa + (0xaa43, 0xaa43,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa4c, 0xaa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa7b, 0xaa7d,), # Myanmar Sign Pao Karen T.. (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho - (0xaaf6, 0xaaf6,), # Meetei Mayek Virama ..Meetei Mayek Virama - (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + (0xaaeb, 0xaaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xaaf5, 0xaaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0xabe3, 0xabea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xabec, 0xabed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani - (0xfe20, 0xfe2d,), # Combining Ligature Left .. + (0xfe00, 0xfe0f,), # Variation Selector-1 ..Variation Selector-16 + (0xfe20, 0xfe2f,), # Combining Ligature Left .. (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi (0x102e0, 0x102e0,), # (nil) .. (0x10376, 0x1037a,), # (nil) .. - (0x10a0d, 0x10a0d,), # Kharoshthi Sign Double R..Kharoshthi Sign Double R - (0x10a0f, 0x10a0f,), # Kharoshthi Sign Visarga ..Kharoshthi Sign Visarga + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama (0x10ae5, 0x10ae6,), # (nil) .. - (0x11046, 0x11046,), # Brahmi Virama ..Brahmi Virama - (0x1107f, 0x1107f,), # (nil) .. - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x1107f, 0x11082,), # (nil) ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11133, 0x11134,), # Chakma Virama ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # (nil) .. - (0x111c0, 0x111c0,), # Sharada Sign Virama ..Sharada Sign Virama - (0x11235, 0x11236,), # (nil) .. - (0x112e9, 0x112ea,), # (nil) .. + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x111ca, 0x111cc,), # (nil) .. + (0x1122c, 0x11237,), # (nil) .. + (0x112df, 0x112ea,), # (nil) .. + (0x11300, 0x11303,), # (nil) .. (0x1133c, 0x1133c,), # (nil) .. - (0x1134d, 0x1134d,), # (nil) .. + (0x1133e, 0x11344,), # (nil) .. + (0x11347, 0x11348,), # (nil) .. + (0x1134b, 0x1134d,), # (nil) .. + (0x11357, 0x11357,), # (nil) .. + (0x11362, 0x11363,), # (nil) .. (0x11366, 0x1136c,), # (nil) .. (0x11370, 0x11374,), # (nil) .. - (0x114c2, 0x114c3,), # (nil) .. - (0x115bf, 0x115c0,), # (nil) .. - (0x1163f, 0x1163f,), # (nil) .. - (0x116b6, 0x116b7,), # Takri Sign Virama ..Takri Sign Nukta + (0x114b0, 0x114c3,), # (nil) .. + (0x115af, 0x115b5,), # (nil) .. + (0x115b8, 0x115c0,), # (nil) .. + (0x115dc, 0x115dd,), # (nil) .. + (0x11630, 0x11640,), # (nil) .. + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # (nil) .. (0x16af0, 0x16af4,), # (nil) .. (0x16b30, 0x16b36,), # (nil) .. - (0x1bc9e, 0x1bc9e,), # (nil) .. + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x1bc9d, 0x1bc9e,), # (nil) .. (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # (nil) .. + (0x1da3b, 0x1da6c,), # (nil) .. + (0x1da75, 0x1da75,), # (nil) .. + (0x1da84, 0x1da84,), # (nil) .. + (0x1da9b, 0x1da9f,), # (nil) .. + (0x1daa1, 0x1daaf,), # (nil) .. (0x1e8d0, 0x1e8d6,), # (nil) .. + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ) diff --git a/wcwidth/table_wide.py b/wcwidth/table_wide.py index 8e634f9..ff0c0ac 100644 --- a/wcwidth/table_wide.py +++ b/wcwidth/table_wide.py @@ -1,7 +1,7 @@ """Wide_Eastasian table. Created by setup.py.""" -# Generated: 2014-11-20T06:55:26.602851 -# Source: EastAsianWidth-7.0.0.txt -# Date: 2014-02-28, 23:15:00 GMT [KW, LI] +# Generated: 2015-09-02T01:27:58.907716 +# Source: EastAsianWidth-8.0.0.txt +# Date: 2015-02-10, 21:00:00 GMT [KW, LI] WIDE_EASTASIAN = ( (0x1100, 0x115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler (0x2329, 0x232a,), # Left-pointing Angle Brac..Right-pointing Angle Bra diff --git a/wcwidth/table_zero.py b/wcwidth/table_zero.py new file mode 100644 index 0000000..42f9b57 --- /dev/null +++ b/wcwidth/table_zero.py @@ -0,0 +1,270 @@ +"""Zero_Width table. Created by setup.py.""" +# Generated: 2015-09-02T01:27:58.927544 +# Source: DerivedGeneralCategory-8.0.0.txt +# Date: 2015-02-13, 13:47:11 GMT [MD] +ZERO_WIDTH = ( + (0x0300, 0x036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x0483, 0x0489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli + (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg + (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe + (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot + (0x05c4, 0x05c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + (0x05c7, 0x05c7,), # Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + (0x0610, 0x061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x064b, 0x065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below + (0x0670, 0x0670,), # Arabic Letter Superscrip..Arabic Letter Superscrip + (0x06d6, 0x06dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x06df, 0x06e4,), # Arabic Small High Rounde..Arabic Small High Madda + (0x06e7, 0x06e8,), # Arabic Small High Yeh ..Arabic Small High Noon + (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip + (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x07a6, 0x07b0,), # Thaana Abafili ..Thaana Sukun + (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot + (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh + (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A + (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x08e3, 0x0902,), # (nil) ..Devanagari Sign Anusvara + (0x093a, 0x093a,), # Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe + (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta + (0x0941, 0x0948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + (0x094d, 0x094d,), # Devanagari Sign Virama ..Devanagari Sign Virama + (0x0951, 0x0957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x0962, 0x0963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x0981, 0x0981,), # Bengali Sign Candrabindu..Bengali Sign Candrabindu + (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta + (0x09c1, 0x09c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + (0x09cd, 0x09cd,), # Bengali Sign Virama ..Bengali Sign Virama + (0x09e2, 0x09e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x0a01, 0x0a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + (0x0a41, 0x0a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x0a47, 0x0a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x0a4b, 0x0a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x0a51, 0x0a51,), # Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + (0x0a70, 0x0a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x0a75, 0x0a75,), # Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + (0x0a81, 0x0a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta + (0x0ac1, 0x0ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + (0x0ac7, 0x0ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + (0x0acd, 0x0acd,), # Gujarati Sign Virama ..Gujarati Sign Virama + (0x0ae2, 0x0ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x0b01, 0x0b01,), # Oriya Sign Candrabindu ..Oriya Sign Candrabindu + (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta + (0x0b3f, 0x0b3f,), # Oriya Vowel Sign I ..Oriya Vowel Sign I + (0x0b41, 0x0b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + (0x0b4d, 0x0b4d,), # Oriya Sign Virama ..Oriya Sign Virama + (0x0b56, 0x0b56,), # Oriya Ai Length Mark ..Oriya Ai Length Mark + (0x0b62, 0x0b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x0b82, 0x0b82,), # Tamil Sign Anusvara ..Tamil Sign Anusvara + (0x0bc0, 0x0bc0,), # Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama + (0x0c00, 0x0c00,), # (nil) .. + (0x0c3e, 0x0c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x0c46, 0x0c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x0c4a, 0x0c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama + (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x0c62, 0x0c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x0c81, 0x0c81,), # (nil) .. + (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta + (0x0cbf, 0x0cbf,), # Kannada Vowel Sign I ..Kannada Vowel Sign I + (0x0cc6, 0x0cc6,), # Kannada Vowel Sign E ..Kannada Vowel Sign E + (0x0ccc, 0x0ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x0ce2, 0x0ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x0d01, 0x0d01,), # (nil) .. + (0x0d41, 0x0d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama + (0x0d62, 0x0d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + (0x0dd2, 0x0dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x0dd6, 0x0dd6,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x0e31, 0x0e31,), # Thai Character Mai Han-a..Thai Character Mai Han-a + (0x0e34, 0x0e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x0e47, 0x0e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x0eb1, 0x0eb1,), # Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + (0x0eb4, 0x0eb9,), # Lao Vowel Sign I ..Lao Vowel Sign Uu + (0x0ebb, 0x0ebc,), # Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + (0x0ec8, 0x0ecd,), # Lao Tone Mai Ek ..Lao Niggahita + (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig + (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + (0x0f71, 0x0f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + (0x0f80, 0x0f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x0f8d, 0x0f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x0f99, 0x0fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter + (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + (0x102d, 0x1030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + (0x1032, 0x1037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + (0x1039, 0x103a,), # Myanmar Sign Virama ..Myanmar Sign Asat + (0x103d, 0x103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x1058, 0x1059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x105e, 0x1060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x1071, 0x1074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x1082, 0x1082,), # Myanmar Consonant Sign S..Myanmar Consonant Sign S + (0x1085, 0x1086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + (0x108d, 0x108d,), # Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + (0x109d, 0x109d,), # Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin + (0x1712, 0x1714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama + (0x1732, 0x1734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x1752, 0x1753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x1772, 0x1773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x17b4, 0x17b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + (0x17b7, 0x17bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua + (0x17c6, 0x17c6,), # Khmer Sign Nikahit ..Khmer Sign Nikahit + (0x17c9, 0x17d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan + (0x180b, 0x180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + (0x1920, 0x1922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U + (0x1927, 0x1928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O + (0x1932, 0x1932,), # Limbu Small Letter Anusv..Limbu Small Letter Anusv + (0x1939, 0x193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i + (0x1a17, 0x1a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U + (0x1a1b, 0x1a1b,), # Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + (0x1a56, 0x1a56,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x1a58, 0x1a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + (0x1a60, 0x1a60,), # Tai Tham Sign Sakot ..Tai Tham Sign Sakot + (0x1a62, 0x1a62,), # Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + (0x1a65, 0x1a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + (0x1a73, 0x1a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt + (0x1ab0, 0x1abe,), # (nil) .. + (0x1b00, 0x1b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang + (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan + (0x1b36, 0x1b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + (0x1b3c, 0x1b3c,), # Balinese Vowel Sign La L..Balinese Vowel Sign La L + (0x1b42, 0x1b42,), # Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol + (0x1b80, 0x1b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar + (0x1ba2, 0x1ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan + (0x1ba8, 0x1ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + (0x1bab, 0x1bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign + (0x1be6, 0x1be6,), # Batak Sign Tompi ..Batak Sign Tompi + (0x1be8, 0x1be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee + (0x1bed, 0x1bed,), # Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O + (0x1bef, 0x1bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H + (0x1c2c, 0x1c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T + (0x1c36, 0x1c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha + (0x1cd4, 0x1ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak + (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above + (0x1cf8, 0x1cf9,), # (nil) .. + (0x1dc0, 0x1df5,), # Combining Dotted Grave A.. + (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x20d0, 0x20f0,), # Combining Left Harpoon A..Combining Asterisk Above + (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu + (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine + (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x302a, 0x302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0xa66f, 0xa672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous + (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer + (0xa69e, 0xa69f,), # (nil) ..Combining Cyrillic Lette + (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0xa802, 0xa802,), # Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + (0xa80b, 0xa80b,), # Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + (0xa825, 0xa826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0xa8c4, 0xa8c4,), # Saurashtra Sign Virama ..Saurashtra Sign Virama + (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig + (0xa926, 0xa92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0xa947, 0xa951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R + (0xa980, 0xa982,), # Javanese Sign Panyangga ..Javanese Sign Layar + (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + (0xa9b6, 0xa9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + (0xa9bc, 0xa9bc,), # Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + (0xa9e5, 0xa9e5,), # (nil) .. + (0xaa29, 0xaa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe + (0xaa31, 0xaa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue + (0xaa35, 0xaa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0xaa43, 0xaa43,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa4c, 0xaa4c,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa7c, 0xaa7c,), # (nil) .. + (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang + (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U + (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia + (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + (0xaaec, 0xaaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xaaf6, 0xaaf6,), # Meetei Mayek Virama ..Meetei Mayek Virama + (0xabe5, 0xabe5,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xabe8, 0xabe8,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + (0xfe00, 0xfe0f,), # Variation Selector-1 ..Variation Selector-16 + (0xfe20, 0xfe2f,), # Combining Ligature Left .. + (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + (0x102e0, 0x102e0,), # (nil) .. + (0x10376, 0x1037a,), # (nil) .. + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama + (0x10ae5, 0x10ae6,), # (nil) .. + (0x11001, 0x11001,), # Brahmi Sign Anusvara ..Brahmi Sign Anusvara + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x1107f, 0x11081,), # (nil) ..Kaithi Sign Anusvara + (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga + (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu + (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11173, 0x11173,), # (nil) .. + (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara + (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x111ca, 0x111cc,), # (nil) .. + (0x1122f, 0x11231,), # (nil) .. + (0x11234, 0x11234,), # (nil) .. + (0x11236, 0x11237,), # (nil) .. + (0x112df, 0x112df,), # (nil) .. + (0x112e3, 0x112ea,), # (nil) .. + (0x11300, 0x11301,), # (nil) .. + (0x1133c, 0x1133c,), # (nil) .. + (0x11340, 0x11340,), # (nil) .. + (0x11366, 0x1136c,), # (nil) .. + (0x11370, 0x11374,), # (nil) .. + (0x114b3, 0x114b8,), # (nil) .. + (0x114ba, 0x114ba,), # (nil) .. + (0x114bf, 0x114c0,), # (nil) .. + (0x114c2, 0x114c3,), # (nil) .. + (0x115b2, 0x115b5,), # (nil) .. + (0x115bc, 0x115bd,), # (nil) .. + (0x115bf, 0x115c0,), # (nil) .. + (0x115dc, 0x115dd,), # (nil) .. + (0x11633, 0x1163a,), # (nil) .. + (0x1163d, 0x1163d,), # (nil) .. + (0x1163f, 0x11640,), # (nil) .. + (0x116ab, 0x116ab,), # Takri Sign Anusvara ..Takri Sign Anusvara + (0x116ad, 0x116ad,), # Takri Vowel Sign Aa ..Takri Vowel Sign Aa + (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au + (0x116b7, 0x116b7,), # Takri Sign Nukta ..Takri Sign Nukta + (0x1171d, 0x1171f,), # (nil) .. + (0x11722, 0x11725,), # (nil) .. + (0x11727, 0x1172b,), # (nil) .. + (0x16af0, 0x16af4,), # (nil) .. + (0x16b30, 0x16b36,), # (nil) .. + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x1bc9d, 0x1bc9e,), # (nil) .. + (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # (nil) .. + (0x1da3b, 0x1da6c,), # (nil) .. + (0x1da75, 0x1da75,), # (nil) .. + (0x1da84, 0x1da84,), # (nil) .. + (0x1da9b, 0x1da9f,), # (nil) .. + (0x1daa1, 0x1daaf,), # (nil) .. + (0x1e8d0, 0x1e8d6,), # (nil) .. + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 +) diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 3befaa9..73145bc 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -71,7 +71,8 @@ from __future__ import division from .table_wide import WIDE_EASTASIAN -from .table_comb import NONZERO_COMBINING +from .table_comb import COMBINING +from .table_zero import ZERO_WIDTH def _bisearch(ucs, table): @@ -121,8 +122,7 @@ def wcwidth(wc): The following have a column width of 0: - Non-spacing and enclosing combining characters (general - category code Mn or Me in the Unicode database). Generally, - having a non-zero value returned by ``unicodedata.combining()``. + category code Mn or Me in the Unicode database). - NULL (U+0000, 0). @@ -174,8 +174,8 @@ def wcwidth(wc): if ucs < 32 or 0x07F <= ucs < 0x0A0: return -1 - # combining characters have zero width - if _bisearch(ucs, NONZERO_COMBINING): + # combining characters with zero width + if _bisearch(ucs, ZERO_WIDTH): return 0 return 1 + _bisearch(ucs, WIDE_EASTASIAN)