Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of combining characters #11

Merged
merged 2 commits into from
Sep 14, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ than the most current Unicode Standard release files, which this project
aims to track.

The most current release of this API is based from Unicode Standard release
*7.0.0*, dated *2014-02-28, 23:15:00 GMT [KW, LI]* for table generated by
file ``EastAsianWidth-7.0.0.txt`` and *2014-02-07, 18:42:08 GMT [MD]* for
``DerivedCombiningClass-7.0.0.txt``.
*8.0.0*, dated *2015-02-10, 21:00:00 GMT [KW, LI]* for table generated by
file ``EastAsianWidth-8.0.0.txt`` and *2015-02-13, 13:47:11 GMT [MD]* for
``DerivedGeneralCategory-8.0.0.txt``.

Installation
------------
Expand Down Expand Up @@ -140,12 +140,14 @@ Updating Tables
The command ``python setup.py update`` will fetch the following resources:

- http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
- http://www.unicode.org/Public/UNIDATA/extracted/DerivedCombiningClass.txt
- http://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt

And generate the table files `wcwidth/table_wide.py`_ and `wcwidth/table_comb.py`_.
And generate the table files `wcwidth/table_wide.py`_,
`wcwidth/table_comb.py`_, and `wcwidth/table_zero.py`_.

.. _`wcwidth/table_wide.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_wide.py
.. _`wcwidth/table_comb.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_comb.py
.. _`wcwidth/table_zero.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_zero.py

wcwidth.c
---------
Expand Down
10 changes: 5 additions & 5 deletions bin/wcwidth-browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import signal

# local
from wcwidth import wcwidth, table_comb
from wcwidth.wcwidth import _bisearch, wcwidth, COMBINING

# 3rd-party
from blessed import Terminal
Expand Down Expand Up @@ -116,6 +116,7 @@ def __init__(self, width=2):
self.characters = (unichr(idx)
for idx in xrange(LIMIT_UCS)
if wcwidth(unichr(idx)) == width
and not _bisearch(idx, COMBINING)
)

def __iter__(self):
Expand Down Expand Up @@ -152,13 +153,13 @@ def __init__(self, width=1):
"""
self.characters = []
letters_o = (u'o' * width)
for boundaries in table_comb.NONZERO_COMBINING:
for boundaries in COMBINING:
for val in [_val for _val in
range(boundaries[0], boundaries[1] + 1)
if _val <= LIMIT_UCS]:
self.characters.append(letters_o[:1] +
unichr(val) +
letters_o[1:])
letters_o[wcwidth(unichr(val))+1:])
self.characters.reverse()

def __iter__(self):
Expand Down Expand Up @@ -647,8 +648,7 @@ def text_entry(self, ucs, name):
delimiter = style.attr_minor(style.delimiter)
if len(ucs) != 1:
# determine display of combining characters
val = ord(next((_ucs for _ucs in ucs
if wcwidth(_ucs) == -1)))
val = ord(ucs[1])
# a combining character displayed of any fg color
# will reset the foreground character of the cell
# combined with (iTerm2, OSX).
Expand Down
6 changes: 3 additions & 3 deletions bin/wcwidth-combining-comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import sys

# local imports
from wcwidth.wcwidth import _bisearch, NONZERO_COMBINING
from wcwidth.wcwidth import _bisearch, COMBINING


def report_comb_msg(ucs, comb_py, comb_wc):
Expand Down Expand Up @@ -66,8 +66,8 @@ def report_comb_msg(ucs, comb_py, comb_wc):


def _is_equal_combining(ucs):
comb_py = bool(unicodedata.combining(ucs))
comb_wc = bool(_bisearch(ord(ucs), NONZERO_COMBINING))
comb_py = bool(unicodedata.category(ucs) in ['Mc', 'Me', 'Mn'])
comb_wc = bool(_bisearch(ord(ucs), COMBINING))
assert comb_py == comb_wc, report_comb_msg(ucs, comb_py, comb_wc)


Expand Down
27 changes: 18 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ class SetupUpdate(setuptools.Command):
EAW_OUT = os.path.join(HERE, 'wcwidth', 'table_wide.py')

UCD_URL = ('http://www.unicode.org/Public/UNIDATA/extracted/'
'DerivedCombiningClass.txt')
UCD_IN = os.path.join(HERE, 'data', 'DerivedCombiningClass.txt')
'DerivedGeneralCategory.txt')
UCD_IN = os.path.join(HERE, 'data', 'DerivedGeneralCategory.txt')
CMB_OUT = os.path.join(HERE, 'wcwidth', 'table_comb.py')
ZERO_OUT = os.path.join(HERE, 'wcwidth', 'table_zero.py')

def initialize_options(self):
"""Override builtin method: no options are available."""
Expand All @@ -60,10 +61,11 @@ def finalize_options(self):
pass

def run(self):
"""Execute command: update east-asian and combining tables."""
"""Execute command: update east-asian, combining and zero width tables."""
assert os.getenv('VIRTUAL_ENV'), 'You should be in a virtualenv'
self.do_east_asian_width()
self.do_combining()
self.do_zero_width()

def do_east_asian_width(self):
"""Fetch and update east-asian tables."""
Expand All @@ -75,9 +77,16 @@ def do_east_asian_width(self):
def do_combining(self):
"""Fetch and update combining tables."""
self._do_retrieve(self.UCD_URL, self.UCD_IN)
(version, date, values) = self._do_combining_parse(self.UCD_IN)
(version, date, values) = self._do_category_parse(self.UCD_IN, ('Mc', 'Me', 'Mn',))
table = self._make_table(values)
self._do_write(self.CMB_OUT, 'NONZERO_COMBINING', version, date, table)
self._do_write(self.CMB_OUT, 'COMBINING', version, date, table)

def do_zero_width(self):
"""Fetch and update zero width tables."""
self._do_retrieve(self.UCD_URL, self.UCD_IN)
(version, date, values) = self._do_category_parse(self.UCD_IN, ('Me', 'Mn',))
table = self._make_table(values)
self._do_write(self.ZERO_OUT, 'ZERO_WIDTH', version, date, table)

@staticmethod
def _make_table(values):
Expand Down Expand Up @@ -143,8 +152,8 @@ def _do_east_asian_width_parse(fname,
return version, date, sorted(values)

@staticmethod
def _do_combining_parse(fname, exclude_values=(0,)):
"""Parse unicode combining tables."""
def _do_category_parse(fname, categories):
"""Parse unicode category tables."""
version, date, values = None, None, []
print("parsing {} ..".format(fname))
for line in open(fname, 'rb'):
Expand All @@ -159,8 +168,8 @@ def _do_combining_parse(fname, exclude_values=(0,)):
continue
addrs, details = uline.split(';', 1)
addrs, details = addrs.rstrip(), details.lstrip()
if not any(details.startswith('{} #'.format(value))
for value in exclude_values):
if any(details.startswith('{} #'.format(value))
for value in categories):
start, stop = addrs, addrs
if '..' in addrs:
start, stop = addrs.split('..')
Expand Down
Loading