Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop support for legacy Python 2 #301

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
language: python
python:
- "2.7"
- "3.4"
- "3.5"
- "3.6"
install:
- pip install six
- pip install pycryptodome
- pip install chardet
- pip install sortedcontainers
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PDFMiner.six
============

PDFMiner.six is a fork of PDFMiner using six for Python 2+3 compatibility
PDFMiner.six is a fork of PDFMiner

[![Build Status](https://travis-ci.org/pdfminer/pdfminer.six.svg?branch=master)](https://travis-ci.org/pdfminer/pdfminer.six) [![PyPI version](https://img.shields.io/pypi/v/pdfminer.six.svg)](https://pypi.python.org/pypi/pdfminer.six/)

Expand Down Expand Up @@ -35,7 +35,7 @@ Features
How to Install
--------------

* Install Python 2.7 or newer. (Python 3.x is supported in pdfminer.six)
* Install Python 3.4 or newer
* Install

`pip install pdfminer.six`
Expand Down
4 changes: 2 additions & 2 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ <h2><a name="intro">What's It?</a></h2>
<p>
<h3>Features</h3>
<ul>
<li> Written entirely in Python. (for version 2.6 or newer)
<li> Written entirely in Python (for version 3.4 or newer).
<li> Parse, analyze, and convert PDF documents.
<li> PDF-1.7 specification support. (well, almost)
<li> CJK languages and vertical writing scripts support.
Expand Down Expand Up @@ -102,7 +102,7 @@ <h3><a name="wheretoask">Where to Ask</a></h3>

<h2><a name="install">How to Install</a></h2>
<ol>
<li> Install <a href="http://www.python.org/download/">Python</a> 2.6 or newer.
<li> Install <a href="http://www.python.org/download/">Python</a> 3.4 or newer.
<li> Download the <a href="#source">PDFMiner source</a>.
<li> Unpack it.
<li> Run <code>setup.py</code> to install:<br>
Expand Down
3 changes: 1 addition & 2 deletions pdfminer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
"""
Fork of PDFMiner using six for Python 2+3 compatibility
Fork of PDFMiner

PDFMiner is a tool for extracting information from PDF documents.
Unlike other PDF-related tools, it focuses entirely on getting and analyzing
Expand Down
11 changes: 4 additions & 7 deletions pdfminer/arcfour.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@


""" Python implementation of Arcfour encryption algorithm.
See https://en.wikipedia.org/wiki/RC4
This code is in the public domain.

"""

import six # Python 2+3 compatibility
## Arcfour
##
class Arcfour(object):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that removing explicit inheritence of object is possible since this is the default in Python3. However, I prefer that the inheritance stays explicit cause it's more consistent.

class Arcfour:

def __init__(self, key):
s = [i for i in range(256)] #because Py3 range is not indexable
j = 0
klen = len(key)
for i in range(256):
j = (j + s[i] + six.indexbytes(key,i % klen)) % 256
j = (j + s[i] + key[i % klen]) % 256
(s[i], s[j]) = (s[j], s[i])
self.s = s
(self.i, self.j) = (0, 0)
Expand All @@ -26,12 +23,12 @@ def process(self, data):
(i, j) = (self.i, self.j)
s = self.s
r = b''
for c in six.iterbytes(data):
for c in iter(data):
i = (i+1) % 256
j = (j+s[i]) % 256
(s[i], s[j]) = (s[j], s[i])
k = s[(s[i]+s[j]) % 256]
r += six.int2byte(c ^ k)
r += bytes((c ^ k,))
(self.i, self.j) = (i, j)
return r

Expand Down
14 changes: 5 additions & 9 deletions pdfminer/ascii85.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


""" Python implementation of ASCII85/ASCIIHex decoder (Adobe version).

This code is in the public domain.
Expand All @@ -9,8 +7,6 @@
import re
import struct

import six #Python 2+3 compatibility


# ascii85decode(data)
def ascii85decode(data):
Expand All @@ -26,8 +22,8 @@ def ascii85decode(data):
"""
n = b = 0
out = b''
for i in six.iterbytes(data):
c=six.int2byte(i)
for i in iter(data):
c=bytes((i,))
if b'!' <= c and c <= b'u':
n += 1
b = b*85+(ord(c)-33)
Expand All @@ -46,8 +42,8 @@ def ascii85decode(data):
return out

# asciihexdecode(data)
hex_re = re.compile(b'([a-f\d]{2})', re.IGNORECASE)
trail_re = re.compile(b'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE)
trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)


def asciihexdecode(data):
Expand All @@ -62,7 +58,7 @@ def asciihexdecode(data):
"""
def decode(x):
i=int(x,16)
return six.int2byte(i)
return bytes((i,))

out=b''
for x in hex_re.findall(data):
Expand Down
15 changes: 3 additions & 12 deletions pdfminer/ccitt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# CCITT Fax decoder
#
# Bugs: uncompressed mode untested.
Expand All @@ -13,21 +12,13 @@
import sys
import array

import six #Python 2+3 compatibility

if six.PY3:
def get_bytes(data):
for byte in data:
yield byte
else:
def get_bytes(data):
for char in data:
yield ord(char)
def get_bytes(data):
yield from data


## BitParser
##
class BitParser(object):
class BitParser:

def __init__(self):
self._pos = 0
Expand Down
17 changes: 5 additions & 12 deletions pdfminer/cmapdb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


""" Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
Expand All @@ -15,10 +13,7 @@
import os
import os.path
import gzip
try:
import cPickle as pickle
except ImportError:
import pickle as pickle
import pickle as pickle
import struct
import logging
from .psparser import PSStackParser
Expand All @@ -31,8 +26,6 @@
from .utils import choplist
from .utils import nunpack

import six #Python 2+3 compatibility

log = logging.getLogger(__name__)


Expand All @@ -42,7 +35,7 @@ class CMapError(Exception):

## CMapBase
##
class CMapBase(object):
class CMapBase:

debug = 0

Expand Down Expand Up @@ -96,7 +89,7 @@ def copy(dst, src):
def decode(self, code):
log.debug('decode: %r, %r', self, code)
d = self.code2cid
for i in six.iterbytes(code):
for i in iter(code):
if i in d:
d = d[i]
if isinstance(d, int):
Expand Down Expand Up @@ -186,7 +179,7 @@ def add_cid2unichr(self, cid, code):
# Interpret as UTF-16BE.
self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
elif isinstance(code, int):
self.cid2unichr[cid] = six.unichr(code)
self.cid2unichr[cid] = chr(code)
else:
raise TypeError(code)
return
Expand Down Expand Up @@ -220,7 +213,7 @@ def __init__(self, name, module, vertical):

## CMapDB
##
class CMapDB(object):
class CMapDB:

_cmap_cache = {}
_umap_cache = {}
Expand Down
18 changes: 7 additions & 11 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@

# -*- coding: utf-8 -*-
import logging
import re
from .pdfdevice import PDFTextDevice
Expand All @@ -23,8 +21,6 @@
from .utils import bbox2str
from . import utils

import six # Python 2+3 compatibility

log = logging.getLogger(__name__)


Expand Down Expand Up @@ -115,7 +111,7 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
try:
text = font.to_unichr(cid)
assert isinstance(text, six.text_type), str(type(text))
assert isinstance(text, str), str(type(text))
except PDFUnicodeNotDefined:
text = self.handle_undefined_char(font, cid)
textwidth = font.char_width(cid)
Expand Down Expand Up @@ -170,7 +166,7 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None):
self.outfp_binary = False
else:
try:
self.outfp.write(u"é")
self.outfp.write("é")
self.outfp_binary = False
except TypeError:
self.outfp_binary = True
Expand All @@ -190,7 +186,7 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,

def write_text(self, text):
text = utils.compatible_encode_method(text, self.codec, 'ignore')
if six.PY3 and self.outfp_binary:
if self.outfp_binary:
text = text.encode()
self.outfp.write(text)
return
Expand Down Expand Up @@ -285,7 +281,7 @@ def write_header(self):

def write_footer(self):
self.write('<div style="position:absolute; top:0px;">Page: %s</div>\n' %
', '.join('<a href="#%s">%s</a>' % (i, i) for i in range(1, self.pageno)))
', '.join('<a href="#{}">{}</a>'.format(i, i) for i in range(1, self.pageno)))
self.write('</body></html>\n')
return

Expand Down Expand Up @@ -373,7 +369,7 @@ def render(item):
if self.showpageno:
self.write('<div style="position:absolute; top:%dpx;">' %
((self._yoffset-item.y1)*self.scale))
self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
self.write('<a name="{}">Page {}</a></div>\n'.format(item.pageid, item.pageid))
for child in item:
render(child)
if item.groups is not None:
Expand Down Expand Up @@ -432,7 +428,7 @@ def close(self):
##
class XMLConverter(PDFConverter):

CONTROL = re.compile(u'[\x00-\x08\x0b-\x0c\x0e-\x1f]')
CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')

def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
laparams=None, imagewriter=None, stripcontrol=False):
Expand Down Expand Up @@ -462,7 +458,7 @@ def write_footer(self):

def write_text(self, text):
if self.stripcontrol:
text = self.CONTROL.sub(u'', text)
text = self.CONTROL.sub('', text)
self.write(enc(text, None))
return

Expand Down
7 changes: 2 additions & 5 deletions pdfminer/encodingdb.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@

import re
from .psparser import PSLiteral
from .glyphlist import glyphname2unicode
from .latin_enc import ENCODING

import six # Python 2+3 compatibility

STRIP_NAME = re.compile(r'[0-9A-Fa-f]+')


Expand All @@ -18,12 +15,12 @@ def name2unicode(name):
m = STRIP_NAME.search(name)
if not m:
raise KeyError(name)
return six.unichr(int(m.group(0), base=16))
return chr(int(m.group(0), base=16))


## EncodingDB
##
class EncodingDB(object):
class EncodingDB:

std2unicode = {}
mac2unicode = {}
Expand Down
Loading