pdfminer · hugovk · Nov 13, 2018 · Oct 17, 2019 · Oct 17, 2019 · Oct 17, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -1,11 +1,9 @@
 language: python
 python:
-  - "2.7"
   - "3.4"
   - "3.5"
   - "3.6"
 install:
-  - pip install six
   - pip install pycryptodome
   - pip install chardet
   - pip install sortedcontainers

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 PDFMiner.six
 ============
 
-PDFMiner.six is a fork of PDFMiner using six for Python 2+3 compatibility
+PDFMiner.six is a fork of PDFMiner
 
 [![Build Status](https://travis-ci.org/pdfminer/pdfminer.six.svg?branch=master)](https://travis-ci.org/pdfminer/pdfminer.six) [![PyPI version](https://img.shields.io/pypi/v/pdfminer.six.svg)](https://pypi.python.org/pypi/pdfminer.six/)
 
@@ -35,7 +35,7 @@ Features
 How to Install
 --------------
 
- * Install Python 2.7 or newer. (Python 3.x is supported in pdfminer.six)
+ * Install Python 3.4 or newer
  * Install
 
     `pip install pdfminer.six`

diff --git a/docs/index.html b/docs/index.html
@@ -58,7 +58,7 @@ <h2><a name="intro">What's It?</a></h2>
 <p>
 <h3>Features</h3>
 <ul>
-<li> Written entirely in Python. (for version 2.6 or newer)
+<li> Written entirely in Python (for version 3.4 or newer).
 <li> Parse, analyze, and convert PDF documents.
 <li> PDF-1.7 specification support. (well, almost)
 <li> CJK languages and vertical writing scripts support.
@@ -102,7 +102,7 @@ <h3><a name="wheretoask">Where to Ask</a></h3>
 
 <h2><a name="install">How to Install</a></h2>
 <ol>
-<li> Install <a href="http://www.python.org/download/">Python</a> 2.6 or newer.
+<li> Install <a href="http://www.python.org/download/">Python</a> 3.4 or newer.
 <li> Download the <a href="#source">PDFMiner source</a>.
 <li> Unpack it.
 <li> Run <code>setup.py</code> to install:<br>

diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 """
-Fork of PDFMiner using six for Python 2+3 compatibility
+Fork of PDFMiner
 
 PDFMiner is a tool for extracting information from PDF documents.
 Unlike other PDF-related tools, it focuses entirely on getting and analyzing

diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py
@@ -1,22 +1,19 @@
-
-
 """ Python implementation of Arcfour encryption algorithm.
 See https://en.wikipedia.org/wiki/RC4
 This code is in the public domain.
 
 """
 
-import six # Python 2+3 compatibility
 ##  Arcfour
 ##
-class Arcfour(object):
+class Arcfour:
 
     def __init__(self, key):
         s = [i for i in range(256)] #because Py3 range is not indexable
         j = 0
         klen = len(key)
         for i in range(256):
-            j = (j + s[i] + six.indexbytes(key,i % klen)) % 256
+            j = (j + s[i] + key[i % klen]) % 256
             (s[i], s[j]) = (s[j], s[i])
         self.s = s
         (self.i, self.j) = (0, 0)
@@ -26,12 +23,12 @@ def process(self, data):
         (i, j) = (self.i, self.j)
         s = self.s
         r = b''
-        for c in six.iterbytes(data):
+        for c in iter(data):
             i = (i+1) % 256
             j = (j+s[i]) % 256
             (s[i], s[j]) = (s[j], s[i])
             k = s[(s[i]+s[j]) % 256]
-            r += six.int2byte(c ^ k)
+            r += bytes((c ^ k,))
         (self.i, self.j) = (i, j)
         return r
 

diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py
@@ -1,5 +1,3 @@
-
-
 """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
 
 This code is in the public domain.
@@ -9,8 +7,6 @@
 import re
 import struct
 
-import six #Python 2+3 compatibility
-
 
 # ascii85decode(data)
 def ascii85decode(data):
@@ -26,8 +22,8 @@ def ascii85decode(data):
     """
     n = b = 0
     out = b''
-    for i in six.iterbytes(data):
-        c=six.int2byte(i)
+    for i in iter(data):
+        c=bytes((i,))
         if b'!' <= c and c <= b'u':
             n += 1
             b = b*85+(ord(c)-33)
@@ -46,8 +42,8 @@ def ascii85decode(data):
     return out
 
 # asciihexdecode(data)
-hex_re = re.compile(b'([a-f\d]{2})', re.IGNORECASE)
-trail_re = re.compile(b'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
+hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE)
+trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
 
 
 def asciihexdecode(data):
@@ -62,7 +58,7 @@ def asciihexdecode(data):
     """
     def decode(x):
         i=int(x,16)
-        return six.int2byte(i)
+        return bytes((i,))
 
     out=b''
     for x in hex_re.findall(data):

diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py
@@ -1,4 +1,3 @@
-
 # CCITT Fax decoder
 #
 # Bugs: uncompressed mode untested.
@@ -13,21 +12,13 @@
 import sys
 import array
 
-import six  #Python 2+3 compatibility
-
-if six.PY3:
-    def get_bytes(data):
-        for byte in data:
-            yield byte
-else:
-    def get_bytes(data):
-        for char in data:
-            yield ord(char)
+def get_bytes(data):
+    yield from data
 
 
 ##  BitParser
 ##
-class BitParser(object):
+class BitParser:
 
     def __init__(self):
         self._pos = 0

diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
@@ -1,5 +1,3 @@
-
-
 """ Adobe character mapping (CMap) support.
 
 CMaps provide the mapping between character codes and Unicode
@@ -15,10 +13,7 @@
 import os
 import os.path
 import gzip
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle as pickle
+import pickle as pickle
 import struct
 import logging
 from .psparser import PSStackParser
@@ -31,8 +26,6 @@
 from .utils import choplist
 from .utils import nunpack
 
-import six  #Python 2+3 compatibility
-
 log = logging.getLogger(__name__)
 
 
@@ -42,7 +35,7 @@ class CMapError(Exception):
 
 ##  CMapBase
 ##
-class CMapBase(object):
+class CMapBase:
 
     debug = 0
 
@@ -96,7 +89,7 @@ def copy(dst, src):
     def decode(self, code):
         log.debug('decode: %r, %r', self, code)
         d = self.code2cid
-        for i in six.iterbytes(code):
+        for i in iter(code):
             if i in d:
                 d = d[i]
                 if isinstance(d, int):
@@ -186,7 +179,7 @@ def add_cid2unichr(self, cid, code):
             # Interpret as UTF-16BE.
             self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
         elif isinstance(code, int):
-            self.cid2unichr[cid] = six.unichr(code)
+            self.cid2unichr[cid] = chr(code)
         else:
             raise TypeError(code)
         return
@@ -220,7 +213,7 @@ def __init__(self, name, module, vertical):
 
 ##  CMapDB
 ##
-class CMapDB(object):
+class CMapDB:
 
     _cmap_cache = {}
     _umap_cache = {}

diff --git a/pdfminer/converter.py b/pdfminer/converter.py
@@ -1,5 +1,3 @@
-
-# -*- coding: utf-8 -*-
 import logging
 import re
 from .pdfdevice import PDFTextDevice
@@ -23,8 +21,6 @@
 from .utils import bbox2str
 from . import utils
 
-import six  # Python 2+3 compatibility
-
 log = logging.getLogger(__name__)
 
 
@@ -115,7 +111,7 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
     def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
         try:
             text = font.to_unichr(cid)
-            assert isinstance(text, six.text_type), str(type(text))
+            assert isinstance(text, str), str(type(text))
         except PDFUnicodeNotDefined:
             text = self.handle_undefined_char(font, cid)
         textwidth = font.char_width(cid)
@@ -170,7 +166,7 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None):
                 self.outfp_binary = False
             else:
                 try:
-                    self.outfp.write(u"é")
+                    self.outfp.write("é")
                     self.outfp_binary = False
                 except TypeError:
                     self.outfp_binary = True
@@ -190,7 +186,7 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
 
     def write_text(self, text):
         text = utils.compatible_encode_method(text, self.codec, 'ignore')
-        if six.PY3 and self.outfp_binary:
+        if self.outfp_binary:
             text = text.encode()
         self.outfp.write(text)
         return
@@ -285,7 +281,7 @@ def write_header(self):
 
     def write_footer(self):
         self.write('<div style="position:absolute; top:0px;">Page: %s</div>\n' %
-                   ', '.join('<a href="#%s">%s</a>' % (i, i) for i in range(1, self.pageno)))
+                   ', '.join('<a href="#{}">{}</a>'.format(i, i) for i in range(1, self.pageno)))
         self.write('</body></html>\n')
         return
 
@@ -373,7 +369,7 @@ def render(item):
                 if self.showpageno:
                     self.write('<div style="position:absolute; top:%dpx;">' %
                                ((self._yoffset-item.y1)*self.scale))
-                    self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
+                    self.write('<a name="{}">Page {}</a></div>\n'.format(item.pageid, item.pageid))
                 for child in item:
                     render(child)
                 if item.groups is not None:
@@ -432,7 +428,7 @@ def close(self):
 ##
 class XMLConverter(PDFConverter):
 
-    CONTROL = re.compile(u'[\x00-\x08\x0b-\x0c\x0e-\x1f]')
+    CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
 
     def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
                  laparams=None, imagewriter=None, stripcontrol=False):
@@ -462,7 +458,7 @@ def write_footer(self):
 
     def write_text(self, text):
         if self.stripcontrol:
-            text = self.CONTROL.sub(u'', text)
+            text = self.CONTROL.sub('', text)
         self.write(enc(text, None))
         return
 

diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py
@@ -1,11 +1,8 @@
-
 import re
 from .psparser import PSLiteral
 from .glyphlist import glyphname2unicode
 from .latin_enc import ENCODING
 
-import six # Python 2+3 compatibility
-
 STRIP_NAME = re.compile(r'[0-9A-Fa-f]+')
 
 
@@ -18,12 +15,12 @@ def name2unicode(name):
     m = STRIP_NAME.search(name)
     if not m:
         raise KeyError(name)
-    return six.unichr(int(m.group(0), base=16))
+    return chr(int(m.group(0), base=16))
 
 
 ##  EncodingDB
 ##
-class EncodingDB(object):
+class EncodingDB:
 
     std2unicode = {}
     mac2unicode = {}