pylint-dev · PCManticore · Nov 26, 2018 · Nov 26, 2018
diff --git a/pylint/checkers/strings.py b/pylint/checkers/strings.py
@@ -592,12 +592,20 @@ def process_module(self, module):
         self._unicode_literals = "unicode_literals" in module.future_imports
 
     def process_tokens(self, tokens):
-        for i, (tok_type, token, start, _, _) in enumerate(tokens):
-            if tok_type == tokenize.STRING:
+        encoding = "ascii"
+        for i, (tok_type, token, start, _, line) in enumerate(tokens):
+            if tok_type == tokenize.ENCODING:
+                # this is always the first token processed
+                encoding = token
+            elif tok_type == tokenize.STRING:
                 # 'token' is the whole un-parsed token; we can look at the start
                 # of it to see whether it's a raw or unicode string etc.
                 self.process_string_token(token, start[0])
                 next_token = tokens[i + 1] if i + 1 < len(tokens) else None
+                if encoding != "ascii":
+                    # We convert `tokenize` character count into a byte count,
+                    # to match with astroid `.col_offset`
+                    start = (start[0], len(line[: start[1]].encode(encoding)))
                 self.string_tokens[start] = (str_eval(token), next_token)
 
     @check_messages(*(MSGS.keys()))
@@ -618,6 +626,10 @@ def check_for_concatenated_strings(self, iterable_node, iterable_type):
                 if elt.col_offset < 0:
                     # This can happen in case of escaped newlines
                     continue
+                if (elt.lineno, elt.col_offset) not in self.string_tokens:
+                    # This may happen with Latin1 encoding
+                    # cf. https://github.com/PyCQA/pylint/issues/2610
+                    continue
                 matching_token, next_token = self.string_tokens[
                     (elt.lineno, elt.col_offset)
                 ]

diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py
@@ -0,0 +1,4 @@
+# coding: latin_1
+#pylint: disable=bad-continuation,invalid-name,missing-docstring
+
+TOTO = ('Caf�', 'Caf�', 'Caf�')
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py
@@ -0,0 +1,3 @@
+#pylint: disable=bad-continuation,invalid-name,missing-docstring
+
+TOTO = ('Café', 'Café', 'Café')
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt
diff --git a/pylint/test/test_functional.py b/pylint/test/test_functional.py
@@ -279,8 +279,9 @@ def _open_expected_file(self):
     def _open_source_file(self):
         if self._test_file.base == "invalid_encoded_data":
             return open(self._test_file.source)
-        else:
-            return io.open(self._test_file.source, encoding="utf8")
+        if "latin1" in self._test_file.base:
+            return io.open(self._test_file.source, encoding="latin1")
+        return io.open(self._test_file.source, encoding="utf8")
 
     def _get_expected(self):
         with self._open_source_file() as fobj:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#pylint: disable=bad-continuation,invalid-name,missing-docstring

		TOTO = ('Café', 'Café', 'Café')