diff --git a/openformats/formats/android_unescaped.py b/openformats/formats/android_unescaped.py index 58561c1b..c95a1210 100644 --- a/openformats/formats/android_unescaped.py +++ b/openformats/formats/android_unescaped.py @@ -4,7 +4,6 @@ from openformats.formats.android import AndroidHandler from ..utils.xml import NewDumbXml as DumbXml - class AndroidUnescapedHandler(AndroidHandler): def _create_string(self, name, text, comment, product, child, pluralized=False): """Creates a string and returns it. If empty string it returns None. @@ -96,17 +95,19 @@ def escape(string): string ) except Exception as _: - return AndroidHandler.escape(string) + # Exception handling: If an error occurs during tag protection, + # escape all special characters. One case of these errors is the + # presence of '<' symbols without corresponding closing tags, causing + # parsing errors. + string = AndroidHandler.escape(string) + string = AndroidUnescapedHandler.escape_special_characters(string) + string = ( + string.replace("<", "<") + ) + return string string = AndroidHandler.escape(string) - string = ( - string.replace("&", "&") - .replace(">", ">") - .replace("\n", "\\n") - .replace("\t", "\\t") - .replace("@", "\\@") - .replace("?", "\\?") - ) + string = AndroidUnescapedHandler.escape_special_characters(string) return AndroidUnescapedHandler._unprotect_inline_tags(string, protected_tags) @staticmethod @@ -121,3 +122,25 @@ def unescape(string): .replace("<", "<") .replace("&", "&") ) + + @staticmethod + def escape_special_characters(string): + """ + Escapes special characters in the given string. + + Note: + - The '<' character is not escaped intentionally to avoid interfering + with inline tags that need to be protected and unprotected separately. + + :param string: The input string that needs special characters escaped. + + :returns: str: The input string with special characters escaped. + """ + return ( + string.replace("&", "&") + .replace(">", ">") + .replace("\n", "\\n") + .replace("\t", "\\t") + .replace("@", "\\@") + .replace("?", "\\?") + ) diff --git a/openformats/tests/formats/android/test_android_unescaped.py b/openformats/tests/formats/android/test_android_unescaped.py index 3a773b34..746567f2 100644 --- a/openformats/tests/formats/android/test_android_unescaped.py +++ b/openformats/tests/formats/android/test_android_unescaped.py @@ -53,6 +53,31 @@ def test_escape(self): raw, ) + def test_escape_lt_character(self): + rich = '< 20 units' + raw = '< 20 units' + + self.assertEqual( + AndroidUnescapedHandler.escape(rich), + raw, + ) + + rich = '< 20 & > 50 units' + raw = '< 20 & > 50 units' + + self.assertEqual( + AndroidUnescapedHandler.escape(rich), + raw, + ) + + rich = '< 20 & > 50 unitstest' + raw = '< 20 & > 50 units<xliff:g>test</xliff:g>' + + self.assertEqual( + AndroidUnescapedHandler.escape(rich), + raw, + ) + def test_unescape(self): rich = "&<>'\n\t@?" + '"' raw = "&<>\\'\\n\\t\\@\\?" + '\\"'