diff --git a/pdf_helper/README.rst b/pdf_helper/README.rst new file mode 100644 index 0000000000..879226ae57 --- /dev/null +++ b/pdf_helper/README.rst @@ -0,0 +1,98 @@ +========== +PDF Helper +========== + +.. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! This file is generated by oca-gen-addon-readme !! + !! changes will be overwritten. !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +.. |badge1| image:: https://img.shields.io/badge/maturity-Beta-yellow.png + :target: https://odoo-community.org/page/development-status + :alt: Beta +.. |badge2| image:: https://img.shields.io/badge/licence-LGPL--3-blue.png + :target: http://www.gnu.org/licenses/lgpl-3.0-standalone.html + :alt: License: LGPL-3 +.. |badge3| image:: https://img.shields.io/badge/github-OCA%2Fedi-lightgray.png?logo=github + :target: https://github.com/OCA/edi/tree/14.0/pdf_helper + :alt: OCA/edi +.. |badge4| image:: https://img.shields.io/badge/weblate-Translate%20me-F47D42.png + :target: https://translation.odoo-community.org/projects/edi-14-0/edi-14-0-pdf_helper + :alt: Translate me on Weblate +.. |badge5| image:: https://img.shields.io/badge/runbot-Try%20me-875A7B.png + :target: https://runbot.odoo-community.org/runbot/226/14.0 + :alt: Try me on Runbot + +|badge1| |badge2| |badge3| |badge4| |badge5| + +Technical module to share PDF utils. + +**Table of contents** + +.. contents:: + :local: + +Usage +===== + +Inside Odoo env:: + + res = env["pdf.helper"].pdf_get_xml_files(pdf_filecontent) + +Outside Odoo env:: + + from odoo.addons.pdf_helper.utils import PDFParser + [...] + res = PDFParser(pdf_filecontent).get_xml_files() + +Bug Tracker +=========== + +Bugs are tracked on `GitHub Issues `_. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us smashing it by providing a detailed and welcomed +`feedback `_. + +Do not contact contributors directly about support or help with technical issues. + +Credits +======= + +Authors +~~~~~~~ + +* Camptocamp + +Contributors +~~~~~~~~~~~~ + +* Simone Orsi +* Alexis de Lattre + +Maintainers +~~~~~~~~~~~ + +This module is maintained by the OCA. + +.. image:: https://odoo-community.org/logo.png + :alt: Odoo Community Association + :target: https://odoo-community.org + +OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use. + +.. |maintainer-simahawk| image:: https://github.com/simahawk.png?size=40px + :target: https://github.com/simahawk + :alt: simahawk +.. |maintainer-alexis-via| image:: https://github.com/alexis-via.png?size=40px + :target: https://github.com/alexis-via + :alt: alexis-via + +Current `maintainers `__: + +|maintainer-simahawk| |maintainer-alexis-via| + +This module is part of the `OCA/edi `_ project on GitHub. + +You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute. diff --git a/pdf_helper/__init__.py b/pdf_helper/__init__.py new file mode 100644 index 0000000000..0650744f6b --- /dev/null +++ b/pdf_helper/__init__.py @@ -0,0 +1 @@ +from . import models diff --git a/pdf_helper/__manifest__.py b/pdf_helper/__manifest__.py new file mode 100644 index 0000000000..2f0548ea38 --- /dev/null +++ b/pdf_helper/__manifest__.py @@ -0,0 +1,18 @@ +# Copyright 2022 Camptocamp SA +# @author: Simone Orsi +# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl). + +# TODO: move it to a simple python package under OCA umbrella? +{ + "name": "PDF Helper", + "version": "15.0.1.0.0", + "category": "Tools", + "license": "LGPL-3", + "summary": "Provides helpers to work w/ PDFs", + "author": "Camptocamp, Odoo Community Association (OCA)", + "maintainers": ["simahawk", "alexis-via"], + "website": "https://github.com/OCA/edi", + "depends": [ + "base", + ], +} diff --git a/pdf_helper/i18n/pdf_helper.pot b/pdf_helper/i18n/pdf_helper.pot new file mode 100644 index 0000000000..97d0b6280f --- /dev/null +++ b/pdf_helper/i18n/pdf_helper.pot @@ -0,0 +1,34 @@ +# Translation of Odoo Server. +# This file contains the translation of the following modules: +# * pdf_helper +# +msgid "" +msgstr "" +"Project-Id-Version: Odoo Server 14.0\n" +"Report-Msgid-Bugs-To: \n" +"Last-Translator: \n" +"Language-Team: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: \n" +"Plural-Forms: \n" + +#. module: pdf_helper +#: model:ir.model.fields,field_description:pdf_helper.field_pdf_helper__display_name +msgid "Display Name" +msgstr "" + +#. module: pdf_helper +#: model:ir.model.fields,field_description:pdf_helper.field_pdf_helper__id +msgid "ID" +msgstr "" + +#. module: pdf_helper +#: model:ir.model.fields,field_description:pdf_helper.field_pdf_helper____last_update +msgid "Last Modified on" +msgstr "" + +#. module: pdf_helper +#: model:ir.model,name:pdf_helper.model_pdf_helper +msgid "PDF Helper" +msgstr "" diff --git a/pdf_helper/models/__init__.py b/pdf_helper/models/__init__.py new file mode 100644 index 0000000000..d533863bba --- /dev/null +++ b/pdf_helper/models/__init__.py @@ -0,0 +1 @@ +from . import helper diff --git a/pdf_helper/models/helper.py b/pdf_helper/models/helper.py new file mode 100644 index 0000000000..52b30355b4 --- /dev/null +++ b/pdf_helper/models/helper.py @@ -0,0 +1,33 @@ +# Copyright 2022 Camptocamp SA +# @author: Simone Orsi +# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl). +import logging + +from PyPDF2.utils import PdfReadError + +from odoo import models + +from ..utils import PDFParser + +_logger = logging.getLogger(__name__) + + +class PDFHelper(models.AbstractModel): + _name = "pdf.helper" + _description = "PDF Helper" + + _PDF_PARSER_KLASS = PDFParser + + def pdf_get_xml_files(self, pdf_file): + parser = self._PDF_PARSER_KLASS(pdf_file) + try: + return parser.get_xml_files() + except self._pdf_get_xml_files_swallable_exceptions() as err: + # TODO: can't we catch specific exceptions? + # This try/except block was added to reflect what done + # in base_business_document_import till now. + _logger.error("PDF file parsing failed: %s", str(err)) + return {} + + def _pdf_get_xml_files_swallable_exceptions(self): + return (KeyError, PdfReadError) diff --git a/pdf_helper/readme/CONTRIBUTORS.rst b/pdf_helper/readme/CONTRIBUTORS.rst new file mode 100644 index 0000000000..fe493ea973 --- /dev/null +++ b/pdf_helper/readme/CONTRIBUTORS.rst @@ -0,0 +1,2 @@ +* Simone Orsi +* Alexis de Lattre diff --git a/pdf_helper/readme/DESCRIPTION.rst b/pdf_helper/readme/DESCRIPTION.rst new file mode 100644 index 0000000000..ebfe3fa8d6 --- /dev/null +++ b/pdf_helper/readme/DESCRIPTION.rst @@ -0,0 +1 @@ +Technical module to share PDF utils. diff --git a/pdf_helper/readme/USAGE.rst b/pdf_helper/readme/USAGE.rst new file mode 100644 index 0000000000..4cee3e9dbb --- /dev/null +++ b/pdf_helper/readme/USAGE.rst @@ -0,0 +1,9 @@ +Inside Odoo env:: + + res = env["pdf.helper"].pdf_get_xml_files(pdf_filecontent) + +Outside Odoo env:: + + from odoo.addons.pdf_helper.utils import PDFParser + [...] + res = PDFParser(pdf_filecontent).get_xml_files() diff --git a/pdf_helper/static/description/icon.png b/pdf_helper/static/description/icon.png new file mode 100644 index 0000000000..3a0328b516 Binary files /dev/null and b/pdf_helper/static/description/icon.png differ diff --git a/pdf_helper/static/description/index.html b/pdf_helper/static/description/index.html new file mode 100644 index 0000000000..c8c8d94753 --- /dev/null +++ b/pdf_helper/static/description/index.html @@ -0,0 +1,436 @@ + + + + + + +PDF Helper + + + +
+

PDF Helper

+ + +

Beta License: LGPL-3 OCA/edi Translate me on Weblate Try me on Runbot

+

Technical module to share PDF utils.

+

Table of contents

+ +
+

Usage

+

Inside Odoo env:

+
+res = env["pdf.helper"].pdf_get_xml_files(pdf_filecontent)
+
+

Outside Odoo env:

+
+from odoo.addons.pdf_helper.utils import PDFParser
+[...]
+res = PDFParser(pdf_filecontent).get_xml_files()
+
+
+
+

Bug Tracker

+

Bugs are tracked on GitHub Issues. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us smashing it by providing a detailed and welcomed +feedback.

+

Do not contact contributors directly about support or help with technical issues.

+
+
+

Credits

+
+

Authors

+
    +
  • Camptocamp
  • +
+
+
+

Contributors

+ +
+
+

Maintainers

+

This module is maintained by the OCA.

+Odoo Community Association +

OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use.

+

Current maintainers:

+

simahawk alexis-via

+

This module is part of the OCA/edi project on GitHub.

+

You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute.

+
+
+
+ + diff --git a/pdf_helper/tests/__init__.py b/pdf_helper/tests/__init__.py new file mode 100644 index 0000000000..018dd86602 --- /dev/null +++ b/pdf_helper/tests/__init__.py @@ -0,0 +1 @@ +from . import test_helper diff --git a/pdf_helper/tests/fixtures/pdf_with_xml_test.pdf b/pdf_helper/tests/fixtures/pdf_with_xml_test.pdf new file mode 100644 index 0000000000..72dcaf7c7c Binary files /dev/null and b/pdf_helper/tests/fixtures/pdf_with_xml_test.pdf differ diff --git a/pdf_helper/tests/test_helper.py b/pdf_helper/tests/test_helper.py new file mode 100644 index 0000000000..d0711a0b34 --- /dev/null +++ b/pdf_helper/tests/test_helper.py @@ -0,0 +1,46 @@ +# Copyright 2022 Camptocamp SA +# @author: Simone Orsi +# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl). +import os + +from lxml import etree + +from odoo.tests.common import TransactionCase, TreeCase + +from odoo.addons.pdf_helper.utils import PDFParser + + +def read_test_file(filename, mode="r"): + path = os.path.join(os.path.dirname(__file__), "fixtures", filename) + with open(path, mode) as thefile: + return thefile.read() + + +# NOTE: this class could use a bare `unittest.TestCase` as base +# but w/out TreeCase Odoo won't load these tests. +class TestPDFHelperUtils(TreeCase): + def test_parse_xml(self): + pdf_content = read_test_file("pdf_with_xml_test.pdf", mode="rb") + res = PDFParser(pdf_content).get_xml_files() + fname, xml_root = tuple(res.items())[0] + self.assertEqual(fname, "factur-x.xml") + self.assertTrue(isinstance(xml_root, etree._Element)) + + +class TestPDFHelper(TransactionCase): + def test_parse_xml(self): + pdf_content = read_test_file("pdf_with_xml_test.pdf", mode="rb") + res = self.env["pdf.helper"].pdf_get_xml_files(pdf_content) + fname, xml_root = tuple(res.items())[0] + self.assertEqual(fname, "factur-x.xml") + self.assertTrue(isinstance(xml_root, etree._Element)) + + def test_parse_xml_fail(self): + with self.assertLogs( + "odoo.addons.pdf_helper.models.helper", level="ERROR" + ) as log_catcher: + self.env["pdf.helper"].pdf_get_xml_files(b"") + self.assertIn( + "PDF file parsing failed: Cannot read an empty file", + log_catcher.output[0], + ) diff --git a/pdf_helper/utils.py b/pdf_helper/utils.py new file mode 100644 index 0000000000..eceb939b5d --- /dev/null +++ b/pdf_helper/utils.py @@ -0,0 +1,70 @@ +# Copyright 2015-2021 Akretion France +# @author: Alexis de Lattre +# Copyright 2022 Camptocamp SA +# @author: Simone Orsi +# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl). + +import logging +import mimetypes +from io import BytesIO + +from lxml import etree + +_logger = logging.getLogger(__name__) + +try: + import PyPDF2 +except ImportError: + _logger.debug("Cannot import PyPDF2") + + +class PDFParser: + def __init__(self, pdf_file): + self.pdf_file = pdf_file + + def get_xml_files(self): + """Parse PDF files to extract XML content. + + :param pdf_file: binary PDF file content + :returns: a dict like {$filename: $parsed_xml_file_obj}. + """ + res = {} + with BytesIO(self.pdf_file) as fd: + xmlfiles = self._extract_xml_files(fd) + for filename, xml_obj in xmlfiles.items(): + root = self._extract_xml_root(xml_obj) + if root is None or not len(root): + continue + res[filename] = root + if res: + _logger.debug("Valid XML files found in PDF: %s", list(res.keys())) + return res + + def _extract_xml_files(self, fd): + pdf = PyPDF2.PdfFileReader(fd) + _logger.debug("pdf.trailer=%s", pdf.trailer) + pdf_root = pdf.trailer["/Root"] + _logger.debug("pdf_root=%s", pdf_root) + # TODO add support for /Kids + embeddedfiles = pdf_root["/Names"]["/EmbeddedFiles"]["/Names"] + i = 0 + xmlfiles = {} # key = filename, value = PDF obj + for embeddedfile in embeddedfiles[:-1]: + mime_res = mimetypes.guess_type(embeddedfile) + if mime_res and mime_res[0] in ["application/xml", "text/xml"]: + xmlfiles[embeddedfile] = embeddedfiles[i + 1] + i += 1 + _logger.debug("xmlfiles=%s", xmlfiles) + return xmlfiles + + def _extract_xml_root(self, xml_obj): + xml_root = None + try: + xml_file_dict = xml_obj.getObject() + _logger.debug("xml_file_dict=%s", xml_file_dict) + xml_string = xml_file_dict["/EF"]["/F"].getData() + xml_root = etree.fromstring(xml_string) + except Exception as err: + # TODO: can't we catch specific exceptions? + _logger.debug("_pdf_extract_xml_root failed: %s", str(err)) + return xml_root diff --git a/setup/pdf_helper/odoo/addons/pdf_helper b/setup/pdf_helper/odoo/addons/pdf_helper new file mode 120000 index 0000000000..e24cd2a985 --- /dev/null +++ b/setup/pdf_helper/odoo/addons/pdf_helper @@ -0,0 +1 @@ +../../../../pdf_helper \ No newline at end of file diff --git a/setup/pdf_helper/setup.py b/setup/pdf_helper/setup.py new file mode 100644 index 0000000000..28c57bb640 --- /dev/null +++ b/setup/pdf_helper/setup.py @@ -0,0 +1,6 @@ +import setuptools + +setuptools.setup( + setup_requires=['setuptools-odoo'], + odoo_addon=True, +)