test: add more tests (#177)

* test: adding tests * remove branch restriction on pr tests * fix color * fix aicsimageio test * working on canonicalization and exclude_unset * minimize roundtrip test code * add report * add more tests * add more tests * fix py38 types
tlambert03 · Jul 3, 2023 · 65e4b77 · 65e4b77
1 parent 0fe37d6
commit 65e4b77
Show file tree

Hide file tree

Showing 14 changed files with 458 additions and 171 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,8 +7,6 @@ on:
     tags:
       - "v*" # Push events to matching v*, i.e. v1.0, v20.15.10
   pull_request:
-    branches:
-      - "main"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -50,6 +48,12 @@ jobs:
       - name: Test
         run: pytest --cov --cov-report=xml
 
+      - name: retest withou lxml or xmlschema
+        if: matrix.platform == 'ubuntu-latest'
+        run: |
+          pip uninstall -y lxml xmlschema
+          pytest --cov --cov-report=xml --cov-append
+
       - uses: codecov/codecov-action@v2
 
   deploy:

diff --git a/.github/workflows/test_dependents.yml b/.github/workflows/test_dependents.yml
@@ -5,8 +5,6 @@ on:
     branches:
       - "main"
   pull_request:
-    branches:
-      - "main"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -53,8 +51,9 @@ jobs:
 
       - name: Run Tests
         run: |
-          pytest --color=yes -k "not test_known_errors_without_cleaning" \
+          pytest --color=yes -k "not test_known_errors_without_cleaning and not bad" \
             aicsimageio/tests/readers/test_ome_tiff_reader.py \
+            aicsimageio/tests/writers/test_ome_tiff_writer.py \
             aicsimageio/tests/readers/extra_readers/test_bioformats_reader.py \
             aicsimageio/tests/readers/extra_readers/test_ome_zarr_reader.py
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -48,7 +48,7 @@ docs = [
   "sphinx-rtd-theme==1.1.1",
   "ipython",
 ]
-test = ["pytest", "pytest-cov", "xmlschema", "distributed"]
+test = ["pytest", "pytest-cov", "xmlschema"]
 
 # https://hatch.pypa.io/latest/plugins/build-hook/custom/
 [tool.hatch.build.targets.wheel.hooks.custom]

diff --git a/src/ome_autogen/_transformer.py b/src/ome_autogen/_transformer.py
@@ -11,11 +11,9 @@
     from xsdata.codegen.models import Class
 
 
-UNWANTED_HANDLERS = (
-    # we don't need RenameDuplicateAttributes because we inject
-    # proper enum names in our _generator.py
-    (RenameDuplicateAttributes, None),
-)
+# we don't need RenameDuplicateAttributes because we inject
+# proper enum names in our _generator.py
+UNWANTED_HANDLERS = [(RenameDuplicateAttributes, None)]
 
 
 class OMETransformer(SchemaTransformer):

diff --git a/src/ome_autogen/main.py b/src/ome_autogen/main.py
@@ -28,7 +28,6 @@ def build_model(
     output_dir: Path | str = SRC_PATH,
     schema_file: Path | str = SCHEMA_FILE,
     target_package: str = OUTPUT_PACKAGE,
-    line_length: int = 88,
     ruff_ignore: list[str] = RUFF_IGNORE,
     do_formatting: bool = True,
     do_mypy: bool = DO_MYPY,
@@ -40,33 +39,40 @@ def build_model(
     _print_gray(f"Processing {getattr(schema_file ,'name', schema_file)}...")
     transformer.process_sources([Path(schema_file).resolve().as_uri()])
 
-    package_dir = Path(output_dir) / OUTPUT_PACKAGE.replace(".", "/")
+    package_dir = str(Path(output_dir) / OUTPUT_PACKAGE.replace(".", "/"))
     rmtree(package_dir, ignore_errors=True)
     with _util.cd(output_dir):  # xsdata doesn't support output path
         _print_gray("Writing Files...")
         transformer.process_classes()
 
     if do_formatting:
-        _print_gray("Running black and ruff ...")
+        _fix_formatting(package_dir, ruff_ignore)
 
-        black = ["black", str(package_dir), "-q", f"--line-length={line_length}"]
-        subprocess.check_call(black)  # noqa S
+    if do_mypy:
+        _check_mypy(package_dir)
 
-        ruff = ["ruff", "-q", "--fix", str(package_dir)]
-        ruff.extend(f"--ignore={ignore}" for ignore in ruff_ignore)
-        subprocess.check_call(ruff)  # noqa S
+    _print_green(f"OME python model created at {OUTPUT_PACKAGE}")
 
-    if do_mypy:
-        _print_gray("Running mypy ...")
 
-        mypy = ["mypy", str(package_dir), "--strict"]
+def _fix_formatting(package_dir: str, ruff_ignore: list[str] = RUFF_IGNORE) -> None:
+    _print_gray("Running black and ruff ...")
 
-        try:
-            subprocess.check_output(mypy, stderr=subprocess.STDOUT)  # noqa S
-        except subprocess.CalledProcessError as e:
-            raise RuntimeError(f"mypy errors:\n\n{e.output.decode()}") from e
+    black = ["black", package_dir, "-q", "--line-length=88"]
+    subprocess.check_call(black)  # noqa S
 
-    _print_green(f"OME python model created at {OUTPUT_PACKAGE}")
+    ruff = ["ruff", "-q", "--fix", package_dir]
+    ruff.extend(f"--ignore={ignore}" for ignore in ruff_ignore)
+    subprocess.check_call(ruff)  # noqa S
+
+
+def _check_mypy(package_dir: str) -> None:
+    _print_gray("Running mypy ...")
+
+    mypy = ["mypy", package_dir, "--strict"]
+    try:
+        subprocess.check_output(mypy, stderr=subprocess.STDOUT)  # noqa S
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(f"mypy errors:\n\n{e.output.decode()}") from e
 
 
 def _print_gray(text: str) -> None:

diff --git a/src/ome_types/_conversion.py b/src/ome_types/_conversion.py
@@ -1,23 +1,36 @@
 from __future__ import annotations
 
+import importlib
+import operator
 import os
+import warnings
 from dataclasses import is_dataclass
 from pathlib import Path
 from struct import Struct
 from typing import TYPE_CHECKING, Any, cast
-from xml.etree import ElementTree as ET
+
+from ome_types.validation import validate_xml
+
+try:
+    from lxml import etree as ET
+except ImportError:  # pragma: no cover
+    from xml.etree import ElementTree as ET
 
 from xsdata.formats.dataclass.parsers.config import ParserConfig
-from xsdata.formats.dataclass.serializers.config import SerializerConfig
 
-from xsdata_pydantic_basemodel.bindings import XmlParser, XmlSerializer
+from xsdata_pydantic_basemodel.bindings import (
+    SerializerConfig,
+    XmlParser,
+    XmlSerializer,
+)
 
 if TYPE_CHECKING:
     import io
     from typing import TypedDict
 
     from xsdata.formats.dataclass.parsers.mixins import XmlHandler
 
+    from ome_types._mixins._base_type import OMEType
     from ome_types.model import OME
     from xsdata_pydantic_basemodel.bindings import XmlContext
 
@@ -27,26 +40,42 @@ class ParserKwargs(TypedDict, total=False):
         handler: type[XmlHandler]
 
 
+__all__ = ["from_xml", "to_xml", "to_dict", "from_tiff", "tiff2xml"]
+
 OME_2016_06_URI = "http://www.openmicroscopy.org/Schemas/OME/2016-06"
-OME_2016_06_NS = f"{{{OME_2016_06_URI}}}OME"
+MODULES = {
+    OME_2016_06_URI: "ome_types._autogenerated.ome_2016_06",
+}
 
 
-def _get_ome(xml: str | bytes) -> type[OME]:
+def _get_ome_type(xml: str | bytes) -> type[OMEType]:
+    """Resolve a python model class for the root element of an OME XML document."""
     if isinstance(xml, str) and not xml.startswith("<"):
         root = ET.parse(xml).getroot()  # noqa: S314
     else:
+        if not isinstance(xml, bytes):
+            xml = xml.encode("utf-8")
         root = ET.fromstring(xml)  # noqa: S314
 
-    if root.tag == OME_2016_06_NS:
-        from ome_types.model import OME
+    *ns, localname = root.tag[1:].split("}", 1)
+    ns = next(iter(ns), None)
 
-        return OME
-    raise ValueError(f"Unsupported OME schema tag {root.tag}")
+    if not ns or ns not in MODULES:
+        raise ValueError(f"Unsupported OME schema tag {root.tag!r} in namespace {ns!r}")
+
+    mod = importlib.import_module(MODULES[ns])
+    try:
+        return getattr(mod, localname)
+    except AttributeError as e:
+        raise ValueError(
+            f"Could not find a class for {localname!r} in {mod.__name__}"
+        ) from e
 
 
 def to_dict(source: OME | Path | str | bytes) -> dict[str, Any]:
     if is_dataclass(source):
         raise NotImplementedError("dataclass -> dict is not supported yet")
+
     return from_xml(  # type: ignore[return-value]
         cast("Path | str | bytes", source),
         # the class_factory is what prevents class instantiation,
@@ -55,57 +84,87 @@ def to_dict(source: OME | Path | str | bytes) -> dict[str, Any]:
     )
 
 
-def _class_factory(cls: type, kwargs: Any) -> Any:
-    kwargs.setdefault("validation", "strict")
-    return cls(**kwargs)
-
-
 def from_xml(
     xml: Path | str | bytes,
     *,
-    validate: bool | None = None,  # TODO implement
-    parser: Any = None,  # TODO deprecate
+    validate: bool | None = None,
+    parser: Any = None,
     parser_kwargs: ParserKwargs | None = None,
 ) -> OME:
-    # if validate:
-    # raise NotImplementedError("validate=True is not supported yet")
+    if parser is not None:
+        warnings.warn(
+            "As of version 0.4.0, the parser argument is ignored. "
+            "lxml will be used if available in the environment, but you can "
+            "drop this keyword argument.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+    if validate:
+        validate_xml(xml)
 
     if isinstance(xml, Path):
         xml = str(xml)
 
-    OME_type = _get_ome(xml)
-    parser_kwargs = {"config": ParserConfig(class_factory=_class_factory)}
-    _parser = XmlParser(**(parser_kwargs or {}))
+    # this cast is a lie... but it's by far the most common type that will
+    # come out of this function, and will be more useful to most users.
+    # For those who pass in an xml document that isn't just a root <OME> tag,
+    # they can cast the result to the correct type themselves.
+    OME_type = cast("type[OME]", _get_ome_type(xml))
+
+    parser_ = XmlParser(**(parser_kwargs or {}))
     if isinstance(xml, bytes):
-        return _parser.from_bytes(xml, OME_type)
+        return parser_.from_bytes(xml, OME_type)
     if os.path.isfile(xml):
-        return _parser.parse(xml, OME_type)
-    return _parser.from_string(xml, OME_type)
+        return parser_.parse(xml, OME_type)
+    return parser_.from_string(xml, OME_type)
 
 
 def to_xml(
     ome: OME,
-    ignore_defaults: bool = True,
+    *,
+    # exclude_defaults takes precendence over exclude_unset
+    # if a value equals the default, it will be excluded
+    exclude_defaults: bool = False,
+    # exclude_unset will exclude any value that is not explicitly set
+    # but will INCLUDE values that are set to their default
+    exclude_unset: bool = True,
     indent: int = 2,
+    include_namespace: bool | None = None,
     include_schema_location: bool = True,
+    canonicalize: bool = False,
+    validate: bool = False,
 ) -> str:
     config = SerializerConfig(
-        pretty_print=indent > 0,
+        pretty_print=(indent > 0) and not canonicalize,  # canonicalize does it for us
         pretty_print_indent=" " * indent,
-        ignore_default_attributes=ignore_defaults,
+        xml_declaration=False,
+        ignore_default_attributes=exclude_defaults,
+        ignore_unset_attributes=exclude_unset,
+        attribute_sort_key=operator.attrgetter("name") if canonicalize else None,
     )
     if include_schema_location:
         config.schema_location = f"{OME_2016_06_URI} {OME_2016_06_URI}/ome.xsd"
 
     serializer = XmlSerializer(config=config)
-    xml = serializer.render(ome, ns_map={None: OME_2016_06_URI})
-    # HACK: xsdata is always including <StructuredAnnotations/> because...
-    # 1. we override the default for OME.structured_annotations so that
-    #    it's always a present (if empty) list.  That was the v1 behavior
-    #    and it allows ome.structured_annotations.append(...) to always work.
-    # 2. xsdata thinks it's not nillable, and therefore always includes it
-    # ... we might be able to do it better, but this fixes it for now.
-    return xml.replace("<StructuredAnnotations/>", "")
+    if include_namespace is None:
+        include_namespace = canonicalize
+
+    ns_map = {"ome" if include_namespace else None: OME_2016_06_URI}
+    xml = serializer.render(ome, ns_map=ns_map)
+
+    if canonicalize:
+        xml = _canonicalize(xml, indent=" " * indent)
+    if validate:
+        validate_xml(xml)
+    return xml
+
+
+def _canonicalize(xml: str, indent: str) -> str:
+    from xml.dom import minidom
+
+    xml_out = ET.canonicalize(xml, strip_text=True)
+    return minidom.parseString(xml_out).toprettyxml(indent=indent)  # noqa: S318
 
 
 def from_tiff(
@@ -131,6 +190,7 @@ def _unpack(fh: io.BufferedReader, strct: Struct) -> int:
 
 
 def tiff2xml(path: Path | str) -> bytes:
+    """Extract the OME-XML from a TIFF file."""
     with Path(path).open(mode="rb") as fh:
         head = fh.read(4)
         if head not in TIFF_TYPES:

diff --git a/src/ome_types/_mixins/_ome.py b/src/ome_types/_mixins/_ome.py
@@ -40,10 +40,10 @@ def from_tiff(cls, path: Path | str) -> OME:
 
         return from_tiff(path)
 
-    def to_xml(self) -> str:
+    def to_xml(self, **kwargs: Any) -> str:
         from ome_types._conversion import to_xml
 
-        return to_xml(cast("OME", self))
+        return to_xml(cast("OME", self), **kwargs)
 
 
 def collect_ids(value: Any) -> dict[str, OMEType]:

diff --git a/src/ome_types/model/_color.py b/src/ome_types/model/_color.py
@@ -12,7 +12,7 @@
 
 class Color(color.Color):
     def __init__(self, val: ColorType = -1) -> None:
-        with suppress(ValueError):
+        with suppress(ValueError, TypeError):
             val = self._int2tuple(int(val))  # type: ignore
         super().__init__(val)  # type: ignore [arg-type]