Skip to content

Commit

Permalink
Clean everything else
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Feb 16, 2025
1 parent 62892ce commit 7b8c9a9
Show file tree
Hide file tree
Showing 8 changed files with 210 additions and 75 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["uv>=0.5.13,<0.6.0"]
requires = ["uv>=0.5.13,<0.7.0"]
# The uv backend entered preview mode in https://github.com/astral-sh/uv/pull/8886/files
# with the 0.5.0 release. See also https://github.com/astral-sh/uv/issues/3957 for tracking.
build-backend = "uv"
Expand Down
60 changes: 45 additions & 15 deletions src/pyobo/identifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import bioontologies.relations
import bioontologies.upgrade
import bioregistry
import click
from curies import ReferenceTuple
from pydantic import ValidationError
from typing_extensions import Doc
Expand Down Expand Up @@ -41,7 +42,7 @@ class BlacklistedError(ValueError):
class ParseError(BaseException):
"""Raised on a missing prefix."""

text: ClassVar[str]
message: ClassVar[str]

def __init__(
self,
Expand All @@ -50,31 +51,38 @@ def __init__(
context: str | None,
ontology_prefix: str | None = None,
node: Reference | None = None,
predicate: Reference | None = None,
line: Line = None,
) -> None:
"""Initialize the error."""
self.curie = curie
self.context = context
self.ontology_prefix = ontology_prefix
self.node = node
self.predicate = predicate
self.line = line

def __str__(self) -> str:
s = ""
if self.node:
s += f"[{self.node.curie}] "
if self.predicate:
s += f"[{self.node.curie} - {self.predicate.curie}] "
else:
s += f"[{self.node.curie}] "
elif self.ontology_prefix:
s += f"[{self.ontology_prefix}] "
s += f"`{self.curie}` {self.text}"
if self.line:
s += f" in: {self.line}"
s += f"{self.message} {click.style(self.curie, fg='cyan')}"
if self.context:
s += f" in {self.context}"
if self.line and self.line != self.curie:
s += f" in {click.style(self.line, fg='yellow')}"
return s


class ParseValidationError(ParseError):
"""Raised on a validation error."""

text = "failed Pydantic validation"
message = "failed Pydantic validation"

def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
"""Initialize the error."""
Expand All @@ -85,38 +93,40 @@ def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
class UnregisteredPrefixError(ParseError):
"""Raised on a missing prefix."""

text = "contains unhandled prefix"
message = "unregistered prefix in"


class UnparsableIRIError(ParseError):
"""Raised on a an unparsable IRI."""

text = "could not be IRI parsed"
message = "couldn't parse IRI"


class EmptyStringError(ParseError):
"""Raised on a an empty string."""

text = "is empty"
message = "is empty"


class NotCURIEError(ParseError):
"""Raised on a text that can't be parsed as a CURIE."""

text = "Not a CURIE"
message = "not a CURIE"


class DefaultCoercionError(ParseError):
"""Raised on a text that can't be coerced into a default reference."""

text = "can't be coerced into a default reference"
message = "can't be coerced into a default reference"


def _is_uri(s: str) -> bool:
return s.startswith("http:") or s.startswith("https:")


def _preclean_uri(s: str) -> str:
s = s.strip().removeprefix(r"url\:").removeprefix(r"uri\:")
s = s.strip().removeprefix(r"URL\:").removeprefix(r"URI\:")
s = s.strip().removeprefix("url:").removeprefix("uri:")
s = s.removeprefix("URL:").removeprefix("URI:")
s = s.removeprefix("WWW:").removeprefix("www:").lstrip()
Expand All @@ -130,6 +140,7 @@ def _parse_str_or_curie_or_uri_helper(
*,
ontology_prefix: str | None = None,
node: Reference | None = None,
predicate: Reference | None = None,
upgrade: bool = True,
line: str | None = None,
name: str | None = None,
Expand All @@ -148,10 +159,11 @@ def _parse_str_or_curie_or_uri_helper(
"""
str_or_curie_or_uri = _preclean_uri(str_or_curie_or_uri)
if not str_or_curie_or_uri:
raise EmptyStringError(
return EmptyStringError(
str_or_curie_or_uri,
ontology_prefix=ontology_prefix,
node=node,
predicate=predicate,
line=line,
context=context,
)
Expand All @@ -177,23 +189,39 @@ def _parse_str_or_curie_or_uri_helper(

if _is_uri(str_or_curie_or_uri):
prefix, identifier = bioregistry.parse_iri(str_or_curie_or_uri)
if prefix and identifier:
return Reference(prefix=prefix, identifier=identifier)
else:
if not prefix or not identifier:
return UnparsableIRIError(
str_or_curie_or_uri,
ontology_prefix=ontology_prefix,
node=node,
predicate=predicate,
line=line,
context=context,
)
try:
rv = Reference.model_validate(
{"prefix": prefix, "identifier": identifier, "name": name}
)
except ValidationError as exc:
return ParseValidationError(
str_or_curie_or_uri,
ontology_prefix=ontology_prefix,
node=node,
predicate=predicate,
line=line,
context=context,
exc=exc,
)
else:
return rv

prefix, delimiter, identifier = str_or_curie_or_uri.partition(":")
if not delimiter:
return NotCURIEError(
str_or_curie_or_uri,
ontology_prefix=ontology_prefix,
node=node,
predicate=predicate,
line=line,
context=context,
)
Expand All @@ -204,6 +232,7 @@ def _parse_str_or_curie_or_uri_helper(
str_or_curie_or_uri,
ontology_prefix=ontology_prefix,
node=node,
predicate=predicate,
line=line,
context=context,
)
Expand All @@ -218,6 +247,7 @@ def _parse_str_or_curie_or_uri_helper(
str_or_curie_or_uri,
ontology_prefix=ontology_prefix,
node=node,
predicate=predicate,
line=line,
exc=exc,
context=context,
Expand Down
Loading

0 comments on commit 7b8c9a9

Please sign in to comment.