Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Use a custom autoref HTML tag #48

Merged
merged 2 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class AutorefsPlugin(BasePlugin):

scan_toc: bool = True
current_page: str | None = None
legacy_refs: bool = True

def __init__(self) -> None:
"""Initialize the object."""
Expand Down Expand Up @@ -211,7 +212,7 @@ def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa:
log.debug(f"Fixing references in page {page.file.src_path}")

url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
fixed_output, unmapped = fix_refs(output, url_mapper)
fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)

if unmapped and log.isEnabledFor(logging.WARNING):
for ref in unmapped:
Expand Down
97 changes: 91 additions & 6 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import re
import warnings
from html import escape, unescape
from html.parser import HTMLParser
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element
Expand Down Expand Up @@ -44,7 +45,12 @@ def __getattr__(name: str) -> Any:
rf"(?: class=(?P<class>{_ATTR_VALUE}))?(?P<attrs> [^<>]+)?>(?P<title>.*?)</span>",
flags=re.DOTALL,
)
"""A regular expression to match mkdocs-autorefs' special reference markers
"""Deprecated. Use [`AUTOREF_RE`][mkdocs_autorefs.references.AUTOREF_RE] instead."""

AUTOREF_RE = re.compile(r"<autoref (?P<attrs>.*?)>(?P<title>.*?)</autoref>", flags=re.DOTALL)
"""The autoref HTML tag regular expression.

A regular expression to match mkdocs-autorefs' special reference markers
in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page].
"""

Expand Down Expand Up @@ -135,8 +141,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
Returns:
A new element.
"""
el = Element("span")
el.set("data-autorefs-identifier", identifier)
el = Element("autoref")
el.set("identifier", identifier)
el.text = text
return el

Expand Down Expand Up @@ -167,7 +173,7 @@ def relative_url(url_a: str, url_b: str) -> str:
return f"{relative}#{anchor}"


def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
def _legacy_fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).

In our context, we match Markdown references and replace them with HTML links.
Expand Down Expand Up @@ -216,7 +222,84 @@ def inner(match: Match) -> str:
return inner


def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str]]:
class _AutorefsAttrs(dict):
_handled_attrs: ClassVar[set[str]] = {"identifier", "optional", "hover", "class"}

@property
def remaining(self) -> str:
return " ".join(k if v is None else f'{k}="{v}"' for k, v in self.items() if k not in self._handled_attrs)


class _HTMLAttrsParser(HTMLParser):
def __init__(self):
super().__init__()
self.attrs = {}

def parse(self, html: str) -> _AutorefsAttrs:
self.attrs.clear()
self.feed(html)
return _AutorefsAttrs(self.attrs)

def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: # noqa: ARG002
self.attrs.update(attrs)


_html_attrs_parser = _HTMLAttrsParser()


def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).

In our context, we match Markdown references and replace them with HTML links.

When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer
`unmapped` list. It generally means the user is trying to cross-reference an object that was not collected
and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning.

Arguments:
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
unmapped: A list to store unmapped identifiers.

Returns:
The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects)
and returning the replacement strings.
"""

def inner(match: Match) -> str:
title = match["title"]
attrs = _html_attrs_parser.parse(f"<a {match['attrs']}>")
identifier: str = attrs["identifier"]
optional = "optional" in attrs
hover = "hover" in attrs

try:
url = url_mapper(unescape(identifier))
except KeyError:
if optional:
if hover:
return f'<span title="{identifier}">{title}</span>'
return title
unmapped.append(identifier)
if title == identifier:
return f"[{identifier}][]"
return f"[{title}][{identifier}]"

parsed = urlsplit(url)
external = parsed.scheme or parsed.netloc
classes = (attrs.get("class") or "").strip().split()
classes = ["autorefs", "autorefs-external" if external else "autorefs-internal", *classes]
class_attr = " ".join(classes)
if remaining := attrs.remaining:
remaining = f" {remaining}"
if optional and hover:
return f'<a class="{class_attr}" title="{identifier}" href="{escape(url)}"{remaining}>{title}</a>'
return f'<a class="{class_attr}" href="{escape(url)}"{remaining}>{title}</a>'

return inner


def fix_refs(html: str, url_mapper: Callable[[str], str], *, _legacy_refs: bool = True) -> tuple[str, list[str]]:
"""Fix all references in the given HTML text.

Arguments:
Expand All @@ -228,7 +311,9 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
The fixed HTML.
"""
unmapped: list[str] = []
html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html)
html = AUTOREF_RE.sub(fix_ref(url_mapper, unmapped), html)
if _legacy_refs:
html = AUTO_REF_RE.sub(_legacy_fix_ref(url_mapper, unmapped), html)
return html, unmapped


Expand Down
57 changes: 52 additions & 5 deletions tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def test_ignore_reference_with_special_char() -> None:
)


def test_custom_required_reference() -> None:
def test_legacy_custom_required_reference() -> None:
"""Check that external HTML-based references are expanded or reported missing."""
url_map = {"ok": "ok.html#ok"}
source = "<span data-autorefs-identifier=bar>foo</span> <span data-autorefs-identifier=ok>ok</span>"
Expand All @@ -221,7 +221,16 @@ def test_custom_required_reference() -> None:
assert unmapped == ["bar"]


def test_custom_optional_reference() -> None:
def test_custom_required_reference() -> None:
"""Check that external HTML-based references are expanded or reported missing."""
url_map = {"ok": "ok.html#ok"}
source = "<autoref identifier=bar>foo</autoref> <autoref identifier=ok>ok</autoref>"
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == '[foo][bar] <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>'
assert unmapped == ["bar"]


def test_legacy_custom_optional_reference() -> None:
"""Check that optional HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<span data-autorefs-optional="bar">foo</span> <span data-autorefs-optional=ok>ok</span>'
Expand All @@ -230,7 +239,16 @@ def test_custom_optional_reference() -> None:
assert unmapped == []


def test_custom_optional_hover_reference() -> None:
def test_custom_optional_reference() -> None:
"""Check that optional HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<autoref optional identifier="bar">foo</autoref> <autoref identifier=ok optional>ok</autoref>'
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == 'foo <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>'
assert unmapped == []


def test_legacy_custom_optional_hover_reference() -> None:
"""Check that optional-hover HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<span data-autorefs-optional-hover="bar">foo</span> <span data-autorefs-optional-hover=ok>ok</span>'
Expand All @@ -242,7 +260,19 @@ def test_custom_optional_hover_reference() -> None:
assert unmapped == []


def test_external_references() -> None:
def test_custom_optional_hover_reference() -> None:
"""Check that optional-hover HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<autoref optional hover identifier="bar">foo</autoref> <autoref optional identifier=ok hover>ok</autoref>'
output, unmapped = fix_refs(source, url_map.__getitem__)
assert (
output
== '<span title="bar">foo</span> <a class="autorefs autorefs-internal" title="ok" href="ok.html#ok">ok</a>'
)
assert unmapped == []


def test_legacy_external_references() -> None:
"""Check that external references are marked as such."""
url_map = {"example": "https://example.com"}
source = '<span data-autorefs-optional="example">example</span>'
Expand All @@ -251,6 +281,15 @@ def test_external_references() -> None:
assert unmapped == []


def test_external_references() -> None:
"""Check that external references are marked as such."""
url_map = {"example": "https://example.com"}
source = '<autoref optional identifier="example">example</autoref>'
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external" href="https://example.com">example</a>'
assert unmapped == []


def test_register_markdown_anchors() -> None:
"""Check that Markdown anchors are registered when enabled."""
plugin = AutorefsPlugin()
Expand Down Expand Up @@ -333,9 +372,17 @@ def test_register_markdown_anchors_with_admonition() -> None:
}


def test_keep_data_attributes() -> None:
def test_legacy_keep_data_attributes() -> None:
"""Keep HTML data attributes from autorefs spans."""
url_map = {"example": "https://e.com"}
source = '<span data-autorefs-optional="example" class="hi ho" data-foo data-bar="0">e</span>'
output, _ = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'


def test_keep_data_attributes() -> None:
"""Keep HTML data attributes from autorefs spans."""
url_map = {"example": "https://e.com"}
source = '<autoref optional identifier="example" class="hi ho" data-foo data-bar="0">e</autoref>'
output, _ = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'
Loading