From fad8df3c7dbe661ac00632af5e7ced1041b0fbd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E0=A6=B8=E0=A7=8C=E0=A6=AE=E0=A7=8D=E0=A6=AF=E0=A6=A6?= =?UTF-8?q?=E0=A7=80=E0=A6=AA=20=E0=A6=98=E0=A7=8B=E0=A6=B7?= <72045785+soumyaDghosh@users.noreply.github.com> Date: Sat, 6 Jul 2024 00:46:47 +0530 Subject: [PATCH] fix: validate appstream metadata links and prefer links from project metadata (#4888) - Validate that update_contact, donation, vcs-browser, bugtracker, and homepage fields adopted from an appstream metadata file are valid URLs or email addresses. - Contact, donation, source-code, issues, and website fields in a snapcraft.yaml take priority over appstream metadata --- docs/requirements.txt | 1 + requirements-devel.txt | 1 + requirements.txt | 1 + snapcraft/meta/appstream.py | 21 +++++++- snapcraft/parts/update_metadata.py | 34 +++++++----- tests/unit/meta/test_appstream.py | 69 +++++++++++++++++++++--- tests/unit/parts/test_update_metadata.py | 58 ++++++++++++++++---- 7 files changed, 154 insertions(+), 31 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index d591c34a63..ebda86fc35 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -121,6 +121,7 @@ typing_extensions==4.11.0 uc-micro-py==1.0.3 urllib3==1.26.19 uvicorn==0.29.0 +validators==0.28.3 wadllib==1.3.6 watchfiles==0.21.0 wcmatch==8.5.1 diff --git a/requirements-devel.txt b/requirements-devel.txt index d42d2069c1..c20066a4bf 100644 --- a/requirements-devel.txt +++ b/requirements-devel.txt @@ -114,6 +114,7 @@ types-toml==0.10.8.20240310 types-urllib3==1.26.25.14 typing_extensions==4.9.0 urllib3==1.26.19 +validators==0.28.3 venusian==3.1.0 virtualenv==20.25.1 wadllib==1.3.6 diff --git a/requirements.txt b/requirements.txt index af830482b7..c75faaeb21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,6 +68,7 @@ types-Deprecated==1.2.9.20240311 types-PyYAML==6.0.12.20240311 typing_extensions==4.9.0 urllib3==1.26.19 +validators==0.28.3 wadllib==1.3.6 wrapt==1.16.0 ws4py==0.5.1 diff --git a/snapcraft/meta/appstream.py b/snapcraft/meta/appstream.py index 60d500acb6..abd91caf2e 100644 --- a/snapcraft/meta/appstream.py +++ b/snapcraft/meta/appstream.py @@ -23,6 +23,8 @@ from typing import List, Optional, cast import lxml.etree +import validators +from craft_cli import emit from xdg.DesktopEntry import DesktopEntry from snapcraft import errors @@ -102,7 +104,15 @@ def extract(relpath: str, *, workdir: str) -> Optional[ExtractedMetadata]: version = _get_latest_release_from_nodes(dom.findall("releases/release")) project_license = _get_value_from_xml_element(dom, "project_license") update_contact = _get_value_from_xml_element(dom, "update_contact") - contact = [update_contact] if update_contact else None + contact = None + if update_contact: + if validators.url(update_contact) or validators.email(update_contact): + contact = [update_contact] + else: + emit.progress( + f"Ignoring invalid url {update_contact!r} in update_contact from appstream metadata.", + permanent=True, + ) issues = _get_urls_from_xml_element(dom.findall("url"), "bugtracker") donation = _get_urls_from_xml_element(dom.findall("url"), "donation") @@ -184,7 +194,14 @@ def _get_urls_from_xml_element(nodes, url_type) -> Optional[List[str]]: and node.attrib["type"] == url_type and node.text.strip() not in urls ): - urls.append(node.text.strip()) + link = node.text.strip() + if validators.url(link) or validators.email(link): + urls.append(link) + else: + emit.progress( + f"Ignoring invalid url or email {link!r} in {url_type!r} from appstream metadata.", + permanent=True, + ) if urls: return urls return None diff --git a/snapcraft/parts/update_metadata.py b/snapcraft/parts/update_metadata.py index 0c04133e89..d2ae9d9e71 100644 --- a/snapcraft/parts/update_metadata.py +++ b/snapcraft/parts/update_metadata.py @@ -17,7 +17,7 @@ """External metadata helpers.""" from pathlib import Path -from typing import Dict, Final, List, cast +from typing import Dict, Final, List, OrderedDict, cast import pydantic from craft_application.models import ProjectTitle, SummaryStr, UniqueStrList, VersionStr @@ -115,18 +115,26 @@ def _update_project_links( :param project: The Project model to update. :param metadata_list: A list of parsed information from metadata files. """ - for metadata in metadata_list: - fields = ["contact", "donation", "source_code", "issues", "website"] - for field in fields: - metadata_field = getattr(metadata, field) - project_field = getattr(project, field) - if metadata_field and project_field and project_field != metadata_field: - project_list = list(project_field) - project_list.extend(set(metadata_field) - set(project_list)) - setattr(project, field, cast(UniqueStrList, project_list)) - - if not getattr(project, field): - setattr(project, field, cast(UniqueStrList, metadata_field)) + fields = ["contact", "donation", "source_code", "issues", "website"] + for field in fields: + project_field = getattr(project, field) + + # only update the project if the project has not defined the field + if not project_field: + + # values for a field from all metadata files + metadata_values: list[str] = list() + + # iterate through all metadata and create a set of values for the field + for metadata in metadata_list: + if metadata_field := getattr(metadata, field): + metadata_values = list( + OrderedDict.fromkeys(metadata_values + metadata_field) + ) + + # update project with all new values from the metadata + if metadata_values: + setattr(project, field, cast(UniqueStrList, metadata_values)) def _update_project_variables(project: Project, project_vars: Dict[str, str]): diff --git a/tests/unit/meta/test_appstream.py b/tests/unit/meta/test_appstream.py index 6bc64bd42b..a7d6bcfc17 100644 --- a/tests/unit/meta/test_appstream.py +++ b/tests/unit/meta/test_appstream.py @@ -50,22 +50,40 @@ class TestAppstreamData: ("id", {}, "common_id", "test-id", "test-id"), ("name", {}, "title", "test-title", "test-title"), ("project_license", {}, "license", "test-license", "test-license"), - ("update_contact", {}, "contact", "test-contact", ["test-contact"]), - ("url", {"type": "homepage"}, "website", "test-website", ["test-website"]), - ("url", {"type": "bugtracker"}, "issues", "test-issues", ["test-issues"]), + ( + "update_contact", + {}, + "contact", + "test-contact@test.com", + ["test-contact@test.com"], + ), + ( + "url", + {"type": "homepage"}, + "website", + "http://test-website.test", + ["http://test-website.test"], + ), + ( + "url", + {"type": "bugtracker"}, + "issues", + "http://test-issues.test", + ["http://test-issues.test"], + ), ( "url", {"type": "donation"}, "donation", - "test-donation", - ["test-donation"], + "http://test-donation.test", + ["http://test-donation.test"], ), ( "url", {"type": "vcs-browser"}, "source_code", - "test-source", - ["test-source"], + "test@test.com", + ["test@test.com"], ), ], ) @@ -678,6 +696,7 @@ def test_appstream_links(self):