Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fdroid packages #10

Merged
merged 1 commit into from
Dec 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions minecode/src/discovery/mappers/fdroid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import logging

from packagedcode.models import PackageData

from discovery import map_router
from discovery.mappers import Mapper
from packageurl import PackageURL
from packagedcode.models import party_person
from packagedcode.models import Party

TRACE = False

logger = logging.getLogger(__name__)

if TRACE:
import sys
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)


@map_router.route('pkg:fdroid/.+')
class FdroidPackageMapper(Mapper):

def get_packages(self, uri, resource_uri):
"""
Yield Package(s) built from the index data for all versions of an F-Droid
package, aka. in F-Droid as an "application"
"""
if resource_uri.data:
visited_data = json.loads(resource_uri.data)
yield from build_packages(purl=resource_uri.package_url, data=visited_data)


def build_packages(purl, data):
"""
Yield PackageData built from ``data`` corresponding to a single package name
and many package versions for a given ``purl`` string
"""
metadata = data["metadata"]

# we map categories to keyword
# "categories": ["Time"],
keywords = metadata.get('categories', [])

# "issueTracker": "https://github.com/jdmonin/anstop/issues",
bug_tracking_url = metadata.get('issueTracker')

# "license": "GPL-2.0-only",
# this is supposed to be an SPDX expression
declared_license = metadata.get('license')

# "sourceCode": "https://github.com/jdmonin/anstop",
vcs_url = metadata.get('sourceCode')

# "webSite": "https://sourceforge.net/projects/androidspeedo",
homepage_url = metadata.get('webSite')

description = build_description(metadata, language="en-US")

parties = []
# "authorEmail": "[email protected]",
# "authorName": "Jigsaw",
# "authorWebSite": "https://jigsaw.google.com/",
author_name = metadata.get('authorName')
author_email = metadata.get('authorEmail')
author_url = metadata.get('authorWebSite')
if any([author_name, author_email, author_url]):
parties.append(Party(
type=party_person,
name=author_name,
role="author",
email=author_email,
url=author_url)
)

# TODO: add these
# release_date
# code_view_url
# copyright
#
# and changelog, sourceCode, donate, translation, antiFeatures

base_purl = PackageURL.from_string(purl)
shared_data = dict(
datasource_id='fdroid-index',
type=base_purl.type,
name=base_purl.name,
keywords=keywords,
bug_tracking_url=bug_tracking_url,
declared_license=declared_license,
vcs_url=vcs_url,
homepage_url=homepage_url,
repository_homepage_url = f'https://f-droid.org/en/packages/{base_purl.name}',
description=description,
parties=parties,
)

# "versions": {
# "78ec7805f5a49b156fbd5f6af174c1cd8ae9900c9c7af2b2df021aca8cd5eae9": {
# "added": 1344556800000,
# "file": {
# "name": "/An.stop_10.apk", ....
versions = data['versions']

for _sha256_of_apk, version_data in versions.items():
# TODO: collect versionName
version_code = str(version_data['manifest']['versionCode'])
logger.debug(f'build_packages: base_purl: {base_purl} version: {version_code}')
logger.debug(f'build_packages: data: {version_data}')

# TODO: add release_date from "added": 1655164800000,

# these must exists since F-Droid builds from sources
src = version_data['src']
src_filename = src['name']
src_sha256 = src['sha256']
src_size = src['size']
download_url = f'https://f-droid.org/repo/{src_filename.strip("/")}'

src = PackageData(
version=version_code,
download_url=download_url,
repository_download_url=download_url,
sha256=src_sha256,
size=src_size,
**shared_data,
)
yield src

source_package = PackageURL(
type=src.type,
name=src.name,
version=src.version,
qualifiers=dict(download_url=download_url)
)

# these must exists or there is no F-Droid package
file = version_data['file']
filename = file['name']
sha256 = file['sha256']
size = file['size']
download_url = f'https://f-droid.org/repo/{filename}'

yield PackageData(
version=version_code,
download_url=download_url,
repository_download_url=download_url,
sha256=sha256,
size=size,
source_packages=[source_package.to_string()],
**shared_data,
)


def build_description(metadata, language='en-US'):
r"""
Return a description in ``language`` built from
a package name, summary and description, one per line.
Skip redundant or empty parts.

For example::

>>> metadata = {
... "name": {"en-US": "Anstop"},
... "summary": {"en-US": "A simple stopwatch"},
... "description": {"en-US": "A really simple stopwatch"}
... }
>>> build_description(metadata)
'Anstop\nA simple stopwatch\nA really simple stopwatch'

>>> metadata = {
... "name": {"en-US": "Anstop"},
... "summary": {"en-US": "Anstop A simple stopwatch"},
... "description": {"en-US": "Anstop A simple stopwatch, nice and sweet."}
... }
>>> build_description(metadata)
'Anstop A simple stopwatch, nice and sweet.'

>>> metadata = {
... "name": {"en-US": "Anstop"},
... "summary": {"dutch": "Anstop A simple stopwatch"},
... "description": {"dutch": "Anstop A simple stopwatch, nice and sweet."}
... }
>>> build_description(metadata)
'Anstop'
"""
names = metadata.get('name') or {}
name = names.get(language)

summaries = metadata.get('summary') or {}
summary = summaries.get(language)

if name and summary and summary.startswith(name):
name = None

descriptions = metadata.get('description') or {}
description = descriptions.get(language)

if summary and description and description.startswith(summary):
summary = None

non_empty_parts = [p for p in [name, summary, description] if p]
return '\n'.join(non_empty_parts)

93 changes: 93 additions & 0 deletions minecode/src/discovery/visitors/fdroid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import codecs
import json
import xmlrpc

from packageurl import PackageURL

from discovery import seed
from discovery import visit_router
from discovery.utils import get_temp_file
from discovery.visitors import HttpJsonVisitor
from discovery.visitors import URI
from discovery.visitors import Visitor
from discovery.visitors import NonPersistentHttpVisitor

"""
Visitors for F-Droid package repositories.

NOTE: the license of F-Droid package data needs to be clarified.
See https://gitlab.com/fdroid/fdroiddata/-/issues/2826 for details

F-Droid packages come with a main JSON index and possible increment/diffs.
- https://f-droid.org/repo/index-v2.json

- this is a legacy XMl index https://f-droid.org/repo/index.xml

- This top level file lists index and diffs https://f-droid.org/repo/entry.json

- This is a diff example: https://f-droid.org/repo/diff/1666980277000.json

- Each apk is available from a URL using this form:

https://f-droid.org/repo/app.seeneva.reader_3.apk
https://f-droid.org/repo/{application_id}_{version_code}.apk

The {application_id}_{version_code}.apk "file name" for each tarball and
apk file name is listed in the index.
"""


class FdroidSeed(seed.Seeder):

def get_seeds(self):
yield 'https://f-droid.org/repo/index-v2.json'


def build_purl(package_id, version_code, filename):
"""
Return a PackageURL for an F-Droid package.
"""
return PackageURL(
type='fdroid',
name=package_id,
version=version_code,
qualifiers=dict(filename=filename)
)


@visit_router.route('https://f-droid.org/repo/index-v2.json')
class FdroidIndexVisitor(NonPersistentHttpVisitor):
"""
Collect package metadata URIs from the F-Droid index for each package.
We treat each apk and corresponding source tarball as a different package.
"""

def get_uris(self, content):
"""
Yield a URI for each F-Droid package.
"""
json_location = content
with open(json_location) as c:
content = json.loads(c.read())

packages = content['packages']

for package_id, package_data in packages.items():
purl = PackageURL(type='fdroid', name=package_id).to_string()
yield URI(
uri=purl,
package_url=purl,
source_uri=self.uri,
data=json.dumps(package_data, separators=(',', ':'), ensure_ascii=False),
# note: visited is True since there nothing more to visit
visited=True
)
55 changes: 55 additions & 0 deletions minecode/tests/discovery/test_fdroid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os

from mock import patch

from discovery.utils_test import mocked_requests_get
from discovery.utils_test import JsonBasedTesting

from discovery.mappers import fdroid as fdroid_mapper
from discovery.visitors import fdroid as fdroid_visitor
from discovery.visitors import URI


class TestFdroidVisitor(JsonBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles')

def test_FdroidPackageRepoVisitor(self):
uri = 'https://f-droid.org/repo/index-v2.json'
test_loc = self.get_test_loc('fdroid/index-v2.json')
with patch('requests.get') as mock_http_get:
mock_http_get.return_value = mocked_requests_get(uri, test_loc)
uris, data, _errors = fdroid_visitor.FdroidIndexVisitor(uri)

# this is a non-persistent visitor, lets make sure we dont return any data
assert not data
expected_loc = self.get_test_loc('fdroid/index-v2-expected-visit.json',)
self.check_expected_uris(uris, expected_loc, regen=False)


class TestFdroidMapper(JsonBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles')

def test_build_packages(self):
with open(self.get_test_loc('fdroid/index-v2-visited.json')) as fdroid_data:
visited_uris = json.load(fdroid_data)
visited_uris = [URI(**uri) for uri in visited_uris]
purl_data = [(u.package_url, json.loads(u.data)) for u in visited_uris]
packages = []

for purl, data in purl_data:
pkgs = list(fdroid_mapper.build_packages(purl, data))
packages.extend(pkgs)

packages = [p.to_dict() for p in packages]
expected_loc = self.get_test_loc('fdroid/index-v2-visited-expected-mapped.json')
self.check_expected_results(packages, expected_loc, regen=False)
Loading