Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to get enhanced package data in API #157

Merged
merged 6 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions packagedb/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def filter_by_checksums(self, request, *args, **kwargs):
data = dict(request.data)
unsupported_fields = []
for field, value in data.items():
if field not in ('md5', 'sha1', 'sha256', 'sha512'):
if field not in ('md5', 'sha1', 'sha256', 'sha512', 'enhance_package_data'):
unsupported_fields.append(field)

if unsupported_fields:
Expand All @@ -513,6 +513,7 @@ def filter_by_checksums(self, request, *args, **kwargs):
}
return Response(response_data)

enhance_package_data = data.pop('enhance_package_data', False)
q = Q()
for field, value in data.items():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to pop enhance_package_data from data before iterating through it, then use enhance_package_data afterwards.

Suggested change
for field, value in data.items():
enhance_package_data = data.pop('enhance_package_data', False)
for field, value in data.items():

# We create this intermediate dictionary so we can modify the field
Expand All @@ -522,8 +523,12 @@ def filter_by_checksums(self, request, *args, **kwargs):

qs = Package.objects.filter(q)
paginated_qs = self.paginate_queryset(qs)
serializer = PackageAPISerializer(paginated_qs, many=True, context={'request': request})
return self.get_paginated_response(serializer.data)
if enhance_package_data:
serialized_package_data = [get_enhanced_package(package=package) for package in paginated_qs]
else:
serializer = PackageAPISerializer(paginated_qs, many=True, context={'request': request})
serialized_package_data = serializer.data
return self.get_paginated_response(serialized_package_data)


UPDATEABLE_FIELDS = [
Expand Down Expand Up @@ -584,8 +589,16 @@ def get_enhanced_package(package):
other packages in the same package_set.
"""
package_content = package.package_content
if package_content == PackageContentType.SOURCE_REPO:
in_package_sets = package.package_sets.count() > 0
if (
not in_package_sets
or not package_content
or package_content == PackageContentType.SOURCE_REPO
):
# Return unenhanced package data for packages that are not in a package
# set or are source repo packages.
# Source repo packages can't really be enhanced much further, datawise
# and we can't enhance a package that is not in a package set.
return package.to_dict()
if package_content in [PackageContentType.BINARY, PackageContentType.SOURCE_ARCHIVE]:
# Binary packages can only be part of one set
Expand All @@ -609,16 +622,9 @@ def _get_enhanced_package(package, packages):
Return a mapping of package data based on `package` and Packages in
`packages`.
"""
mixing = False
package_data = {}
package_data = package.to_dict()
for peer in packages:
if peer == package:
mixing = True
package_data = package.to_dict()
continue
if not mixing:
continue
if peer.package_content == package.package_content:
if peer.package_content >= package.package_content:
# We do not want to mix data with peers of the same package content
continue
enhanced = False
Expand Down
6 changes: 0 additions & 6 deletions packagedb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,11 +1061,5 @@ def add_to_package_set(self, package):
def get_package_set_members(self):
"""Return related Packages"""
return self.packages.order_by(
'type',
'namespace',
'name',
'version',
'qualifiers',
'subpath',
'package_content',
)
93 changes: 89 additions & 4 deletions packagedb/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,14 +265,92 @@ def setUp(self):
self.package3 = Package.objects.create(**self.package_data3)
self.package3.refresh_from_db()

self.package_data4= {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should add two new maven packages, one of them a binary package, the other a source_archive package. These packages should have the same purl fields (type, namespace, name, version) with the qualifiers being different. On the binary package, leave out license information, and on the source archive package, have license information. We want to see the enhanced package data returned for the binary package in the test results when we enable the option.

'type': 'jar',
'namespace': 'sample',
'name': 'Baz',
'version': '90.123',
'qualifiers': '',
'subpath': '',
'download_url': 'http://anothersample.com',
'filename': 'Baz.zip',
'sha1': 'testsha1-4',
'md5': 'testmd5-3',
'size': 100,
'package_content': PackageContentType.BINARY,
}
self.package4 = Package.objects.create(**self.package_data4)
self.package4.refresh_from_db()

self.package_data5= {
'type': 'maven',
'namespace': 'foot',
'name': 'baz',
'version': '90.123',
'qualifiers': 'classifier=source',
'subpath': '',
'download_url': 'http://test-maven.com',
'filename': 'Baz.zip',
'sha1': 'testsha1-5',
'md5': 'testmd5-11',
'size': 100,
'package_content': PackageContentType.SOURCE_ARCHIVE,
'declared_license_expression': 'MIT',
}

self.package5 = Package.objects.create(**self.package_data5)
self.package5.refresh_from_db()

self.package_data6= {
'type': 'maven',
'namespace': 'fooo',
'name': 'baz',
'version': '90.123',
'qualifiers': '',
'subpath': '',
'download_url': 'http://test-maven-11.com',
'filename': 'Baz.zip',
'sha1': 'testsha1-6',
'md5': 'testmd5-11',
'size': 100,
'package_content': PackageContentType.BINARY,
}

self.package6 = Package.objects.create(**self.package_data6)
self.package6.refresh_from_db()

self.package_data7= {
'type': 'github',
'namespace': 'glue',
'name': 'cat',
'version': '90.123',
'qualifiers': '',
'subpath': '',
'download_url': 'http://test-maven-111.com',
'filename': 'Baz.zip',
'sha1': 'testsha1-7',
'md5': 'testmd5-11',
'size': 100,
'copyright': 'BACC',
'package_content': PackageContentType.SOURCE_REPO,
}

self.package7 = Package.objects.create(**self.package_data7)
self.package7.refresh_from_db()

self.packageset_1 = PackageSet.objects.create()
self.packageset_1.packages.add(self.package6)
self.packageset_1.packages.add(self.package5)
self.packageset_1.packages.add(self.package7)

self.test_url = 'http://testserver/api/packages/{}/'

self.client = APIClient()

def test_package_api_list_endpoint(self):
response = self.client.get('/api/packages/')
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(3, response.data.get('count'))
self.assertEqual(7, response.data.get('count'))

def test_package_api_list_endpoint_filter(self):
for key, value in self.package_data.items():
Expand Down Expand Up @@ -448,14 +526,21 @@ def test_package_api_filter_by_checksums(self):
'testsha1',
'testsha1-2',
'testsha1-3',
'testsha1-4',
'testsha1-6',
]
data = {
'sha1': sha1s
'sha1': sha1s,
}
response = self.client.post('/api/packages/filter_by_checksums/', data=data)
self.assertEqual(3, response.data['count'])
self.assertEqual(5, response.data['count'])
expected = self.get_test_loc('api/package-filter_by_checksums-expected.json')
self.check_expected_results(response.data['results'], expected, fields_to_remove=["url", "uuid", "resources"], regen=False)
self.check_expected_results(response.data['results'], expected, fields_to_remove=["url", "uuid", "resources", "package_sets",], regen=False)
data["enhance_package_data"] = True
enhanced_response = self.client.post('/api/packages/filter_by_checksums/', data=data)
self.assertEqual(5, len(enhanced_response.data['results']))
expected = self.get_test_loc('api/package-filter_by_checksums-enhanced-package-data-expected.json')
self.check_expected_results(enhanced_response.data['results'], expected, fields_to_remove=["url", "uuid", "resources", "package_sets",], regen=False)


class PackageApiReindexingTestCase(JsonBasedTesting, TestCase):
Expand Down
Loading