From aee9ff2a079e4b1f238696eebd670597ee9e26b1 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 1 May 2023 15:51:03 -0700 Subject: [PATCH 1/3] Changing use of timestamp keyword in query --- data-workflows/activity/github_activity_model.py | 8 ++++---- data-workflows/activity/snowflake_adapter.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data-workflows/activity/github_activity_model.py b/data-workflows/activity/github_activity_model.py index 4661070d1..c5a8d489a 100644 --- a/data-workflows/activity/github_activity_model.py +++ b/data-workflows/activity/github_activity_model.py @@ -9,7 +9,7 @@ from pynamodb.attributes import UnicodeAttribute, NumberAttribute from utils.utils import get_current_timestamp, date_to_utc_timestamp_in_millis, datetime_to_utc_timestamp_in_millis -from plugin.helpers import _get_cache, _get_repo_to_plugin_dict +from plugin.helpers import _get_repo_to_plugin_dict LOGGER = logging.getLogger() @@ -27,11 +27,11 @@ def __new__(cls, timestamp_formatter, type_identifier_formatter, query_projectio return github_activity_type LATEST = (datetime_to_utc_timestamp_in_millis, 'LATEST:{0}', - 'repo AS name, to_timestamp(max(commit_author_date)) as latest_commit', 'name') + 'repo AS name, to_timestamp(max(commit_author_date)) AS latest_commit', 'name') MONTH = (date_to_utc_timestamp_in_millis, 'MONTH:{1:%Y%m}:{0}', - 'repo AS name, date_trunc("month", to_date(commit_author_date)) as month, count(*) as commit_count', + 'repo AS name, date_trunc("month", to_date(commit_author_date)) AS month, count(*) AS commit_count', 'name, month') - TOTAL = (lambda timestamp: None, 'TOTAL:{0}', 'repo AS name, count(*) as commit_count', 'name') + TOTAL = (lambda timestamp: None, 'TOTAL:{0}', 'repo AS name, count(*) AS commit_count', 'name') def format_to_timestamp(self, timestamp: datetime) -> Union[int, None]: return self.timestamp_formatter(timestamp) diff --git a/data-workflows/activity/snowflake_adapter.py b/data-workflows/activity/snowflake_adapter.py index c6bf7293d..dd8fdd710 100644 --- a/data-workflows/activity/snowflake_adapter.py +++ b/data-workflows/activity/snowflake_adapter.py @@ -38,7 +38,7 @@ def get_plugins_install_count_since_timestamp(plugins_by_earliest_ts: dict[str, query = f""" SELECT LOWER(file_project) AS name, - {install_activity_type.get_query_timestamp_projection()} AS timestamp, + {install_activity_type.get_query_timestamp_projection()} AS ts, COUNT(*) AS count FROM imaging.pypi.labeled_downloads @@ -46,8 +46,8 @@ def get_plugins_install_count_since_timestamp(plugins_by_earliest_ts: dict[str, download_type = 'pip' AND project_type = 'plugin' AND ({_generate_subquery_by_type(plugins_by_earliest_ts, install_activity_type)}) - GROUP BY name, timestamp - ORDER BY name, timestamp + GROUP BY name, ts + ORDER BY name, ts """ LOGGER.info(f'Fetching data for granularity={install_activity_type.name}') return _mapped_query_results(query, 'PYPI', {}, _cursor_to_plugin_activity_mapper) From 3f9f3135f8f1c0fb2c4f8907d6d9682c6edb5381 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 1 May 2023 16:39:14 -0700 Subject: [PATCH 2/3] Fixing unit tests --- data-workflows/activity/tests/test_snowflake_adapter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data-workflows/activity/tests/test_snowflake_adapter.py b/data-workflows/activity/tests/test_snowflake_adapter.py index 0e77ba906..9bfcd9a88 100644 --- a/data-workflows/activity/tests/test_snowflake_adapter.py +++ b/data-workflows/activity/tests/test_snowflake_adapter.py @@ -42,7 +42,7 @@ def get_plugins_install_count_since_timestamp_query(projection, subquery): return f""" SELECT LOWER(file_project) AS name, - {projection} AS timestamp, + {projection} AS ts, COUNT(*) AS count FROM imaging.pypi.labeled_downloads @@ -50,8 +50,8 @@ def get_plugins_install_count_since_timestamp_query(projection, subquery): download_type = 'pip' AND project_type = 'plugin' AND ({subquery}) - GROUP BY name, timestamp - ORDER BY name, timestamp + GROUP BY name, ts + ORDER BY name, ts """ From 872b22c6ba6d8fc21bf6909ce4c7877c88328071 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 1 May 2023 16:46:40 -0700 Subject: [PATCH 3/3] Consistency funsies --- data-workflows/activity/github_activity_model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data-workflows/activity/github_activity_model.py b/data-workflows/activity/github_activity_model.py index c5a8d489a..d23a58664 100644 --- a/data-workflows/activity/github_activity_model.py +++ b/data-workflows/activity/github_activity_model.py @@ -27,11 +27,11 @@ def __new__(cls, timestamp_formatter, type_identifier_formatter, query_projectio return github_activity_type LATEST = (datetime_to_utc_timestamp_in_millis, 'LATEST:{0}', - 'repo AS name, to_timestamp(max(commit_author_date)) AS latest_commit', 'name') + 'repo AS name, TO_TIMESTAMP(MAX(commit_author_date)) AS latest_commit', 'name') MONTH = (date_to_utc_timestamp_in_millis, 'MONTH:{1:%Y%m}:{0}', - 'repo AS name, date_trunc("month", to_date(commit_author_date)) AS month, count(*) AS commit_count', + 'repo AS name, DATE_TRUNC("month", TO_DATE(commit_author_date)) AS month, COUNT(*) AS commit_count', 'name, month') - TOTAL = (lambda timestamp: None, 'TOTAL:{0}', 'repo AS name, count(*) AS commit_count', 'name') + TOTAL = (lambda timestamp: None, 'TOTAL:{0}', 'repo AS name, COUNT(*) AS commit_count', 'name') def format_to_timestamp(self, timestamp: datetime) -> Union[int, None]: return self.timestamp_formatter(timestamp) @@ -43,7 +43,7 @@ def _create_subquery(self, plugins_by_earliest_ts: dict[str, datetime]) -> str: if self is GitHubActivityType.MONTH: return " OR ".join( [ - f"repo = '{name}' AND to_timestamp(commit_author_date) >= " + f"repo = '{name}' AND TO_TIMESTAMP(commit_author_date) >= " f"{TIMESTAMP_FORMAT.format(ts.replace(day=1))}" for name, ts in plugins_by_earliest_ts.items() ]