Skip to content

Commit

Permalink
Fixing use of timestamp keyword in snowflake query (#1014)
Browse files Browse the repository at this point in the history
* Changing the use of the timestamp keyword in snowflake query

* Updating unit tests

* Consistency funsies
  • Loading branch information
manasaV3 authored May 1, 2023
1 parent 7c27f26 commit 5585a29
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
10 changes: 5 additions & 5 deletions data-workflows/activity/github_activity_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pynamodb.attributes import UnicodeAttribute, NumberAttribute

from utils.utils import get_current_timestamp, date_to_utc_timestamp_in_millis, datetime_to_utc_timestamp_in_millis
from plugin.helpers import _get_cache, _get_repo_to_plugin_dict
from plugin.helpers import _get_repo_to_plugin_dict


LOGGER = logging.getLogger()
Expand All @@ -27,11 +27,11 @@ def __new__(cls, timestamp_formatter, type_identifier_formatter, query_projectio
return github_activity_type

LATEST = (datetime_to_utc_timestamp_in_millis, 'LATEST:{0}',
'repo AS name, to_timestamp(max(commit_author_date)) as latest_commit', 'name')
'repo AS name, TO_TIMESTAMP(MAX(commit_author_date)) AS latest_commit', 'name')
MONTH = (date_to_utc_timestamp_in_millis, 'MONTH:{1:%Y%m}:{0}',
'repo AS name, date_trunc("month", to_date(commit_author_date)) as month, count(*) as commit_count',
'repo AS name, DATE_TRUNC("month", TO_DATE(commit_author_date)) AS month, COUNT(*) AS commit_count',
'name, month')
TOTAL = (lambda timestamp: None, 'TOTAL:{0}', 'repo AS name, count(*) as commit_count', 'name')
TOTAL = (lambda timestamp: None, 'TOTAL:{0}', 'repo AS name, COUNT(*) AS commit_count', 'name')

def format_to_timestamp(self, timestamp: datetime) -> Union[int, None]:
return self.timestamp_formatter(timestamp)
Expand All @@ -43,7 +43,7 @@ def _create_subquery(self, plugins_by_earliest_ts: dict[str, datetime]) -> str:
if self is GitHubActivityType.MONTH:
return " OR ".join(
[
f"repo = '{name}' AND to_timestamp(commit_author_date) >= "
f"repo = '{name}' AND TO_TIMESTAMP(commit_author_date) >= "
f"{TIMESTAMP_FORMAT.format(ts.replace(day=1))}"
for name, ts in plugins_by_earliest_ts.items()
]
Expand Down
6 changes: 3 additions & 3 deletions data-workflows/activity/snowflake_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ def get_plugins_install_count_since_timestamp(plugins_by_earliest_ts: dict[str,
query = f"""
SELECT
LOWER(file_project) AS name,
{install_activity_type.get_query_timestamp_projection()} AS timestamp,
{install_activity_type.get_query_timestamp_projection()} AS ts,
COUNT(*) AS count
FROM
imaging.pypi.labeled_downloads
WHERE
download_type = 'pip'
AND project_type = 'plugin'
AND ({_generate_subquery_by_type(plugins_by_earliest_ts, install_activity_type)})
GROUP BY name, timestamp
ORDER BY name, timestamp
GROUP BY name, ts
ORDER BY name, ts
"""
LOGGER.info(f'Fetching data for granularity={install_activity_type.name}')
return _mapped_query_results(query, 'PYPI', {}, _cursor_to_plugin_activity_mapper)
Expand Down
6 changes: 3 additions & 3 deletions data-workflows/activity/tests/test_snowflake_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ def get_plugins_install_count_since_timestamp_query(projection, subquery):
return f"""
SELECT
LOWER(file_project) AS name,
{projection} AS timestamp,
{projection} AS ts,
COUNT(*) AS count
FROM
imaging.pypi.labeled_downloads
WHERE
download_type = 'pip'
AND project_type = 'plugin'
AND ({subquery})
GROUP BY name, timestamp
ORDER BY name, timestamp
GROUP BY name, ts
ORDER BY name, ts
"""


Expand Down

0 comments on commit 5585a29

Please sign in to comment.