Skip to content

Commit

Permalink
Add Capabilities queries (#2322)
Browse files Browse the repository at this point in the history
* Add queries

* Remove incorrect import

* Try to make the linter happy

* Lint

* Combine queries

* Add file

* Update sql/2021/capabilities/fugu.sql

Co-authored-by: Barry Pollard <[email protected]>

* Update sql/2021/capabilities/fugu.sql

Co-authored-by: Barry Pollard <[email protected]>

* Add top query

* Add #standardSQL

Co-authored-by: Barry Pollard <[email protected]>
Co-authored-by: Barry <[email protected]>
  • Loading branch information
3 people authored Aug 30, 2021
1 parent 65f171a commit 77556e7
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
39 changes: 39 additions & 0 deletions sql/2021/capabilities/fugu.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#standardSQL
CREATE TEMP FUNCTION getFuguAPIs(data STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
const $ = JSON.parse(data);
return Object.keys($);
''';

SELECT
_TABLE_SUFFIX AS client,
fuguAPI,
COUNT(DISTINCT url) AS pages,
total,
COUNT(DISTINCT url) / total AS pct,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT url LIMIT 50), ' ') AS sample_urls
FROM
`httparchive.pages.2021_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX),
UNNEST(getFuguAPIs(JSON_QUERY(payload, '$."_fugu-apis"'))) AS fuguAPI
WHERE
JSON_QUERY(payload, '$."_fugu-apis"') != "[]"
GROUP BY
fuguAPI,
client,
total
HAVING
COUNT(DISTINCT url) >= 10
ORDER BY
pct DESC,
client;
27 changes: 27 additions & 0 deletions sql/2021/capabilities/top.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#standardSQL
CREATE TEMP FUNCTION getFuguAPIs(data STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
const $ = JSON.parse(data);
return Object.keys($);
''';

SELECT
_TABLE_SUFFIX AS client,
url,
COUNT(DISTINCT fuguAPI) AS fuguAPIs
FROM
`httparchive.pages.2021_07_01_*`,
UNNEST(getFuguAPIs(JSON_QUERY(payload, '$."_fugu-apis"'))) AS fuguAPI
WHERE
JSON_QUERY(payload, '$."_fugu-apis"') != "[]"
GROUP BY
client,
url
HAVING
COUNT(DISTINCT fuguAPI) >= 1
ORDER BY
fuguAPIs DESC,
url,
client
LIMIT 100;

0 comments on commit 77556e7

Please sign in to comment.