-
-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Media 2021 queries (#2144) * Test query for 'encoding -> format use' * Percent of pages with picture elements, and distribution of number of picture elements per page * Simplify query, based on advice from Kevin Farrugia * Basic script for counting no of images that use lazy loading * Replaced JSON_EXTRACT_SCALAR with JSON_VALUE as the formor is depracated * Adding the query to pull in details about the usage of , , and resposive dimension specification based on new custom metrics * Copying and updating last year's media queries * Adding a file from Performance chapter and tweaking it to report image dimension by industry vertical * Updated the alt query to also count images that have attribute * Removing reference to "decode=lazy" from alt tag SQ; creating a new query for decode usage * removing a condition that is not valid for this query * correcting name of a output query field to indicate % * Add extra totals columns, query July data * Fix for loop; 'lazy' → 'async' * Add average image query * Deal with nulls and INT64 limits * Adding a query to find the usage of image cdns * Adding a SQL to count cross domain image requests * Bits per pixel, by format * A few fixes to BPP/format query * Align on distribution percentiles * Copying Colin's query for top media queries from 2019 SQL code base * Chaged the description of the query * Corrected the looping construct to make it more readable * Two bytes and dimensions queries to rule them all * Img elements with one and zero pixel resources ...excluded from other analysis * Top aspect ratios query * Remove unnecessary queries * Portriat / aspect ratio / square query * Comment out smallImageCount and bigImageCount constraint ...which was making it return no results? * Most common sizes values * Sizes implicit vs explicit, and parse errors * .sql * linting * lint * lint * lint * remaining queries * lint * 1x1 and 0x0 Co-authored-by: Eric Portis <[email protected]> Co-authored-by: Akshay Ranganath <[email protected]>
- Loading branch information
1 parent
58c269d
commit 5de0fe6
Showing
40 changed files
with
1,716 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,4 @@ | |
Analysts: if helpful, you can use this README to give additional info about the queries. | ||
--> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING) | ||
RETURNS ARRAY<STRUCT<imgURL STRING, approximateResourceWidth INT64, approximateResourceHeight INT64, byteSize INT64, bitsPerPixel NUMERIC, isPixel BOOL, isDataURL BOOL, resourceFormat STRING>> | ||
LANGUAGE js AS ''' | ||
function pithyType( { contentType, url } ) { | ||
const subtypeMap = { | ||
'svg+xml': 'svg', | ||
'svgz': 'svg', | ||
'jpeg': 'jpg', | ||
'jfif': 'jpg', | ||
'x-png': 'png', | ||
'vnd.microsoft.icon': 'ico', | ||
'x-icon': 'ico', | ||
'jxr': 'jxr', | ||
'vnd.ms-photo': 'jxr', | ||
'hdp': 'jxr', | ||
'wdp': 'jxr', | ||
'jpf': 'jp2', | ||
'jpx': 'jp2', | ||
'jpm': 'jp2', | ||
'mj2': 'jp2', | ||
'x-jp2-container': 'jp2', | ||
'x-jp2-codestream': 'jp2', | ||
'x-jpeg2000-image': 'jp2', | ||
'heic': 'heif', | ||
'x-ms-bmp': 'bmp', | ||
'x-pict': 'pict', | ||
'tif': 'tiff', | ||
'x-tif': 'tiff', | ||
'x-tiff': 'tiff', | ||
'vnd.mozilla.apng': 'apng', | ||
// identities | ||
'apng': 'apng', | ||
'jpg': 'jpg', | ||
'jp2': 'jp2', | ||
'png': 'png', | ||
'gif': 'gif', | ||
'ico': 'ico', | ||
'webp': 'webp', | ||
'avif': 'avif', | ||
'tiff': 'tiff', | ||
'flif': 'flif', | ||
'heif': 'heif', | ||
'jxl': 'jxl', | ||
'avif-sequence': 'avif-sequence', // keep separate from single frames... | ||
'heic-sequence': 'heic-sequence', | ||
'bmp': 'bmp', | ||
'pict': 'pict' | ||
}; | ||
function normalizeSubtype( subtype ) { | ||
if ( subtypeMap[ subtype ] ) { | ||
return subtypeMap[ subtype ]; | ||
} | ||
return 'unknown'; // switch between: | ||
// `subtype` | ||
// to see everything, check if there's anything else worth capturing | ||
// `'unknown'` | ||
// to make results manageable | ||
} | ||
|
||
// if it's a data url, take the mime type from there, done. | ||
if ( url && | ||
typeof url === "string" ) { | ||
const match = url.toLowerCase().match( /^data:image\\/([\\w\\-\\.\\+]+)/ ); | ||
if ( match && match[ 1 ] ) { | ||
return normalizeSubtype( match[ 1 ] ); | ||
} | ||
} | ||
// if we get a content-type header, use it! | ||
if ( contentType && | ||
typeof contentType === "string" ) { | ||
const match = contentType.toLowerCase().match( /image\\/([\\w\\-\\.\\+]+)/ ); | ||
if ( match && match[ 1 ] ) { | ||
return normalizeSubtype( match[ 1 ] ); | ||
} | ||
} | ||
// otherwise fall back to extension in the URL | ||
if ( url && | ||
typeof url === "string" ) { | ||
const splitOnSlashes = url.split("/"); | ||
if ( splitOnSlashes.length > 1 ) { | ||
const afterLastSlash = splitOnSlashes[ splitOnSlashes.length - 1 ], | ||
splitOnDots = afterLastSlash.split("."); | ||
if ( splitOnDots.length > 1 ) { | ||
return normalizeSubtype( | ||
splitOnDots[ splitOnDots.length - 1 ] | ||
.toLowerCase() | ||
.replace( /^(\\w+)[\\?\\&\\#].*/, '$1' ) // strip query params | ||
); | ||
} | ||
} | ||
} | ||
// otherwise throw up our hands | ||
return 'unknown'; | ||
} | ||
const parsed = JSON.parse( responsiveImagesJsonString ); | ||
if ( parsed && parsed.map ) { | ||
const dataRegEx = new RegExp('^data'); | ||
return parsed.map( d => ({ | ||
imgURL: d.url, | ||
approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ), | ||
approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ), | ||
byteSize: Math.floor( d.byteSize || 0 ), | ||
bitsPerPixel: parseFloat( d.bitsPerPixel || 0 ), | ||
isPixel: d.approximateResourceWidth == 1 && d.approximateResourceHeight == 1, | ||
isDataURL: dataRegEx.test(d.url), | ||
resourceFormat: pithyType({ contentType: d.mimeType, url: d.url }) | ||
}) ); | ||
} | ||
'''; | ||
|
||
WITH imgs AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
url AS pageURL, | ||
imgURL, | ||
approximateResourceWidth, | ||
approximateResourceHeight, | ||
byteSize, | ||
bitsPerPixel, | ||
isPixel, | ||
isDataURL, | ||
( approximateResourceWidth * approximateResourceHeight ) / 1000000 AS megapixels, | ||
( approximateResourceWidth / approximateResourceHeight ) AS aspectRatio, | ||
resourceFormat | ||
FROM | ||
`httparchive.pages.2021_07_01_*`, | ||
UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images' ), '$.responsive-images'))) | ||
), | ||
|
||
percentiles AS ( | ||
SELECT | ||
client, | ||
APPROX_QUANTILES(approximateResourceWidth, 1000) AS resourceWidthPercentiles, | ||
APPROX_QUANTILES(approximateResourceHeight, 1000) AS resourceHeightPercentiles, | ||
APPROX_QUANTILES(aspectRatio, 1000) AS aspectRatioPercentiles, | ||
APPROX_QUANTILES(megapixels, 1000) AS megapixelsPercentiles, | ||
APPROX_QUANTILES(byteSize, 1000) AS byteSizePercentiles, | ||
APPROX_QUANTILES(bitsPerPixel, 1000) AS bitsPerPixelPercentiles, | ||
COUNT(0) AS imgCount | ||
FROM | ||
imgs | ||
WHERE | ||
approximateResourceWidth > 1 AND | ||
approximateResourceHeight > 1 | ||
GROUP BY | ||
client | ||
) | ||
|
||
SELECT | ||
percentile, | ||
client, | ||
imgCount, | ||
resourceWidthPercentiles[OFFSET(percentile * 10)] AS resourceWidth, | ||
resourceHeightPercentiles[OFFSET(percentile * 10)] AS resourceHeight, | ||
aspectRatioPercentiles[OFFSET(percentile * 10)] AS aspectRatio, | ||
megapixelsPercentiles[OFFSET(percentile * 10)] AS megapixels, | ||
byteSizePercentiles[OFFSET(percentile * 10)] AS byteSize, | ||
bitsPerPixelPercentiles[OFFSET(percentile * 10)] AS bitsPerPixel | ||
FROM | ||
percentiles, | ||
UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile | ||
ORDER BY | ||
imgCount DESC, | ||
percentile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING) | ||
RETURNS ARRAY<STRUCT<imgURL STRING, approximateResourceWidth INT64, approximateResourceHeight INT64, byteSize INT64, bitsPerPixel NUMERIC, isPixel BOOL, isDataURL BOOL, resourceFormat STRING>> | ||
LANGUAGE js AS ''' | ||
function pithyType( { contentType, url } ) { | ||
const subtypeMap = { | ||
'svg+xml': 'svg', | ||
'svgz': 'svg', | ||
'jpeg': 'jpg', | ||
'jfif': 'jpg', | ||
'x-png': 'png', | ||
'vnd.microsoft.icon': 'ico', | ||
'x-icon': 'ico', | ||
'jxr': 'jxr', | ||
'vnd.ms-photo': 'jxr', | ||
'hdp': 'jxr', | ||
'wdp': 'jxr', | ||
'jpf': 'jp2', | ||
'jpx': 'jp2', | ||
'jpm': 'jp2', | ||
'mj2': 'jp2', | ||
'x-jp2-container': 'jp2', | ||
'x-jp2-codestream': 'jp2', | ||
'x-jpeg2000-image': 'jp2', | ||
'heic': 'heif', | ||
'x-ms-bmp': 'bmp', | ||
'x-pict': 'pict', | ||
'tif': 'tiff', | ||
'x-tif': 'tiff', | ||
'x-tiff': 'tiff', | ||
'vnd.mozilla.apng': 'apng', | ||
// identities | ||
'apng': 'apng', | ||
'jpg': 'jpg', | ||
'jp2': 'jp2', | ||
'png': 'png', | ||
'gif': 'gif', | ||
'ico': 'ico', | ||
'webp': 'webp', | ||
'avif': 'avif', | ||
'tiff': 'tiff', | ||
'flif': 'flif', | ||
'heif': 'heif', | ||
'jxl': 'jxl', | ||
'avif-sequence': 'avif-sequence', // keep separate from single frames... | ||
'heic-sequence': 'heic-sequence', | ||
'bmp': 'bmp', | ||
'pict': 'pict' | ||
}; | ||
function normalizeSubtype( subtype ) { | ||
if ( subtypeMap[ subtype ] ) { | ||
return subtypeMap[ subtype ]; | ||
} | ||
return 'unknown'; // switch between: | ||
// `subtype` | ||
// to see everything, check if there's anything else worth capturing | ||
// `'unknown'` | ||
// to make results manageable | ||
} | ||
|
||
// if it's a data url, take the mime type from there, done. | ||
if ( url && | ||
typeof url === "string" ) { | ||
const match = url.toLowerCase().match( /^data:image\\/([\\w\\-\\.\\+]+)/ ); | ||
if ( match && match[ 1 ] ) { | ||
return normalizeSubtype( match[ 1 ] ); | ||
} | ||
} | ||
// if we get a content-type header, use it! | ||
if ( contentType && | ||
typeof contentType === "string" ) { | ||
const match = contentType.toLowerCase().match( /image\\/([\\w\\-\\.\\+]+)/ ); | ||
if ( match && match[ 1 ] ) { | ||
return normalizeSubtype( match[ 1 ] ); | ||
} | ||
} | ||
// otherwise fall back to extension in the URL | ||
if ( url && | ||
typeof url === "string" ) { | ||
const splitOnSlashes = url.split("/"); | ||
if ( splitOnSlashes.length > 1 ) { | ||
const afterLastSlash = splitOnSlashes[ splitOnSlashes.length - 1 ], | ||
splitOnDots = afterLastSlash.split("."); | ||
if ( splitOnDots.length > 1 ) { | ||
return normalizeSubtype( | ||
splitOnDots[ splitOnDots.length - 1 ] | ||
.toLowerCase() | ||
.replace( /^(\\w+)[\\?\\&\\#].*/, '$1' ) // strip query params | ||
); | ||
} | ||
} | ||
} | ||
// otherwise throw up our hands | ||
return 'unknown'; | ||
} | ||
const parsed = JSON.parse( responsiveImagesJsonString ); | ||
if ( parsed && parsed.map ) { | ||
const dataRegEx = new RegExp('^data'); | ||
return parsed.map( d => ({ | ||
imgURL: d.url, | ||
approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ), | ||
approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ), | ||
byteSize: Math.floor( d.byteSize || 0 ), | ||
bitsPerPixel: parseFloat( d.bitsPerPixel || 0 ), | ||
isPixel: d.approximateResourceWidth == 1 && d.approximateResourceHeight == 1, | ||
isDataURL: dataRegEx.test(d.url), | ||
resourceFormat: pithyType({ contentType: d.mimeType, url: d.url }) | ||
}) ); | ||
} | ||
'''; | ||
|
||
WITH imgs AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
url AS pageURL, | ||
imgURL, | ||
approximateResourceWidth, | ||
approximateResourceHeight, | ||
byteSize, | ||
bitsPerPixel, | ||
isPixel, | ||
isDataURL, | ||
( approximateResourceWidth * approximateResourceHeight ) / 1000000 AS megapixels, | ||
( approximateResourceWidth / approximateResourceHeight ) AS aspectRatio, | ||
resourceFormat | ||
FROM | ||
`httparchive.pages.2021_07_01_*`, | ||
UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images'))) | ||
), | ||
|
||
percentiles AS ( | ||
SELECT | ||
client, | ||
resourceFormat, | ||
APPROX_QUANTILES(approximateResourceWidth, 1000) AS resourceWidthPercentiles, | ||
APPROX_QUANTILES(approximateResourceHeight, 1000) AS resourceHeightPercentiles, | ||
APPROX_QUANTILES(aspectRatio, 1000) AS aspectRatioPercentiles, | ||
APPROX_QUANTILES(megapixels, 1000) AS megapixelsPercentiles, | ||
APPROX_QUANTILES(byteSize, 1000) AS byteSizePercentiles, | ||
APPROX_QUANTILES(bitsPerPixel, 1000) AS bitsPerPixelPercentiles, | ||
COUNT(0) AS imgCount | ||
FROM | ||
imgs | ||
WHERE | ||
approximateResourceWidth > 1 AND | ||
approximateResourceHeight > 1 | ||
GROUP BY | ||
client, | ||
resourceFormat | ||
) | ||
|
||
SELECT | ||
percentile, | ||
client, | ||
resourceFormat, | ||
imgCount, | ||
resourceWidthPercentiles[OFFSET(percentile * 10)] AS resourceWidth, | ||
resourceHeightPercentiles[OFFSET(percentile * 10)] AS resourceHeight, | ||
aspectRatioPercentiles[OFFSET(percentile * 10)] AS aspectRatio, | ||
megapixelsPercentiles[OFFSET(percentile * 10)] AS megapixels, | ||
byteSizePercentiles[OFFSET(percentile * 10)] AS byteSize, | ||
bitsPerPixelPercentiles[OFFSET(percentile * 10)] AS bitsPerPixel | ||
FROM | ||
percentiles, | ||
UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile | ||
ORDER BY | ||
imgCount DESC, | ||
percentile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
CREATE TEMPORARY FUNCTION getPixelInfo(responsiveImagesJsonString STRING) | ||
RETURNS ARRAY<STRUCT<imgURL STRING, approximateResourceWidth INT64, approximateResourceHeight INT64, byteSize INT64, isPixel BOOL, isDataURL BOOL>> | ||
LANGUAGE js AS ''' | ||
const parsed = JSON.parse(responsiveImagesJsonString); | ||
if (parsed && parsed.map) { | ||
const dataRegEx = new RegExp('^data'); | ||
return parsed.map(d => ({ | ||
isPixel: d.approximateResourceWidth == 0 && d.approximateResourceHeight == 0, | ||
isDataURL: dataRegEx.test(d.url) | ||
})); | ||
} | ||
'''; | ||
|
||
WITH imgs AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
isPixel, | ||
isDataURL | ||
FROM | ||
`httparchive.pages.2021_07_01_*`, | ||
UNNEST(getPixelInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images'))) | ||
), | ||
|
||
counts AS ( | ||
SELECT | ||
client, | ||
COUNT(0) AS total_imgs, | ||
COUNTIF(isPixel) AS zero_pixel_imgs, | ||
COUNTIF(isPixel AND isDataURL) AS zero_pixel_data_urls | ||
FROM | ||
imgs | ||
GROUP BY | ||
client | ||
) | ||
|
||
SELECT | ||
client, | ||
total_imgs, | ||
zero_pixel_imgs, | ||
zero_pixel_data_urls, | ||
SAFE_DIVIDE(zero_pixel_imgs, total_imgs) AS pct_zero_pixel_imgs, | ||
SAFE_DIVIDE(zero_pixel_data_urls, total_imgs) AS pct_zero_pixel_data_urls | ||
FROM | ||
counts |
Oops, something went wrong.