diff --git a/sql/2021/media/media_formats.sql b/sql/2021/media/media_formats.sql new file mode 100644 index 00000000000..37954fde3d9 --- /dev/null +++ b/sql/2021/media/media_formats.sql @@ -0,0 +1,31 @@ +CREATE TEMPORARY FUNCTION fixFormat(format STRING, mimeType STRING) +RETURNS STRING +LANGUAGE js AS ''' + +if (mimeType === "image/avif") { + return "avif"; +} else if (mimeType === "image/webp" || format==="webp") { + return "webp"; +} else { + return format; +} + +'''; + +SELECT + trueFormat, + COUNT(*) freq, + COUNT(DISTINCT NET.HOST(url)) as Hosts, + COUNT(DISTINCT pageid) as Pages +FROM ( + SELECT + url, + pageid, + mimeType, + format, + fixFormat(format,mimeType) as trueFormat + FROM `httparchive.sample_data.summary_requests_*` + WHERE type="image" and respSize >0 +) +GROUP BY trueFormat +ORDER BY freq DESC \ No newline at end of file