Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PWA Queries for 2024 #3748

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions sql/2024/pwa/assetlink_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#standardSQL
# assetlink usage

SELECT
'PWA sites' AS type,
_TABLE_SUFFIX AS client,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.pages.2024_06_01_*`
JOIN
(
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2024_06_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}' AND
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true'
GROUP BY
_TABLE_SUFFIX
)
USING (_TABLE_SUFFIX)
WHERE
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true' AND
JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}' AND
JSON_EXTRACT_SCALAR(JSON_VALUE(payload, '$._well-known'), "$['/.well-known/assetlinks.json'].found") = 'true'
GROUP BY
client,
total
UNION ALL
SELECT
'All sites' AS type,
_TABLE_SUFFIX AS client,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.pages.2024_06_01_*`
JOIN
(
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2024_06_01_*`
GROUP BY
_TABLE_SUFFIX
)
USING (_TABLE_SUFFIX)
WHERE
JSON_EXTRACT_SCALAR(JSON_VALUE(payload, '$._well-known'), "$['/.well-known/assetlinks.json'].found") = 'true'
GROUP BY
client,
total
ORDER BY
type DESC,
freq / total DESC,
client
39 changes: 39 additions & 0 deletions sql/2024/pwa/fugu.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#standardSQL
CREATE TEMP FUNCTION getFuguAPIs(data STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
const $ = JSON.parse(data);
return Object.keys($);
''';

SELECT
_TABLE_SUFFIX AS client,
fuguAPI,
COUNT(DISTINCT url) AS pages,
total,
COUNT(DISTINCT url) / total AS pct,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT url LIMIT 50), ' ') AS sample_urls
FROM
`httparchive.pages.2024_06_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2024_06_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX),
UNNEST(getFuguAPIs(JSON_QUERY(payload, '$."_fugu-apis"'))) AS fuguAPI
WHERE
JSON_QUERY(payload, '$."_fugu-apis"') != '[]'
GROUP BY
fuguAPI,
client,
total
HAVING
COUNT(DISTINCT url) >= 10
ORDER BY
pct DESC,
client;
69 changes: 69 additions & 0 deletions sql/2024/pwa/install_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#standardSQL
# SW install events

CREATE TEMPORARY FUNCTION getInstallEvents(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var payloadJSON = JSON.parse(payload);

/* YouTube iFrames account for a lot of these, so we exclude them */
/* Cannot use filter as it is a complex object and not a straight array */
function filterYouTube(info) {

var objectKeys = Object.keys(info);
objectKeys = objectKeys.trim().split(',');
for(var i = 0; i < objectKeys.length; i++) {
if(objectKeys[i].toLowerCase().includes('youtube')) {
delete info[objectKeys[i]];
}
}
return info;
}

var windowEventListenersInfo = Object.values(filterYouTube(payloadJSON.windowEventListenersInfo)).flat();
var windowPropertiesInfo = Object.values(filterYouTube(payloadJSON.windowPropertiesInfo)).flat()

return [...new Set([...windowEventListenersInfo ,...windowPropertiesInfo])];
} catch (e) {
return [];
}
''';

SELECT
_TABLE_SUFFIX AS client,
install_event,
COUNT(DISTINCT url) AS freq,
total,
COUNT(DISTINCT url) / total AS pct
FROM
`httparchive.pages.2024_06_01_*`,
UNNEST(getInstallEvents(JSON_EXTRACT(payload, '$._pwa'))) AS install_event
JOIN
(
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2024_06_01_*`
WHERE
-- This condition filters out tests that might have broken when running the 'pwa' metric
-- as even pages without any pwa capabilities will have a _pwa object with empty fields
JSON_EXTRACT(payload, '$._pwa') != '[]'
GROUP BY
_TABLE_SUFFIX
)
USING (_TABLE_SUFFIX)
WHERE
(
JSON_EXTRACT(payload, '$._pwa.windowEventListenersInfo') != '[]' OR
JSON_EXTRACT(payload, '$._pwa.windowPropertiesInfo') != '[]'
) AND
install_event != '' AND
install_event != '[]'
GROUP BY
client,
total,
install_event
ORDER BY
freq / total DESC,
client
73 changes: 73 additions & 0 deletions sql/2024/pwa/lighthouse_pwa_audits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#standardSQL
# Get summary of all lighthouse PWA audits, for both PWA pages and all pages
# Note scores, weightings, groups and descriptions may be off in mixed months when new versions of Lighthouse roles out

CREATE TEMPORARY FUNCTION getAudits(auditRefs STRING, audits STRING)
RETURNS ARRAY<STRUCT<id STRING, weight INT64, audit_group STRING, title STRING, description STRING, score INT64>> LANGUAGE js AS '''
var auditrefs = JSON.parse(auditRefs);
var audits = JSON.parse(audits);
var results = [];
for (auditref of auditrefs) {
results.push({
id: auditref.id,
weight: auditref.weight,
audit_group: auditref.group,
description: audits[auditref.id].description,
score: audits[auditref.id].score
});
}
return results;
''';

SELECT
_TABLE_SUFFIX AS client,
'PWA Sites' AS type,
audits.id AS id,
COUNTIF(audits.score > 0) AS num_pages,
COUNT(0) AS total,
COUNTIF(audits.score IS NOT NULL) AS total_applicable,
SAFE_DIVIDE(COUNTIF(audits.score > 0), COUNTIF(audits.score IS NOT NULL)) AS pct,
APPROX_QUANTILES(audits.weight, 100)[OFFSET(50)] AS median_weight,
MAX(audits.audit_group) AS audit_group,
MAX(audits.description) AS description
FROM
`httparchive.lighthouse.2024_06_01_*`,
UNNEST(getAudits(JSON_EXTRACT(report, '$.categories.pwa.auditRefs'), JSON_EXTRACT(report, '$.audits'))) AS audits
JOIN
(
SELECT
_TABLE_SUFFIX,
url
FROM
`httparchive.pages.2024_06_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true' AND
JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}'
)
USING (_TABLE_SUFFIX, url)
GROUP BY
client,
audits.id
UNION ALL
SELECT
_TABLE_SUFFIX AS client,
'ALL Sites' AS type,
audits.id AS id,
COUNTIF(audits.score > 0) AS num_pages,
COUNT(0) AS total,
COUNTIF(audits.score IS NOT NULL) AS total_applicable,
SAFE_DIVIDE(COUNTIF(audits.score > 0), COUNTIF(audits.score IS NOT NULL)) AS pct,
APPROX_QUANTILES(audits.weight, 100)[OFFSET(50)] AS median_weight,
MAX(audits.audit_group) AS audit_group,
MAX(audits.description) AS description
FROM
`httparchive.lighthouse.2024_06_01_*`,
UNNEST(getAudits(JSON_EXTRACT(report, '$.categories.pwa.auditRefs'), JSON_EXTRACT(report, '$.audits'))) AS audits
GROUP BY
client,
audits.id
ORDER BY
client,
type DESC,
median_weight DESC,
id
55 changes: 55 additions & 0 deletions sql/2024/pwa/lighthouse_pwa_score.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#standardSQL
# Percentiles of lighthouse pwa score
SELECT
client,
'2024_06_01' AS date,
'PWA Sites' AS type,
percentile,
APPROX_QUANTILES(score, 1000)[OFFSET(percentile * 10)] * 100 AS score
FROM (
SELECT
_TABLE_SUFFIX AS client,
url,
CAST(JSON_EXTRACT(report, '$.categories.pwa.score') AS NUMERIC) AS score
FROM
`httparchive.lighthouse.2024_06_01_*`)
JOIN
(
SELECT
_TABLE_SUFFIX AS client,
url
FROM
`httparchive.pages.2024_06_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true' AND
JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}'
)
USING (client, url),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
client,
date,
percentile
UNION ALL
SELECT
client,
'2024_06_01' AS date,
'All Sites' AS type,
percentile,
APPROX_QUANTILES(score, 1000)[OFFSET(percentile * 10)] * 100 AS score
FROM (
SELECT
_TABLE_SUFFIX AS client,
CAST(JSON_EXTRACT(report, '$.categories.pwa.score') AS NUMERIC) AS score
FROM
`httparchive.lighthouse.2024_06_01_*`),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
client,
date,
percentile
ORDER BY
client,
date,
type,
percentile
21 changes: 21 additions & 0 deletions sql/2024/pwa/manifests_and_service_workers.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#standardSQL
# Counting Manifests and Service Workers
SELECT
client,
SAFE_DIVIDE(SUM(ServiceWorker), SUM(COUNT(0)) OVER (PARTITION BY client)) AS ServiceWorkers,
SAFE_DIVIDE(SUM(manifests), SUM(COUNT(0)) OVER (PARTITION BY client)) AS Manifests,
SAFE_DIVIDE(COUNTIF(ServiceWorker > 0 OR manifests > 0), SUM(COUNT(0)) OVER (PARTITION BY client)) AS Either,
SAFE_DIVIDE(COUNTIF(ServiceWorker > 0 AND manifests > 0), SUM(COUNT(0)) OVER (PARTITION BY client)) AS Both,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total
FROM (
SELECT
_TABLE_SUFFIX AS client,
IF(JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true', 1, 0) AS ServiceWorker,
IF(JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}', 1, 0) AS manifests
FROM
`httparchive.pages.2024_06_01_*`
)
GROUP BY
client
ORDER BY
client
28 changes: 28 additions & 0 deletions sql/2024/pwa/manifests_description.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#standardSQL
# % manifests with description for service worker pages and all pages

CREATE TEMP FUNCTION hasDescription(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = Object.values(JSON.parse(manifest))[0];
return $.hasOwnProperty('description') && $.description != '';
} catch (e) {
return null;
}
''';

SELECT
'PWA Pages' AS type,
_TABLE_SUFFIX AS client,
hasDescription(JSON_EXTRACT(payload, '$._pwa.manifests')) AS hasDescription,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct
FROM
`httparchive.pages.2024_06_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}' AND
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true'
GROUP BY
client,
hasDescription
28 changes: 28 additions & 0 deletions sql/2024/pwa/manifests_file_handlers.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#standardSQL
# % manifests with file handlers for service worker pages and all pages

CREATE TEMP FUNCTION hasFileHandlers(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = Object.values(JSON.parse(manifest))[0];
return $.hasOwnProperty('file_handlers') && $.file_handlers.length > 0;
} catch (e) {
return null;
}
''';

SELECT
'PWA Pages' AS type,
_TABLE_SUFFIX AS client,
hasFileHandlers(JSON_EXTRACT(payload, '$._pwa.manifests')) AS hasFileHandlers,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct
FROM
`httparchive.pages.2024_06_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.manifests') != '[]' AND JSON_EXTRACT(payload, '$._pwa.manifests') != '{}' AND
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = 'true'
GROUP BY
client,
hasFileHandlers
Loading