I want to know who is not using Javascript trackers (such as google-analytics, piwik …). I am not sure if this is the right way to query it:
SELECT
pages.pageid,
url,
cnt,
libs,
pages.rank rank
FROM
[httparchive:runs.2013_06_01_pages] AS pages
JOIN (
SELECT
pageid,
COUNT(DISTINCT(type)) cnt,
GROUP_CONCAT(type) libs
FROM (
SELECT
REGEXP_EXTRACT(url, r'(piwik|google-analytics|getclicky|xiti|googletagmanager|criteo|webtrends|s_code|ntpagetag|tagcommander).*\.js') type,
pageid
FROM
[httparchive:runs.2013_06_01_requests]
WHERE
REGEXP_MATCH(url, r'piwik|google-analytics|getclicky|xiti|googletagmanager|criteo|webtrends|s_code|ntpagetag|tagcommander.*\.js')
GROUP BY
pageid,
type )
GROUP BY
pageid
HAVING cnt = 0) AS lib
ON
lib.pageid = pages.pageid
WHERE
rank IS NOT NULL
AND TLD(url) = ".fr"
ORDER BY
rank