SELECT REGEXP_EXTRACT(url, r'(ga|piwik|webtrends|s_code|ntpagetag)') type,
COUNT(DISTINCT(pageid)) tagcount
FROM [httparchive:runs.2013_05_15_requests]
WHERE REGEXP_MATCH(url, r'ga|piwik|webtrends|s_code|ntpagetag')
GROUP BY type
ORDER BY tagcount desc;
There are probably more well-known Web Analytics providers but this covers Google, Piwik, WebTrends, Omniture and Unica (I think!) so that’s most of the biggies!
@TheOpsMgr I think the “ga” query is a bit too relaxed. Here’s my run at it:
SELECT type, tagcount, ROUND(ratio*10000)/100 as percent FROM (
SELECT
COUNT(DISTINCT(pageid)) tagcount,
RATIO_TO_REPORT(tagcount) OVER() AS ratio,
CASE
WHEN REGEXP_MATCH(url, "http[s]*://[www.|ssl.]*google-analytics.com/ga.js") THEN "Google Analytics"
WHEN url CONTAINS "piwik" then "Piwik"
WHEN url CONTAINS "webtrends" then "WebTrends"
WHEN url CONTAINS "s_code" then "Omniture"
WHEN url CONTAINS "ntpagetag" THEN "Unica"
END as type
FROM [httparchive:runs.2013_05_15_requests]
GROUP BY type
HAVING type IS NOT NULL
)
What I’m seeing for 2013-05-15 run:
I picked out a more substantial portion of the analytics services I see every day:
SELECT type, tagcount, ROUND(ratio*10000)/100 as percent FROM (
SELECT
COUNT(DISTINCT(pageid)) tagcount,
RATIO_TO_REPORT(tagcount) OVER() AS ratio,
CASE
WHEN url CONTAINS "google-analytics.com" THEN "Google Analytics"
WHEN url CONTAINS "piwik" then "Piwik"
WHEN url CONTAINS "webtrends" then "WebTrends"
WHEN url CONTAINS "s_code" then "Omniture"
WHEN url CONTAINS "ntpagetag" THEN "Unica"
WHEN url CONTAINS "i.kissinsights" then "KISS"
WHEN url CONTAINS "chartbeat.com" then "chartbeat"
WHEN url CONTAINS "insights.gravity.com" then "gravity"
WHEN url CONTAINS "cdn.optimizely.com" then "optimizely"
WHEN url CONTAINS "quantserve.com" then "Quantcast"
WHEN url CONTAINS "owa.tracker" then "OpenWebAnalytics"
WHEN url CONTAINS "static.getclicky.com" then "Clicky"
WHEN url CONTAINS "cdn.mxpnl.com" then "Mixpanel"
WHEN url CONTAINS ".addthis.com" then "AddThis"
WHEN url CONTAINS "scorecardresearch.com" then "ComScore"
WHEN url CONTAINS "sitestat.com" then "ComScore"
END as type
FROM [httparchive:runs.latest_requests]
GROUP BY type
HAVING type IS NOT NULL
)
Though some of the selectors may need some work.
1 Like