SELECT COUNT(*) AS num, lang FROM (
SELECT page, url, REGEXP_EXTRACT(LOWER(body), r'<html\s(?:[^>]+\s)?lang\s*=\s*["\']?([a-z]+)') AS lang
FROM [httparchive:har.2016_05_01_chrome_requests_bodies]
WHERE LOWER(body) CONTAINS ' lang='
) WHERE lang != "null"
GROUP BY lang
ORDER BY num DESC
255 unique primary language subtags found, in 681,651 docs having <html lang>
with non-empty value, out of a total of 17,551,160 documents (meaning, only 3.88% of documents had <html lang>
at all, which is consistent with the data at https://www.chromestatus.com/metrics/feature/timeline/popularity/588).