SELECT COUNT(*) AS num, lang FROM (
SELECT page, url, REGEXP_EXTRACT(LOWER(body), r'<html\s(?:[^>]+\s)?lang\s*=\s*["\']?([a-z0-9-]+)') AS lang
FROM [httparchive:har.2016_05_01_chrome_requests_bodies]
WHERE LOWER(body) CONTAINS ' lang='
) WHERE lang != "null"
GROUP BY lang
ORDER BY num DESC
643 unique lang
values found, among 681,664 documents having <html lang>
with a non-empty value, out of a total of 17,551,160 documents (meaning, only 3.88% of documents had <html lang>
at all, which is consistent with the data at https://www.chromestatus.com/metrics/feature/timeline/popularity/588).