mirror of
https://gh.wpcy.net/https://github.com/WeblateOrg/language-data.git
synced 2026-04-21 09:42:22 +08:00
76 lines
2.4 KiB
Python
Executable file
76 lines
2.4 KiB
Python
Executable file
#! /usr/bin/env python3
|
|
|
|
# Copyright © Michal Čihař <michal@weblate.org>
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
import csv
|
|
import json
|
|
from collections import defaultdict
|
|
from collections.abc import Generator
|
|
from pathlib import Path
|
|
|
|
MAPPING = {
|
|
"zh": "zh_Hans",
|
|
"pa_Arab": "pa",
|
|
}
|
|
REGIONS: list[str] = [
|
|
"150",
|
|
"419",
|
|
]
|
|
REGION_LANGUAGES: set[str] = {"en", "es"}
|
|
|
|
with open(
|
|
"modules/cldr-json/cldr-json/cldr-core/supplemental/territoryContainment.json",
|
|
) as handle:
|
|
CONTAINMENT = json.load(handle)["supplemental"]["territoryContainment"]
|
|
|
|
|
|
def get_region_countries(text: str) -> Generator[str]:
|
|
for code in CONTAINMENT[text]["_contains"]:
|
|
if code.isdigit():
|
|
yield from get_region_countries(code)
|
|
else:
|
|
yield code
|
|
|
|
|
|
REGION_COUNTRIES: dict[str, str] = {}
|
|
for code in REGIONS:
|
|
for country in get_region_countries(code):
|
|
REGION_COUNTRIES[country] = code
|
|
|
|
|
|
def load_fallback_populations() -> dict[str, int]:
|
|
fallback_file = Path("population-fallback.csv")
|
|
if not fallback_file.exists():
|
|
return {}
|
|
with fallback_file.open() as handle:
|
|
reader = csv.DictReader(handle)
|
|
return {row["code"]: int(row["population"]) for row in reader}
|
|
|
|
|
|
with open(
|
|
"modules/cldr-json/cldr-json/cldr-core/supplemental/territoryInfo.json",
|
|
) as handle:
|
|
languages: dict[str, float] = defaultdict(float)
|
|
for code, territory in json.load(handle)["supplemental"]["territoryInfo"].items():
|
|
population = int(territory["_population"])
|
|
if "languagePopulation" not in territory:
|
|
print(f"Skipping {code}: {territory}")
|
|
continue
|
|
for language_cldr, data in territory["languagePopulation"].items():
|
|
language = MAPPING.get(language_cldr, language_cldr)
|
|
factor = float(data["_populationPercent"]) / 100
|
|
languages[language] += population * factor
|
|
languages[f"{language}_{code}"] += population * factor
|
|
if code in REGION_COUNTRIES and language in REGION_LANGUAGES:
|
|
languages[f"{language}_{REGION_COUNTRIES[code]}"] += population * factor
|
|
|
|
for code, population in load_fallback_populations().items():
|
|
if int(languages.get(code, 0)) == 0:
|
|
languages[code] = population
|
|
|
|
with open("population.csv", "w") as handle:
|
|
handle.write("code,population\n")
|
|
for code in sorted(languages):
|
|
handle.write(f"{code},{int(languages[code])}\n")
|