language-data/scripts/export-languages-po.py
renovate[bot] 8f6cfc4953
chore(deps): update pre-commit hook astral-sh/ruff-pre-commit to v0.12.0 (#1895)
* chore(deps): update pre-commit hook astral-sh/ruff-pre-commit to v0.12.0

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-06-17 16:18:25 +00:00

81 lines
2 KiB
Python
Executable file

#! /usr/bin/env python3
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: MIT
"""Export CLDR language names into po file."""
import glob
import json
import os
import re
import sys
BASE = "modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json"
HEADER = """# CLDR language names for {0}
# Automatically generated using tools available at
# https://github.com/WeblateOrg/language-data
msgid ""
msgstr ""
"Project-Id-Version: Weblate\\n"
"Language: {0}\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
"X-Generator: Weblate\\n"
"""
ROW = """
msgid "{}"
msgstr "{}"
"""
def extract_names(data):
code, langs = next(iter(data["main"].items()))
return code, langs["localeDisplayNames"]["languages"]
# Parse language names
with open(BASE) as handle:
data = json.load(handle)
names = extract_names(data)[1]
# Process all files or only subset?
match = sys.argv[1] if len(sys.argv) > 1 else "*"
# Remove current files, we overwrite them anyway and this ensures
# that no stale files are there
for old in glob.glob(f"languages-po/{match}.po"):
os.unlink(old)
path = f"modules/cldr-json/cldr-json/cldr-localenames-full/main/{match}/languages.json"
# Process translations
for lang in glob.glob(path):
if lang == BASE:
continue
with open(lang) as handle:
data = json.load(handle)
result = {}
language_code, language_names = extract_names(data)
for code, name in language_names.items():
if code == name or code not in names:
continue
name_tokens = set(re.findall(r"\w+", name))
code_tokens = set(code.split("-alt")[0].split("-"))
if name_tokens & code_tokens:
continue
result[names[code]] = name
with open(f"languages-po/{language_code}.po", "w") as handle:
handle.write(HEADER.format(language_code))
handle.writelines(
ROW.format(msgid, msgstr) for msgid, msgstr in sorted(result.items())
)