mirror of
https://gh.wpcy.net/https://github.com/WeblateOrg/weblate.git
synced 2026-05-06 15:34:01 +08:00
246 lines
6.8 KiB
Python
Executable file
246 lines
6.8 KiB
Python
Executable file
#!/usr/bin/env python3
|
||
#
|
||
# Copyright © 2012 - 2020 Michal Čihař <michal@cihar.com>
|
||
#
|
||
# This file is part of Weblate <https://weblate.org/>
|
||
#
|
||
# This program is free software: you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License as published by
|
||
# the Free Software Foundation, either version 3 of the License, or
|
||
# (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
#
|
||
"""
|
||
Helper script to generate Python code from language-data repository.
|
||
|
||
See https://github.com/WeblateOrg/language-data
|
||
"""
|
||
|
||
import csv
|
||
import json
|
||
import re
|
||
import subprocess
|
||
from textwrap import TextWrapper
|
||
|
||
SPLIT_RE = re.compile(
|
||
r"(?:\&(?:nbsp|rsaquo|lt|gt|amp|ldquo|rdquo|times|quot);|"
|
||
+ r'[() ,.^`"\'\\/_<>!?;:|{}*^@%#&~=+\r\n✓—‑…\[\]0-9-])+'
|
||
)
|
||
|
||
HEADER = '''#
|
||
# Copyright © 2012 - 2020 Michal Čihař <michal@cihar.com>
|
||
#
|
||
# This file is part of Weblate <https://weblate.org/>
|
||
#
|
||
# This program is free software: you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License as published by
|
||
# the Free Software Foundation, either version 3 of the License, or
|
||
# (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
#
|
||
|
||
"""Language data definitions.
|
||
|
||
This is an automatically generated file, see scripts/generate-language-data
|
||
|
||
Do not edit, please adjust language definitions in following repository:
|
||
https://github.com/WeblateOrg/language-data
|
||
"""
|
||
# pylint: disable=line-too-long,too-many-lines
|
||
|
||
|
||
'''
|
||
|
||
TEMPLATE = """ (
|
||
'{0}',
|
||
# Translators: Language name, ISO code: {0}
|
||
_('{1}'),
|
||
{2},
|
||
'{3}'
|
||
),
|
||
"""
|
||
|
||
WRAPPER = TextWrapper(width=79, initial_indent=" " * 4, subsequent_indent=" " * 4)
|
||
|
||
# Read languages
|
||
with open("scripts/language-data/languages.csv", "r") as csvfile:
|
||
reader = csv.reader(csvfile, delimiter=";")
|
||
LANGUAGES = list(reader)
|
||
|
||
# Read aliases
|
||
with open("scripts/language-data/aliases.csv", "r") as csvfile:
|
||
reader = csv.reader(csvfile, delimiter=";")
|
||
ALIASES = [alias for alias in reader if alias[0] != "#"]
|
||
|
||
# Read extra plurals
|
||
with open("scripts/language-data/extraplurals.csv", "r") as csvfile:
|
||
reader = csv.reader(csvfile, delimiter=";")
|
||
EXTRAPLURALS = list(reader)
|
||
|
||
# Read extra plurals
|
||
with open("scripts/language-data/default_countries.csv", "r") as csvfile:
|
||
reader = csv.reader(csvfile, delimiter=";")
|
||
DEFAULT_COUNTRIES = list(reader)
|
||
|
||
# Write language definitions
|
||
with open("weblate/langdata/languages.py", "w") as output:
|
||
output.write(HEADER)
|
||
output.write("from django.utils.translation import gettext_noop as _\n\n")
|
||
output.write("# Language definitions\n")
|
||
output.write("LANGUAGES = (\n")
|
||
for row in LANGUAGES:
|
||
output.write(
|
||
TEMPLATE.format(row[0], row[1].replace("'", "\\'"), row[2], row[3])
|
||
)
|
||
output.write(")\n")
|
||
with open("weblate/langdata/plurals.py", "w") as output:
|
||
output.write(HEADER)
|
||
output.write("from django.utils.translation import gettext_noop as _\n\n")
|
||
output.write("# Additional plural rules definitions\n")
|
||
output.write("EXTRAPLURALS = (\n")
|
||
for row in EXTRAPLURALS:
|
||
output.write(
|
||
TEMPLATE.format(row[0], row[1].replace("'", "\\'"), row[2], row[3])
|
||
)
|
||
output.write(")\n")
|
||
with open("weblate/langdata/aliases.py", "w") as output:
|
||
output.write(HEADER)
|
||
output.write("# Language aliases\n")
|
||
output.write("ALIASES = {\n")
|
||
for row in ALIASES:
|
||
output.write(" '{}': '{}',\n".format(*row))
|
||
output.write("}\n")
|
||
with open("weblate/langdata/countries.py", "w") as output:
|
||
output.write(HEADER)
|
||
output.write("# List of defaul languages, omitting country code should be okay\n")
|
||
output.write("DEFAULT_LANGS = (\n")
|
||
for row in DEFAULT_COUNTRIES:
|
||
output.write(" '{}',\n".format(*row))
|
||
output.write(")\n")
|
||
|
||
# Generate same check blacklist
|
||
words = set()
|
||
|
||
|
||
def add_word(word):
|
||
words.update(SPLIT_RE.split(word.lower()))
|
||
|
||
|
||
def process_iso(name):
|
||
with open("scripts/iso-codes/data/iso_{}.json".format(name), "r") as handle:
|
||
for item in json.load(handle)[name]:
|
||
add_word(item["name"])
|
||
if "common_name" in item:
|
||
add_word(item["common_name"])
|
||
|
||
|
||
# Our languages data
|
||
for row in LANGUAGES:
|
||
add_word(row[1])
|
||
|
||
# iso-codes
|
||
process_iso("639-2")
|
||
process_iso("639-3")
|
||
process_iso("639-5")
|
||
process_iso("15924")
|
||
process_iso("3166-1")
|
||
process_iso("3166-2")
|
||
process_iso("3166-3")
|
||
process_iso("4217")
|
||
|
||
words.difference_update(
|
||
{
|
||
"administered",
|
||
"administrative",
|
||
"air",
|
||
"and",
|
||
"are",
|
||
"association",
|
||
"autonomous",
|
||
"auxiliary",
|
||
"based",
|
||
"bassin",
|
||
"bath",
|
||
"bay",
|
||
"big",
|
||
"canal",
|
||
"canton",
|
||
"country",
|
||
"county",
|
||
"early",
|
||
"east",
|
||
"eastern",
|
||
"family",
|
||
"language",
|
||
"languages",
|
||
"long",
|
||
"metropolitan",
|
||
"miscellaneous",
|
||
"neutral",
|
||
"new",
|
||
"north",
|
||
"northeast",
|
||
"northeastern",
|
||
"northern",
|
||
"northwest",
|
||
"northwestern",
|
||
"region",
|
||
"see",
|
||
"small",
|
||
"south",
|
||
"southeast",
|
||
"southeastern",
|
||
"southern",
|
||
"southwest",
|
||
"southwestern",
|
||
"state",
|
||
"states",
|
||
"testing",
|
||
"transactions",
|
||
"trust",
|
||
"use",
|
||
"west",
|
||
"western",
|
||
}
|
||
)
|
||
|
||
# Write same check blacklist
|
||
with open("weblate/checks/languages.py", "w") as output:
|
||
output.write(HEADER)
|
||
output.write("# Language names to ignore in same check\n")
|
||
output.write("LANGUAGES = {\n")
|
||
content = ", ".join(
|
||
(
|
||
"'{}'".format(word.replace("'", "\\'"))
|
||
for word in sorted(words)
|
||
if len(word) > 2
|
||
)
|
||
)
|
||
output.write("\n".join(WRAPPER.wrap(content)))
|
||
output.write("\n}\n")
|
||
|
||
# Apply coding style
|
||
subprocess.run(
|
||
[
|
||
"black",
|
||
"weblate/langdata/countries.py",
|
||
"weblate/langdata/aliases.py",
|
||
"weblate/langdata/plurals.py",
|
||
"weblate/langdata/languages.py",
|
||
"weblate/checks/languages.py",
|
||
]
|
||
)
|