mirror of
https://gh.llkk.cc/https://github.com/WeblateOrg/language-data.git
synced 2025-10-03 15:01:09 +08:00
feat: automate text direction extracting from CLDR
This fills is the data for all CLDR languages making issues like #1477 less likely to happen.
This commit is contained in:
parent
826f086770
commit
af29b70621
5 changed files with 146 additions and 4 deletions
5
Makefile
5
Makefile
|
@ -4,7 +4,7 @@
|
|||
|
||||
all: weblate_language_data/languages.py weblate_language_data/plural_tags.py PLURALS_DIFF.md $(wildcard weblate_language_data/locale/*/LC_MESSAGES/django.po) $(filter-out $(patsubst modules/cldr-json/cldr-json/cldr-localenames-full/main/%/languages.json,languages-po/%.po,$(wildcard modules/cldr-json/cldr-json/cldr-localenames-full/main/*/languages.json)),languages-po/en.po)
|
||||
|
||||
weblate_language_data/languages.py: languages.csv aliases.csv cldr.csv extraplurals.csv default_countries.csv population.csv qt.csv $(wildcard modules/iso-codes/data/iso_*.json) scripts/generate-language-data
|
||||
weblate_language_data/languages.py: languages.csv aliases.csv cldr.csv extraplurals.csv default_countries.csv population.csv qt.csv rtl.csv $(wildcard modules/iso-codes/data/iso_*.json) scripts/generate-language-data
|
||||
./scripts/generate-language-data
|
||||
|
||||
PLURALS_DIFF.md: languages.csv cldr.csv gettext.csv l10n-guide.csv translate.csv scripts/list-diff
|
||||
|
@ -14,6 +14,9 @@ PLURALS_DIFF.md: languages.csv cldr.csv gettext.csv l10n-guide.csv translate.csv
|
|||
cldr.csv: modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json scripts/export-cldr
|
||||
./scripts/export-cldr
|
||||
|
||||
rtl.csv: modules/cldr-json/cldr-json/cldr-misc-full/main/*/layout.json scripts/export-cldr-orientation languages.csv
|
||||
./scripts/export-cldr-orientation
|
||||
|
||||
qt.csv: modules/qttools/src/linguist/shared/numerus.cpp scripts/export-qt languages.csv
|
||||
./scripts/export-qt
|
||||
|
||||
|
|
53
rtl.csv
Normal file
53
rtl.csv
Normal file
|
@ -0,0 +1,53 @@
|
|||
code
|
||||
ae
|
||||
aii
|
||||
ajp
|
||||
apc
|
||||
ar
|
||||
ar_BH
|
||||
ar_DZ
|
||||
ar_EG
|
||||
ar_KW
|
||||
ar_LY
|
||||
ar_MA
|
||||
ar_SA
|
||||
ar_YE
|
||||
ara
|
||||
arc
|
||||
ave
|
||||
bal
|
||||
bgn
|
||||
bqi
|
||||
ckb
|
||||
ckb_IR
|
||||
dv
|
||||
egy
|
||||
fa
|
||||
fa_AF
|
||||
fas
|
||||
ha
|
||||
he
|
||||
heb
|
||||
khw
|
||||
ks
|
||||
lrc
|
||||
luz
|
||||
ms_Arab
|
||||
mzn
|
||||
nqo
|
||||
pal
|
||||
per
|
||||
phn
|
||||
ps
|
||||
rhg
|
||||
sam
|
||||
sd
|
||||
sdh
|
||||
skr
|
||||
syc
|
||||
syr
|
||||
ug
|
||||
ur
|
||||
ur_IN
|
||||
urd
|
||||
yi
|
|
45
scripts/export-cldr-orientation
Executable file
45
scripts/export-cldr-orientation
Executable file
|
@ -0,0 +1,45 @@
|
|||
#! /usr/bin/env python3
|
||||
|
||||
# Copyright © Michal Čihař <michal@weblate.org>
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
import csv
|
||||
|
||||
# Read languages
|
||||
with open("languages.csv") as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=",")
|
||||
next(reader)
|
||||
LANGUAGES = list(reader)
|
||||
LANGUAGE_CODES = {lang[0] for lang in LANGUAGES}
|
||||
|
||||
# Read RTL
|
||||
with open("rtl.csv") as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=",")
|
||||
next(reader)
|
||||
RTLS = list(reader)
|
||||
RTL_CODES = {lang[0] for lang in RTLS}
|
||||
|
||||
LAYOUTDIR = Path("modules/cldr-json/cldr-json/cldr-misc-full/main/")
|
||||
|
||||
for layout_file in LAYOUTDIR.glob("*/layout.json"):
|
||||
json_text = layout_file.read_text()
|
||||
data = json.loads(json_text)
|
||||
for key, value in data["main"].items():
|
||||
code = key.replace("-", "_")
|
||||
if code not in LANGUAGE_CODES:
|
||||
continue
|
||||
character_order = value["layout"]["orientation"]["characterOrder"]
|
||||
if character_order == "right-to-left":
|
||||
RTL_CODES.add(code)
|
||||
elif character_order != "left-to-right":
|
||||
print(f"Uknown order for {code}: {character_order})")
|
||||
|
||||
print(RTL_CODES)
|
||||
|
||||
with open("rtl.csv", "w") as handle:
|
||||
handle.write("code\n")
|
||||
for code in sorted(RTL_CODES):
|
||||
handle.write(f"{code}\n")
|
|
@ -132,12 +132,19 @@ with open("cldr.csv") as csvfile:
|
|||
if existing != number:
|
||||
CLDRPLURALS.append((code, LANGUAGE_NAMES[code], number, equation))
|
||||
|
||||
# Read extra plurals
|
||||
# Read default countries
|
||||
with open("default_countries.csv") as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=",")
|
||||
next(reader)
|
||||
DEFAULT_COUNTRIES = list(reader)
|
||||
|
||||
# Read RTL
|
||||
with open("rtl.csv") as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=",")
|
||||
next(reader)
|
||||
RTLS = list(reader)
|
||||
RTL_CODES = {lang[0] for lang in RTLS}
|
||||
|
||||
# Write language definitions
|
||||
with open("weblate_language_data/languages.py", "w") as output:
|
||||
output.write(HEADER)
|
||||
|
@ -195,6 +202,13 @@ with open("weblate_language_data/countries.py", "w") as output:
|
|||
for row in DEFAULT_COUNTRIES:
|
||||
output.write(" '{}',\n".format(*row))
|
||||
output.write(")\n")
|
||||
with open("weblate_language_data/rtl.py", "w") as output:
|
||||
output.write(HEADER)
|
||||
output.write("# List of RTL languages\n")
|
||||
output.write("RTL_LANGS = {\n")
|
||||
for code in sorted(RTL_CODES):
|
||||
output.write(f' "{code}",\n')
|
||||
output.write("}\n")
|
||||
|
||||
# Generate same check blacklist
|
||||
words = set()
|
||||
|
@ -322,6 +336,7 @@ subprocess.run(
|
|||
"pre-commit",
|
||||
"run",
|
||||
"--files",
|
||||
"weblate_language_data/rtl.py",
|
||||
"weblate_language_data/countries.py",
|
||||
"weblate_language_data/aliases.py",
|
||||
"weblate_language_data/plurals.py",
|
||||
|
|
|
@ -2,21 +2,43 @@
|
|||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Language data definitions.
|
||||
|
||||
This is an automatically generated file, see scripts/generate-language-data
|
||||
|
||||
Do not edit, please adjust language definitions in following repository:
|
||||
https://github.com/WeblateOrg/language-data
|
||||
"""
|
||||
# pylint: disable=line-too-long,too-many-lines
|
||||
|
||||
# List of RTL languages
|
||||
RTL_LANGS = {
|
||||
"ae",
|
||||
"aii",
|
||||
"ajp",
|
||||
"apc",
|
||||
"ar",
|
||||
"ar_BH",
|
||||
"ar_DZ",
|
||||
"ar_EG",
|
||||
"ar_KW",
|
||||
"ar_LY",
|
||||
"ar_MA",
|
||||
"ar_SA",
|
||||
"ar_YE",
|
||||
"ara",
|
||||
"arc",
|
||||
"ae",
|
||||
"aii",
|
||||
"ave",
|
||||
"bal",
|
||||
"bgn",
|
||||
"bqi",
|
||||
"ckb",
|
||||
"ckb_IR",
|
||||
"dv",
|
||||
"egy",
|
||||
"fa",
|
||||
"fa_AF",
|
||||
"fas",
|
||||
"ha",
|
||||
"he",
|
||||
|
@ -26,18 +48,22 @@ RTL_LANGS = {
|
|||
"lrc",
|
||||
"luz",
|
||||
"ms_Arab",
|
||||
"mzn",
|
||||
"nqo",
|
||||
"pal",
|
||||
"per",
|
||||
"phn",
|
||||
"ps",
|
||||
"rhg",
|
||||
"sam",
|
||||
"sd",
|
||||
"sdh",
|
||||
"skr",
|
||||
"syc",
|
||||
"syr",
|
||||
"ug",
|
||||
"ur",
|
||||
"ur_IN",
|
||||
"urd",
|
||||
"yi",
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue