language-data/scripts/export-qt
Michal Čihař 5994f50d4e fix: remove duplicate plural definitions for Qt
The source file contains ambiguous definition for Filipino and double
definition for Interlingua.
2024-10-15 15:12:37 +02:00

141 lines
3.8 KiB
Python
Executable file

#! /usr/bin/env python3
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: MIT
import os
import re
import csv
ALIASES = {
"Chinese": ("Chinese (Simplified Han script)", "Chinese (Traditional Han script)"),
"WesternFrisian": ("Frisian",),
"Interlingue": (), # Ignore, duplicate for Interlingua
"Khmer": ("Khmer (Central)",),
"Kirghiz": ("Kyrgyz",),
"NorthernSotho": ("Pedi",),
"NorwegianBokmal": ("Norwegian Bokmål",),
"NorwegianNynorsk": ("Norwegian Nynorsk",),
"Oriya": ("Odia",),
"SouthernSotho": ("Sotho (Southern)",),
"Uigur": ("Uyghur",),
"Volapuk": ("Volapük",),
"Divehi": ("Dhivehi",),
"Inupiak": ("Inupiaq",),
"NorthernSami": ("Sami (Northern)",),
"PortugueseBrasil": ("Portuguese (Brazil)",),
"Ganda": ("Luganda",),
}
def parse_csv(name):
result = {}
with open(name) as csvfile:
reader = csv.reader(csvfile, delimiter=",")
next(reader)
for data in reader:
if data[0] == "#":
continue
if data[1] in result:
raise ValueError(f"Duplicate {data[1]} in {name}!")
result[data[1]] = data
return result
DEFINITIONS = parse_csv("languages.csv")
plural_definition = re.compile(
'{\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*,\s*"nplurals=([0-9]+); plural=([^"]+);"\s}'
)
LANGUAGES = {}
PLURALS = {}
def handle_language(parts):
text = "".join(parts).replace("\n", "")[31:]
name = text.split("[", 1)[0]
# Remove duplicate definition
if name == "frenchStyleLanguages":
text = text.replace("QLocale::Filipino,", "")
# Hack to deal with frenchStyleCountries
if name == "frenchStyleLanguages":
text = text.replace("Portuguese", "PortugueseBrasil")
LANGUAGES[name] = (
text.split("{")[1]
.split("}")[0]
.replace(",", "")
.replace("EOL", "")
.replace("QLocale::", "")
.replace("Language", "")
.split()
)
def handle_table(parts):
text = "".join(parts).replace("\n", "")[50:]
for match in plural_definition.findall(text):
PLURALS[match[3]] = int(match[5]), match[6]
in_language = False
in_table = False
parts = []
with open("modules/qttools/src/linguist/shared/numerus.cpp") as handle:
for line in handle:
if in_language:
if not line.strip().startswith("//"):
parts.append(line)
if ";" in line:
handle_language(parts)
in_language = False
elif in_table:
parts.append(line)
if "};" in line:
handle_table(parts)
in_table = False
elif line.startswith("static const QLocale::Language"):
parts = [line]
if ";" in line:
handle_language(parts)
else:
in_language = True
elif line.startswith("static const NumerusTableEntry numerusTable"):
parts = [line]
in_table = True
output = []
processed = set()
def generate(group, name):
definition = DEFINITIONS[name]
plural = PLURALS[group]
if definition[0] in processed:
raise ValueError(f"Duplicate definition for {definition[0]}")
output.append(
(
definition[0],
definition[1],
plural[0],
plural[1],
)
)
processed.add(definition[0])
for group, languages in LANGUAGES.items():
for language in languages:
if language in ALIASES:
for aliased in ALIASES[language]:
generate(group, aliased)
else:
generate(group, language)
os.unlink("qt.csv")
with open("qt.csv", "w") as handle:
handle.write("code,name,nplurals,formula\n")
for line in sorted(output):
handle.write("{},{},{},{}\n".format(*line))