language-data/scripts/add-iso.py

#! /usr/bin/env python3

# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: MIT

import csv
import json
import sys
from operator import itemgetter
from urllib.request import urlopen

names = {}


def convert_name(name: str) -> str:
    if "," in name:
        base, extra = name.split(",", 1)
        return f"{base.strip()} ({extra.strip()})"
    return name


# See https://github.com/WeblateOrg/language-data/issues/103
PLURALDATA = "https://jibecfed.fedorapeople.org/partage/languages-data/{}.json"

with open("modules/iso-codes/data/iso_639-2.json") as handle:
    for item in json.load(handle)["639-2"]:
        for value in ("alpha_2", "alpha_3"):
            if value in item:
                names[item[value]] = convert_name(
                    item.get("inverted_name", item["name"])
                )

with open("modules/iso-codes/data/iso_639-3.json") as handle:
    for item in json.load(handle)["639-3"]:
        for value in ("alpha_2", "alpha_3"):
            if value in item:
                names[item[value]] = convert_name(
                    item.get("inverted_name", item["name"])
                )

with open("languages.csv", newline="") as handle:
    reader = csv.reader(handle, delimiter=",")
    header = next(reader)
    lines = list(reader)

for code in sys.argv[1:]:
    plurals = 2
    formula = "n != 1"
    try:
        with urlopen(PLURALDATA.format(code)) as handle:
            data = json.load(handle)
            for plural in data[code]["Plural-Forms"]:
                if not plural or "INTEGER" in plural:
                    continue
                parts = plural.split(";", 1)
                plurals = int(parts[0].split("=")[1].strip())
                if "plural" in parts[1]:
                    formula = parts[1].split("=", 1)[1].strip().rstrip(";")
                else:
                    formula = parts[1]
                if formula.replace(" ", "") == "(n!=1)":
                    formula = "n != 1"
                if formula == "n != 1":
                    continue
                break
    except (OSError, KeyError):
        sys.stderr.write(f"Failed to load data for {code}, using defaults\n")
    lines.append(
        [
            code,
            names[code].split(";")[-1].strip(),
            plurals,
            formula,
        ],
    )

with open("languages.csv", "w", newline="") as handle:
    writer = csv.writer(handle, delimiter=",", lineterminator="\n")
    writer.writerow(header)
    writer.writerows(sorted(lines, key=itemgetter(0)))
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00			`#! /usr/bin/env python3`

Use REUSE 3.0 for license data 2023-01-13 09:57:01 +01:00			`# Copyright © Michal Čihař <michal@weblate.org>`
			`#`
			`# SPDX-License-Identifier: MIT`

feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`import csv`
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00			`import json`
			`import sys`
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`from operator import itemgetter`
Use data from Fedora to get plural equations See #103 2020-11-02 14:37:39 +01:00			`from urllib.request import urlopen`
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00
			`names = {}`

feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00
			`def convert_name(name: str) -> str:`
			`if "," in name:`
			`base, extra = name.split(",", 1)`
			`return f"{base.strip()} ({extra.strip()})"`
			`return name`


Use data from Fedora to get plural equations See #103 2020-11-02 14:37:39 +01:00			`# See https://github.com/WeblateOrg/language-data/issues/103`
			`PLURALDATA = "https://jibecfed.fedorapeople.org/partage/languages-data/{}.json"`

Use Python 3 features 2020-11-20 13:43:47 +01:00			`with open("modules/iso-codes/data/iso_639-2.json") as handle:`
Apply black code formatter Signed-off-by: Michal Čihař <michal@cihar.com> 2019-04-17 10:56:26 +02:00			`for item in json.load(handle)["639-2"]:`
			`for value in ("alpha_2", "alpha_3"):`
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00			`if value in item:`
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`names[item[value]] = convert_name(`
			`item.get("inverted_name", item["name"])`
			`)`
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00
Use Python 3 features 2020-11-20 13:43:47 +01:00			`with open("modules/iso-codes/data/iso_639-3.json") as handle:`
Apply black code formatter Signed-off-by: Michal Čihař <michal@cihar.com> 2019-04-17 10:56:26 +02:00			`for item in json.load(handle)["639-3"]:`
			`for value in ("alpha_2", "alpha_3"):`
Support ISO 639-3 codes as well Signed-off-by: Michal Čihař <michal@cihar.com> 2018-04-09 09:36:30 +02:00			`if value in item:`
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`names[item[value]] = convert_name(`
			`item.get("inverted_name", item["name"])`
			`)`
Support ISO 639-3 codes as well Signed-off-by: Michal Čihař <michal@cihar.com> 2018-04-09 09:36:30 +02:00
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`with open("languages.csv", newline="") as handle:`
			`reader = csv.reader(handle, delimiter=",")`
			`header = next(reader)`
			`lines = list(reader)`
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00
			`for code in sys.argv[1:]:`
Use data from Fedora to get plural equations See #103 2020-11-02 14:37:39 +01:00			`plurals = 2`
			`formula = "n != 1"`
			`try:`
			`with urlopen(PLURALDATA.format(code)) as handle:`
			`data = json.load(handle)`
			`for plural in data[code]["Plural-Forms"]:`
			`if not plural or "INTEGER" in plural:`
			`continue`
			`parts = plural.split(";", 1)`
			`plurals = int(parts[0].split("=")[1].strip())`
Better handle broken plural forms in downloaded JSON 2020-11-09 10:09:42 +01:00			`if "plural" in parts[1]:`
			`formula = parts[1].split("=", 1)[1].strip().rstrip(";")`
			`else:`
			`formula = parts[1]`
			`if formula.replace(" ", "") == "(n!=1)":`
Use data from Fedora to get plural equations See #103 2020-11-02 14:37:39 +01:00			`formula = "n != 1"`
			`if formula == "n != 1":`
			`continue`
			`break`
Fix missing plurals handling 2021-02-24 16:55:52 +01:00			`except (OSError, KeyError):`
Use data from Fedora to get plural equations See #103 2020-11-02 14:37:39 +01:00			`sys.stderr.write(f"Failed to load data for {code}, using defaults\n")`
Apply black code formatter Signed-off-by: Michal Čihař <michal@cihar.com> 2019-04-17 10:56:26 +02:00			`lines.append(`
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`[`
chore: use more of ruff 2025-02-05 14:40:41 +01:00			`code,`
			`names[code].split(";")[-1].strip(),`
			`plurals,`
			`formula,`
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`],`
Apply black code formatter Signed-off-by: Michal Čihař <michal@cihar.com> 2019-04-17 10:56:26 +02:00			`)`
Add helper script to add language from ISO 639-2 Signed-off-by: Michal Čihař <michal@cihar.com> 2018-01-09 14:12:40 +01:00
feat: use csv writer to generate CSV when adding This makes it produce valid CSV even in corner cases. 2025-04-01 08:31:16 +02:00			`with open("languages.csv", "w", newline="") as handle:`
			`writer = csv.writer(handle, delimiter=",", lineterminator="\n")`
			`writer.writerow(header)`
			`writer.writerows(sorted(lines, key=itemgetter(0)))`