weblate/scripts/generate-non-word-chars
Michal Čihař b1d0c0715d chore: Consolidate copyright headers
SPDX-FileCopyrightText was mistakenly used on some files.
2023-01-11 09:25:24 +01:00

35 lines
688 B
Python
Executable file

#!/usr/bin/env python
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""
Generates list of non-word chars.
Used in weblate/checks/data.py
"""
import pprint
import sys
import unicodedata
# Unicode categories to consider non word chars
CATEGORIES = {"Po", "Ps", "Zs", "Cc", "Sk"}
# Excluded chars
EXCLUDES = {
# Removed to avoid breaking regexp syntax
"]",
# We intentionally skip following
"-",
# Used in Catalan ŀ
"·",
"",
}
pprint.pprint(
[
char
for char in map(chr, range(sys.maxunicode + 1))
if char not in EXCLUDES and unicodedata.category(char) in CATEGORIES
]
)