weblate/scripts/generate-non-word-chars
Michal Čihař 9bed419161 Checks: Extend list of non-word chars
Some unicode categories were missing.

Fixes #5576
2021-03-04 12:16:26 +01:00

30 lines
589 B
Python
Executable file

#!/usr/bin/env python
"""
Generates list of non-word chars.
Used in weblate/checks/data.py
"""
import pprint
import sys
import unicodedata
# Unicode categories to consider non word chars
CATEGORIES = {"Po", "Ps", "Zs", "Cc", "Sk"}
# Excluded chars
EXCLUDES = {
# Removed to avoid breaking regexp syntax
"]",
# We intentionally skip following
"-",
# Used in Catalan ŀ
"·",
"",
}
pprint.pprint(
[
char
for char in map(chr, range(sys.maxunicode + 1))
if char not in EXCLUDES and unicodedata.category(char) in CATEGORIES
]
)