weblate/weblate/checks/placeholders.py
2024-07-29 15:00:18 +02:00

173 lines
5.7 KiB
Python

# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations
import re
from typing import TYPE_CHECKING, Any, Literal
from django.utils.functional import SimpleLazyObject
from django.utils.html import escape, format_html, format_html_join
from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy
from weblate.checks.base import TargetCheckParametrized
from weblate.checks.parser import multi_value_flag, single_value_flag
if TYPE_CHECKING:
from weblate.trans.models import Unit
def parse_regex(val):
if isinstance(val, str):
return re.compile(val)
return val
class PlaceholderCheck(TargetCheckParametrized):
check_id = "placeholders"
default_disabled = True
name = gettext_lazy("Placeholders")
description = gettext_lazy("Translation is missing some placeholders")
@property
def param_type(self):
return multi_value_flag(lambda x: x)
def get_value(self, unit: Unit):
return re.compile(
"|".join(
re.escape(param) if isinstance(param, str) else param.pattern
for param in super().get_value(unit)
),
re.IGNORECASE if "case-insensitive" in unit.all_flags else 0,
)
@staticmethod
def get_matches(value, text: str):
for match in value.finditer(text):
yield match.group()
def diff_case_sensitive(self, expected, found):
return expected - found, found - expected
def diff_case_insensitive(self, expected, found):
expected_fold = {v.casefold(): v for v in expected}
found_fold = {v.casefold(): v for v in found}
expected_set = set(expected_fold)
found_set = set(found_fold)
return (
{expected_fold[v] for v in expected_set - found_set},
{found_fold[v] for v in found_set - expected_set},
)
def check_target_unit( # type: ignore[override]
self, sources: list[str], targets: list[str], unit: Unit
) -> Literal[False] | dict[str, Any]:
# TODO: this is type annotation hack, instead the check should have a proper return type
return super().check_target_unit(sources, targets, unit) # type: ignore[return-value]
def check_target_params( # type: ignore[override]
self, sources: list[str], targets: list[str], unit: Unit, value
) -> Literal[False] | dict[str, Any]:
expected = set(self.get_matches(value, sources[0]))
if not expected and len(sources) > 1:
expected = set(self.get_matches(value, sources[-1]))
plural_examples = SimpleLazyObject(lambda: unit.translation.plural.examples)
if "case-insensitive" in unit.all_flags:
diff_func = self.diff_case_insensitive
else:
diff_func = self.diff_case_sensitive
missing = set()
extra = set()
for pluralno, target in enumerate(targets):
found = set(self.get_matches(value, target))
diff = diff_func(expected, found)
plural_example = plural_examples[pluralno]
# Allow to skip format string in case there is single plural or in special
# case of 0, 1 plural. It is technically wrong, but in many cases there
# won't be 0 so don't trigger too many false positives
if len(targets) == 1 or (
len(plural_example) > 1 and plural_example != ["0", "1"]
):
missing.update(diff[0])
extra.update(diff[1])
if missing or extra:
return {"missing": missing, "extra": extra}
return False
def check_highlight(self, source: str, unit: Unit):
if self.should_skip(unit):
return
regexp = self.get_value(unit)
for match in regexp.finditer(source):
yield (match.start(), match.end(), match.group())
def get_description(self, check_obj):
unit = check_obj.unit
result = self.check_target_unit(
unit.get_source_plurals(), unit.get_target_plurals(), unit
)
if not result:
return super().get_description(check_obj)
errors = []
if result["missing"]:
errors.append(self.get_missing_text(result["missing"]))
if result["extra"]:
errors.append(self.get_extra_text(result["extra"]))
return format_html_join(
mark_safe("<br />"), # noqa: S308
"{}",
((error,) for error in errors),
)
class RegexCheck(TargetCheckParametrized):
check_id = "regex"
default_disabled = True
name = gettext_lazy("Regular expression")
description = gettext_lazy("Translation does not match regular expression")
@property
def param_type(self):
return single_value_flag(parse_regex)
def check_target_params(
self, sources: list[str], targets: list[str], unit: Unit, value
):
return any(not value.findall(target) for target in targets)
def should_skip(self, unit: Unit) -> bool:
if super().should_skip(unit):
return True
return not self.get_value(unit).pattern
def check_highlight(self, source: str, unit: Unit):
if self.should_skip(unit):
return
regex = self.get_value(unit)
for match in regex.finditer(source):
yield (match.start(), match.end(), match.group())
def get_description(self, check_obj):
unit = check_obj.unit
if not self.has_value(unit):
return super().get_description(check_obj)
regex = self.get_value(unit)
return format_html(
escape(gettext_lazy("Does not match regular expression {}.")),
format_html("<code>{}</code>", regex.pattern),
)