mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 02:05:37 +08:00
This PR changes Discourse AI translations from an opt-in category model to an opt-out model: instead of translating only selected `ai_translation_target_categories`, it introduces `ai_translation_excluded_categories`, updates the admin UI copy and save flow, changes topic/post/category candidate selection and detection jobs to translate all non-excluded categories by default, and adds a migration that converts existing target-category settings into the equivalent excluded-category list for existing sites. It also updates all related specs. --------- Co-authored-by: discourse-patch-triage[bot] <272280883+discourse-patch-triage[bot]@users.noreply.github.com>
204 lines
7.3 KiB
Ruby
Vendored
204 lines
7.3 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Translation
|
|
class PostCandidates
|
|
# Returns the number of posts that have been translated, and the total number of posts that need translation for a given locale.
|
|
# The total number of posts is based off candidates that already have a locale.
|
|
# Also returns aggregate counts for total eligible posts and posts with detected locale.
|
|
# @return [Hash] a hash with keys :translation_progress (array), :total (integer), and :posts_with_detected_locale (integer)
|
|
def self.get_completion_all_locales
|
|
Discourse.cache.fetch(progress_cache_key, expires_in: 30.minutes) { completion_all_locales }
|
|
end
|
|
|
|
def self.needs_localization(limit:)
|
|
locales = DiscourseAi::Translation.locales
|
|
return [] if locales.blank?
|
|
|
|
locale_map = {}
|
|
locales.each { |l| locale_map[l.split("_").first] ||= l }
|
|
|
|
target_locale_values = locale_map.map { |base, full| "('#{base}', '#{full}')" }.join(", ")
|
|
|
|
base_sql = get.where.not(locale: nil).to_sql
|
|
|
|
sql = <<~SQL
|
|
SELECT ep.id AS post_id, target.target_locale
|
|
FROM (#{base_sql}) ep
|
|
JOIN (VALUES #{target_locale_values}) AS target(base_locale, target_locale)
|
|
ON target.base_locale != split_part(ep.locale, '_', 1)
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM post_localizations pl
|
|
WHERE pl.post_id = ep.id
|
|
AND split_part(pl.locale, '_', 1) = target.base_locale
|
|
)
|
|
ORDER BY ep.updated_at DESC, target.target_locale
|
|
LIMIT #{limit.to_i}
|
|
SQL
|
|
|
|
DB.query(sql).map { |r| [r.post_id, r.target_locale] }
|
|
end
|
|
|
|
private
|
|
|
|
# all posts that are eligible for translation based on site settings,
|
|
# including those without locale detected yet.
|
|
def self.get
|
|
posts =
|
|
Post
|
|
.where(
|
|
"posts.created_at > ?",
|
|
SiteSetting.ai_translation_backfill_max_age_days.days.ago,
|
|
)
|
|
.where(deleted_at: nil)
|
|
.where.not(raw: [nil, ""])
|
|
.where("LENGTH(posts.raw) <= ?", SiteSetting.ai_translation_max_post_length)
|
|
|
|
posts =
|
|
posts.where("posts.user_id > 0") unless SiteSetting.ai_translation_include_bot_content
|
|
|
|
posts = posts.joins(:topic)
|
|
|
|
# if no categories are excluded, posts from all categories will be sent for translation
|
|
# private categories need to be explicitly excluded
|
|
excluded_category_ids = DiscourseAi::Translation.excluded_category_ids
|
|
pm_scope = SiteSetting.ai_translation_personal_messages
|
|
|
|
if excluded_category_ids.present?
|
|
posts =
|
|
posts.where(
|
|
"topics.category_id NOT IN (:cats) OR topics.archetype = :pm",
|
|
cats: excluded_category_ids,
|
|
pm: Archetype.private_message,
|
|
)
|
|
else
|
|
posts =
|
|
posts.where(
|
|
"topics.category_id IS NOT NULL OR topics.archetype = :pm",
|
|
pm: Archetype.private_message,
|
|
)
|
|
end
|
|
|
|
# PM scope filter
|
|
case pm_scope
|
|
when "group"
|
|
posts =
|
|
posts.where(
|
|
"topics.archetype != :pm OR topics.id IN (SELECT topic_id FROM topic_allowed_groups)",
|
|
pm: Archetype.private_message,
|
|
)
|
|
when "none", nil
|
|
posts = posts.where.not(topics: { archetype: Archetype.private_message })
|
|
end
|
|
|
|
# Always include posts from banner topics regardless of age or category filters
|
|
banner_posts =
|
|
Post
|
|
.where(deleted_at: nil)
|
|
.where.not(raw: [nil, ""])
|
|
.where("LENGTH(posts.raw) <= ?", SiteSetting.ai_translation_max_post_length)
|
|
.joins(:topic)
|
|
.where(topics: { archetype: Archetype.banner, deleted_at: nil })
|
|
banner_posts =
|
|
banner_posts.where(
|
|
"posts.user_id > 0",
|
|
) unless SiteSetting.ai_translation_include_bot_content
|
|
posts = posts.or(banner_posts)
|
|
|
|
posts
|
|
end
|
|
|
|
def self.progress_cache_key
|
|
[
|
|
"ai-translations-progress",
|
|
SiteSetting.content_localization_supported_locales,
|
|
SiteSetting.ai_translation_backfill_max_age_days,
|
|
SiteSetting.ai_translation_include_bot_content,
|
|
SiteSetting.ai_translation_max_post_length,
|
|
SiteSetting.ai_translation_personal_messages,
|
|
DiscourseAi::Translation.excluded_category_ids.sort.join(","),
|
|
].join(":")
|
|
end
|
|
|
|
def self.completion_all_locales
|
|
supported = SiteSetting.content_localization_supported_locales.split("|")
|
|
values_rows = supported.map { |loc| "('#{loc}')" }.join(", ")
|
|
|
|
sql = <<~SQL
|
|
WITH supported AS (
|
|
SELECT localestr,
|
|
split_part(localestr, '_', 1) AS base
|
|
FROM (VALUES #{values_rows}) AS t(localestr)
|
|
),
|
|
all_eligible_posts AS (
|
|
#{get.to_sql}
|
|
),
|
|
total_eligible_count AS (
|
|
SELECT COUNT(*)::bigint AS count FROM all_eligible_posts
|
|
),
|
|
eligible_posts AS (
|
|
SELECT * FROM all_eligible_posts WHERE locale IS NOT NULL
|
|
),
|
|
all_posts_count AS (
|
|
SELECT COUNT(*)::bigint AS count FROM eligible_posts
|
|
),
|
|
non_target_locale_counts AS (
|
|
SELECT s.base,
|
|
COUNT(*)::bigint AS count
|
|
FROM eligible_posts p
|
|
CROSS JOIN supported s
|
|
WHERE split_part(p.locale, '_', 1) != s.base
|
|
GROUP BY s.base
|
|
),
|
|
done_per_base AS (
|
|
SELECT s.base,
|
|
COUNT(*)::bigint AS done
|
|
FROM eligible_posts p
|
|
JOIN supported s ON TRUE
|
|
WHERE split_part(p.locale, '_', 1) != s.base AND EXISTS (
|
|
SELECT 1
|
|
FROM post_localizations pl
|
|
WHERE pl.post_id = p.id
|
|
AND split_part(pl.locale, '_', 1) = s.base
|
|
)
|
|
GROUP BY s.base
|
|
)
|
|
SELECT s.localestr AS locale,
|
|
COALESCE(d.done, 0) AS done,
|
|
COALESCE(ntl.count, 0) AS total,
|
|
(SELECT count FROM total_eligible_count) AS total_eligible,
|
|
(SELECT count FROM all_posts_count) AS posts_with_locale
|
|
FROM supported s
|
|
LEFT JOIN done_per_base d ON d.base = s.base
|
|
LEFT JOIN non_target_locale_counts ntl ON ntl.base = s.base
|
|
SQL
|
|
|
|
results = DB.query(sql)
|
|
|
|
if results.empty?
|
|
return { translation_progress: [], total: 0, posts_with_detected_locale: 0 }
|
|
end
|
|
|
|
# Extract aggregate counts from first row (same for all rows)
|
|
total_eligible = results.first.total_eligible
|
|
posts_with_locale = results.first.posts_with_locale
|
|
|
|
# Build per-locale progress array
|
|
translation_progress =
|
|
results.map { |r| { locale: r.locale, done: r.done, total: r.total } }
|
|
|
|
translation_progress =
|
|
translation_progress.sort_by do |r|
|
|
percentage = r[:total] > 0 ? r[:done].to_f / r[:total] : 0
|
|
-percentage
|
|
end
|
|
|
|
{
|
|
translation_progress: translation_progress,
|
|
total: total_eligible,
|
|
posts_with_detected_locale: posts_with_locale,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
end
|