discourse/plugins/discourse-ai/lib/translation/post_candidates.rb
Penar Musaraj 90baea1ea7
FEATURE: Switch from opt-in to opt-out for categories in AI translations (#40169)
This PR changes Discourse AI translations from an opt-in category model
to an opt-out model: instead of translating only selected
`ai_translation_target_categories`, it introduces
`ai_translation_excluded_categories`, updates the admin UI copy and save
flow, changes topic/post/category candidate selection and detection jobs
to translate all non-excluded categories by default, and adds a
migration that converts existing target-category settings into the
equivalent excluded-category list for existing sites.

It also updates all related specs.

---------

Co-authored-by: discourse-patch-triage[bot] <272280883+discourse-patch-triage[bot]@users.noreply.github.com>
2026-05-26 14:51:04 -04:00

204 lines
7.3 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module Translation
class PostCandidates
# Returns the number of posts that have been translated, and the total number of posts that need translation for a given locale.
# The total number of posts is based off candidates that already have a locale.
# Also returns aggregate counts for total eligible posts and posts with detected locale.
# @return [Hash] a hash with keys :translation_progress (array), :total (integer), and :posts_with_detected_locale (integer)
def self.get_completion_all_locales
Discourse.cache.fetch(progress_cache_key, expires_in: 30.minutes) { completion_all_locales }
end
def self.needs_localization(limit:)
locales = DiscourseAi::Translation.locales
return [] if locales.blank?
locale_map = {}
locales.each { |l| locale_map[l.split("_").first] ||= l }
target_locale_values = locale_map.map { |base, full| "('#{base}', '#{full}')" }.join(", ")
base_sql = get.where.not(locale: nil).to_sql
sql = <<~SQL
SELECT ep.id AS post_id, target.target_locale
FROM (#{base_sql}) ep
JOIN (VALUES #{target_locale_values}) AS target(base_locale, target_locale)
ON target.base_locale != split_part(ep.locale, '_', 1)
WHERE NOT EXISTS (
SELECT 1 FROM post_localizations pl
WHERE pl.post_id = ep.id
AND split_part(pl.locale, '_', 1) = target.base_locale
)
ORDER BY ep.updated_at DESC, target.target_locale
LIMIT #{limit.to_i}
SQL
DB.query(sql).map { |r| [r.post_id, r.target_locale] }
end
private
# all posts that are eligible for translation based on site settings,
# including those without locale detected yet.
def self.get
posts =
Post
.where(
"posts.created_at > ?",
SiteSetting.ai_translation_backfill_max_age_days.days.ago,
)
.where(deleted_at: nil)
.where.not(raw: [nil, ""])
.where("LENGTH(posts.raw) <= ?", SiteSetting.ai_translation_max_post_length)
posts =
posts.where("posts.user_id > 0") unless SiteSetting.ai_translation_include_bot_content
posts = posts.joins(:topic)
# if no categories are excluded, posts from all categories will be sent for translation
# private categories need to be explicitly excluded
excluded_category_ids = DiscourseAi::Translation.excluded_category_ids
pm_scope = SiteSetting.ai_translation_personal_messages
if excluded_category_ids.present?
posts =
posts.where(
"topics.category_id NOT IN (:cats) OR topics.archetype = :pm",
cats: excluded_category_ids,
pm: Archetype.private_message,
)
else
posts =
posts.where(
"topics.category_id IS NOT NULL OR topics.archetype = :pm",
pm: Archetype.private_message,
)
end
# PM scope filter
case pm_scope
when "group"
posts =
posts.where(
"topics.archetype != :pm OR topics.id IN (SELECT topic_id FROM topic_allowed_groups)",
pm: Archetype.private_message,
)
when "none", nil
posts = posts.where.not(topics: { archetype: Archetype.private_message })
end
# Always include posts from banner topics regardless of age or category filters
banner_posts =
Post
.where(deleted_at: nil)
.where.not(raw: [nil, ""])
.where("LENGTH(posts.raw) <= ?", SiteSetting.ai_translation_max_post_length)
.joins(:topic)
.where(topics: { archetype: Archetype.banner, deleted_at: nil })
banner_posts =
banner_posts.where(
"posts.user_id > 0",
) unless SiteSetting.ai_translation_include_bot_content
posts = posts.or(banner_posts)
posts
end
def self.progress_cache_key
[
"ai-translations-progress",
SiteSetting.content_localization_supported_locales,
SiteSetting.ai_translation_backfill_max_age_days,
SiteSetting.ai_translation_include_bot_content,
SiteSetting.ai_translation_max_post_length,
SiteSetting.ai_translation_personal_messages,
DiscourseAi::Translation.excluded_category_ids.sort.join(","),
].join(":")
end
def self.completion_all_locales
supported = SiteSetting.content_localization_supported_locales.split("|")
values_rows = supported.map { |loc| "('#{loc}')" }.join(", ")
sql = <<~SQL
WITH supported AS (
SELECT localestr,
split_part(localestr, '_', 1) AS base
FROM (VALUES #{values_rows}) AS t(localestr)
),
all_eligible_posts AS (
#{get.to_sql}
),
total_eligible_count AS (
SELECT COUNT(*)::bigint AS count FROM all_eligible_posts
),
eligible_posts AS (
SELECT * FROM all_eligible_posts WHERE locale IS NOT NULL
),
all_posts_count AS (
SELECT COUNT(*)::bigint AS count FROM eligible_posts
),
non_target_locale_counts AS (
SELECT s.base,
COUNT(*)::bigint AS count
FROM eligible_posts p
CROSS JOIN supported s
WHERE split_part(p.locale, '_', 1) != s.base
GROUP BY s.base
),
done_per_base AS (
SELECT s.base,
COUNT(*)::bigint AS done
FROM eligible_posts p
JOIN supported s ON TRUE
WHERE split_part(p.locale, '_', 1) != s.base AND EXISTS (
SELECT 1
FROM post_localizations pl
WHERE pl.post_id = p.id
AND split_part(pl.locale, '_', 1) = s.base
)
GROUP BY s.base
)
SELECT s.localestr AS locale,
COALESCE(d.done, 0) AS done,
COALESCE(ntl.count, 0) AS total,
(SELECT count FROM total_eligible_count) AS total_eligible,
(SELECT count FROM all_posts_count) AS posts_with_locale
FROM supported s
LEFT JOIN done_per_base d ON d.base = s.base
LEFT JOIN non_target_locale_counts ntl ON ntl.base = s.base
SQL
results = DB.query(sql)
if results.empty?
return { translation_progress: [], total: 0, posts_with_detected_locale: 0 }
end
# Extract aggregate counts from first row (same for all rows)
total_eligible = results.first.total_eligible
posts_with_locale = results.first.posts_with_locale
# Build per-locale progress array
translation_progress =
results.map { |r| { locale: r.locale, done: r.done, total: r.total } }
translation_progress =
translation_progress.sort_by do |r|
percentage = r[:total] > 0 ? r[:done].to_f / r[:total] : 0
-percentage
end
{
translation_progress: translation_progress,
total: total_eligible,
posts_with_detected_locale: posts_with_locale,
}
end
end
end
end