discourse/plugins/discourse-ai/spec/lib/translation/post_candidates_spec.rb
Penar Musaraj 90baea1ea7
FEATURE: Switch from opt-in to opt-out for categories in AI translations (#40169)
This PR changes Discourse AI translations from an opt-in category model
to an opt-out model: instead of translating only selected
`ai_translation_target_categories`, it introduces
`ai_translation_excluded_categories`, updates the admin UI copy and save
flow, changes topic/post/category candidate selection and detection jobs
to translate all non-excluded categories by default, and adds a
migration that converts existing target-category settings into the
equivalent excluded-category list for existing sites.

It also updates all related specs.

---------

Co-authored-by: discourse-patch-triage[bot] <272280883+discourse-patch-triage[bot]@users.noreply.github.com>
2026-05-26 14:51:04 -04:00

292 lines
11 KiB
Ruby
Vendored

# frozen_string_literal: true
describe DiscourseAi::Translation::PostCandidates do
before { SiteSetting.ai_translation_excluded_categories = "" }
describe ".get" do
it "does not return bot posts" do
post = Fabricate(:post, user: Discourse.system_user)
expect(DiscourseAi::Translation::PostCandidates.get).not_to include(post)
end
describe "SiteSetting.ai_translation_include_bot_content" do
it "includes bot posts when enabled" do
SiteSetting.ai_translation_include_bot_content = true
bot_post = Fabricate(:post, user: Discourse.system_user)
regular_post = Fabricate(:post)
posts = DiscourseAi::Translation::PostCandidates.get
expect(posts).to include(bot_post)
expect(posts).to include(regular_post)
end
end
it "does not return posts older than ai_translation_backfill_max_age_days" do
post =
Fabricate(
:post,
created_at: SiteSetting.ai_translation_backfill_max_age_days.days.ago - 1.day,
)
expect(DiscourseAi::Translation::PostCandidates.get).not_to include(post)
end
it "does not return deleted posts" do
post = Fabricate(:post, deleted_at: Time.now)
expect(DiscourseAi::Translation::PostCandidates.get).not_to include(post)
end
it "does not return posts longer than ai_translation_max_post_length" do
SiteSetting.ai_translation_max_post_length = 100
short_post = Fabricate(:post, raw: "This is a short post that fits within the limit.")
long_post = Fabricate(:post, raw: "a" * 50 + " This is a long post. " + "b" * 50)
posts = DiscourseAi::Translation::PostCandidates.get
expect(posts).to include(short_post)
expect(posts).not_to include(long_post)
end
describe "category and PM filtering" do
fab!(:target_category, :category)
fab!(:non_target_category, :category)
fab!(:group)
fab!(:pm_post) { Fabricate(:post, topic: Fabricate(:private_message_topic)) }
fab!(:group_pm_post) do
Fabricate(:post, topic: Fabricate(:private_message_topic, allowed_groups: [group]))
end
fab!(:target_post) { Fabricate(:post, topic: Fabricate(:topic, category: target_category)) }
fab!(:non_target_post) do
Fabricate(:post, topic: Fabricate(:topic, category: non_target_category))
end
it "includes posts from private categories by default" do
private_category = Fabricate(:private_category, group:)
private_post = Fabricate(:post, topic: Fabricate(:topic, category: private_category))
SiteSetting.ai_translation_personal_messages = "none"
expect(DiscourseAi::Translation::PostCandidates.get).to include(private_post)
end
it "does not include posts from excluded categories" do
SiteSetting.ai_translation_excluded_categories = non_target_category.id.to_s
SiteSetting.ai_translation_personal_messages = "none"
posts = DiscourseAi::Translation::PostCandidates.get
expect(posts).to include(target_post)
expect(posts).not_to include(non_target_post)
expect(posts).not_to include(pm_post)
expect(posts).not_to include(group_pm_post)
end
it "includes group PMs but not personal PMs when pm_translation_scope is group" do
SiteSetting.ai_translation_excluded_categories = non_target_category.id.to_s
SiteSetting.ai_translation_personal_messages = "group"
posts = DiscourseAi::Translation::PostCandidates.get
expect(posts).to include(target_post)
expect(posts).not_to include(pm_post)
expect(posts).to include(group_pm_post)
end
it "includes all PMs when pm_translation_scope is all" do
SiteSetting.ai_translation_excluded_categories = non_target_category.id.to_s
SiteSetting.ai_translation_personal_messages = "all"
posts = DiscourseAi::Translation::PostCandidates.get
expect(posts).to include(target_post)
expect(posts).to include(pm_post)
expect(posts).to include(group_pm_post)
end
end
end
describe ".needs_localization" do
fab!(:target_category, :category)
before do
SiteSetting.ai_translation_backfill_max_age_days = 100
SiteSetting.content_localization_supported_locales = "en|ja|de"
SiteSetting.ai_translation_excluded_categories = ""
SiteSetting.ai_translation_personal_messages = "none"
end
it "returns [post_id, target_locale] pairs for posts needing localization" do
post = Fabricate(:post, locale: "es", topic: Fabricate(:topic, category: target_category))
pairs = described_class.needs_localization(limit: 10)
expect(pairs).to include([post.id, "en"])
expect(pairs).to include([post.id, "ja"])
expect(pairs).to include([post.id, "de"])
end
it "excludes posts without a detected locale" do
Fabricate(:post, locale: nil, topic: Fabricate(:topic, category: target_category))
pairs = described_class.needs_localization(limit: 10)
expect(pairs).to be_empty
end
it "excludes fully translated posts" do
post = Fabricate(:post, locale: "es", topic: Fabricate(:topic, category: target_category))
Fabricate(:post_localization, post: post, locale: "en")
Fabricate(:post_localization, post: post, locale: "ja")
Fabricate(:post_localization, post: post, locale: "de")
pairs = described_class.needs_localization(limit: 10)
post_ids = pairs.map(&:first)
expect(post_ids).not_to include(post.id)
end
it "returns only missing locale pairs for partially translated posts" do
post = Fabricate(:post, locale: "es", topic: Fabricate(:topic, category: target_category))
Fabricate(:post_localization, post: post, locale: "en")
pairs = described_class.needs_localization(limit: 10)
expect(pairs).not_to include([post.id, "en"])
expect(pairs).to include([post.id, "ja"])
expect(pairs).to include([post.id, "de"])
end
it "excludes posts whose locale matches all target base locales" do
SiteSetting.content_localization_supported_locales = "en"
post = Fabricate(:post, locale: "en", topic: Fabricate(:topic, category: target_category))
pairs = described_class.needs_localization(limit: 10)
post_ids = pairs.map(&:first)
expect(post_ids).not_to include(post.id)
end
it "handles base-locale deduplication (ja_JP localization covers ja target)" do
post = Fabricate(:post, locale: "es", topic: Fabricate(:topic, category: target_category))
Fabricate(:post_localization, post: post, locale: "en")
Fabricate(:post_localization, post: post, locale: "ja_JP")
Fabricate(:post_localization, post: post, locale: "de_DE")
pairs = described_class.needs_localization(limit: 10)
post_ids = pairs.map(&:first)
expect(post_ids).not_to include(post.id)
end
it "respects the limit parameter" do
3.times do
Fabricate(:post, locale: "es", topic: Fabricate(:topic, category: target_category))
end
pairs = described_class.needs_localization(limit: 2)
expect(pairs.size).to eq(2)
end
it "returns empty when no locales are configured" do
SiteSetting.content_localization_supported_locales = ""
pairs = described_class.needs_localization(limit: 10)
expect(pairs).to be_empty
end
end
describe ".get_completion_all_locales" do
fab!(:target_category, :category)
before do
Discourse.cache.clear
SiteSetting.content_localization_supported_locales = "en_GB|pt|es"
SiteSetting.ai_translation_backfill_max_age_days = 30
SiteSetting.ai_translation_excluded_categories = ""
SiteSetting.ai_translation_personal_messages = "group"
end
it "returns empty state when no posts exist" do
Post.delete_all
result = DiscourseAi::Translation::PostCandidates.get_completion_all_locales
expect(result).to be_a(Hash)
expect(result[:translation_progress].length).to eq(3)
expect(result[:translation_progress]).to all(include(done: 0, total: 0))
expect(result[:total]).to eq(0)
expect(result[:posts_with_detected_locale]).to eq(0)
end
it "uses excluded categories in the cache key" do
Post.delete_all
excluded_category = Fabricate(:category)
Fabricate(:post, locale: "en_GB", topic: Fabricate(:topic, category: target_category))
Fabricate(:post, locale: "fr", topic: Fabricate(:topic, category: excluded_category))
SiteSetting.ai_translation_excluded_categories = ""
expect(described_class.get_completion_all_locales[:total]).to eq(2)
SiteSetting.ai_translation_excluded_categories = excluded_category.id.to_s
expect(described_class.get_completion_all_locales[:total]).to eq(1)
end
it "returns progress grouped by base locale (of en_GB) and correct totals" do
post1 = Fabricate(:post, locale: "en_GB", topic: Fabricate(:topic, category: target_category))
post2 = Fabricate(:post, locale: "fr", topic: Fabricate(:topic, category: target_category))
post3 = Fabricate(:post, locale: "es", topic: Fabricate(:topic, category: target_category))
post_without_locale =
Fabricate(:post, locale: nil, topic: Fabricate(:topic, category: target_category))
# add an en_GB localization to a non-en base post
PostLocalization.create!(
post: post2,
locale: "en",
raw: "Translated to English",
cooked: "<p>Translated to English</p>",
post_version: post2.version,
localizer_user_id: Discourse.system_user.id,
)
result = DiscourseAi::Translation::PostCandidates.completion_all_locales
expect(result).to be_a(Hash)
expect(result[:translation_progress].length).to eq(3)
expect(result[:total]).to eq(4) # all eligible posts (including one without locale)
expect(result[:posts_with_detected_locale]).to eq(3) # only posts with locale
progress = result[:translation_progress]
expect(progress).to all(include(:locale, :done, :total))
expect(progress.first[:locale]).to eq("en_GB")
en_entry = progress.find { |r| r[:locale] == "en_GB" }
expect(en_entry).to be_present
# total is non-English posts (post2 + post3)
expect(en_entry[:done]).to eq(1)
expect(en_entry[:total]).to eq(2)
pt_entry = progress.find { |r| r[:locale] == "pt" }
expect(pt_entry).to be_present
expect(pt_entry[:done]).to eq(0)
expect(pt_entry[:total]).to eq(3)
es_entry = progress.find { |r| r[:locale] == "es" }
expect(es_entry).to be_present
expect(es_entry[:done]).to eq(0)
expect(es_entry[:total]).to eq(2)
fr_entry = progress.find { |r| r[:locale] == "fr" }
expect(fr_entry).to be_nil
end
it "excludes posts longer than ai_translation_max_post_length from totals" do
SiteSetting.ai_translation_max_post_length = 100
short_post =
Fabricate(
:post,
locale: "en_GB",
raw: "This is a short post that fits.",
topic: Fabricate(:topic, category: target_category),
)
long_post =
Fabricate(
:post,
locale: "fr",
raw: "a" * 50 + " This is a long post. " + "b" * 50,
topic: Fabricate(:topic, category: target_category),
)
result = DiscourseAi::Translation::PostCandidates.get_completion_all_locales
expect(result[:total]).to eq(1)
expect(result[:posts_with_detected_locale]).to eq(1)
end
end
end