mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 07:43:46 +08:00
Document attachments (doc, docx, xls, xlsx, rtf, csv, md, txt) are now converted to text before being included in LLM prompts, instead of being forwarded as raw base64 payloads. PDFs remain the only format sent as a raw upload, capped at 10MB. New converters under lib/completions: - DocToText shells out to antiword - DocxToText parses OOXML directly with size and depth limits - XlsToText shells out to xls2csv - XlsxToText parses OOXML and shared strings into CSV-style text - RtfToText is a custom RTF tokenizer with destination/group handling Plain text formats (csv, md, txt) are read with a 1MB byte cap and UTF-8 normalization. Extracted text is truncated to 100k characters, with a preamble noting the original filename and size. Dialect trimming now uses token-aware truncation against a per-message budget so large extracted documents collapse cleanly under the prompt limit, rather than the previous step-based slicing of raw content. Other changes: - LlmModel.normalize_attachment_types is shared with UploadEncoder and collapses "markdown" to "md" so the canonical extension is consistent across model config, UI defaults, and encoder output - ai-llm-attachment-types adds csv, xls, xlsx to the default choices - Locale strings clarify that vision controls images and allowed_attachment_types controls documents --------- Co-authored-by: Rafael Silva <xfalcox@gmail.com>
503 lines
14 KiB
Ruby
Vendored
503 lines
14 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
describe DiscourseAi::Automation::LlmTriage do
|
|
fab!(:post)
|
|
fab!(:reply) { Fabricate(:post, topic: post.topic, user: Fabricate(:user)) }
|
|
fab!(:llm_model)
|
|
|
|
fab!(:ai_agent)
|
|
|
|
def triage(**args)
|
|
DiscourseAi::Automation::LlmTriage.handle(**args)
|
|
end
|
|
|
|
before do
|
|
enable_current_plugin
|
|
ai_agent.update!(default_llm: llm_model)
|
|
end
|
|
|
|
it "does nothing if it does not pass triage" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["good"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
hide_topic: true,
|
|
search_for_text: "bad",
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.topic.reload.visible).to eq(true)
|
|
end
|
|
|
|
it "can hide topics on triage" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
hide_topic: true,
|
|
search_for_text: "bad",
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.topic.reload.visible).to eq(false)
|
|
end
|
|
|
|
it "can categorize topics on triage" do
|
|
category = Fabricate(:category)
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
category_id: category.id,
|
|
search_for_text: "bad",
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.topic.reload.category_id).to eq(category.id)
|
|
end
|
|
|
|
it "can reply to topics on triage" do
|
|
user = Fabricate(:user)
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
canned_reply: "test canned reply 123",
|
|
canned_reply_user: user.username,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reply = post.topic.posts.order(:post_number).last
|
|
|
|
expect(reply.raw).to eq("test canned reply 123")
|
|
expect(reply.user.id).to eq(user.id)
|
|
end
|
|
|
|
it "can add posts to the review queue" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable.target_id).to eq(post.id)
|
|
expect(reviewable.target_type).to eq("Post")
|
|
expect(reviewable.reviewable_scores.first.reason).to include("bad")
|
|
end
|
|
|
|
it "flags via tool call when the agent invokes flag_post" do
|
|
ai_agent.update!(tools: ["FlagPost"])
|
|
tool_call =
|
|
DiscourseAi::Completions::ToolCall.new(
|
|
name: "flag_post",
|
|
parameters: {
|
|
flag_post: true,
|
|
reason: "Looks unsafe",
|
|
},
|
|
id: "tool_call_1",
|
|
)
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses([tool_call, "all good"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable.target_id).to eq(post.id)
|
|
expect(reviewable.target_type).to eq("Post")
|
|
expect(reviewable.reviewable_scores.first.reason).to include("Looks unsafe")
|
|
end
|
|
|
|
it "can handle spam flags" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :spam,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.reload).to be_hidden
|
|
expect(post.topic.reload.visible).to eq(false)
|
|
end
|
|
|
|
it "can handle spam+silence flags" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :spam_silence,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.reload).to be_hidden
|
|
expect(post.topic.reload.visible).to eq(false)
|
|
expect(post.user.silenced?).to eq(true)
|
|
end
|
|
|
|
it "can handle flag + hide" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review_hide,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable.target_id).to eq(post.id)
|
|
expect(reviewable.target_type).to eq("Post")
|
|
expect(reviewable.reviewable_scores.first.reason).to include("bad")
|
|
expect(post.reload).to be_hidden
|
|
end
|
|
|
|
it "can handle flag + delete" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review_delete,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable.target_id).to eq(post.id)
|
|
expect(reviewable.target_type).to eq("Post")
|
|
expect(reviewable.reviewable_scores.first.reason).to include("bad")
|
|
expect(post.reload.trashed?).to eq(true)
|
|
end
|
|
|
|
it "can handle flag + delete post + silence" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review_delete_silence,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable.target_id).to eq(post.id)
|
|
expect(reviewable.target_type).to eq("Post")
|
|
expect(reviewable.reviewable_scores.first.reason).to include("bad")
|
|
expect(post.reload.trashed?).to eq(true)
|
|
expect(post.user.silenced?).to eq(true)
|
|
end
|
|
|
|
it "restores deleted post when moderator approves" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review_delete,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
expect(post.reload.trashed?).to eq(true)
|
|
topic = Topic.with_deleted.find_by(id: post.topic_id)
|
|
expect(topic.trashed?).to eq(true)
|
|
|
|
moderator = Fabricate(:moderator)
|
|
result = reviewable.perform(moderator, :approve_and_restore)
|
|
expect(result).to be_success
|
|
|
|
# Post and topic should be restored
|
|
expect(post.reload.trashed?).to eq(false)
|
|
expect(post.topic.reload.trashed?).to eq(false)
|
|
end
|
|
|
|
it "sends author a PM when notify_author_pm is enabled" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review_delete,
|
|
automation: nil,
|
|
notify_author_pm: true,
|
|
)
|
|
end
|
|
|
|
pm_topic = Topic.where(archetype: Archetype.private_message).order(:id).last
|
|
expect(pm_topic).to be_present
|
|
expect(pm_topic.allowed_users).to include(post.user)
|
|
end
|
|
|
|
it "uses custom PM message when provided" do
|
|
custom_message = "Your post is pending review."
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review_delete,
|
|
automation: nil,
|
|
notify_author_pm: true,
|
|
notify_author_pm_message: custom_message,
|
|
)
|
|
end
|
|
|
|
pm_post =
|
|
Post
|
|
.where(
|
|
"posts.topic_id IN (?)",
|
|
Topic.where(archetype: Archetype.private_message).select(:id),
|
|
)
|
|
.order(:id)
|
|
.last
|
|
expect(pm_post.raw).to include(custom_message)
|
|
end
|
|
|
|
it "does not silence the user if the flag fails" do
|
|
Fabricate(
|
|
:post_action,
|
|
post: post,
|
|
user: Discourse.system_user,
|
|
post_action_type_id: PostActionType.types[:spam],
|
|
)
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :spam_silence,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.user.reload).not_to be_silenced
|
|
end
|
|
|
|
it "can handle garbled output from LLM" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["Bad.\n\nYo"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable&.target).to eq(post)
|
|
end
|
|
|
|
it "treats search_for_text as case-insensitive" do
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "BAD",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
|
|
expect(reviewable.target).to eq(post)
|
|
end
|
|
|
|
it "includes post uploads when triaging" do
|
|
ai_agent.update!(vision_enabled: true)
|
|
post_upload = Fabricate(:image_upload, posts: [post])
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
|
|
triage_prompt = DiscourseAi::Completions::Llm.prompts.last
|
|
|
|
expect(triage_prompt.messages.last[:content].last).to eq({ upload_id: post_upload.id })
|
|
end
|
|
end
|
|
|
|
it "includes document uploads when triaging even if image uploads are disabled" do
|
|
ai_agent.update!(vision_enabled: false)
|
|
llm_model.update!(allowed_attachment_types: ["txt"])
|
|
SiteSetting.authorized_extensions = "*"
|
|
image_upload = Fabricate(:image_upload, posts: [post])
|
|
document_upload = Fabricate(:upload, original_filename: "notes.txt", extension: "txt")
|
|
UploadReference.create!(target: post, upload: document_upload)
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post.reload,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
|
|
triage_prompt = DiscourseAi::Completions::Llm.prompts.last
|
|
content = triage_prompt.messages.last[:content]
|
|
|
|
expect(content).to include({ upload_id: document_upload.id })
|
|
expect(content).not_to include({ upload_id: image_upload.id })
|
|
end
|
|
end
|
|
|
|
it "includes stop_sequences in the completion call" do
|
|
sequences = %w[GOOD BAD]
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do |spy|
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
stop_sequences: sequences,
|
|
)
|
|
|
|
expect(spy.model_params[:stop_sequences]).to contain_exactly(*sequences)
|
|
end
|
|
end
|
|
|
|
it "append rule tags instead of replacing them" do
|
|
tag_1 = Fabricate(:tag)
|
|
tag_2 = Fabricate(:tag)
|
|
post.topic.update!(tags: [tag_1])
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
tags: [tag_2.name],
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
expect(post.topic.reload.tags).to contain_exactly(tag_1, tag_2)
|
|
end
|
|
|
|
it "includes the base path in the flagged post message" do
|
|
allow(Discourse).to receive(:base_path).and_return("http://test.host")
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
automation: nil,
|
|
)
|
|
end
|
|
|
|
reviewable = ReviewablePost.last
|
|
expect(reviewable.target_id).to eq(post.id)
|
|
expect(reviewable.target_type).to eq("Post")
|
|
expect(reviewable.reviewable_scores.first.reason).to include(
|
|
"<a href=\"#{Discourse.base_path}/admin/plugins/automation/",
|
|
)
|
|
end
|
|
|
|
it "escapes llm response and automation name in the flagged post message" do
|
|
automation = Fabricate(:automation, script: "llm_triage", name: %(rule"><img src=x onerror=1>))
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["<img src=x onerror=alert(1)>"]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "img",
|
|
flag_post: true,
|
|
automation: automation,
|
|
)
|
|
end
|
|
|
|
score_reason = ReviewablePost.last.reviewable_scores.first.reason
|
|
|
|
expect(score_reason).to include("<img src=x onerror=alert(1)>")
|
|
expect(score_reason).to include("rule"><img src=x onerror=1>")
|
|
expect(score_reason).not_to include("<img src=x onerror=alert(1)>")
|
|
expect(score_reason).not_to include(%(rule"><img src=x onerror=1>))
|
|
end
|
|
|
|
it "only sends one PM when multiple rules flag the same post" do
|
|
# First rule flags the post and sends PM
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(%w[bad bad]) do
|
|
triage(
|
|
post: post,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review,
|
|
automation: nil,
|
|
notify_author_pm: true,
|
|
)
|
|
|
|
triage(
|
|
post: post.reload,
|
|
triage_agent_id: ai_agent.id,
|
|
search_for_text: "bad",
|
|
flag_post: true,
|
|
flag_type: :review,
|
|
automation: nil,
|
|
notify_author_pm: true,
|
|
)
|
|
end
|
|
|
|
pm_topics =
|
|
Topic
|
|
.where(archetype: Archetype.private_message)
|
|
.joins(:topic_allowed_users)
|
|
.where(topic_allowed_users: { user: post.user })
|
|
|
|
expect(pm_topics.size).to eq(1)
|
|
|
|
reviewable_scores_count =
|
|
ReviewableScore.joins(:reviewable).where(reviewable: { target: post }).count
|
|
expect(reviewable_scores_count).to eq(2)
|
|
end
|
|
end
|