discourse/plugins/discourse-ai/spec/evals/runners/translation_spec.rb
Sam b8abe100c5
FEATURE: add agentic execution mode for AI personas (#38230)
Introduce an "agentic" execution mode as an alternative to the
default fixed-turn/tool-limit approach. In agentic mode, personas
use a configurable token budget (`max_turn_tokens`) to govern how
long a tool-use session can run, with automatic context compression
when the conversation exceeds a configurable threshold percentage
(`compression_threshold`) of the model's context window.

Key changes:

- Add `execution_mode`, `max_turn_tokens`, and `compression_threshold`
  columns to `ai_personas` via migration
- Refactor `Bot#reply` to support token-budget loop control with a
  thread-local token accumulator, budget exhaustion hints, and a
  safety valve at 100 completions
- Add `maybe_compress_context` which summarizes middle conversation
  messages when token usage crosses the compression threshold,
  preserving system prompt and recent tail messages
- Update `StreamReplyCustomToolsSession` to track accumulated tokens
  across rounds and handle budget exhaustion in the custom tools path
- Discount cached tokens (Anthropic) in the token accumulator to
  avoid over-counting reused KV cache prefixes
- Update persona editor UI with execution mode selector and
  conditional fields (agentic shows token budget/compression;
  default shows max context posts)
2026-03-05 15:06:54 +11:00

66 lines
2.3 KiB
Ruby
Vendored

# frozen_string_literal: true
require_relative "../../../evals/lib/runners/translation"
require_relative "../support/runner_helper"
RSpec.describe DiscourseAi::Evals::Runners::Translation do
fab!(:llm, :fake_model)
let(:execution_context) { DiscourseAi::Completions::ExecutionContext.new }
describe "#run" do
it "translates a single piece of content when no cases are provided" do
runner = described_class.new("post_raw_translator")
stub_runner_bot(response: "Hola mundo")
eval_case = OpenStruct.new(args: { input: "Hello world", target_locale: "es" })
result = runner.run(eval_case, llm, execution_context: execution_context)
expect(result[:raw]).to eq("Hola mundo")
expect(result[:metadata]).to include(target_locale: "es")
end
it "supports multiple cases and returns metadata for each entry" do
runner = described_class.new("short_text_translator")
responses = %w[Hola Salut]
stub_runner_bot { |blk| blk.call(responses.shift, nil, nil) }
eval_case =
OpenStruct.new(
args: {
target_locale: "es",
cases: [{ input: "Hello" }, { input: "Hi there", target_locale: "fr" }],
},
)
results = runner.run(eval_case, llm, execution_context: execution_context)
expect(results.length).to eq(2)
expect(results[0][:raw]).to eq("Hola")
expect(results[0][:metadata]).to include(message: "Hello", target_locale: "es")
expect(results[1][:metadata]).to include(target_locale: "fr", message: "Hi there")
expect(results[1][:raw]).to eq("Salut")
end
it "invokes the locale detector without requiring a target locale" do
runner = described_class.new("locale_detector")
stub_runner_bot(response: "es")
eval_case = OpenStruct.new(args: { input: "¿Cómo estás?" })
expect(runner.run(eval_case, llm, execution_context: execution_context)[:raw]).to eq("es")
end
it "raises when translation cases omit the target locale" do
runner = described_class.new("topic_title_translator")
expect {
runner.run(
OpenStruct.new(args: { input: "Hello" }),
llm,
execution_context: execution_context,
)
}.to raise_error(ArgumentError, /target_locale/)
end
end
end