mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 07:43:46 +08:00
Introduce an "agentic" execution mode as an alternative to the default fixed-turn/tool-limit approach. In agentic mode, personas use a configurable token budget (`max_turn_tokens`) to govern how long a tool-use session can run, with automatic context compression when the conversation exceeds a configurable threshold percentage (`compression_threshold`) of the model's context window. Key changes: - Add `execution_mode`, `max_turn_tokens`, and `compression_threshold` columns to `ai_personas` via migration - Refactor `Bot#reply` to support token-budget loop control with a thread-local token accumulator, budget exhaustion hints, and a safety valve at 100 completions - Add `maybe_compress_context` which summarizes middle conversation messages when token usage crosses the compression threshold, preserving system prompt and recent tail messages - Update `StreamReplyCustomToolsSession` to track accumulated tokens across rounds and handle budget exhaustion in the custom tools path - Discount cached tokens (Anthropic) in the token accumulator to avoid over-counting reused KV cache prefixes - Update persona editor UI with execution mode selector and conditional fields (agentic shows token budget/compression; default shows max context posts)
66 lines
2.3 KiB
Ruby
Vendored
66 lines
2.3 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
require_relative "../../../evals/lib/runners/translation"
|
|
require_relative "../support/runner_helper"
|
|
|
|
RSpec.describe DiscourseAi::Evals::Runners::Translation do
|
|
fab!(:llm, :fake_model)
|
|
let(:execution_context) { DiscourseAi::Completions::ExecutionContext.new }
|
|
|
|
describe "#run" do
|
|
it "translates a single piece of content when no cases are provided" do
|
|
runner = described_class.new("post_raw_translator")
|
|
stub_runner_bot(response: "Hola mundo")
|
|
|
|
eval_case = OpenStruct.new(args: { input: "Hello world", target_locale: "es" })
|
|
|
|
result = runner.run(eval_case, llm, execution_context: execution_context)
|
|
|
|
expect(result[:raw]).to eq("Hola mundo")
|
|
expect(result[:metadata]).to include(target_locale: "es")
|
|
end
|
|
|
|
it "supports multiple cases and returns metadata for each entry" do
|
|
runner = described_class.new("short_text_translator")
|
|
responses = %w[Hola Salut]
|
|
stub_runner_bot { |blk| blk.call(responses.shift, nil, nil) }
|
|
|
|
eval_case =
|
|
OpenStruct.new(
|
|
args: {
|
|
target_locale: "es",
|
|
cases: [{ input: "Hello" }, { input: "Hi there", target_locale: "fr" }],
|
|
},
|
|
)
|
|
|
|
results = runner.run(eval_case, llm, execution_context: execution_context)
|
|
|
|
expect(results.length).to eq(2)
|
|
expect(results[0][:raw]).to eq("Hola")
|
|
expect(results[0][:metadata]).to include(message: "Hello", target_locale: "es")
|
|
expect(results[1][:metadata]).to include(target_locale: "fr", message: "Hi there")
|
|
expect(results[1][:raw]).to eq("Salut")
|
|
end
|
|
|
|
it "invokes the locale detector without requiring a target locale" do
|
|
runner = described_class.new("locale_detector")
|
|
stub_runner_bot(response: "es")
|
|
|
|
eval_case = OpenStruct.new(args: { input: "¿Cómo estás?" })
|
|
|
|
expect(runner.run(eval_case, llm, execution_context: execution_context)[:raw]).to eq("es")
|
|
end
|
|
|
|
it "raises when translation cases omit the target locale" do
|
|
runner = described_class.new("topic_title_translator")
|
|
|
|
expect {
|
|
runner.run(
|
|
OpenStruct.new(args: { input: "Hello" }),
|
|
llm,
|
|
execution_context: execution_context,
|
|
)
|
|
}.to raise_error(ArgumentError, /target_locale/)
|
|
end
|
|
end
|
|
end
|