mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 07:43:46 +08:00
Introduce an "agentic" execution mode as an alternative to the default fixed-turn/tool-limit approach. In agentic mode, personas use a configurable token budget (`max_turn_tokens`) to govern how long a tool-use session can run, with automatic context compression when the conversation exceeds a configurable threshold percentage (`compression_threshold`) of the model's context window. Key changes: - Add `execution_mode`, `max_turn_tokens`, and `compression_threshold` columns to `ai_personas` via migration - Refactor `Bot#reply` to support token-budget loop control with a thread-local token accumulator, budget exhaustion hints, and a safety valve at 100 completions - Add `maybe_compress_context` which summarizes middle conversation messages when token usage crosses the compression threshold, preserving system prompt and recent tail messages - Update `StreamReplyCustomToolsSession` to track accumulated tokens across rounds and handle budget exhaustion in the custom tools path - Discount cached tokens (Anthropic) in the token accumulator to avoid over-counting reused KV cache prefixes - Update persona editor UI with execution mode selector and conditional fields (agentic shows token budget/compression; default shows max context posts)
47 lines
1.5 KiB
Ruby
Vendored
47 lines
1.5 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
require_relative "../../../evals/lib/runners/discoveries"
|
|
require_relative "../support/runner_helper"
|
|
|
|
RSpec.describe DiscourseAi::Evals::Runners::Discoveries do
|
|
fab!(:llm, :fake_model)
|
|
let(:execution_context) { DiscourseAi::Completions::ExecutionContext.new }
|
|
|
|
before { stub_runner_bot(response: "Search overview") }
|
|
|
|
describe "#run" do
|
|
it "returns a discovery payload with the model output" do
|
|
runner = described_class.new("discoveries")
|
|
result =
|
|
runner.run(
|
|
OpenStruct.new(args: { query: "chat integrations" }),
|
|
llm,
|
|
execution_context: execution_context,
|
|
)
|
|
|
|
expect(result[:raw]).to eq("Search overview")
|
|
expect(result[:metadata]).to include(query: "chat integrations")
|
|
end
|
|
|
|
it "evaluates each provided case" do
|
|
runner = described_class.new("discoveries")
|
|
results =
|
|
runner.run(
|
|
OpenStruct.new(args: { cases: [{ query: "best themes" }, { query: "login security" }] }),
|
|
llm,
|
|
execution_context: execution_context,
|
|
)
|
|
|
|
expect(results.length).to eq(2)
|
|
expect(results.last[:metadata]).to include(query: "login security")
|
|
end
|
|
|
|
it "raises when the query is missing" do
|
|
runner = described_class.new("discoveries")
|
|
|
|
expect {
|
|
runner.run(OpenStruct.new(args: {}), llm, execution_context: execution_context)
|
|
}.to raise_error(ArgumentError, /require :query/)
|
|
end
|
|
end
|
|
end
|