discourse/plugins/discourse-ai/spec/lib/completions/token_usage_tracker_spec.rb
Sam b8abe100c5
FEATURE: add agentic execution mode for AI personas (#38230)
Introduce an "agentic" execution mode as an alternative to the
default fixed-turn/tool-limit approach. In agentic mode, personas
use a configurable token budget (`max_turn_tokens`) to govern how
long a tool-use session can run, with automatic context compression
when the conversation exceeds a configurable threshold percentage
(`compression_threshold`) of the model's context window.

Key changes:

- Add `execution_mode`, `max_turn_tokens`, and `compression_threshold`
  columns to `ai_personas` via migration
- Refactor `Bot#reply` to support token-budget loop control with a
  thread-local token accumulator, budget exhaustion hints, and a
  safety valve at 100 completions
- Add `maybe_compress_context` which summarizes middle conversation
  messages when token usage crosses the compression threshold,
  preserving system prompt and recent tail messages
- Update `StreamReplyCustomToolsSession` to track accumulated tokens
  across rounds and handle budget exhaustion in the custom tools path
- Discount cached tokens (Anthropic) in the token accumulator to
  avoid over-counting reused KV cache prefixes
- Update persona editor UI with execution mode selector and
  conditional fields (agentic shows token budget/compression;
  default shows max context posts)
2026-03-05 15:06:54 +11:00

61 lines
1.8 KiB
Ruby

# frozen_string_literal: true
RSpec.describe DiscourseAi::Completions::TokenUsageTracker do
it "applies weighted request accounting from audit logs" do
tracker = described_class.new
log =
Struct.new(:request_tokens, :cache_write_tokens, :cache_read_tokens, :response_tokens).new(
1000,
0,
800,
50,
)
tracker.add_from_audit_log(log)
expect(tracker.request).to eq(1080)
expect(tracker.response).to eq(50)
expect(tracker.total).to eq(1130)
end
it "supports starting from a previous total budget" do
tracker = described_class.new(base_total: 101)
expect(tracker.request).to eq(50)
expect(tracker.response).to eq(51)
expect(tracker.total).to eq(101)
end
it "supports exact request/response initialization" do
tracker = described_class.new(base_request: 12, base_response: 34)
expect(tracker.request).to eq(12)
expect(tracker.response).to eq(34)
expect(tracker.total).to eq(46)
end
it "accumulates across multiple audit logs" do
tracker = described_class.new
log = Struct.new(:request_tokens, :cache_write_tokens, :cache_read_tokens, :response_tokens)
tracker.add_from_audit_log(log.new(100, 20, 50, 10))
tracker.add_from_audit_log(log.new(200, 0, 0, 5))
expect(tracker.request).to eq(325)
expect(tracker.response).to eq(15)
expect(tracker.total).to eq(340)
end
it "raises when request/response initialization is partial" do
expect { described_class.new(base_request: 1) }.to raise_error(
ArgumentError,
/must both be provided/,
)
end
it "raises when total and request/response are mixed" do
expect {
described_class.new(base_total: 10, base_request: 1, base_response: 2)
}.to raise_error(ArgumentError, /cannot be combined/)
end
end