discourse/plugins/discourse-ai/evals/lib/prompts/prompt_evaluator.rb
Sam b8abe100c5
FEATURE: add agentic execution mode for AI personas (#38230)
Introduce an "agentic" execution mode as an alternative to the
default fixed-turn/tool-limit approach. In agentic mode, personas
use a configurable token budget (`max_turn_tokens`) to govern how
long a tool-use session can run, with automatic context compression
when the conversation exceeds a configurable threshold percentage
(`compression_threshold`) of the model's context window.

Key changes:

- Add `execution_mode`, `max_turn_tokens`, and `compression_threshold`
  columns to `ai_personas` via migration
- Refactor `Bot#reply` to support token-budget loop control with a
  thread-local token accumulator, budget exhaustion hints, and a
  safety valve at 100 completions
- Add `maybe_compress_context` which summarizes middle conversation
  messages when token usage crosses the compression threshold,
  preserving system prompt and recent tail messages
- Update `StreamReplyCustomToolsSession` to track accumulated tokens
  across rounds and handle budget exhaustion in the custom tools path
- Discount cached tokens (Anthropic) in the token accumulator to
  avoid over-counting reused KV cache prefixes
- Update persona editor UI with execution mode selector and
  conditional fields (agentic shows token budget/compression;
  default shows max context posts)
2026-03-05 15:06:54 +11:00

85 lines
2.2 KiB
Ruby
Vendored

# frozen_string_literal: true
class DiscourseAi::Evals::PromptEvaluator
def initialize(llm_model)
@llm = llm_model.to_llm
end
def prompt_call(args, execution_context: nil)
args = [args] if !args.is_a?(Array)
runner = DiscourseAi::Evals::PromptSingleTestRunner.new(@llm)
with_tests_progress(total: args.size) do |bump_progress|
args.flat_map do |test|
bump_progress.call
prompts = test[:prompts] || [test[:prompt]]
messages = test[:messages] || [test[:message]]
followups = symbolize_followups(test)
output_thinking = test[:output_thinking] || false
stream = test[:stream] || false
temperature = test[:temperature]
tools = symbolize_tools(test[:tools])
tool_results = test[:tool_results]
chain_length = test[:chain_length] || 1
max_tool_calls = test[:max_tool_calls]
prompts.flat_map do |prompt|
messages.map do |message|
runner.run_single_test(
prompt:,
message:,
followups:,
output_thinking:,
stream:,
temperature:,
tools:,
tool_results:,
chain_length:,
max_tool_calls:,
execution_context:,
)
end
end
end
end
end
private
def symbolize_followups(args)
return nil if args[:followups].nil? && args[:followup].nil?
followups = args[:followups] || [args[:followup]]
followups.map do |followup|
followup = followup.dup.symbolize_keys!
message = followup[:message].dup.symbolize_keys!
message[:type] = message[:type].to_sym if message[:type]
followup[:message] = message
followup
end
end
def symbolize_tools(tools)
return nil if tools.nil?
tools.map do |tool|
tool.symbolize_keys!
tool.merge(
parameters: tool[:parameters]&.map { |param| param.transform_keys(&:to_sym) },
).compact
end
end
def with_tests_progress(total:)
puts ""
count = 0
result =
yield(
-> do
count += 1
print "\rProcessing test #{count}/#{total}"
end
)
print "\r\033[K"
result
end
end