mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 03:23:50 +08:00
Introduce an "agentic" execution mode as an alternative to the default fixed-turn/tool-limit approach. In agentic mode, personas use a configurable token budget (`max_turn_tokens`) to govern how long a tool-use session can run, with automatic context compression when the conversation exceeds a configurable threshold percentage (`compression_threshold`) of the model's context window. Key changes: - Add `execution_mode`, `max_turn_tokens`, and `compression_threshold` columns to `ai_personas` via migration - Refactor `Bot#reply` to support token-budget loop control with a thread-local token accumulator, budget exhaustion hints, and a safety valve at 100 completions - Add `maybe_compress_context` which summarizes middle conversation messages when token usage crosses the compression threshold, preserving system prompt and recent tail messages - Update `StreamReplyCustomToolsSession` to track accumulated tokens across rounds and handle budget exhaustion in the custom tools path - Discount cached tokens (Anthropic) in the token accumulator to avoid over-counting reused KV cache prefixes - Update persona editor UI with execution mode selector and conditional fields (agentic shows token budget/compression; default shows max context posts)
138 lines
4.1 KiB
Ruby
Vendored
138 lines
4.1 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
class DiscourseAi::Evals::PromptSingleTestRunner
|
|
def initialize(llm)
|
|
@llm = llm
|
|
end
|
|
|
|
# Run a single test with a prompt and message, and some model settings
|
|
# @param prompt [String] the prompt to use
|
|
# @param message [String] the message to use
|
|
# @param followups [Array<Hash>] an array of followups (messages) to run after the initial prompt
|
|
# @param output_thinking [Boolean] whether to output the thinking state of the model
|
|
# @param stream [Boolean] whether to stream the output of the model
|
|
# @param temperature [Float] the temperature to use when generating completions
|
|
# @param tools [Array<Hash>] an array of tools to use when generating completions
|
|
# @return [Hash] the prompt, message, and result of the test
|
|
def run_single_test(
|
|
prompt:,
|
|
message:,
|
|
followups:,
|
|
output_thinking:,
|
|
stream:,
|
|
temperature:,
|
|
tools:,
|
|
tool_results: nil,
|
|
chain_length: 1,
|
|
max_tool_calls: nil,
|
|
execution_context: nil
|
|
)
|
|
@c_prompt =
|
|
DiscourseAi::Completions::Prompt.new(prompt, messages: [{ type: :user, content: message }])
|
|
@c_prompt.tools = tools if tools
|
|
@tool_results = tool_results || {}
|
|
|
|
while chain_length > 0
|
|
generate_result(temperature, output_thinking, stream, execution_context:)
|
|
chain_length -= 1
|
|
if chain_length > 0
|
|
populate_reply(max_tool_calls:)
|
|
break if @c_prompt.messages.last[:type] != :tool
|
|
end
|
|
|
|
@c_prompt.tool_choice = :none if chain_length == 1
|
|
end
|
|
|
|
if followups
|
|
followups.each do |followup|
|
|
generate_followup(followup, output_thinking, stream, temperature, execution_context:)
|
|
end
|
|
end
|
|
|
|
{ prompt:, message:, result: @result }
|
|
end
|
|
|
|
private
|
|
|
|
def populate_reply(max_tool_calls:)
|
|
# @c_prompt contains the prompt
|
|
# @result contains the last result
|
|
#
|
|
# we need to append the result to the prompt so we can proceed with the chain
|
|
|
|
current = @result
|
|
current = [current] if !current.is_a?(Array)
|
|
|
|
result = []
|
|
current.each do |part|
|
|
if part.is_a?(DiscourseAi::Completions::ToolCall)
|
|
proposed_result = @tool_results[part.name]
|
|
raise "No tool result provided for tool #{part.name}" if !proposed_result
|
|
|
|
part.parameters.each do |key, value|
|
|
proposed_result = proposed_result.gsub("{{#{key}}}", value.to_s)
|
|
end
|
|
|
|
break if max_tool_calls && ((max_tool_calls -= 1) < 0)
|
|
|
|
result.push(part)
|
|
result.push(
|
|
DiscourseAi::Completions::ToolResult.new(content: proposed_result, tool_call: part),
|
|
)
|
|
else
|
|
result.push(part)
|
|
end
|
|
end
|
|
|
|
@c_prompt.push_model_response(result)
|
|
end
|
|
|
|
def generate_followup(followup, output_thinking, stream, temperature, execution_context: nil)
|
|
@c_prompt.push_model_response(@result)
|
|
followup_message = set_followup_tool(followup)
|
|
@c_prompt.push(**followup_message)
|
|
begin
|
|
generate_result(temperature, output_thinking, stream, execution_context:)
|
|
rescue => e
|
|
# should not happen but it helps debugging...
|
|
puts e
|
|
end
|
|
end
|
|
|
|
def set_followup_tool(followup)
|
|
@c_prompt.tools = followup[:tools] if followup[:tools]
|
|
followup_message = followup[:message]
|
|
%i[id name].each do |key|
|
|
if followup_message[key].is_a?(Array)
|
|
type, inner_key = followup_message[key]
|
|
# this allows us to dynamically set the id or name of the tool call
|
|
prev = @c_prompt.messages.reverse.find { |m| m[:type] == type.to_sym }
|
|
followup_message[key] = prev[inner_key.to_sym] if prev
|
|
end
|
|
end
|
|
followup_message
|
|
end
|
|
|
|
def generate_result(temperature, output_thinking, stream, execution_context: nil)
|
|
@result =
|
|
if stream
|
|
stream_result = []
|
|
@llm.generate(
|
|
@c_prompt,
|
|
user: Discourse.system_user,
|
|
temperature:,
|
|
output_thinking:,
|
|
execution_context:,
|
|
) { |partial| stream_result << partial }
|
|
stream_result
|
|
else
|
|
@llm.generate(
|
|
@c_prompt,
|
|
user: Discourse.system_user,
|
|
temperature:,
|
|
output_thinking:,
|
|
execution_context:,
|
|
)
|
|
end
|
|
end
|
|
end
|