discourse/plugins/discourse-ai/lib/completions/dialects/dialect.rb
Rafael dos Santos Silva bc39aacc3d
FEATURE: Provider-native built-in tools for agents (web search) (#40809)
Adds a fourth kind of agent tool: provider-native built-in tools that
the
LLM provider executes server-side, rather than tools Discourse runs and
feeds back. The first one is web search, supported on Gemini (Google
Search
grounding), OpenAI (web search via the Responses API) and Anthropic
(Claude
web search).

Native tools are stored on the agent's `tools` column with a `native-`
prefix, flow to the prompt as a separate `native_tools` list (never as
runnable Tool classes), and each provider dialect renders them into its
own
request payload. Response processors already ignore the server-side
tool/grounding blocks, so the bot loop never tries to execute them.

They are only selectable when the agent forces a default LLM whose
provider
supports the tool; this is enforced both in the editor UI (filtered by
the
selected LLM's `supported_native_tools`) and by server-side validation.

Also fixes the Gemini endpoint sending `function_calling_config` without
any
`function_declarations`, which the API rejects when only native tools
are
present.

---------

Co-authored-by: Sam Saffron <sam.saffron@gmail.com>
2026-06-16 14:37:51 -03:00

493 lines
15 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class Dialect
class << self
def can_translate?(llm_model)
raise NotImplemented
end
def all_dialects
[
DiscourseAi::Completions::Dialects::OpenAiResponses,
DiscourseAi::Completions::Dialects::ChatGpt,
DiscourseAi::Completions::Dialects::Gemini,
DiscourseAi::Completions::Dialects::Converse,
DiscourseAi::Completions::Dialects::Claude,
DiscourseAi::Completions::Dialects::Command,
DiscourseAi::Completions::Dialects::Ollama,
DiscourseAi::Completions::Dialects::Mistral,
DiscourseAi::Completions::Dialects::Nova,
DiscourseAi::Completions::Dialects::OpenAiCompatible,
]
end
def dialect_for(llm_model)
dialects = []
dialects = [DiscourseAi::Completions::Dialects::Fake] if Rails.env.local?
dialects = dialects.concat(all_dialects)
dialect = dialects.find { |d| d.can_translate?(llm_model) }
raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
dialect
end
end
def initialize(generic_prompt, llm_model, opts: {})
@prompt = generic_prompt
@opts = opts
@llm_model = llm_model
end
def native_tool_support?
false
end
def vision_support?
llm_model.vision_enabled?
end
def tools
@tools ||= tools_dialect.translated_tools
end
# provider-native built-in tools (e.g. web search) rendered into the
# request payload; the provider executes them server-side
def native_tools
[]
end
def tool_choice
prompt.tool_choice
end
def self.no_more_tool_calls_text
# note, Anthropic must never prefill with an ending whitespace
"Tool budget EXHAUSTED for this response, no more tools will be called in this response.\nHere is the best, complete, answer I can come up with given the information I have to address the original user query."
end
def self.no_more_tool_calls_text_user
"IT IS CRITICAL you do not use any tools or function calls in your response. JUST REPLY with the best answer you can provide based on your existing knowledge."
end
def no_more_tool_calls_text
self.class.no_more_tool_calls_text
end
def no_more_tool_calls_text_user
self.class.no_more_tool_calls_text_user
end
# supported options are :none/:all/:model_only
def strip_upload_markdown_mode
:none
end
def strip_upload_markdown(messages, strip_mode: nil)
return messages if strip_mode == :none
eligible_types =
case strip_mode
when :all
%i[user model]
when :model_only
%i[model]
else
[]
end
return messages if eligible_types.empty?
upload_ids =
messages
.flat_map do |m|
next [] if eligible_types.exclude?(m[:type].to_sym)
content = m[:content]
content = [content] unless content.is_a?(Array)
content.filter_map { |c| c.is_a?(Hash) && c[:upload_id] ? c[:upload_id] : nil }
end
.uniq
return messages if upload_ids.empty?
shas = Upload.where(id: upload_ids).pluck(:sha1).compact
messages.map do |m|
next m if eligible_types.exclude?(m[:type].to_sym)
content = m[:content]
content = [content] unless content.is_a?(Array)
new_content =
content.map do |c|
if c.is_a?(String)
strip_upload_markers(c, shas)
else
c
end
end
new_content = new_content[0] if new_content.length == 1
m.merge(content: new_content)
end
end
def translate
messages = prompt.messages
if strip_upload_markdown_mode != :none
messages = strip_upload_markdown(messages, strip_mode: strip_upload_markdown_mode)
end
messages = expand_text_document_uploads(messages)
messages = trim_messages(messages)
last_message = messages.last
inject_done_on_last_tool_call = false
if !native_tool_support? && last_message && last_message[:type].to_sym == :tool &&
prompt.tool_choice == :none
inject_done_on_last_tool_call = true
end
translated =
messages
.map do |msg|
case msg[:type].to_sym
when :system
system_msg(msg)
when :user
user_msg(msg)
when :model
model_msg(msg)
when :tool
if inject_done_on_last_tool_call && msg == last_message
tools_dialect.inject_done { tool_msg(msg) }
else
tool_msg(msg)
end
when :tool_call
tool_call_msg(msg)
else
raise ArgumentError, "Unknown message type: #{msg[:type]}"
end
end
.compact
translated
end
def conversation_context
raise NotImplemented
end
def max_prompt_tokens
raise NotImplemented
end
attr_reader :prompt
private
attr_reader :opts, :llm_model
def strip_upload_markers(markdown, upload_shas)
return markdown if markdown.blank? || upload_shas.blank?
base62_set = upload_shas.compact.map { |sha| Upload.base62_sha1(sha) }.to_set
markdown.gsub(%r{!\[([^\]|]+)(?:\|[^\]]*)?\]\(upload://([a-zA-Z0-9]+)[^)]+\)}) do
b62 = Regexp.last_match(2)
if base62_set.include?(b62)
""
else
Regexp.last_match(0)
end
end
end
def trim_messages(messages)
return messages if prompt.skip_trim
prompt_limit = max_prompt_tokens
current_token_count = 0
trimmed_messages = []
range = (0..-1)
if messages.dig(0, :type) == :system
max_system_tokens = prompt_limit * 0.6
system_message = messages[0]
system_size = calculate_message_token(system_message)
if system_size > max_system_tokens
system_message[:content] = tokenizer.truncate(
system_message[:content],
max_system_tokens,
strict: SiteSetting.ai_strict_token_counting,
)
end
trimmed_messages << system_message
current_token_count += calculate_message_token(system_message)
range = (1..-1)
end
reversed_trimmed_msgs = []
messages[range].reverse.each do |msg|
break if current_token_count >= prompt_limit
message_tokens = calculate_message_token(msg)
dupped_msg = msg.dup
# Don't trim tool call metadata.
if msg[:type] == :tool_call
break if current_token_count + message_tokens + per_message_overhead > prompt_limit
current_token_count += message_tokens + per_message_overhead
reversed_trimmed_msgs << dupped_msg
next
end
# Trimming content to make sure we respect token limit.
available_tokens = prompt_limit - current_token_count - per_message_overhead
if message_tokens > available_tokens
dupped_msg[:content] = truncate_content_to_token_budget(
dupped_msg[:content],
available_tokens,
)
message_tokens = calculate_message_token(dupped_msg)
end
next if content_blank?(dupped_msg[:content])
current_token_count += message_tokens + per_message_overhead
reversed_trimmed_msgs << dupped_msg
end
reversed_trimmed_msgs.pop if reversed_trimmed_msgs.last&.dig(:type) == :tool
trimmed_messages.concat(reversed_trimmed_msgs.reverse)
end
def per_message_overhead
0
end
def expand_text_document_uploads(messages)
messages.map do |message|
content = message[:content]
next message if !content.is_a?(Array)
expanded_content =
content.map do |part|
next part if !part.is_a?(Hash) || !part.key?(:upload_id)
encoded =
prompt.encode_upload(
part[:upload_id],
allow_images: false,
allow_documents: true,
allowed_attachment_types: llm_model.allowed_attachment_types,
)
if encoded&.dig(:kind) == :document && encoded[:text].present? &&
document_allowed?(encoded)
{ encoded_upload: encoded }
else
part
end
end
message.merge(content: expanded_content)
end
end
def truncate_content_to_token_budget(content, token_budget)
return "" if token_budget <= 0
case content
when Array
truncate_array_content_to_token_budget(content, token_budget)
when Hash
truncate_hash_content_to_token_budget(content, token_budget)
else
tokenizer.truncate(
content.to_s,
token_budget,
strict: SiteSetting.ai_strict_token_counting,
)
end
end
def truncate_array_content_to_token_budget(content, token_budget)
remaining_tokens = token_budget
truncated = []
content.each do |part|
part_tokens = calculate_content_token(part)
if part_tokens <= remaining_tokens
truncated << part
remaining_tokens -= part_tokens
elsif part.is_a?(String) || (part.is_a?(Hash) && part.key?(:encoded_upload))
truncated_part = truncate_content_to_token_budget(part, remaining_tokens)
truncated << truncated_part if !content_blank?(truncated_part)
break
else
break
end
end
truncated
end
def truncate_hash_content_to_token_budget(content, token_budget)
return "" if !content.key?(:encoded_upload)
encoded = content[:encoded_upload].dup
encoded[:text] = tokenizer.truncate(
encoded[:text].to_s,
token_budget,
strict: SiteSetting.ai_strict_token_counting,
)
encoded[:text].present? ? { encoded_upload: encoded } : ""
end
def content_blank?(content)
case content
when Array
content.all? { |part| content_blank?(part) }
when Hash
content.key?(:encoded_upload) ? content[:encoded_upload][:text].blank? : content.blank?
else
content.blank?
end
end
def calculate_message_token(msg)
calculate_content_token(msg[:content])
end
def calculate_content_token(content)
case content
when Array
content.sum { |part| calculate_content_token(part) }
when Hash
if content.key?(:encoded_upload)
calculate_content_token(content[:encoded_upload][:text].to_s)
else
tokenizer.size(content.to_s)
end
else
tokenizer.size(content.to_s)
end
end
def tokenizer
llm_model.tokenizer_class
end
def tools_dialect
@tools_dialect ||= DiscourseAi::Completions::Dialects::XmlTools.new(prompt.tools)
end
def system_msg(msg)
raise NotImplemented
end
def model_msg(msg)
raise NotImplemented
end
def user_msg(msg)
raise NotImplemented
end
def tool_call_msg(msg)
new_content = tools_dialect.from_raw_tool_call(msg)
msg = msg.merge(content: new_content)
model_msg(msg)
end
def tool_msg(msg)
new_content = tools_dialect.from_raw_tool(msg)
msg = msg.merge(content: new_content)
user_msg(msg)
end
def to_encoded_content_array(
content:,
upload_encoder:,
text_encoder:,
other_encoder: nil,
allow_images:,
allow_documents: false,
allowed_attachment_types: nil,
upload_filter: nil
)
content = [content] if !content.is_a?(Array)
current_string = +""
result = []
content.each do |c|
if c.is_a?(String)
current_string << c
elsif c.is_a?(Hash) && (c.key?(:upload_id) || c.key?(:encoded_upload))
next if !allow_images && !allow_documents
encoded =
if c.key?(:encoded_upload)
c[:encoded_upload]
else
prompt.encode_upload(
c[:upload_id],
allow_images: allow_images,
allow_documents: allow_documents,
allowed_attachment_types: allowed_attachment_types,
)
end
next if encoded.blank?
is_image = encoded[:kind] == :image
is_document = encoded[:kind] == :document
next if is_image && !allow_images
next if is_document && !allow_documents
next if upload_filter && !upload_filter.call(encoded)
if !current_string.empty?
result << text_encoder.call(current_string)
current_string = +""
end
encoded_upload = upload_encoder.call(encoded)
result << encoded_upload if encoded_upload
elsif other_encoder
encoded = other_encoder.call(c)
result << encoded if encoded
end
end
result << text_encoder.call(current_string) if !current_string.empty?
result
end
def document_allowed?(encoded)
return true if encoded[:kind] != :document
allowed_types = llm_model.allowed_attachment_types
return false if allowed_types.blank?
ext = File.extname(encoded[:filename].to_s).delete_prefix(".")
allowed_types.include?(
DiscourseAi::Completions::UploadEncoder.attachment_type_for(ext, encoded[:mime_type]),
)
end
end
end
end
end