discourse/plugins/discourse-ai/lib/completions/endpoints/gemini.rb
Rafael dos Santos Silva bc39aacc3d
FEATURE: Provider-native built-in tools for agents (web search) (#40809)
Adds a fourth kind of agent tool: provider-native built-in tools that
the
LLM provider executes server-side, rather than tools Discourse runs and
feeds back. The first one is web search, supported on Gemini (Google
Search
grounding), OpenAI (web search via the Responses API) and Anthropic
(Claude
web search).

Native tools are stored on the agent's `tools` column with a `native-`
prefix, flow to the prompt as a separate `native_tools` list (never as
runnable Tool classes), and each provider dialect renders them into its
own
request payload. Response processors already ignore the server-side
tool/grounding blocks, so the bot loop never tries to execute them.

They are only selectable when the agent forces a default LLM whose
provider
supports the tool; this is enforced both in the editor UI (filtered by
the
selected LLM's `supported_native_tools`) and by server-side validation.

Also fixes the Gemini endpoint sending `function_calling_config` without
any
`function_declarations`, which the API rejects when only native tools
are
present.

---------

Co-authored-by: Sam Saffron <sam.saffron@gmail.com>
2026-06-16 14:37:51 -03:00

475 lines
15 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class Gemini < Base
GEMINI_PROVIDER_KEY = :gemini
GROUNDING_METADATA_KEYS = %i[webSearchQueries groundingChunks groundingSupports]
THOUGHT_SIGNATURE_PROVIDER_KEY = :thought_signature_parts
def self.can_contact?(llm_model)
llm_model.provider == "google"
end
def default_options
# the default setting is a problem, it blocks too much
categories = %w[HARASSMENT SEXUALLY_EXPLICIT HATE_SPEECH DANGEROUS_CONTENT]
safety_settings =
categories.map do |category|
{ category: "HARM_CATEGORY_#{category}", threshold: "BLOCK_NONE" }
end
{ generationConfig: {}, safetySettings: safety_settings }
end
def normalize_model_params(model_params)
model_params = model_params.dup
@include_thought_summaries =
output_thinking && !!model_params.delete(:include_thought_summaries)
if model_params[:stop_sequences]
model_params[:stopSequences] = model_params.delete(:stop_sequences)
end
if model_params[:max_tokens]
model_params[:maxOutputTokens] = model_params.delete(:max_tokens)
end
model_params[:topP] = model_params.delete(:top_p) if model_params[:top_p]
thinking_enabled =
%w[minimal low medium high].include?(llm_model.lookup_custom_param("thinking_level")) ||
llm_model.lookup_custom_param("enable_thinking")
if thinking_enabled
model_params.delete(:temperature)
elsif llm_model.lookup_custom_param("disable_temperature")
model_params.delete(:temperature)
end
model_params.delete(:topP) if llm_model.lookup_custom_param("disable_top_p")
model_params
end
def provider_id
AiApiAuditLog::Provider::Gemini
end
private
def model_uri
url = llm_model.url
key = llm_model.api_key
if @streaming_mode
url = "#{url}:streamGenerateContent?key=#{key}&alt=sse"
else
url = "#{url}:generateContent?key=#{key}"
end
URI(url)
end
def prepare_payload(prompt, model_params, dialect)
@native_tool_support = dialect.native_tool_support?
@current_batch_token = nil
tools = dialect.tools if @native_tool_support
payload = default_options.merge(contents: prompt[:messages])
payload[:systemInstruction] = {
role: "system",
parts: [{ text: prompt[:system_instruction].to_s }],
} if prompt[:system_instruction].present?
if tools.present?
payload[:tools] = tools
# function_calling_config only applies to function declarations; Gemini
# rejects it when the request only carries provider-native tools (e.g.
# google_search grounding) with no function_declarations.
has_function_declarations =
tools.any? { |tool| tool.is_a?(Hash) && tool[:function_declarations].present? }
if has_function_declarations
function_calling_config = { mode: "AUTO" }
if dialect.tool_choice.present?
if dialect.tool_choice == :none
function_calling_config = { mode: "NONE" }
else
function_calling_config = {
mode: "ANY",
allowed_function_names: [dialect.tool_choice],
}
end
end
payload[:tool_config] = { function_calling_config: function_calling_config }
end
end
if model_params.present?
payload[:generationConfig].merge!(model_params.except(:response_format))
# https://ai.google.dev/api/generate-content#generationconfig
gemini_schema = model_params.dig(:response_format, :json_schema, :schema)
if gemini_schema.present?
payload[:generationConfig][:responseSchema] = gemini_schema.except(
:additionalProperties,
)
payload[:generationConfig][:responseMimeType] = "application/json"
end
end
thinking_level = llm_model.lookup_custom_param("thinking_level")
if %w[minimal low medium high].include?(thinking_level)
payload[:generationConfig][:thinkingConfig] = { thinkingLevel: thinking_level }
elsif llm_model.lookup_custom_param("enable_thinking")
thinking_tokens = llm_model.lookup_custom_param("thinking_tokens").to_i
thinking_tokens = thinking_tokens.clamp(0, 24_576)
payload[:generationConfig][:thinkingConfig] = { thinkingBudget: thinking_tokens }
end
if @include_thought_summaries
payload[:generationConfig][:thinkingConfig] ||= {}
payload[:generationConfig][:thinkingConfig][:includeThoughts] = true
end
payload
end
def prepare_request(payload)
headers = { "Content-Type" => "application/json" }
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
end
def extract_completion_from(response_raw)
parsed =
if @streaming_mode
response_raw
else
JSON.parse(response_raw, symbolize_names: true)
end
response_h = parsed.dig(:candidates, 0, :content, :parts, 0)
if response_h
@has_function_call ||= response_h.dig(:functionCall).present?
if @has_function_call
function_call = response_h.dig(:functionCall)
provider_data = provider_data_from_part(response_h)
ToolCall.new(
id: "tool_0",
name: function_call[:name],
parameters: function_call[:args],
provider_data: provider_data,
)
elsif response_h[:text]
response_h.dig(:text)
elsif response_h[:inlineData]
inline_data_to_upload_markdown(response_h[:inlineData])
end
end
end
class GeminiStreamingDecoder
def initialize
@buffer = +""
end
def decode(str)
@buffer << str
lines = @buffer.split(/\r?\n\r?\n/)
keep_last = false
decoded =
lines
.map do |line|
if line.start_with?("data: {")
begin
JSON.parse(line[6..-1], symbolize_names: true)
rescue JSON::ParserError
keep_last = line
nil
end
else
keep_last = line
nil
end
end
.compact
if keep_last
@buffer = +keep_last
else
@buffer = +""
end
decoded
end
end
def decode(chunk)
json = JSON.parse(chunk, symbolize_names: true)
update_usage(json)
candidate = json.dig(:candidates, 0)
parts = candidate&.dig(:content, :parts)
batch_token = current_batch_token_for(parts)
decode_parts(parts, batch_token:) + native_tool_thinkings_from_candidate(candidate)
end
def decode_chunk(chunk)
@tool_index ||= -1
streaming_decoder
.decode(chunk)
.map do |parsed|
update_usage(parsed)
candidate = parsed.dig(:candidates, 0)
parts = candidate&.dig(:content, :parts)
batch_token = current_batch_token_for(parts)
decode_parts(parts, batch_token:, streaming: true) +
native_tool_thinkings_from_candidate(candidate)
end
.flatten
.compact
end
def decode_parts(parts, batch_token:, streaming: false)
idx = -1
(parts || []).each_with_object([]) do |part, result|
if part[:thought]
result << decode_thought_summary(part[:text], streaming: streaming)
next
end
result.concat(finish_thought_summary) if streaming
if part[:functionCall]
tool_index =
if streaming
@tool_index += 1
else
idx += 1
end
provider_data = provider_data_from_part(part, batch_token:)
result << ToolCall.new(
id: "tool_#{tool_index}",
name: part[:functionCall][:name],
parameters: part[:functionCall][:args],
provider_data: provider_data,
)
elsif part[:inlineData]
result << inline_data_to_upload_markdown(part[:inlineData])
else
text = part[:text]
result << text if text != ""
end
# we could get a nil here cause part can be nil
# interface expects an array
end
end
def decode_chunk_finish
finish_thought_summary
end
def decode_thought_summary(text, streaming: false)
return if !output_thinking || text.blank?
if streaming
@thought_summary ||= +""
@thought_summary << text
Thinking.new(message: text, partial: true)
else
Thinking.new(message: text, partial: false)
end
end
def finish_thought_summary
return [] if @thought_summary.blank?
thinking = Thinking.new(message: @thought_summary, partial: false)
@thought_summary = nil
[thinking]
end
def native_tool_thinkings_from_candidate(candidate)
return [] if !output_thinking || candidate.blank?
track_thought_signature_parts(candidate)
thinkings = [
native_web_search_thinking(candidate[:groundingMetadata]),
native_web_fetch_thinking(candidate[:urlContextMetadata]),
].compact
if thinkings.blank? && (thinking = thought_signature_thinking)
thinkings << thinking
end
thinkings
end
def native_web_search_thinking(metadata)
return if metadata.blank? || @emitted_grounding_metadata_thinking
provider_metadata = metadata.slice(*GROUNDING_METADATA_KEYS).compact
return if provider_metadata.blank?
queries = Array(metadata[:webSearchQueries]).compact_blank
message = queries.present? ? "Web search: #{queries.join(", ")}" : nil
@emitted_grounding_metadata_thinking = true
Thinking.new(
message: message,
partial: false,
provider_info: gemini_provider_info(grounding_metadata: provider_metadata),
)
end
def native_web_fetch_thinking(metadata)
return if metadata.blank? || @emitted_url_context_metadata_thinking
urls =
Array(metadata[:urlMetadata]).map { |url_metadata| url_metadata[:retrievedUrl] }.compact
message = urls.present? ? "Web fetch: #{urls.join(", ")}" : "Web fetch"
@emitted_url_context_metadata_thinking = true
Thinking.new(
message: message,
partial: false,
provider_info: gemini_provider_info(url_context_metadata: metadata),
)
end
def track_thought_signature_parts(candidate)
parts = candidate.dig(:content, :parts) || []
parts.each do |part|
next if part[:functionCall]
signature = part[:thoughtSignature]
next if signature.blank?
@thought_signature_parts ||= []
@thought_signature_parts << {
text: part[:text].to_s,
thoughtSignature: signature,
}.tap { |signed_part| signed_part[:thought] = part[:thought] if part.key?(:thought) }
end
end
def thought_signature_thinking
provider_info = gemini_provider_info
return if provider_info.blank?
Thinking.new(message: nil, partial: false, provider_info: provider_info)
end
def gemini_provider_info(**info)
pending_thought_signature_parts =
(@thought_signature_parts || []).drop(@emitted_thought_signature_parts_count.to_i)
if pending_thought_signature_parts.present?
info[THOUGHT_SIGNATURE_PROVIDER_KEY] = pending_thought_signature_parts.deep_dup
@emitted_thought_signature_parts_count = @thought_signature_parts.length
end
info.present? ? { GEMINI_PROVIDER_KEY => info } : {}
end
def update_usage(parsed)
usage = parsed.dig(:usageMetadata)
if usage
if prompt_token_count = usage[:promptTokenCount]
@prompt_token_count = prompt_token_count
end
if candidate_token_count = usage[:candidatesTokenCount]
@candidate_token_count = candidate_token_count
end
end
end
def final_log_update(log)
log.request_tokens = @prompt_token_count if @prompt_token_count
log.response_tokens = @candidate_token_count if @candidate_token_count
end
def streaming_decoder
@decoder ||= GeminiStreamingDecoder.new
end
def provider_data_from_part(part, batch_token: nil)
thought_signature = part[:thoughtSignature] || part[:thought_signature]
provider_data = {}
provider_data[:thought_signature] = thought_signature if thought_signature
provider_data[:batch_id] = batch_token if batch_token
provider_data
end
def contains_function_call?(parts)
parts&.any? { |p| p[:functionCall].present? }
end
def current_batch_token_for(parts)
if contains_function_call?(parts)
@current_batch_token ||= SecureRandom.hex(8)
else
@current_batch_token = nil
end
@current_batch_token
end
def extract_prompt_for_tokenizer(prompt)
prompt.to_s
end
def xml_tools_enabled?
!@native_tool_support
end
def inline_data_to_upload_markdown(inline_data)
mime = inline_data[:mimeType]
data_b64 = inline_data[:data]
return unless mime && data_b64
begin
raw = Base64.decode64(data_b64)
ext =
case mime
when "image/png"
"png"
when "image/jpeg", "image/jpg"
"jpg"
when "image/gif"
"gif"
when "image/webp"
"webp"
else
"bin"
end
filename = "gemini-#{SecureRandom.hex(8)}.#{ext}"
file = Tempfile.new(filename, binmode: true)
file.write(raw)
file.rewind
upload =
UploadCreator.new(file, filename, for_system_message: true).create_for(
Discourse.system_user.id,
)
return "\n![image](#{upload.short_url})\n" if upload&.persisted?
ensure
file&.close! if defined?(file)
end
nil
end
end
end
end
end