discourse/plugins/discourse-ai/spec/lib/completions/dialects/gemini_spec.rb
Rafael dos Santos Silva bc39aacc3d
FEATURE: Provider-native built-in tools for agents (web search) (#40809)
Adds a fourth kind of agent tool: provider-native built-in tools that
the
LLM provider executes server-side, rather than tools Discourse runs and
feeds back. The first one is web search, supported on Gemini (Google
Search
grounding), OpenAI (web search via the Responses API) and Anthropic
(Claude
web search).

Native tools are stored on the agent's `tools` column with a `native-`
prefix, flow to the prompt as a separate `native_tools` list (never as
runnable Tool classes), and each provider dialect renders them into its
own
request payload. Response processors already ignore the server-side
tool/grounding blocks, so the bot loop never tries to execute them.

They are only selectable when the agent forces a default LLM whose
provider
supports the tool; this is enforced both in the editor UI (filtered by
the
selected LLM's `supported_native_tools`) and by server-side validation.

Also fixes the Gemini endpoint sending `function_calling_config` without
any
`function_declarations`, which the API rejects when only native tools
are
present.

---------

Co-authored-by: Sam Saffron <sam.saffron@gmail.com>
2026-06-16 14:37:51 -03:00

342 lines
11 KiB
Ruby
Vendored

# frozen_string_literal: true
require_relative "dialect_context"
RSpec.describe DiscourseAi::Completions::Dialects::Gemini do
fab!(:model, :gemini_model)
let(:context) { DialectContext.new(described_class, model) }
before { enable_current_plugin }
describe "#translate" do
it "translates a prompt written in our generic format to the Gemini format" do
gemini_version = {
messages: [{ role: "user", parts: [{ text: context.simple_user_input }] }],
system_instruction: context.system_insts,
}
translated = context.system_user_scenario
expect(translated).to eq(gemini_version)
end
describe "upload markdown stripping for image preview model" do
fab!(:upload)
let(:image_model) { Fabricate(:gemini_model, name: "gemini-2.5-flash-image-preview") }
it "strips upload markdown from both user and model messages" do
base62 = Upload.base62_sha1(upload.sha1)
user_md = "User text start ![user image](upload://#{base62}.png) end."
model_md = "Model text start ![model image](upload://#{base62}.png) end."
prompt =
DiscourseAi::Completions::Prompt.new(
nil,
messages: [
{ type: :system, content: "Sys" },
{ type: :user, content: [user_md, { upload_id: upload.id }] },
{ type: :model, content: [model_md, { upload_id: upload.id }] },
],
)
dialect = described_class.new(prompt, image_model)
expect(dialect.strip_upload_markdown_mode).to eq(:all)
translated = dialect.translate
user_msg = translated[:messages].find { |m| m[:role] == "user" }
model_msg = translated[:messages].find { |m| m[:role] == "model" }
user_text = user_msg[:parts].map { |p| p[:text] }.join
model_text = model_msg[:parts].map { |p| p[:text] }.join
expect(user_text).to eq("User text start end.")
expect(model_text).to eq("Model text start end.")
end
end
it "injects model after tool call" do
expect(context.image_generation_scenario).to eq(
{
messages: [
{ role: "user", parts: [{ text: "user1: draw a cat" }] },
{
role: "model",
parts: [{ functionCall: { name: "draw", args: { picture: "Cat" } } }],
},
{
role: "function",
parts: [
{
functionResponse: {
name: "tool_id",
response: {
content: "\"I'm a tool result\"",
},
},
},
],
},
{ role: "model", parts: { text: "Ok." } },
{ role: "user", parts: [{ text: "user1: draw another cat" }] },
],
system_instruction: context.system_insts,
},
)
end
it "translates tool_call and tool messages" do
expect(context.multi_turn_scenario).to eq(
{
messages: [
{ role: "user", parts: [{ text: "user1: This is a message by a user" }] },
{
role: "model",
parts: [{ text: "I'm a previous bot reply, that's why there's no user" }],
},
{ role: "user", parts: [{ text: "user1: This is a new message by a user" }] },
{
role: "model",
parts: [
{ functionCall: { name: "get_weather", args: { location: "Sydney", unit: "c" } } },
],
},
{
role: "function",
parts: [
{
functionResponse: {
name: "get_weather",
response: {
content: "\"I'm a tool result\"",
},
},
},
],
},
],
system_instruction:
"I want you to act as a title generator for written pieces. I will provide you with a text,\nand you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,\nand ensure that the meaning is maintained. Replies will utilize the language type of the topic.\n",
},
)
end
it "includes thoughtSignature for tool calls when provider data is present" do
prompt = context.prompt
prompt.push(type: :user, id: "user1", content: "call a tool")
prompt.push(
type: :tool_call,
id: "tool_id",
name: "get_weather",
content: { arguments: { location: "Sydney" } }.to_json,
provider_data: {
thought_signature: "sig-123",
},
)
prompt.push(type: :tool, id: "tool_id", name: "get_weather", content: { ok: true }.to_json)
translated = context.dialect(prompt).translate
tool_call_parts =
translated[:messages].find { |message| message[:role] == "model" }.fetch(:parts).first
expect(tool_call_parts[:thoughtSignature]).to eq("sig-123")
end
it "preserves text thought signatures on model messages" do
prompt = context.prompt
prompt.push(type: :user, id: "user1", content: "hello")
prompt.push(
type: :model,
content: "Hello world",
thinking_provider_info: {
gemini: {
thought_signature_parts: [{ text: "", thoughtSignature: "sig-empty" }],
},
},
)
translated = context.dialect(prompt).translate
model_message = translated[:messages].find { |message| message[:role] == "model" }
expect(model_message[:parts]).to eq(
[{ text: "Hello world" }, { text: "", thoughtSignature: "sig-empty" }],
)
end
it "preserves signed text suffixes without merging them into unsigned text" do
prompt = context.prompt
prompt.push(type: :user, id: "user1", content: "hello")
prompt.push(
type: :model,
content: "Hello world",
thinking_provider_info: {
gemini: {
thought_signature_parts: [{ text: "world", thoughtSignature: "sig-world" }],
},
},
)
translated = context.dialect(prompt).translate
model_message = translated[:messages].find { |message| message[:role] == "model" }
expect(model_message[:parts]).to eq(
[{ text: "Hello " }, { text: "world", thoughtSignature: "sig-world" }],
)
end
it "preserves signed thought summaries before model text" do
prompt = context.prompt
prompt.push(type: :user, id: "user1", content: "hello")
prompt.push(
type: :model,
content: "Hello world",
thinking_provider_info: {
gemini: {
thought_signature_parts: [
{ text: "I should greet the user.", thought: true, thoughtSignature: "sig-thought" },
],
},
},
)
translated = context.dialect(prompt).translate
model_message = translated[:messages].find { |message| message[:role] == "model" }
expect(model_message[:parts]).to eq(
[
{ text: "I should greet the user.", thought: true, thoughtSignature: "sig-thought" },
{ text: "Hello world" },
],
)
end
it "merges multiple tool calls from the same batch into a single model message" do
prompt = context.prompt
prompt.push(type: :user, id: "user1", content: "do two things")
prompt.push(
type: :tool_call,
id: "tool_id_1",
name: "get_weather",
content: { arguments: { city: "Paris" } }.to_json,
provider_data: {
batch_id: "batch-1",
thought_signature: "sig-A",
},
)
prompt.push(
type: :tool,
id: "tool_id_1",
name: "get_weather",
content: { temp: "15C" }.to_json,
provider_data: {
batch_id: "batch-1",
},
)
prompt.push(
type: :tool_call,
id: "tool_id_2",
name: "get_weather",
content: { arguments: { city: "London" } }.to_json,
provider_data: {
batch_id: "batch-1",
},
)
prompt.push(
type: :tool,
id: "tool_id_2",
name: "get_weather",
content: { temp: "12C" }.to_json,
provider_data: {
batch_id: "batch-1",
},
)
translated = context.dialect(prompt).translate
model_message = translated[:messages].find { |m| m[:role] == "model" }
function_message = translated[:messages].find { |m| m[:role] == "function" }
expect(model_message[:parts].length).to eq(2)
expect(model_message[:parts].first[:thoughtSignature]).to eq("sig-A")
expect(model_message[:parts].second[:thoughtSignature]).to be_nil
expect(function_message[:parts].length).to eq(2)
expect(function_message[:parts].first.dig(:functionResponse, :response, :content)).to eq(
{ temp: "15C" }.to_json,
)
expect(function_message[:parts].second.dig(:functionResponse, :response, :content)).to eq(
{ temp: "12C" }.to_json,
)
end
it "trims content if it's getting too long" do
# testing truncation on 800k tokens is slow use model with less
model.max_prompt_tokens = 16_384
context = DialectContext.new(described_class, model)
translated = context.long_user_input_scenario(length: 5_000)
expect(translated[:messages].last[:role]).to eq("user")
expect(translated[:messages].last.dig(:parts, 0, :text).length).to be <
context.long_message_text(length: 5_000).length
end
it "renders converted document uploads as text parts" do
model.update!(allowed_attachment_types: ["docx"])
converted_text = "Uploaded document: sample.docx (13 Bytes)\n\nConverted text"
prompt =
DiscourseAi::Completions::Prompt.new(
nil,
messages: [{ type: :user, content: ["Read this: ", { upload_id: 123 }] }],
)
allow(DiscourseAi::Completions::UploadEncoder).to receive(:encode).and_return(
[
{
kind: :document,
filename: "sample.docx",
mime_type: "text/plain",
text: converted_text,
converted_from: "docx",
},
],
)
translated = described_class.new(prompt, model).translate
user_message = translated[:messages].find { |msg| msg[:role] == "user" }
expect(user_message[:parts]).to eq([{ text: "Read this: " }, { text: converted_text }])
expect(user_message[:parts]).not_to include(hash_including(inlineData: anything))
end
end
describe "#tools" do
it "returns a list of available tools" do
gemini_tools = {
function_declarations: [
{
name: "get_weather",
description: "Get the weather in a city",
parameters: {
type: "object",
required: %w[location unit],
properties: {
"location" => {
type: :string,
description: "the city name",
},
"unit" => {
type: :string,
description: "the unit of measurement celcius c or fahrenheit f",
enum: %w[c f],
},
},
},
},
],
}
expect(context.dialect_tools).to contain_exactly(gemini_tools)
end
end
end