discourse/plugins/discourse-ai/spec/lib/completions/endpoints/aws_bedrock_spec.rb
Sam 3b13aaa003
DEV: update preset model versions (#39342)
Refresh several AI model presets to newer provider versions and adjust
pricing/capacity metadata accordingly. Also add support for the new
`xhigh` effort level across Anthropic and Bedrock request handling and
update the LLM admin UI/specs to match the renamed presets.
2026-04-17 17:55:24 +10:00

1605 lines
49 KiB
Ruby
Vendored

# frozen_string_literal: true
require_relative "endpoint_compliance"
require "aws-eventstream"
require "aws-sigv4"
require "aws-sdk-sts"
class BedrockMock < EndpointMock
end
RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
subject(:endpoint) { described_class.new(model) }
fab!(:user)
fab!(:model, :bedrock_model)
let(:bedrock_mock) { BedrockMock.new(endpoint) }
let(:compliance) do
EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::Claude, user)
end
def encode_message(message)
wrapped = { bytes: Base64.encode64(message.to_json) }.to_json
io = StringIO.new(wrapped)
aws_message = Aws::EventStream::Message.new(payload: io)
Aws::EventStream::Encoder.new.encode(aws_message)
end
before { enable_current_plugin }
it "should provide accurate max token count" do
prompt = DiscourseAi::Completions::Prompt.new("hello")
dialect = DiscourseAi::Completions::Dialects::Claude.new(prompt, model)
endpoint = DiscourseAi::Completions::Endpoints::AwsBedrock.new(model)
model.name = "claude-2"
expect(endpoint.default_options(dialect)[:max_tokens]).to eq(4096)
model.name = "claude-3-5-sonnet"
expect(endpoint.default_options(dialect)[:max_tokens]).to eq(8192)
model.name = "claude-3-5-haiku"
options = endpoint.default_options(dialect)
expect(options[:max_tokens]).to eq(8192)
end
describe "function calling" do
it "supports old school xml function calls" do
model.provider_params["disable_native_tools"] = true
model.save!
proxy = DiscourseAi::Completions::Llm.proxy(model)
incomplete_tool_call = <<~XML.strip
<thinking>I should be ignored</thinking>
<search_quality_reflection>also ignored</search_quality_reflection>
<search_quality_score>0</search_quality_score>
<function_calls>
<invoke>
<tool_name>google</tool_name>
<parameters><query>sydney weather today</query></parameters>
</invoke>
</function_calls>
XML
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "hello\n" } },
{ type: "content_block_delta", delta: { text: incomplete_tool_call } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
prompt =
DiscourseAi::Completions::Prompt.new(
messages: [{ type: :user, content: "what is the weather in sydney" }],
)
tool = {
name: "google",
description: "Will search using Google",
parameters: [
{ name: "query", description: "The search query", type: "string", required: true },
],
}
prompt.tools = [tool]
response = []
proxy.generate(prompt, user: user) { |partial| response << partial }
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
parsed_body = JSON.parse(request.body)
expect(parsed_body["system"]).to include("<function_calls>")
expect(parsed_body["tools"]).to eq(nil)
expect(parsed_body["stop_sequences"]).to eq(["</function_calls>"])
expected = [
"hello\n",
DiscourseAi::Completions::ToolCall.new(
id: "tool_0",
name: "google",
parameters: {
query: "sydney weather today",
},
),
]
expect(response).to eq(expected)
end
end
it "supports streaming function calls" do
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
messages =
[
{
type: "message_start",
message: {
id: "msg_bdrk_01WYxeNMk6EKn9s98r6XXrAB",
type: "message",
role: "assistant",
model: "claude-3-sonnet-20240307",
stop_sequence: nil,
usage: {
input_tokens: 840,
output_tokens: 1,
},
content: [],
stop_reason: nil,
},
},
{
type: "content_block_start",
index: 0,
delta: {
text: "<thinking>I should be ignored</thinking>",
},
},
{
type: "content_block_start",
index: 0,
content_block: {
type: "tool_use",
id: "toolu_bdrk_014CMjxtGmKUtGoEFPgc7PF7",
name: "google",
input: {
},
},
},
{
type: "content_block_delta",
index: 0,
delta: {
type: "input_json_delta",
partial_json: "",
},
},
{
type: "content_block_delta",
index: 0,
delta: {
type: "input_json_delta",
partial_json: "{\"query\": \"s",
},
},
{
type: "content_block_delta",
index: 0,
delta: {
type: "input_json_delta",
partial_json: "ydney weat",
},
},
{
type: "content_block_delta",
index: 0,
delta: {
type: "input_json_delta",
partial_json: "her today\"}",
},
},
{ type: "content_block_stop", index: 0 },
{
type: "message_delta",
delta: {
stop_reason: "tool_use",
stop_sequence: nil,
},
usage: {
output_tokens: 53,
},
},
{
type: "message_stop",
"amazon-bedrock-invocationMetrics": {
inputTokenCount: 846,
outputTokenCount: 39,
invocationLatency: 880,
firstByteLatency: 402,
},
},
].map { |message| encode_message(message) }
messages = messages.join("").split
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
prompt =
DiscourseAi::Completions::Prompt.new(
messages: [{ type: :user, content: "what is the weather in sydney" }],
)
tool = {
name: "google",
description: "Will search using Google",
parameters: [
{ name: "query", description: "The search query", type: "string", required: true },
],
}
prompt.tools = [tool]
response = []
proxy.generate(prompt, user: user) { |partial| response << partial }
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
expected_response = [
DiscourseAi::Completions::ToolCall.new(
id: "toolu_bdrk_014CMjxtGmKUtGoEFPgc7PF7",
name: "google",
parameters: {
query: "sydney weather today",
},
),
]
expect(response).to eq(expected_response)
expected = {
"max_tokens" => 4096,
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "what is the weather in sydney" }],
"tools" => [
{
"name" => "google",
"description" => "Will search using Google",
"input_schema" => {
"type" => "object",
"properties" => {
"query" => {
"type" => "string",
"description" => "The search query",
},
},
"required" => ["query"],
},
},
],
}
expect(JSON.parse(request.body)).to eq(expected)
log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(846)
expect(log.response_tokens).to eq(39)
end
end
end
describe "Claude 3 support" do
it "supports regular completions" do
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "hello sam"],
usage: {
input_tokens: 10,
output_tokens: 20,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
response = proxy.generate("hello world", user: user)
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
expected = {
"max_tokens" => 4096,
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
}
expect(JSON.parse(request.body)).to eq(expected)
expect(response).to eq("hello sam")
log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(10)
expect(log.response_tokens).to eq(20)
end
it "supports thinking" do
model.provider_params["enable_reasoning"] = true
model.provider_params["reasoning_tokens"] = 10_000
model.save!
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "hello sam"],
usage: {
input_tokens: 10,
output_tokens: 20,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
response = proxy.generate("hello world", user: user)
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
expected = {
"max_tokens" => 40_000,
"thinking" => {
"type" => "enabled",
"budget_tokens" => 10_000,
},
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
}
expect(JSON.parse(request.body)).to eq(expected)
expect(response).to eq("hello sam")
log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(10)
expect(log.response_tokens).to eq(20)
end
it "strips temperature and top_p when reasoning is enabled" do
model.update!(
provider_params: {
access_key_id: "123",
region: "us-east-1",
enable_reasoning: true,
reasoning_tokens: 2048,
},
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user, temperature: 0.7, top_p: 0.9)
request_body = JSON.parse(request.body)
expect(request_body).not_to have_key("temperature")
expect(request_body).not_to have_key("top_p")
end
it "supports claude 3 streaming" do
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "hello " } },
{ type: "content_block_delta", delta: { text: "sam" } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
# stream 1 letter at a time
# cause we need to handle this case
messages = messages.join("").split
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
response = +""
proxy.generate("hello world", user: user) { |partial| response << partial }
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
expected = {
"max_tokens" => 4096,
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
}
expect(JSON.parse(request.body)).to eq(expected)
expect(response).to eq("hello sam")
log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(9)
expect(log.response_tokens).to eq(25)
end
end
end
describe "parameter disabling" do
it "excludes disabled parameters from the request" do
model.update!(
provider_params: {
access_key_id: "123",
region: "us-east-1",
disable_top_p: true,
disable_temperature: true,
},
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
# Request with parameters that should be ignored
proxy.generate("test prompt", user: user, top_p: 0.9, temperature: 0.8, max_tokens: 500)
# Parse the request body
request_body = JSON.parse(request.body)
# Verify disabled parameters aren't included
expect(request_body).not_to have_key("top_p")
expect(request_body).not_to have_key("temperature")
# Verify other parameters still work
expect(request_body).to have_key("max_tokens")
expect(request_body["max_tokens"]).to eq(500)
end
end
describe "disabled tool use" do
it "sets tool_choice to none natively" do
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a helpful assistant",
messages: [{ type: :user, content: "don't use any tools please" }],
tools: [
{
name: "echo",
description: "echo something",
parameters: [
{ name: "text", type: "string", description: "text to echo", required: true },
],
},
],
tool_choice: :none,
)
content = {
content: [text: "I won't use any tools. Here's a direct response instead."],
usage: {
input_tokens: 25,
output_tokens: 15,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
result = proxy.generate(prompt, user: user)
request_body = JSON.parse(request.body)
expect(request_body.dig("tool_choice", "type")).to eq("none")
messages = request_body["messages"]
expect(messages.length).to eq(1)
expect(result).to eq("I won't use any tools. Here's a direct response instead.")
end
end
describe "tool_choice :none with response_format" do
it "sets tool_choice none and appends assistant prefill when structured output is disabled" do
model.update!(
provider_params: model.provider_params.merge("disable_native_structured_output" => true),
)
schema = {
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
key: {
type: "string",
},
},
required: ["key"],
additionalProperties: false,
},
strict: true,
},
}
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a helpful assistant",
messages: [{ type: :user, content: "reply as json" }],
tools: [
{
name: "echo",
description: "echo something",
parameters: [
{ name: "text", type: "string", description: "text to echo", required: true },
],
},
],
tool_choice: :none,
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "\"key\":\"value\"}"],
usage: {
input_tokens: 25,
output_tokens: 15,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate(prompt, user: user, response_format: schema)
request_body = JSON.parse(request.body)
expect(request_body.dig("tool_choice", "type")).to eq("none")
expect(request_body).not_to have_key("output_config")
messages = request_body["messages"]
expect(messages.length).to eq(2)
expect(messages[0]["role"]).to eq("user")
expect(messages[1]).to eq({ "role" => "assistant", "content" => "{" })
end
end
describe "forced tool use" do
let(:tools) do
[
{
name: "echo",
description: "echo something",
parameters: [
{ name: "text", type: "string", description: "text to echo", required: true },
],
},
]
end
let(:tool_response_body) do
{
content: [
{
type: "tool_use",
id: "toolu_bdrk_014CMjxtGmKUtGoEFPgc7PF7",
name: "echo",
input: {
text: "hello",
},
},
],
usage: {
input_tokens: 25,
output_tokens: 15,
},
}.to_json
end
it "can properly force tool use" do
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a bot",
messages: [type: :user, id: "user1", content: "echo hello"],
tools: tools,
tool_choice: "echo",
)
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: tool_response_body)
proxy.generate(prompt, user: user)
request_body = JSON.parse(request.body)
expect(request_body.dig("tool_choice", "name")).to eq("echo")
end
it "skips tool_choice and injects guidance when thinking is enabled" do
model.update!(
provider_params:
model.provider_params.merge("enable_reasoning" => true, "adaptive_thinking" => true),
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a bot",
messages: [type: :user, id: "user1", content: "echo hello"],
tools: tools,
tool_choice: "echo",
)
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: tool_response_body)
proxy.generate(prompt, user: user)
request_body = JSON.parse(request.body)
expect(request_body).not_to have_key("tool_choice")
last_message = request_body["messages"].last
expect(last_message["role"]).to eq("user")
expect(last_message["content"]).to include("'echo' tool")
end
end
describe "role-based authentication" do
it "uses assumed role credentials when role_arn is provided" do
# Configure the model with a role_arn
model.update!(
provider_params: {
region: "us-east-1",
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
},
)
# Mock the actual credentials object returned by AssumeRoleCredentials
mock_creds =
instance_double(
Aws::Credentials,
access_key_id: "ASSUMED_ACCESS_KEY",
secret_access_key: "ASSUMED_SECRET_KEY",
session_token: "ASSUMED_SESSION_TOKEN",
)
# Mock Aws::AssumeRoleCredentials
mock_credentials = instance_double(Aws::AssumeRoleCredentials)
allow(mock_credentials).to receive(:credentials).and_return(mock_creds)
# Mock the STS client
mock_sts_client = instance_double(Aws::STS::Client)
allow(Aws::STS::Client).to receive(:new).with(region: "us-east-1").and_return(mock_sts_client)
# Mock AssumeRoleCredentials.new
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
).and_return(mock_credentials)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
# Verify AssumeRoleCredentials was created with correct parameters
expect(Aws::AssumeRoleCredentials).to have_received(:new).with(
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
)
# Verify the request was signed (authorization header should be present)
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
# The session token should be included in the signed request headers
expect(request.headers["X-Amz-Security-Token"]).to eq("ASSUMED_SESSION_TOKEN")
end
it "uses regular credentials when role_arn is not provided" do
# Configure the model without a role_arn
model.update!(provider_params: { access_key_id: "DIRECT_ACCESS_KEY", region: "us-east-1" })
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
# Ensure AssumeRoleCredentials is not used when role_arn is not provided
allow(Aws::AssumeRoleCredentials).to receive(:new).and_call_original
proxy.generate("test prompt", user: user)
expect(Aws::AssumeRoleCredentials).not_to have_received(:new)
# Verify the request was signed with regular credentials
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
# No session token should be present when using regular credentials
expect(request.headers["X-Amz-Security-Token"]).to be_nil
end
it "caches assumed role credentials across multiple requests" do
# Configure the model with a role_arn
model.update!(
provider_params: {
region: "us-east-1",
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
},
)
# Mock the actual credentials object returned by AssumeRoleCredentials
mock_creds =
instance_double(
Aws::Credentials,
access_key_id: "ASSUMED_ACCESS_KEY",
secret_access_key: "ASSUMED_SECRET_KEY",
session_token: "ASSUMED_SESSION_TOKEN",
)
# Mock Aws::AssumeRoleCredentials
mock_credentials = instance_double(Aws::AssumeRoleCredentials)
allow(mock_credentials).to receive(:credentials).and_return(mock_creds)
# Mock the STS client
mock_sts_client = instance_double(Aws::STS::Client)
allow(Aws::STS::Client).to receive(:new).with(region: "us-east-1").and_return(mock_sts_client)
# Mock AssumeRoleCredentials.new
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
).and_return(mock_credentials)
proxy = DiscourseAi::Completions::Llm.proxy(model)
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
).to_return(status: 200, body: content)
# Make multiple generate calls
proxy.generate("test prompt 1", user: user)
proxy.generate("test prompt 2", user: user)
proxy.generate("test prompt 3", user: user)
# Verify AssumeRoleCredentials was created only once (cached in LlmModel)
expect(Aws::AssumeRoleCredentials).to have_received(:new).once
end
it "invalidates cache when role_arn changes" do
# Configure the model with initial role_arn
model.update!(
provider_params: {
region: "us-east-1",
role_arn: "arn:aws:iam::123456789012:role/FirstRole",
},
)
# Mock credentials for first role
mock_creds_1 =
instance_double(
Aws::Credentials,
access_key_id: "FIRST_ACCESS_KEY",
secret_access_key: "FIRST_SECRET_KEY",
session_token: "FIRST_SESSION_TOKEN",
)
mock_credentials_1 = instance_double(Aws::AssumeRoleCredentials)
allow(mock_credentials_1).to receive(:credentials).and_return(mock_creds_1)
# Mock credentials for second role
mock_creds_2 =
instance_double(
Aws::Credentials,
access_key_id: "SECOND_ACCESS_KEY",
secret_access_key: "SECOND_SECRET_KEY",
session_token: "SECOND_SESSION_TOKEN",
)
mock_credentials_2 = instance_double(Aws::AssumeRoleCredentials)
allow(mock_credentials_2).to receive(:credentials).and_return(mock_creds_2)
mock_sts_client = instance_double(Aws::STS::Client)
allow(Aws::STS::Client).to receive(:new).with(region: "us-east-1").and_return(mock_sts_client)
# Mock AssumeRoleCredentials.new to return different credentials based on role_arn
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
role_arn: "arn:aws:iam::123456789012:role/FirstRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
).and_return(mock_credentials_1)
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
role_arn: "arn:aws:iam::123456789012:role/SecondRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
).and_return(mock_credentials_2)
proxy = DiscourseAi::Completions::Llm.proxy(model)
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
).to_return(status: 200, body: content)
# First request with initial role
proxy.generate("test prompt 1", user: user)
# Change the role_arn
model.update!(
provider_params: {
region: "us-east-1",
role_arn: "arn:aws:iam::123456789012:role/SecondRole",
},
)
# Second request should use new role
proxy.generate("test prompt 2", user: user)
# Verify AssumeRoleCredentials was created twice (once for each role)
expect(Aws::AssumeRoleCredentials).to have_received(:new).with(
role_arn: "arn:aws:iam::123456789012:role/FirstRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
).once
expect(Aws::AssumeRoleCredentials).to have_received(:new).with(
role_arn: "arn:aws:iam::123456789012:role/SecondRole",
role_session_name: "discourse-bedrock-#{Process.pid}",
client: mock_sts_client,
).once
end
end
describe "structured output via output_config" do
it "forces the response to be a JSON and using the given JSON schema" do
schema = {
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
key: {
type: "string",
},
},
required: ["key"],
additionalProperties: false,
},
strict: true,
},
}
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "{\"" } },
{ type: "content_block_delta", delta: { text: "key" } },
{ type: "content_block_delta", delta: { text: "\":\"" } },
{ type: "content_block_delta", delta: { text: "Hello!" } },
{ type: "content_block_delta", delta: { text: "\n There" } },
{ type: "content_block_delta", delta: { text: "\"}" } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
structured_output = nil
proxy.generate("hello world", response_format: schema, user: user) do |partial|
structured_output = partial
end
expected = {
"max_tokens" => 4096,
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
"output_config" => {
"format" => {
"type" => "json_schema",
"schema" => schema[:json_schema][:schema].deep_stringify_keys,
},
},
}
expect(JSON.parse(request.body)).to eq(expected)
expect(structured_output.read_buffered_property(:key)).to eq("Hello!\n There")
end
end
it "works with JSON schema array types" do
schema = {
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
plain: {
type: "string",
},
key: {
type: "array",
items: {
type: "string",
},
},
},
required: %w[plain key],
additionalProperties: false,
},
strict: true,
},
}
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "{\"" } },
{ type: "content_block_delta", delta: { text: "key" } },
{ type: "content_block_delta", delta: { text: "\":" } },
{ type: "content_block_delta", delta: { text: " [\"" } },
{ type: "content_block_delta", delta: { text: "Hello!" } },
{ type: "content_block_delta", delta: { text: " I am" } },
{ type: "content_block_delta", delta: { text: " a " } },
{ type: "content_block_delta", delta: { text: "chunk\"," } },
{ type: "content_block_delta", delta: { text: "\"There" } },
{ type: "content_block_delta", delta: { text: "\"]," } },
{ type: "content_block_delta", delta: { text: " \"plain" } },
{ type: "content_block_delta", delta: { text: "\":\"" } },
{ type: "content_block_delta", delta: { text: "I'm here" } },
{ type: "content_block_delta", delta: { text: " too\"}" } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
structured_output = nil
proxy.generate("hello world", response_format: schema, user: user) do |partial|
structured_output = partial
end
expected = {
"max_tokens" => 4096,
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
"output_config" => {
"format" => {
"type" => "json_schema",
"schema" => schema[:json_schema][:schema].deep_stringify_keys,
},
},
}
expect(JSON.parse(request.body)).to eq(expected)
expect(structured_output.read_buffered_property(:key)).to contain_exactly(
"Hello! I am a chunk",
"There",
)
expect(structured_output.read_buffered_property(:plain)).to eq("I'm here too")
end
end
end
describe "structured output via prefilling when native structured output is disabled" do
it "falls back to assistant message prefill" do
model.update!(
provider_params: model.provider_params.merge("disable_native_structured_output" => true),
)
schema = {
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
key: {
type: "string",
},
},
required: ["key"],
additionalProperties: false,
},
strict: true,
},
}
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "\"key\":\"value\"}" } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
structured_output = nil
proxy.generate("hello world", response_format: schema, user: user) do |partial|
structured_output = partial
end
request_body = JSON.parse(request.body)
expect(request_body["messages"].last).to eq({ "role" => "assistant", "content" => "{" })
expect(request_body).not_to have_key("output_config")
expect(structured_output.read_buffered_property(:key)).to eq("value")
end
end
end
describe "prompt caching for Anthropic models" do
it "applies caching in always mode for Claude models" do
params = model.provider_params || {}
params["prompt_caching"] = "always"
model.update!(provider_params: params)
messages =
[
{
type: "message_start",
message: {
usage: {
input_tokens: 10,
cache_creation_input_tokens: 100,
cache_read_input_tokens: 50,
},
},
},
{ type: "content_block_delta", delta: { text: "Cached response" } },
{ type: "message_delta", delta: { usage: { output_tokens: 5 } } },
].map { |message| encode_message(message) }
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a bot",
messages: [{ type: :user, content: "hello" }],
)
result = +""
endpoint.perform_completion!(
DiscourseAi::Completions::Dialects::Claude.new(prompt, model),
user,
) { |partial| result << partial }
expect(result).to eq("Cached response")
parsed_body = JSON.parse(request.body, symbolize_names: true)
expect(parsed_body[:messages].last[:content].last[:cache_control]).to eq(
{ type: "ephemeral" },
)
log = AiApiAuditLog.order(:id).last
expect(log.cache_read_tokens).to eq(50)
expect(log.cache_write_tokens).to eq(100)
end
end
it "does not apply caching in never mode" do
params = model.provider_params || {}
params["prompt_caching"] = "never"
model.update!(provider_params: params)
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 10 } } },
{ type: "content_block_delta", delta: { text: "No cache" } },
{ type: "message_delta", delta: { usage: { output_tokens: 5 } } },
].map { |message| encode_message(message) }
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a bot",
messages: [{ type: :user, content: "hello" }],
)
result = +""
endpoint.perform_completion!(
DiscourseAi::Completions::Dialects::Claude.new(prompt, model),
user,
) { |partial| result << partial }
expect(result).to eq("No cache")
# Verify cache_control was NOT added
parsed_body = JSON.parse(request.body, symbolize_names: true)
expect(parsed_body[:system]).to eq("You are a bot")
end
end
it "does not apply caching to non-Claude models on Bedrock" do
# Caching should only work for Anthropic Claude models, not other Bedrock models
# This test would need a Nova model setup to be fully tested
# For now, we verify the logic is only applied in Claude dialect branch
expect(endpoint.respond_to?(:should_apply_prompt_caching?)).to be(true)
end
end
describe "adaptive thinking" do
it "sends adaptive thinking config when enabled" do
model.update!(
provider_params: {
access_key_id: "123",
region: "us-east-1",
enable_reasoning: true,
adaptive_thinking: true,
},
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
request_body = JSON.parse(request.body)
expect(request_body["thinking"]).to eq({ "type" => "adaptive" })
expect(request_body["max_tokens"]).to eq(32_000)
end
it "adaptive_thinking takes priority over enable_reasoning" do
model.update!(
provider_params: {
access_key_id: "123",
region: "us-east-1",
enable_reasoning: true,
adaptive_thinking: true,
reasoning_tokens: 10_000,
},
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
request_body = JSON.parse(request.body)
expect(request_body["thinking"]).to eq({ "type" => "adaptive" })
expect(request_body["max_tokens"]).to eq(32_000)
end
end
describe "effort parameter" do
it "includes effort in output_config when set to max" do
model.update!(provider_params: { access_key_id: "123", region: "us-east-1", effort: "max" })
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
request_body = JSON.parse(request.body)
expect(request_body.dig("output_config", "effort")).to eq("max")
end
it "includes effort in output_config when set to xhigh" do
model.update!(provider_params: { access_key_id: "123", region: "us-east-1", effort: "xhigh" })
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
request_body = JSON.parse(request.body)
expect(request_body.dig("output_config", "effort")).to eq("xhigh")
end
it "includes effort in output_config when set to low, medium, or high" do
model.update!(
provider_params: {
access_key_id: "123",
region: "us-east-1",
effort: "medium",
},
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
request_body = JSON.parse(request.body)
expect(request_body.dig("output_config", "effort")).to eq("medium")
expect(request_body).not_to have_key("anthropic_beta")
end
it "omits effort when set to default" do
model.update!(
provider_params: {
access_key_id: "123",
region: "us-east-1",
effort: "default",
},
)
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
content = {
content: [text: "test response"],
usage: {
input_tokens: 10,
output_tokens: 5,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
proxy.generate("test prompt", user: user)
request_body = JSON.parse(request.body)
expect(request_body).not_to have_key("output_config")
expect(request_body).not_to have_key("anthropic_beta")
end
it "merges effort and structured output format in output_config" do
model.update!(provider_params: { access_key_id: "123", region: "us-east-1", effort: "high" })
schema = {
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
key: {
type: "string",
},
},
required: ["key"],
additionalProperties: false,
},
strict: true,
},
}
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "{\"key\":\"value\"}" } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
proxy = DiscourseAi::Completions::Llm.proxy(model)
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
proxy.generate("hello world", response_format: schema, user: user) { |partial| }
request_body = JSON.parse(request.body)
expect(request_body["output_config"]).to eq(
{
"effort" => "high",
"format" => {
"type" => "json_schema",
"schema" => schema[:json_schema][:schema].deep_stringify_keys,
},
},
)
end
end
end
end