mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 09:49:09 +08:00
Refresh several AI model presets to newer provider versions and adjust pricing/capacity metadata accordingly. Also add support for the new `xhigh` effort level across Anthropic and Bedrock request handling and update the LLM admin UI/specs to match the renamed presets.
1605 lines
49 KiB
Ruby
Vendored
1605 lines
49 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
require_relative "endpoint_compliance"
|
|
require "aws-eventstream"
|
|
require "aws-sigv4"
|
|
require "aws-sdk-sts"
|
|
|
|
class BedrockMock < EndpointMock
|
|
end
|
|
|
|
RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
|
|
subject(:endpoint) { described_class.new(model) }
|
|
|
|
fab!(:user)
|
|
fab!(:model, :bedrock_model)
|
|
|
|
let(:bedrock_mock) { BedrockMock.new(endpoint) }
|
|
|
|
let(:compliance) do
|
|
EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::Claude, user)
|
|
end
|
|
|
|
def encode_message(message)
|
|
wrapped = { bytes: Base64.encode64(message.to_json) }.to_json
|
|
io = StringIO.new(wrapped)
|
|
aws_message = Aws::EventStream::Message.new(payload: io)
|
|
Aws::EventStream::Encoder.new.encode(aws_message)
|
|
end
|
|
|
|
before { enable_current_plugin }
|
|
|
|
it "should provide accurate max token count" do
|
|
prompt = DiscourseAi::Completions::Prompt.new("hello")
|
|
dialect = DiscourseAi::Completions::Dialects::Claude.new(prompt, model)
|
|
endpoint = DiscourseAi::Completions::Endpoints::AwsBedrock.new(model)
|
|
|
|
model.name = "claude-2"
|
|
expect(endpoint.default_options(dialect)[:max_tokens]).to eq(4096)
|
|
|
|
model.name = "claude-3-5-sonnet"
|
|
expect(endpoint.default_options(dialect)[:max_tokens]).to eq(8192)
|
|
|
|
model.name = "claude-3-5-haiku"
|
|
options = endpoint.default_options(dialect)
|
|
expect(options[:max_tokens]).to eq(8192)
|
|
end
|
|
|
|
describe "function calling" do
|
|
it "supports old school xml function calls" do
|
|
model.provider_params["disable_native_tools"] = true
|
|
model.save!
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
incomplete_tool_call = <<~XML.strip
|
|
<thinking>I should be ignored</thinking>
|
|
<search_quality_reflection>also ignored</search_quality_reflection>
|
|
<search_quality_score>0</search_quality_score>
|
|
<function_calls>
|
|
<invoke>
|
|
<tool_name>google</tool_name>
|
|
<parameters><query>sydney weather today</query></parameters>
|
|
</invoke>
|
|
</function_calls>
|
|
XML
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
|
{ type: "content_block_delta", delta: { text: "hello\n" } },
|
|
{ type: "content_block_delta", delta: { text: incomplete_tool_call } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
messages: [{ type: :user, content: "what is the weather in sydney" }],
|
|
)
|
|
|
|
tool = {
|
|
name: "google",
|
|
description: "Will search using Google",
|
|
parameters: [
|
|
{ name: "query", description: "The search query", type: "string", required: true },
|
|
],
|
|
}
|
|
|
|
prompt.tools = [tool]
|
|
response = []
|
|
proxy.generate(prompt, user: user) { |partial| response << partial }
|
|
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
|
|
parsed_body = JSON.parse(request.body)
|
|
expect(parsed_body["system"]).to include("<function_calls>")
|
|
expect(parsed_body["tools"]).to eq(nil)
|
|
expect(parsed_body["stop_sequences"]).to eq(["</function_calls>"])
|
|
|
|
expected = [
|
|
"hello\n",
|
|
DiscourseAi::Completions::ToolCall.new(
|
|
id: "tool_0",
|
|
name: "google",
|
|
parameters: {
|
|
query: "sydney weather today",
|
|
},
|
|
),
|
|
]
|
|
|
|
expect(response).to eq(expected)
|
|
end
|
|
end
|
|
|
|
it "supports streaming function calls" do
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
request = nil
|
|
|
|
messages =
|
|
[
|
|
{
|
|
type: "message_start",
|
|
message: {
|
|
id: "msg_bdrk_01WYxeNMk6EKn9s98r6XXrAB",
|
|
type: "message",
|
|
role: "assistant",
|
|
model: "claude-3-sonnet-20240307",
|
|
stop_sequence: nil,
|
|
usage: {
|
|
input_tokens: 840,
|
|
output_tokens: 1,
|
|
},
|
|
content: [],
|
|
stop_reason: nil,
|
|
},
|
|
},
|
|
{
|
|
type: "content_block_start",
|
|
index: 0,
|
|
delta: {
|
|
text: "<thinking>I should be ignored</thinking>",
|
|
},
|
|
},
|
|
{
|
|
type: "content_block_start",
|
|
index: 0,
|
|
content_block: {
|
|
type: "tool_use",
|
|
id: "toolu_bdrk_014CMjxtGmKUtGoEFPgc7PF7",
|
|
name: "google",
|
|
input: {
|
|
},
|
|
},
|
|
},
|
|
{
|
|
type: "content_block_delta",
|
|
index: 0,
|
|
delta: {
|
|
type: "input_json_delta",
|
|
partial_json: "",
|
|
},
|
|
},
|
|
{
|
|
type: "content_block_delta",
|
|
index: 0,
|
|
delta: {
|
|
type: "input_json_delta",
|
|
partial_json: "{\"query\": \"s",
|
|
},
|
|
},
|
|
{
|
|
type: "content_block_delta",
|
|
index: 0,
|
|
delta: {
|
|
type: "input_json_delta",
|
|
partial_json: "ydney weat",
|
|
},
|
|
},
|
|
{
|
|
type: "content_block_delta",
|
|
index: 0,
|
|
delta: {
|
|
type: "input_json_delta",
|
|
partial_json: "her today\"}",
|
|
},
|
|
},
|
|
{ type: "content_block_stop", index: 0 },
|
|
{
|
|
type: "message_delta",
|
|
delta: {
|
|
stop_reason: "tool_use",
|
|
stop_sequence: nil,
|
|
},
|
|
usage: {
|
|
output_tokens: 53,
|
|
},
|
|
},
|
|
{
|
|
type: "message_stop",
|
|
"amazon-bedrock-invocationMetrics": {
|
|
inputTokenCount: 846,
|
|
outputTokenCount: 39,
|
|
invocationLatency: 880,
|
|
firstByteLatency: 402,
|
|
},
|
|
},
|
|
].map { |message| encode_message(message) }
|
|
|
|
messages = messages.join("").split
|
|
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
messages: [{ type: :user, content: "what is the weather in sydney" }],
|
|
)
|
|
|
|
tool = {
|
|
name: "google",
|
|
description: "Will search using Google",
|
|
parameters: [
|
|
{ name: "query", description: "The search query", type: "string", required: true },
|
|
],
|
|
}
|
|
|
|
prompt.tools = [tool]
|
|
response = []
|
|
proxy.generate(prompt, user: user) { |partial| response << partial }
|
|
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
|
|
expected_response = [
|
|
DiscourseAi::Completions::ToolCall.new(
|
|
id: "toolu_bdrk_014CMjxtGmKUtGoEFPgc7PF7",
|
|
name: "google",
|
|
parameters: {
|
|
query: "sydney weather today",
|
|
},
|
|
),
|
|
]
|
|
|
|
expect(response).to eq(expected_response)
|
|
|
|
expected = {
|
|
"max_tokens" => 4096,
|
|
"anthropic_version" => "bedrock-2023-05-31",
|
|
"messages" => [{ "role" => "user", "content" => "what is the weather in sydney" }],
|
|
"tools" => [
|
|
{
|
|
"name" => "google",
|
|
"description" => "Will search using Google",
|
|
"input_schema" => {
|
|
"type" => "object",
|
|
"properties" => {
|
|
"query" => {
|
|
"type" => "string",
|
|
"description" => "The search query",
|
|
},
|
|
},
|
|
"required" => ["query"],
|
|
},
|
|
},
|
|
],
|
|
}
|
|
expect(JSON.parse(request.body)).to eq(expected)
|
|
|
|
log = AiApiAuditLog.order(:id).last
|
|
expect(log.request_tokens).to eq(846)
|
|
expect(log.response_tokens).to eq(39)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "Claude 3 support" do
|
|
it "supports regular completions" do
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "hello sam"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 20,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
response = proxy.generate("hello world", user: user)
|
|
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
|
|
expected = {
|
|
"max_tokens" => 4096,
|
|
"anthropic_version" => "bedrock-2023-05-31",
|
|
"messages" => [{ "role" => "user", "content" => "hello world" }],
|
|
"system" => "You are a helpful bot",
|
|
}
|
|
expect(JSON.parse(request.body)).to eq(expected)
|
|
|
|
expect(response).to eq("hello sam")
|
|
|
|
log = AiApiAuditLog.order(:id).last
|
|
expect(log.request_tokens).to eq(10)
|
|
expect(log.response_tokens).to eq(20)
|
|
end
|
|
|
|
it "supports thinking" do
|
|
model.provider_params["enable_reasoning"] = true
|
|
model.provider_params["reasoning_tokens"] = 10_000
|
|
model.save!
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "hello sam"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 20,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
response = proxy.generate("hello world", user: user)
|
|
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
|
|
expected = {
|
|
"max_tokens" => 40_000,
|
|
"thinking" => {
|
|
"type" => "enabled",
|
|
"budget_tokens" => 10_000,
|
|
},
|
|
"anthropic_version" => "bedrock-2023-05-31",
|
|
"messages" => [{ "role" => "user", "content" => "hello world" }],
|
|
"system" => "You are a helpful bot",
|
|
}
|
|
expect(JSON.parse(request.body)).to eq(expected)
|
|
|
|
expect(response).to eq("hello sam")
|
|
|
|
log = AiApiAuditLog.order(:id).last
|
|
expect(log.request_tokens).to eq(10)
|
|
expect(log.response_tokens).to eq(20)
|
|
end
|
|
|
|
it "strips temperature and top_p when reasoning is enabled" do
|
|
model.update!(
|
|
provider_params: {
|
|
access_key_id: "123",
|
|
region: "us-east-1",
|
|
enable_reasoning: true,
|
|
reasoning_tokens: 2048,
|
|
},
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user, temperature: 0.7, top_p: 0.9)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body).not_to have_key("temperature")
|
|
expect(request_body).not_to have_key("top_p")
|
|
end
|
|
|
|
it "supports claude 3 streaming" do
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
request = nil
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
|
{ type: "content_block_delta", delta: { text: "hello " } },
|
|
{ type: "content_block_delta", delta: { text: "sam" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
# stream 1 letter at a time
|
|
# cause we need to handle this case
|
|
messages = messages.join("").split
|
|
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
response = +""
|
|
proxy.generate("hello world", user: user) { |partial| response << partial }
|
|
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
|
|
expected = {
|
|
"max_tokens" => 4096,
|
|
"anthropic_version" => "bedrock-2023-05-31",
|
|
"messages" => [{ "role" => "user", "content" => "hello world" }],
|
|
"system" => "You are a helpful bot",
|
|
}
|
|
expect(JSON.parse(request.body)).to eq(expected)
|
|
|
|
expect(response).to eq("hello sam")
|
|
|
|
log = AiApiAuditLog.order(:id).last
|
|
expect(log.request_tokens).to eq(9)
|
|
expect(log.response_tokens).to eq(25)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "parameter disabling" do
|
|
it "excludes disabled parameters from the request" do
|
|
model.update!(
|
|
provider_params: {
|
|
access_key_id: "123",
|
|
region: "us-east-1",
|
|
disable_top_p: true,
|
|
disable_temperature: true,
|
|
},
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
# Request with parameters that should be ignored
|
|
proxy.generate("test prompt", user: user, top_p: 0.9, temperature: 0.8, max_tokens: 500)
|
|
|
|
# Parse the request body
|
|
request_body = JSON.parse(request.body)
|
|
|
|
# Verify disabled parameters aren't included
|
|
expect(request_body).not_to have_key("top_p")
|
|
expect(request_body).not_to have_key("temperature")
|
|
|
|
# Verify other parameters still work
|
|
expect(request_body).to have_key("max_tokens")
|
|
expect(request_body["max_tokens"]).to eq(500)
|
|
end
|
|
end
|
|
|
|
describe "disabled tool use" do
|
|
it "sets tool_choice to none natively" do
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
"You are a helpful assistant",
|
|
messages: [{ type: :user, content: "don't use any tools please" }],
|
|
tools: [
|
|
{
|
|
name: "echo",
|
|
description: "echo something",
|
|
parameters: [
|
|
{ name: "text", type: "string", description: "text to echo", required: true },
|
|
],
|
|
},
|
|
],
|
|
tool_choice: :none,
|
|
)
|
|
|
|
content = {
|
|
content: [text: "I won't use any tools. Here's a direct response instead."],
|
|
usage: {
|
|
input_tokens: 25,
|
|
output_tokens: 15,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
result = proxy.generate(prompt, user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
|
|
expect(request_body.dig("tool_choice", "type")).to eq("none")
|
|
|
|
messages = request_body["messages"]
|
|
expect(messages.length).to eq(1)
|
|
|
|
expect(result).to eq("I won't use any tools. Here's a direct response instead.")
|
|
end
|
|
end
|
|
|
|
describe "tool_choice :none with response_format" do
|
|
it "sets tool_choice none and appends assistant prefill when structured output is disabled" do
|
|
model.update!(
|
|
provider_params: model.provider_params.merge("disable_native_structured_output" => true),
|
|
)
|
|
|
|
schema = {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "reply",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
key: {
|
|
type: "string",
|
|
},
|
|
},
|
|
required: ["key"],
|
|
additionalProperties: false,
|
|
},
|
|
strict: true,
|
|
},
|
|
}
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
"You are a helpful assistant",
|
|
messages: [{ type: :user, content: "reply as json" }],
|
|
tools: [
|
|
{
|
|
name: "echo",
|
|
description: "echo something",
|
|
parameters: [
|
|
{ name: "text", type: "string", description: "text to echo", required: true },
|
|
],
|
|
},
|
|
],
|
|
tool_choice: :none,
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "\"key\":\"value\"}"],
|
|
usage: {
|
|
input_tokens: 25,
|
|
output_tokens: 15,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate(prompt, user: user, response_format: schema)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
|
|
expect(request_body.dig("tool_choice", "type")).to eq("none")
|
|
expect(request_body).not_to have_key("output_config")
|
|
|
|
messages = request_body["messages"]
|
|
expect(messages.length).to eq(2)
|
|
expect(messages[0]["role"]).to eq("user")
|
|
expect(messages[1]).to eq({ "role" => "assistant", "content" => "{" })
|
|
end
|
|
end
|
|
|
|
describe "forced tool use" do
|
|
let(:tools) do
|
|
[
|
|
{
|
|
name: "echo",
|
|
description: "echo something",
|
|
parameters: [
|
|
{ name: "text", type: "string", description: "text to echo", required: true },
|
|
],
|
|
},
|
|
]
|
|
end
|
|
|
|
let(:tool_response_body) do
|
|
{
|
|
content: [
|
|
{
|
|
type: "tool_use",
|
|
id: "toolu_bdrk_014CMjxtGmKUtGoEFPgc7PF7",
|
|
name: "echo",
|
|
input: {
|
|
text: "hello",
|
|
},
|
|
},
|
|
],
|
|
usage: {
|
|
input_tokens: 25,
|
|
output_tokens: 15,
|
|
},
|
|
}.to_json
|
|
end
|
|
|
|
it "can properly force tool use" do
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
"You are a bot",
|
|
messages: [type: :user, id: "user1", content: "echo hello"],
|
|
tools: tools,
|
|
tool_choice: "echo",
|
|
)
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: tool_response_body)
|
|
|
|
proxy.generate(prompt, user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
|
|
expect(request_body.dig("tool_choice", "name")).to eq("echo")
|
|
end
|
|
|
|
it "skips tool_choice and injects guidance when thinking is enabled" do
|
|
model.update!(
|
|
provider_params:
|
|
model.provider_params.merge("enable_reasoning" => true, "adaptive_thinking" => true),
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
"You are a bot",
|
|
messages: [type: :user, id: "user1", content: "echo hello"],
|
|
tools: tools,
|
|
tool_choice: "echo",
|
|
)
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: tool_response_body)
|
|
|
|
proxy.generate(prompt, user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
|
|
expect(request_body).not_to have_key("tool_choice")
|
|
last_message = request_body["messages"].last
|
|
expect(last_message["role"]).to eq("user")
|
|
expect(last_message["content"]).to include("'echo' tool")
|
|
end
|
|
end
|
|
|
|
describe "role-based authentication" do
|
|
it "uses assumed role credentials when role_arn is provided" do
|
|
# Configure the model with a role_arn
|
|
model.update!(
|
|
provider_params: {
|
|
region: "us-east-1",
|
|
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
|
|
},
|
|
)
|
|
|
|
# Mock the actual credentials object returned by AssumeRoleCredentials
|
|
mock_creds =
|
|
instance_double(
|
|
Aws::Credentials,
|
|
access_key_id: "ASSUMED_ACCESS_KEY",
|
|
secret_access_key: "ASSUMED_SECRET_KEY",
|
|
session_token: "ASSUMED_SESSION_TOKEN",
|
|
)
|
|
|
|
# Mock Aws::AssumeRoleCredentials
|
|
mock_credentials = instance_double(Aws::AssumeRoleCredentials)
|
|
allow(mock_credentials).to receive(:credentials).and_return(mock_creds)
|
|
|
|
# Mock the STS client
|
|
mock_sts_client = instance_double(Aws::STS::Client)
|
|
allow(Aws::STS::Client).to receive(:new).with(region: "us-east-1").and_return(mock_sts_client)
|
|
|
|
# Mock AssumeRoleCredentials.new
|
|
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
).and_return(mock_credentials)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
# Verify AssumeRoleCredentials was created with correct parameters
|
|
expect(Aws::AssumeRoleCredentials).to have_received(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
)
|
|
|
|
# Verify the request was signed (authorization header should be present)
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
# The session token should be included in the signed request headers
|
|
expect(request.headers["X-Amz-Security-Token"]).to eq("ASSUMED_SESSION_TOKEN")
|
|
end
|
|
|
|
it "uses regular credentials when role_arn is not provided" do
|
|
# Configure the model without a role_arn
|
|
model.update!(provider_params: { access_key_id: "DIRECT_ACCESS_KEY", region: "us-east-1" })
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
# Ensure AssumeRoleCredentials is not used when role_arn is not provided
|
|
allow(Aws::AssumeRoleCredentials).to receive(:new).and_call_original
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
expect(Aws::AssumeRoleCredentials).not_to have_received(:new)
|
|
|
|
# Verify the request was signed with regular credentials
|
|
expect(request.headers["Authorization"]).to be_present
|
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
|
# No session token should be present when using regular credentials
|
|
expect(request.headers["X-Amz-Security-Token"]).to be_nil
|
|
end
|
|
|
|
it "caches assumed role credentials across multiple requests" do
|
|
# Configure the model with a role_arn
|
|
model.update!(
|
|
provider_params: {
|
|
region: "us-east-1",
|
|
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
|
|
},
|
|
)
|
|
|
|
# Mock the actual credentials object returned by AssumeRoleCredentials
|
|
mock_creds =
|
|
instance_double(
|
|
Aws::Credentials,
|
|
access_key_id: "ASSUMED_ACCESS_KEY",
|
|
secret_access_key: "ASSUMED_SECRET_KEY",
|
|
session_token: "ASSUMED_SESSION_TOKEN",
|
|
)
|
|
|
|
# Mock Aws::AssumeRoleCredentials
|
|
mock_credentials = instance_double(Aws::AssumeRoleCredentials)
|
|
allow(mock_credentials).to receive(:credentials).and_return(mock_creds)
|
|
|
|
# Mock the STS client
|
|
mock_sts_client = instance_double(Aws::STS::Client)
|
|
allow(Aws::STS::Client).to receive(:new).with(region: "us-east-1").and_return(mock_sts_client)
|
|
|
|
# Mock AssumeRoleCredentials.new
|
|
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/BedRockAccessRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
).and_return(mock_credentials)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
).to_return(status: 200, body: content)
|
|
|
|
# Make multiple generate calls
|
|
proxy.generate("test prompt 1", user: user)
|
|
proxy.generate("test prompt 2", user: user)
|
|
proxy.generate("test prompt 3", user: user)
|
|
|
|
# Verify AssumeRoleCredentials was created only once (cached in LlmModel)
|
|
expect(Aws::AssumeRoleCredentials).to have_received(:new).once
|
|
end
|
|
|
|
it "invalidates cache when role_arn changes" do
|
|
# Configure the model with initial role_arn
|
|
model.update!(
|
|
provider_params: {
|
|
region: "us-east-1",
|
|
role_arn: "arn:aws:iam::123456789012:role/FirstRole",
|
|
},
|
|
)
|
|
|
|
# Mock credentials for first role
|
|
mock_creds_1 =
|
|
instance_double(
|
|
Aws::Credentials,
|
|
access_key_id: "FIRST_ACCESS_KEY",
|
|
secret_access_key: "FIRST_SECRET_KEY",
|
|
session_token: "FIRST_SESSION_TOKEN",
|
|
)
|
|
mock_credentials_1 = instance_double(Aws::AssumeRoleCredentials)
|
|
allow(mock_credentials_1).to receive(:credentials).and_return(mock_creds_1)
|
|
|
|
# Mock credentials for second role
|
|
mock_creds_2 =
|
|
instance_double(
|
|
Aws::Credentials,
|
|
access_key_id: "SECOND_ACCESS_KEY",
|
|
secret_access_key: "SECOND_SECRET_KEY",
|
|
session_token: "SECOND_SESSION_TOKEN",
|
|
)
|
|
mock_credentials_2 = instance_double(Aws::AssumeRoleCredentials)
|
|
allow(mock_credentials_2).to receive(:credentials).and_return(mock_creds_2)
|
|
|
|
mock_sts_client = instance_double(Aws::STS::Client)
|
|
allow(Aws::STS::Client).to receive(:new).with(region: "us-east-1").and_return(mock_sts_client)
|
|
|
|
# Mock AssumeRoleCredentials.new to return different credentials based on role_arn
|
|
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/FirstRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
).and_return(mock_credentials_1)
|
|
|
|
allow(Aws::AssumeRoleCredentials).to receive(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/SecondRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
).and_return(mock_credentials_2)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
).to_return(status: 200, body: content)
|
|
|
|
# First request with initial role
|
|
proxy.generate("test prompt 1", user: user)
|
|
|
|
# Change the role_arn
|
|
model.update!(
|
|
provider_params: {
|
|
region: "us-east-1",
|
|
role_arn: "arn:aws:iam::123456789012:role/SecondRole",
|
|
},
|
|
)
|
|
|
|
# Second request should use new role
|
|
proxy.generate("test prompt 2", user: user)
|
|
|
|
# Verify AssumeRoleCredentials was created twice (once for each role)
|
|
expect(Aws::AssumeRoleCredentials).to have_received(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/FirstRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
).once
|
|
|
|
expect(Aws::AssumeRoleCredentials).to have_received(:new).with(
|
|
role_arn: "arn:aws:iam::123456789012:role/SecondRole",
|
|
role_session_name: "discourse-bedrock-#{Process.pid}",
|
|
client: mock_sts_client,
|
|
).once
|
|
end
|
|
end
|
|
|
|
describe "structured output via output_config" do
|
|
it "forces the response to be a JSON and using the given JSON schema" do
|
|
schema = {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "reply",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
key: {
|
|
type: "string",
|
|
},
|
|
},
|
|
required: ["key"],
|
|
additionalProperties: false,
|
|
},
|
|
strict: true,
|
|
},
|
|
}
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
|
{ type: "content_block_delta", delta: { text: "{\"" } },
|
|
{ type: "content_block_delta", delta: { text: "key" } },
|
|
{ type: "content_block_delta", delta: { text: "\":\"" } },
|
|
{ type: "content_block_delta", delta: { text: "Hello!" } },
|
|
{ type: "content_block_delta", delta: { text: "\n There" } },
|
|
{ type: "content_block_delta", delta: { text: "\"}" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
structured_output = nil
|
|
proxy.generate("hello world", response_format: schema, user: user) do |partial|
|
|
structured_output = partial
|
|
end
|
|
|
|
expected = {
|
|
"max_tokens" => 4096,
|
|
"anthropic_version" => "bedrock-2023-05-31",
|
|
"messages" => [{ "role" => "user", "content" => "hello world" }],
|
|
"system" => "You are a helpful bot",
|
|
"output_config" => {
|
|
"format" => {
|
|
"type" => "json_schema",
|
|
"schema" => schema[:json_schema][:schema].deep_stringify_keys,
|
|
},
|
|
},
|
|
}
|
|
expect(JSON.parse(request.body)).to eq(expected)
|
|
|
|
expect(structured_output.read_buffered_property(:key)).to eq("Hello!\n There")
|
|
end
|
|
end
|
|
|
|
it "works with JSON schema array types" do
|
|
schema = {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "reply",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
plain: {
|
|
type: "string",
|
|
},
|
|
key: {
|
|
type: "array",
|
|
items: {
|
|
type: "string",
|
|
},
|
|
},
|
|
},
|
|
required: %w[plain key],
|
|
additionalProperties: false,
|
|
},
|
|
strict: true,
|
|
},
|
|
}
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
|
{ type: "content_block_delta", delta: { text: "{\"" } },
|
|
{ type: "content_block_delta", delta: { text: "key" } },
|
|
{ type: "content_block_delta", delta: { text: "\":" } },
|
|
{ type: "content_block_delta", delta: { text: " [\"" } },
|
|
{ type: "content_block_delta", delta: { text: "Hello!" } },
|
|
{ type: "content_block_delta", delta: { text: " I am" } },
|
|
{ type: "content_block_delta", delta: { text: " a " } },
|
|
{ type: "content_block_delta", delta: { text: "chunk\"," } },
|
|
{ type: "content_block_delta", delta: { text: "\"There" } },
|
|
{ type: "content_block_delta", delta: { text: "\"]," } },
|
|
{ type: "content_block_delta", delta: { text: " \"plain" } },
|
|
{ type: "content_block_delta", delta: { text: "\":\"" } },
|
|
{ type: "content_block_delta", delta: { text: "I'm here" } },
|
|
{ type: "content_block_delta", delta: { text: " too\"}" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
structured_output = nil
|
|
proxy.generate("hello world", response_format: schema, user: user) do |partial|
|
|
structured_output = partial
|
|
end
|
|
|
|
expected = {
|
|
"max_tokens" => 4096,
|
|
"anthropic_version" => "bedrock-2023-05-31",
|
|
"messages" => [{ "role" => "user", "content" => "hello world" }],
|
|
"system" => "You are a helpful bot",
|
|
"output_config" => {
|
|
"format" => {
|
|
"type" => "json_schema",
|
|
"schema" => schema[:json_schema][:schema].deep_stringify_keys,
|
|
},
|
|
},
|
|
}
|
|
expect(JSON.parse(request.body)).to eq(expected)
|
|
|
|
expect(structured_output.read_buffered_property(:key)).to contain_exactly(
|
|
"Hello! I am a chunk",
|
|
"There",
|
|
)
|
|
expect(structured_output.read_buffered_property(:plain)).to eq("I'm here too")
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "structured output via prefilling when native structured output is disabled" do
|
|
it "falls back to assistant message prefill" do
|
|
model.update!(
|
|
provider_params: model.provider_params.merge("disable_native_structured_output" => true),
|
|
)
|
|
schema = {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "reply",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
key: {
|
|
type: "string",
|
|
},
|
|
},
|
|
required: ["key"],
|
|
additionalProperties: false,
|
|
},
|
|
strict: true,
|
|
},
|
|
}
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
|
{ type: "content_block_delta", delta: { text: "\"key\":\"value\"}" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
structured_output = nil
|
|
proxy.generate("hello world", response_format: schema, user: user) do |partial|
|
|
structured_output = partial
|
|
end
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body["messages"].last).to eq({ "role" => "assistant", "content" => "{" })
|
|
expect(request_body).not_to have_key("output_config")
|
|
|
|
expect(structured_output.read_buffered_property(:key)).to eq("value")
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "prompt caching for Anthropic models" do
|
|
it "applies caching in always mode for Claude models" do
|
|
params = model.provider_params || {}
|
|
params["prompt_caching"] = "always"
|
|
model.update!(provider_params: params)
|
|
|
|
messages =
|
|
[
|
|
{
|
|
type: "message_start",
|
|
message: {
|
|
usage: {
|
|
input_tokens: 10,
|
|
cache_creation_input_tokens: 100,
|
|
cache_read_input_tokens: 50,
|
|
},
|
|
},
|
|
},
|
|
{ type: "content_block_delta", delta: { text: "Cached response" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 5 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
"You are a bot",
|
|
messages: [{ type: :user, content: "hello" }],
|
|
)
|
|
|
|
result = +""
|
|
endpoint.perform_completion!(
|
|
DiscourseAi::Completions::Dialects::Claude.new(prompt, model),
|
|
user,
|
|
) { |partial| result << partial }
|
|
|
|
expect(result).to eq("Cached response")
|
|
|
|
parsed_body = JSON.parse(request.body, symbolize_names: true)
|
|
expect(parsed_body[:messages].last[:content].last[:cache_control]).to eq(
|
|
{ type: "ephemeral" },
|
|
)
|
|
|
|
log = AiApiAuditLog.order(:id).last
|
|
expect(log.cache_read_tokens).to eq(50)
|
|
expect(log.cache_write_tokens).to eq(100)
|
|
end
|
|
end
|
|
|
|
it "does not apply caching in never mode" do
|
|
params = model.provider_params || {}
|
|
params["prompt_caching"] = "never"
|
|
model.update!(provider_params: params)
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 10 } } },
|
|
{ type: "content_block_delta", delta: { text: "No cache" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 5 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
"You are a bot",
|
|
messages: [{ type: :user, content: "hello" }],
|
|
)
|
|
|
|
result = +""
|
|
endpoint.perform_completion!(
|
|
DiscourseAi::Completions::Dialects::Claude.new(prompt, model),
|
|
user,
|
|
) { |partial| result << partial }
|
|
|
|
expect(result).to eq("No cache")
|
|
|
|
# Verify cache_control was NOT added
|
|
parsed_body = JSON.parse(request.body, symbolize_names: true)
|
|
expect(parsed_body[:system]).to eq("You are a bot")
|
|
end
|
|
end
|
|
|
|
it "does not apply caching to non-Claude models on Bedrock" do
|
|
# Caching should only work for Anthropic Claude models, not other Bedrock models
|
|
# This test would need a Nova model setup to be fully tested
|
|
# For now, we verify the logic is only applied in Claude dialect branch
|
|
expect(endpoint.respond_to?(:should_apply_prompt_caching?)).to be(true)
|
|
end
|
|
end
|
|
|
|
describe "adaptive thinking" do
|
|
it "sends adaptive thinking config when enabled" do
|
|
model.update!(
|
|
provider_params: {
|
|
access_key_id: "123",
|
|
region: "us-east-1",
|
|
enable_reasoning: true,
|
|
adaptive_thinking: true,
|
|
},
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body["thinking"]).to eq({ "type" => "adaptive" })
|
|
expect(request_body["max_tokens"]).to eq(32_000)
|
|
end
|
|
|
|
it "adaptive_thinking takes priority over enable_reasoning" do
|
|
model.update!(
|
|
provider_params: {
|
|
access_key_id: "123",
|
|
region: "us-east-1",
|
|
enable_reasoning: true,
|
|
adaptive_thinking: true,
|
|
reasoning_tokens: 10_000,
|
|
},
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body["thinking"]).to eq({ "type" => "adaptive" })
|
|
expect(request_body["max_tokens"]).to eq(32_000)
|
|
end
|
|
end
|
|
|
|
describe "effort parameter" do
|
|
it "includes effort in output_config when set to max" do
|
|
model.update!(provider_params: { access_key_id: "123", region: "us-east-1", effort: "max" })
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body.dig("output_config", "effort")).to eq("max")
|
|
end
|
|
|
|
it "includes effort in output_config when set to xhigh" do
|
|
model.update!(provider_params: { access_key_id: "123", region: "us-east-1", effort: "xhigh" })
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body.dig("output_config", "effort")).to eq("xhigh")
|
|
end
|
|
|
|
it "includes effort in output_config when set to low, medium, or high" do
|
|
model.update!(
|
|
provider_params: {
|
|
access_key_id: "123",
|
|
region: "us-east-1",
|
|
effort: "medium",
|
|
},
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body.dig("output_config", "effort")).to eq("medium")
|
|
expect(request_body).not_to have_key("anthropic_beta")
|
|
end
|
|
|
|
it "omits effort when set to default" do
|
|
model.update!(
|
|
provider_params: {
|
|
access_key_id: "123",
|
|
region: "us-east-1",
|
|
effort: "default",
|
|
},
|
|
)
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
|
|
content = {
|
|
content: [text: "test response"],
|
|
usage: {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
},
|
|
}.to_json
|
|
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: content)
|
|
|
|
proxy.generate("test prompt", user: user)
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body).not_to have_key("output_config")
|
|
expect(request_body).not_to have_key("anthropic_beta")
|
|
end
|
|
|
|
it "merges effort and structured output format in output_config" do
|
|
model.update!(provider_params: { access_key_id: "123", region: "us-east-1", effort: "high" })
|
|
|
|
schema = {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "reply",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
key: {
|
|
type: "string",
|
|
},
|
|
},
|
|
required: ["key"],
|
|
additionalProperties: false,
|
|
},
|
|
strict: true,
|
|
},
|
|
}
|
|
|
|
messages =
|
|
[
|
|
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
|
{ type: "content_block_delta", delta: { text: "{\"key\":\"value\"}" } },
|
|
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
|
].map { |message| encode_message(message) }
|
|
|
|
proxy = DiscourseAi::Completions::Llm.proxy(model)
|
|
request = nil
|
|
bedrock_mock.with_chunk_array_support do
|
|
stub_request(
|
|
:post,
|
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
|
)
|
|
.with do |inner_request|
|
|
request = inner_request
|
|
true
|
|
end
|
|
.to_return(status: 200, body: messages)
|
|
|
|
proxy.generate("hello world", response_format: schema, user: user) { |partial| }
|
|
|
|
request_body = JSON.parse(request.body)
|
|
expect(request_body["output_config"]).to eq(
|
|
{
|
|
"effort" => "high",
|
|
"format" => {
|
|
"type" => "json_schema",
|
|
"schema" => schema[:json_schema][:schema].deep_stringify_keys,
|
|
},
|
|
},
|
|
)
|
|
end
|
|
end
|
|
end
|
|
end
|