mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 08:23:45 +08:00
## Summary
Image uploads delivered through the `aws_bedrock_converse` LLM provider
were rejected by Bedrock with `Could not process image` whenever an
agent / LLM had `vision_enabled` set to true.
Two related bugs are fixed:
### 1. `Dialects::Converse#upload_node` — base64 string passed where raw
bytes expected
In `plugins/discourse-ai/lib/completions/dialects/converse.rb`, image
content was emitted as:
```ruby
source: { bytes: details[:base64] }
```
`details[:base64]` is the upload's base64-encoded string (as produced by
`UploadEncoder`), but `Aws::BedrockRuntime::Client#converse` expects
**raw bytes** on the `:bytes` key — the SDK then base64-encodes them on
the wire. Passing the already-base64-encoded string causes Bedrock to
receive **doubly-encoded** data, which it cannot decode into a valid
image. Decoding back to raw bytes via
`Base64.decode64(details[:base64])` resolves the round-trip.
### 2. `AwsBedrockConverse#perform_completion!` — JSON-logging fails on
binary payloads
With raw bytes now flowing through `sdk_params`, the subsequent
`sdk_params.to_json` call (used to record the request in `start_log`)
raises `EncodingError` because PNG/JPEG bytes are not valid UTF-8. The
call is wrapped in `begin / rescue EncodingError` so the request can
still proceed; a placeholder string is recorded in the audit log instead
of the binary payload.
## Test plan
- A new spec case in
`plugins/discourse-ai/spec/lib/completions/dialects/converse_spec.rb`
asserts that `details[:base64]` is decoded back to raw bytes before
being emitted as `source: { bytes: ... }`. This guards against
regression.
- Verified end-to-end against `us.anthropic.claude-sonnet-4-6` via
Bedrock Converse on `ap-northeast-1` → `us-east-1` cross-region
inference profile: with this patch the model correctly describes
uploaded PNG attachments (a Loupe Browser version warning dialog)
instead of returning `Could not process image`.
## Reproduction (before the fix)
1. Configure an `aws_bedrock_converse` LLM in Discourse and assign it to
an `AiAgent` with `vision_enabled: true`.
2. Wire up `llm_triage` (or any path that goes through
`Dialects::Converse#upload_node`) to reply to a topic that contains an
image upload.
3. Observe:
`DiscourseAi::Completions::Endpoints::Base::CompletionFailed: The model
returned the following errors: Could not process image`
## Discovered while
Standing up a Discourse instance with Bedrock-backed AI as part of an
internal forum spike. Happy to iterate on the patch (e.g. tighten the
log fallback or extract a helper) if reviewers prefer a different shape.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
---------
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Rafael Silva <xfalcox@gmail.com>
203 lines
5.8 KiB
Ruby
Vendored
203 lines
5.8 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Completions
|
|
module Dialects
|
|
class Converse < Dialect
|
|
class << self
|
|
def can_translate?(llm_model)
|
|
llm_model.provider == "aws_bedrock_converse"
|
|
end
|
|
end
|
|
|
|
class ConversePrompt
|
|
attr_reader :system, :messages, :tool_config
|
|
|
|
def initialize(system, messages, tool_config = nil)
|
|
@system = system
|
|
@messages = messages
|
|
@tool_config = tool_config
|
|
end
|
|
|
|
def system_prompt
|
|
system.to_s
|
|
end
|
|
|
|
def has_tools?
|
|
tool_config.present?
|
|
end
|
|
end
|
|
|
|
def translate
|
|
messages = super
|
|
|
|
system = messages.shift[:content] if messages.first&.dig(:role) == "system"
|
|
converse_messages =
|
|
messages.map { |msg| { role: msg[:role], content: build_content(msg) } }
|
|
|
|
# Converse API requires alternating user/assistant roles
|
|
interleaved = []
|
|
previous_message = nil
|
|
converse_messages.each do |message|
|
|
if previous_message
|
|
if previous_message[:role] == "user" && message[:role] == "user"
|
|
interleaved << { role: "assistant", content: [{ text: "OK" }] }
|
|
elsif previous_message[:role] == "assistant" && message[:role] == "assistant"
|
|
interleaved << { role: "user", content: [{ text: "OK" }] }
|
|
end
|
|
end
|
|
interleaved << message
|
|
previous_message = message
|
|
end
|
|
|
|
tool_config = tools_dialect.translated_tools
|
|
|
|
ConversePrompt.new(system.presence && [{ text: system }], interleaved, tool_config)
|
|
end
|
|
|
|
def max_prompt_tokens
|
|
llm_model.max_prompt_tokens
|
|
end
|
|
|
|
def native_tool_support?
|
|
true
|
|
end
|
|
|
|
def tools_dialect
|
|
@tools_dialect ||= DiscourseAi::Completions::Dialects::ConverseTools.new(prompt.tools)
|
|
end
|
|
|
|
private
|
|
|
|
def build_content(msg)
|
|
content = []
|
|
|
|
existing_content = msg[:content]
|
|
|
|
if existing_content.is_a?(Array)
|
|
content.concat(existing_content)
|
|
elsif existing_content.is_a?(Hash)
|
|
content << existing_content
|
|
elsif existing_content.is_a?(String)
|
|
content << { text: existing_content }
|
|
end
|
|
|
|
msg[:images]&.each { |image| content << image }
|
|
|
|
content
|
|
end
|
|
|
|
def detect_format(mime_type)
|
|
case mime_type
|
|
when "image/jpeg"
|
|
"jpeg"
|
|
when "image/png"
|
|
"png"
|
|
when "image/gif"
|
|
"gif"
|
|
when "image/webp"
|
|
"webp"
|
|
else
|
|
"jpeg"
|
|
end
|
|
end
|
|
|
|
def system_msg(msg)
|
|
{ role: "system", content: msg[:content] }
|
|
end
|
|
|
|
def user_msg(msg)
|
|
content_array = [msg[:content]].flatten
|
|
|
|
content_array =
|
|
to_encoded_content_array(
|
|
content: content_array,
|
|
upload_encoder: ->(details) { upload_node(details) },
|
|
text_encoder: ->(text) { { text: text } },
|
|
allow_images: vision_support?,
|
|
allow_documents: true,
|
|
allowed_attachment_types: llm_model.allowed_attachment_types,
|
|
upload_filter: ->(encoded) { converse_upload_allowed?(encoded) },
|
|
)
|
|
|
|
{ role: "user", content: content_array }
|
|
end
|
|
|
|
def converse_upload_allowed?(encoded)
|
|
return true if encoded[:kind] == :image
|
|
|
|
document_allowed?(encoded) && encoded[:text].present?
|
|
end
|
|
|
|
def upload_node(details)
|
|
return { text: details[:text] } if details[:text].present?
|
|
return if details[:kind] != :image
|
|
|
|
{
|
|
image: {
|
|
format: details[:format] || detect_format(details[:mime_type]),
|
|
source: {
|
|
# AWS SDK for Ruby expects raw bytes here and will base64-encode them
|
|
# on the wire. Passing the already-base64-encoded string causes Bedrock
|
|
# to receive doubly-encoded data and respond with
|
|
# "Could not process image".
|
|
bytes: Base64.decode64(details[:base64]),
|
|
},
|
|
},
|
|
}
|
|
end
|
|
|
|
def model_msg(msg)
|
|
content = []
|
|
|
|
provider_info = converse_reasoning(msg)
|
|
if provider_info.present?
|
|
if msg[:thinking] && provider_info[:signature]
|
|
content << {
|
|
reasoning_content: {
|
|
reasoning_text: {
|
|
text: msg[:thinking],
|
|
signature: provider_info[:signature],
|
|
},
|
|
},
|
|
}
|
|
end
|
|
|
|
if provider_info[:redacted_content]
|
|
content << {
|
|
reasoning_content: {
|
|
redacted_content: provider_info[:redacted_content],
|
|
},
|
|
}
|
|
end
|
|
end
|
|
|
|
text = msg[:content]
|
|
if text.is_a?(String)
|
|
content << { text: text }
|
|
elsif text.is_a?(Array)
|
|
content.concat(text)
|
|
end
|
|
|
|
{ role: "assistant", content: content }
|
|
end
|
|
|
|
def converse_reasoning(message)
|
|
info = message[:thinking_provider_info]
|
|
return if info.blank?
|
|
info[:bedrock_converse] || info["bedrock_converse"]
|
|
end
|
|
|
|
def tool_msg(msg)
|
|
translated = tools_dialect.from_raw_tool(msg)
|
|
{ role: "user", content: translated }
|
|
end
|
|
|
|
def tool_call_msg(msg)
|
|
translated = tools_dialect.from_raw_tool_call(msg)
|
|
{ role: "assistant", content: translated }
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|