discourse/plugins/discourse-ai/lib/completions/endpoints/anthropic_prompt_cache.rb
Sam 1dbe92f222
FIX: update model presets to Claude 4.6 and refresh OpenRouter models (#37934)
- Update Anthropic presets from Claude 4.5 to Claude 4.6 (Opus and
  Sonnet), increasing Opus max output tokens to 128k
- Add AWS Bedrock model ID mappings for Claude 4.5 and 4.6
- Remove anthropic-beta headers for prompt caching and effort
  features, which are now GA and no longer require beta flags
- Update OpenRouter model presets: MiniMax M2.1 → M2.5,
  Z-AI GLM-4.7 → GLM-5, refresh Kimi K2.5 token limits and pricing
- Broaden effort parameter description (no longer Opus-only)
- Update eval LLM config, locales, and specs accordingly

Most important thing here is the beta header removal, having it causes
features to break since they are no longer supported
2026-02-20 15:49:33 +11:00

55 lines
1.6 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
module AnthropicPromptCache
def should_apply_prompt_caching?(prompt)
caching_mode = llm_model.lookup_custom_param("prompt_caching") || "never"
return false if caching_mode == "never"
case caching_mode
when "always"
true
when "tool_results"
prompt
.messages
.last(5)
.any? do |msg|
content = msg[:content]
if content.is_a?(Array)
content.any? { |c| c.is_a?(Hash) && c[:type] == "tool_result" }
elsif content.is_a?(Hash)
content[:type] == "tool_result"
else
false
end
end
else
false
end
end
def apply_anthropic_cache_control!(payload, prompt)
if payload[:messages].present?
last_message = payload[:messages].last
if last_message[:content].is_a?(String)
last_message[:content] = [
type: "text",
text: last_message[:content],
cache_control: {
type: "ephemeral",
},
]
elsif last_message[:content].is_a?(Array)
last_content = last_message[:content].last
last_content[:cache_control] = { type: "ephemeral" } if last_content.is_a?(Hash)
end
end
end
end
end
end
end