discourse/plugins/discourse-ai/app/models/llm_model.rb

589 lines
17 KiB
Ruby
Vendored

# frozen_string_literal: true
class LlmModel < ActiveRecord::Base
# TODO: Remove this line after 20251212144720_populate_ai_bot_enabled_llms_setting migration
# has been promoted to pre-deploy
self.ignored_columns = %w[enabled_chat_bot]
FIRST_BOT_USER_ID = -1200
BEDROCK_PROVIDER_NAME = "aws_bedrock"
BEDROCK_CONVERSE_PROVIDER_NAME = "aws_bedrock_converse"
DEFAULT_ALLOWED_ATTACHMENT_TYPES = [].freeze
ATTACHMENT_TYPE_ALIASES = {
"markdown" => "md",
"md" => "md",
"htm" => "html",
"text" => "txt",
}.freeze
COST_COMPONENTS = {
input: {
tokens: :request_tokens,
cost: :input_cost,
},
output: {
tokens: :response_tokens,
cost: :output_cost,
},
cache_read: {
tokens: :cache_read_tokens,
cost: :cached_input_cost,
},
cache_write: {
tokens: :cache_write_tokens,
cost: :cache_write_cost,
},
}.freeze
def self.spending_component_sql(component, table)
info = COST_COMPONENTS.fetch(component)
qt = connection.quote_table_name(table.to_s)
"COALESCE(#{qt}.#{info[:tokens]}, 0) * COALESCE(llm_models.#{info[:cost]}, 0)"
end
def self.spending_sql(table)
COST_COMPONENTS.keys.map { |k| spending_component_sql(k, table) }.join(" + ")
end
def spending_for(record)
total =
COST_COMPONENTS.values.sum do |info|
record.public_send(info[:tokens]).to_i * public_send(info[:cost]).to_f
end
(total / 1_000_000.0).round(6)
end
has_many :llm_quotas, dependent: :destroy
has_one :llm_credit_allocation, dependent: :destroy
has_many :llm_feature_credit_costs, dependent: :destroy
belongs_to :user
belongs_to :ai_secret, optional: true
validates :display_name, presence: true, length: { maximum: 100 }
validates :tokenizer, presence: true, inclusion: DiscourseAi::Completions::Llm.tokenizer_names
validates :provider, presence: true, inclusion: DiscourseAi::Completions::Llm.provider_names
validates :url,
presence: true,
unless: -> { provider.in?([BEDROCK_PROVIDER_NAME, BEDROCK_CONVERSE_PROVIDER_NAME]) }
validates :name, presence: true
validate :api_key_or_secret_present
validates :max_prompt_tokens, numericality: { greater_than: 0 }
validates :input_cost,
:cached_input_cost,
:cache_write_cost,
:output_cost,
:max_output_tokens,
numericality: {
greater_than_or_equal_to: 0,
},
allow_nil: true
validate :required_provider_params
scope :in_use,
-> do
model_ids = DiscourseAi::Configuration::LlmEnumerator.global_usage.keys
where(id: model_ids)
end
def self.enabled_chat_bot_ids
SiteSetting.ai_bot_enabled_llms.split("|").map(&:to_i).reject(&:zero?)
end
def enabled_chat_bot?
self.class.enabled_chat_bot_ids.include?(id)
end
def self.provider_params
params = {
aws_bedrock: {
access_key_id: :secret,
role_arn: :text,
region: :text,
inference_profile_arn: :text,
enable_reasoning: :checkbox,
adaptive_thinking: {
type: :checkbox,
depends_on: :enable_reasoning,
},
reasoning_tokens: {
type: :number,
depends_on: :enable_reasoning,
hidden_if: :adaptive_thinking,
},
effort: {
type: :enum,
values: ["default", *DiscourseAi::Completions::Endpoints::AnthropicShared::EFFORT_VALUES],
default: "default",
},
disable_native_tools: :checkbox,
disable_native_structured_output: :checkbox,
disable_temperature: {
type: :checkbox,
hidden_if: %i[enable_reasoning adaptive_thinking],
},
disable_top_p: {
type: :checkbox,
hidden_if: %i[enable_reasoning adaptive_thinking],
},
prompt_caching: {
type: :enum,
values: %w[never tool_results always],
default: "tool_results",
},
},
aws_bedrock_converse: {
access_key_id: :secret,
role_arn: :text,
region: :text,
enable_reasoning: :checkbox,
adaptive_thinking: {
type: :checkbox,
depends_on: :enable_reasoning,
},
reasoning_tokens: {
type: :number,
depends_on: :enable_reasoning,
hidden_if: :adaptive_thinking,
},
effort: {
type: :enum,
values: ["default", *DiscourseAi::Completions::Endpoints::AnthropicShared::EFFORT_VALUES],
default: "default",
},
disable_temperature: {
type: :checkbox,
hidden_if: %i[enable_reasoning adaptive_thinking],
},
disable_top_p: {
type: :checkbox,
hidden_if: %i[enable_reasoning adaptive_thinking],
},
prompt_caching: {
type: :enum,
values: %w[never tool_results always],
default: "tool_results",
},
extra_model_fields: :text,
},
anthropic: {
enable_reasoning: :checkbox,
adaptive_thinking: {
type: :checkbox,
depends_on: :enable_reasoning,
},
reasoning_tokens: {
type: :number,
depends_on: :enable_reasoning,
hidden_if: :adaptive_thinking,
},
effort: {
type: :enum,
values: ["default", *DiscourseAi::Completions::Endpoints::AnthropicShared::EFFORT_VALUES],
default: "default",
},
disable_native_tools: :checkbox,
disable_native_structured_output: :checkbox,
disable_temperature: {
type: :checkbox,
hidden_if: %i[enable_reasoning adaptive_thinking],
},
disable_top_p: {
type: :checkbox,
hidden_if: %i[enable_reasoning adaptive_thinking],
},
prompt_caching: {
type: :enum,
values: %w[never tool_results always],
default: "tool_results",
},
},
open_ai: {
organization: :text,
disable_native_tools: :checkbox,
reasoning_effort: {
type: :enum,
values: %w[default none minimal low medium high xhigh],
default: "default",
},
disable_temperature: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_top_p: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_streaming: :checkbox,
service_tier: {
type: :enum,
values: %w[default auto flex priority],
default: "default",
},
},
groq: {
disable_native_tools: :checkbox,
reasoning_effort: {
type: :enum,
values: %w[default none minimal low medium high xhigh],
default: "default",
},
disable_temperature: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_top_p: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_streaming: :checkbox,
},
mistral: {
disable_native_tools: :checkbox,
},
google: {
disable_native_tools: :checkbox,
enable_thinking: :checkbox,
thinking_level: {
type: :enum,
values: %w[default minimal low medium high],
default: "default",
depends_on: :enable_thinking,
},
thinking_tokens: {
type: :number,
depends_on: :enable_thinking,
hidden_if: :thinking_level,
},
disable_temperature: {
type: :checkbox,
hidden_if: :enable_thinking,
},
disable_top_p: :checkbox,
},
azure: {
disable_native_tools: :checkbox,
reasoning_effort: {
type: :enum,
values: %w[default none minimal low medium high xhigh],
default: "default",
},
disable_temperature: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_top_p: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_streaming: :checkbox,
service_tier: {
type: :enum,
values: %w[default auto flex priority],
default: "default",
},
},
hugging_face: {
disable_system_prompt: :checkbox,
disable_native_tools: :checkbox,
},
vllm: {
disable_system_prompt: :checkbox,
disable_native_tools: :checkbox,
reasoning_parser: {
type: :enum,
values: [
{ id: "default", name: "Server default" },
{ id: "deepseek_r1", name: "deepseek_r1" },
{ id: "qwen3", name: "qwen3" },
{ id: "deepseek_v3", name: "deepseek_v3" },
{ id: "deepseek_v4", name: "deepseek_v4" },
{ id: "gemma4", name: "gemma4" },
{ id: "granite", name: "granite" },
{ id: "glm45", name: "glm45" },
{ id: "hunyuan_a13b", name: "hunyuan_a13b" },
{ id: "cohere_command3", name: "cohere_command3" },
{ id: "ernie45", name: "ernie45" },
{ id: "holo2", name: "holo2" },
{ id: "minimax_m2_append_think", name: "minimax_m2_append_think" },
],
default: "default",
tooltip: "discourse_ai.llms.provider_field_hints.reasoning_parser",
},
thinking_override: {
type: :enum,
values: [
{ id: "default", name: "Server default" },
{ id: "on", name: "Force on" },
{ id: "off", name: "Force off" },
],
default: "default",
depends_on: :reasoning_parser,
tooltip: "discourse_ai.llms.provider_field_hints.thinking_override",
},
reasoning_effort: {
type: :enum,
values: [
{ id: "default", name: "Server default" },
{ id: "none", name: "None" },
{ id: "low", name: "Low" },
{ id: "medium", name: "Medium" },
{ id: "high", name: "High" },
],
default: "default",
depends_on: :reasoning_parser,
tooltip: "discourse_ai.llms.provider_field_hints.reasoning_effort",
},
thinking_token_budget: {
type: :number,
depends_on: :reasoning_parser,
tooltip: "discourse_ai.llms.provider_field_hints.thinking_token_budget",
},
disable_temperature: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_top_p: {
type: :checkbox,
hidden_if: :reasoning_effort,
},
disable_streaming: :checkbox,
},
ollama: {
disable_system_prompt: :checkbox,
enable_native_tool: :checkbox,
disable_streaming: :checkbox,
},
open_router: {
disable_native_tools: :checkbox,
provider_order: :text,
provider_quantizations: :text,
disable_streaming: :checkbox,
disable_temperature: :checkbox,
disable_top_p: :checkbox,
},
}
unless SiteSetting.ai_llm_temperature_top_p_enabled
params.each_value do |provider_config|
provider_config.delete(:disable_temperature)
provider_config.delete(:disable_top_p)
end
end
params
end
def to_llm
DiscourseAi::Completions::Llm.proxy(self)
end
def identifier
"#{id}"
end
def toggle_companion_user
return if name == "fake" && Rails.env.production?
enable_check = SiteSetting.ai_bot_enabled && enabled_chat_bot?
if enable_check
if !user
next_id = DB.query_single(<<~SQL).first
SELECT min(id) - 1 FROM users
SQL
new_user =
User.new(
id: [FIRST_BOT_USER_ID, next_id].min,
email: "no_email_#{SecureRandom.hex}",
name: name.titleize,
username: UserNameSuggester.suggest(name),
active: true,
approved: true,
admin: true,
moderator: true,
trust_level: TrustLevel[4],
)
new_user.save!(validate: false)
update!(user: new_user)
else
user.active = true
user.save!(validate: false)
end
else
cleanup_companion_user
end
end
def cleanup_companion_user
return unless user
# will include deleted
has_posts = DB.query_single("SELECT 1 FROM posts WHERE user_id = #{user.id} LIMIT 1").present?
if has_posts
user.update!(active: false) if user.active
else
user.destroy!
update!(user: nil)
end
end
def tokenizer_class
tokenizer.constantize
end
def self.normalize_attachment_types(value)
normalized =
Array(value)
.map { |v| v.to_s.downcase.strip }
.map { |v| ATTACHMENT_TYPE_ALIASES[v] || v }
.reject(&:blank?)
.uniq
normalized = DEFAULT_ALLOWED_ATTACHMENT_TYPES if normalized.empty?
normalized
end
def allowed_attachment_types
self.class.normalize_attachment_types(
self[:allowed_attachment_types].presence || DEFAULT_ALLOWED_ATTACHMENT_TYPES,
)
end
def allowed_attachment_types=(value)
self[:allowed_attachment_types] = self.class.normalize_attachment_types(value)
end
def lookup_custom_param(key)
value = provider_params&.dig(key)
return value if value.nil?
param_def = self.class.provider_params.dig(provider&.to_sym, key.to_sym)
if param_def.is_a?(Hash) && param_def[:depends_on]
deps = Array(param_def[:depends_on])
return nil if deps.any? { |dep| !param_active?(dep) }
end
if param_def == :secret || (param_def.is_a?(Hash) && param_def[:type] == :secret)
if value.to_s =~ /\A\d+\z/
resolved = AiSecret.find_by(id: value.to_i)
return resolved&.secret if resolved
end
end
value
end
def seeded?
id.present? && id < 0
end
def api_key
if seeded?
env_key = "DISCOURSE_AI_SEEDED_LLM_API_KEY_#{id.abs}"
ENV[env_key] || self[:api_key]
elsif ai_secret.present?
ai_secret.secret
else
self[:api_key]
end
end
def credit_system_enabled?
seeded? && llm_credit_allocation.present?
end
def aws_bedrock_credentials
return nil unless provider == BEDROCK_PROVIDER_NAME
role_arn = lookup_custom_param("role_arn")
return nil if role_arn.blank?
# Invalidate cache if role_arn changed
if @cached_role_arn != role_arn
@cached_role_arn = role_arn
@aws_bedrock_credentials = nil
end
@aws_bedrock_credentials ||=
begin
require "aws-sdk-sts" unless defined?(Aws::STS)
region = lookup_custom_param("region")
Aws::AssumeRoleCredentials.new(
role_arn: role_arn,
role_session_name: "discourse-bedrock-#{Process.pid}",
client: Aws::STS::Client.new(region: region),
)
end
end
private
def param_active?(key)
val = provider_params&.dig(key.to_s)
return false if val.nil? || val == false || val == "false" || val == "default" || val == ""
true
end
def api_key_or_secret_present
return if seeded?
# Converse provider supports auto-resolved credentials (env vars, instance profile)
return if provider == BEDROCK_CONVERSE_PROVIDER_NAME
if ai_secret_id.present?
unless AiSecret.exists?(ai_secret_id)
errors.add(:ai_secret_id, I18n.t("discourse_ai.llm_models.secret_not_found"))
end
return
end
return if self[:api_key].present?
errors.add(:base, I18n.t("discourse_ai.llm_models.secret_required"))
end
def required_provider_params
if provider == BEDROCK_PROVIDER_NAME
if lookup_custom_param("region").blank?
errors.add(:base, I18n.t("discourse_ai.llm_models.missing_provider_param", param: "region"))
end
if lookup_custom_param("access_key_id").blank? && lookup_custom_param("role_arn").blank?
errors.add(:base, I18n.t("discourse_ai.llm_models.bedrock_missing_auth"))
end
elsif provider == BEDROCK_CONVERSE_PROVIDER_NAME
if lookup_custom_param("region").blank?
errors.add(:base, I18n.t("discourse_ai.llm_models.missing_provider_param", param: "region"))
end
# access_key_id and role_arn are optional — SDK can auto-resolve credentials
end
end
end
# == Schema Information
#
# Table name: llm_models
#
# id :bigint not null, primary key
# allowed_attachment_types :text default([]), not null, is an Array
# api_key :string
# cache_write_cost :float default(0.0)
# cached_input_cost :float
# display_name :string
# input_cost :float
# max_output_tokens :integer
# max_prompt_tokens :integer not null
# name :string not null
# output_cost :float
# provider :string not null
# provider_params :jsonb
# tokenizer :string not null
# url :string
# vision_enabled :boolean default(FALSE), not null
# created_at :datetime not null
# updated_at :datetime not null
# ai_secret_id :bigint
# user_id :integer
#
# Indexes
#
# index_llm_models_on_ai_secret_id (ai_secret_id)
#