discourse/plugins/discourse-ai/app/models/llm_model.rb

# frozen_string_literal: true

class LlmModel < ActiveRecord::Base
  # TODO: Remove this line after 20251212144720_populate_ai_bot_enabled_llms_setting migration
  # has been promoted to pre-deploy
  self.ignored_columns = %w[enabled_chat_bot]

  FIRST_BOT_USER_ID = -1200
  BEDROCK_PROVIDER_NAME = "aws_bedrock"
  BEDROCK_CONVERSE_PROVIDER_NAME = "aws_bedrock_converse"
  DEFAULT_ALLOWED_ATTACHMENT_TYPES = [].freeze
  ATTACHMENT_TYPE_ALIASES = {
    "markdown" => "md",
    "md" => "md",
    "htm" => "html",
    "text" => "txt",
  }.freeze

  COST_COMPONENTS = {
    input: {
      tokens: :request_tokens,
      cost: :input_cost,
    },
    output: {
      tokens: :response_tokens,
      cost: :output_cost,
    },
    cache_read: {
      tokens: :cache_read_tokens,
      cost: :cached_input_cost,
    },
    cache_write: {
      tokens: :cache_write_tokens,
      cost: :cache_write_cost,
    },
  }.freeze

  def self.spending_component_sql(component, table)
    info = COST_COMPONENTS.fetch(component)
    qt = connection.quote_table_name(table.to_s)
    "COALESCE(#{qt}.#{info[:tokens]}, 0) * COALESCE(llm_models.#{info[:cost]}, 0)"
  end

  def self.spending_sql(table)
    COST_COMPONENTS.keys.map { |k| spending_component_sql(k, table) }.join(" + ")
  end

  def spending_for(record)
    total =
      COST_COMPONENTS.values.sum do |info|
        record.public_send(info[:tokens]).to_i * public_send(info[:cost]).to_f
      end
    (total / 1_000_000.0).round(6)
  end

  has_many :llm_quotas, dependent: :destroy
  has_one :llm_credit_allocation, dependent: :destroy
  has_many :llm_feature_credit_costs, dependent: :destroy
  belongs_to :user
  belongs_to :ai_secret, optional: true

  validates :display_name, presence: true, length: { maximum: 100 }
  validates :tokenizer, presence: true, inclusion: DiscourseAi::Completions::Llm.tokenizer_names
  validates :provider, presence: true, inclusion: DiscourseAi::Completions::Llm.provider_names
  validates :url,
            presence: true,
            unless: -> { provider.in?([BEDROCK_PROVIDER_NAME, BEDROCK_CONVERSE_PROVIDER_NAME]) }
  validates :name, presence: true
  validate :api_key_or_secret_present
  validates :max_prompt_tokens, numericality: { greater_than: 0 }
  validates :input_cost,
            :cached_input_cost,
            :cache_write_cost,
            :output_cost,
            :max_output_tokens,
            numericality: {
              greater_than_or_equal_to: 0,
            },
            allow_nil: true
  validate :required_provider_params
  scope :in_use,
        -> do
          model_ids = DiscourseAi::Configuration::LlmEnumerator.global_usage.keys
          where(id: model_ids)
        end

  def self.enabled_chat_bot_ids
    SiteSetting.ai_bot_enabled_llms.split("|").map(&:to_i).reject(&:zero?)
  end

  def enabled_chat_bot?
    self.class.enabled_chat_bot_ids.include?(id)
  end

  def self.provider_params
    params = {
      aws_bedrock: {
        access_key_id: :secret,
        role_arn: :text,
        region: :text,
        inference_profile_arn: :text,
        enable_reasoning: :checkbox,
        adaptive_thinking: {
          type: :checkbox,
          depends_on: :enable_reasoning,
        },
        reasoning_tokens: {
          type: :number,
          depends_on: :enable_reasoning,
          hidden_if: :adaptive_thinking,
        },
        effort: {
          type: :enum,
          values: ["default", *DiscourseAi::Completions::Endpoints::AnthropicShared::EFFORT_VALUES],
          default: "default",
        },
        disable_native_tools: :checkbox,
        disable_native_structured_output: :checkbox,
        disable_temperature: {
          type: :checkbox,
          hidden_if: %i[enable_reasoning adaptive_thinking],
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: %i[enable_reasoning adaptive_thinking],
        },
        prompt_caching: {
          type: :enum,
          values: %w[never tool_results always],
          default: "tool_results",
        },
      },
      aws_bedrock_converse: {
        access_key_id: :secret,
        role_arn: :text,
        region: :text,
        enable_reasoning: :checkbox,
        adaptive_thinking: {
          type: :checkbox,
          depends_on: :enable_reasoning,
        },
        reasoning_tokens: {
          type: :number,
          depends_on: :enable_reasoning,
          hidden_if: :adaptive_thinking,
        },
        effort: {
          type: :enum,
          values: ["default", *DiscourseAi::Completions::Endpoints::AnthropicShared::EFFORT_VALUES],
          default: "default",
        },
        disable_temperature: {
          type: :checkbox,
          hidden_if: %i[enable_reasoning adaptive_thinking],
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: %i[enable_reasoning adaptive_thinking],
        },
        prompt_caching: {
          type: :enum,
          values: %w[never tool_results always],
          default: "tool_results",
        },
        extra_model_fields: :text,
      },
      anthropic: {
        enable_reasoning: :checkbox,
        adaptive_thinking: {
          type: :checkbox,
          depends_on: :enable_reasoning,
        },
        reasoning_tokens: {
          type: :number,
          depends_on: :enable_reasoning,
          hidden_if: :adaptive_thinking,
        },
        effort: {
          type: :enum,
          values: ["default", *DiscourseAi::Completions::Endpoints::AnthropicShared::EFFORT_VALUES],
          default: "default",
        },
        disable_native_tools: :checkbox,
        disable_native_structured_output: :checkbox,
        disable_temperature: {
          type: :checkbox,
          hidden_if: %i[enable_reasoning adaptive_thinking],
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: %i[enable_reasoning adaptive_thinking],
        },
        prompt_caching: {
          type: :enum,
          values: %w[never tool_results always],
          default: "tool_results",
        },
      },
      open_ai: {
        organization: :text,
        disable_native_tools: :checkbox,
        reasoning_effort: {
          type: :enum,
          values: %w[default none minimal low medium high xhigh],
          default: "default",
        },
        disable_temperature: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_streaming: :checkbox,
        service_tier: {
          type: :enum,
          values: %w[default auto flex priority],
          default: "default",
        },
      },
      groq: {
        disable_native_tools: :checkbox,
        reasoning_effort: {
          type: :enum,
          values: %w[default none minimal low medium high xhigh],
          default: "default",
        },
        disable_temperature: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_streaming: :checkbox,
      },
      mistral: {
        disable_native_tools: :checkbox,
      },
      google: {
        disable_native_tools: :checkbox,
        enable_thinking: :checkbox,
        thinking_level: {
          type: :enum,
          values: %w[default minimal low medium high],
          default: "default",
          depends_on: :enable_thinking,
        },
        thinking_tokens: {
          type: :number,
          depends_on: :enable_thinking,
          hidden_if: :thinking_level,
        },
        disable_temperature: {
          type: :checkbox,
          hidden_if: :enable_thinking,
        },
        disable_top_p: :checkbox,
      },
      azure: {
        disable_native_tools: :checkbox,
        reasoning_effort: {
          type: :enum,
          values: %w[default none minimal low medium high xhigh],
          default: "default",
        },
        disable_temperature: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_streaming: :checkbox,
        service_tier: {
          type: :enum,
          values: %w[default auto flex priority],
          default: "default",
        },
      },
      hugging_face: {
        disable_system_prompt: :checkbox,
        disable_native_tools: :checkbox,
      },
      vllm: {
        disable_system_prompt: :checkbox,
        disable_native_tools: :checkbox,
        reasoning_parser: {
          type: :enum,
          values: [
            { id: "default", name: "Server default" },
            { id: "deepseek_r1", name: "deepseek_r1" },
            { id: "qwen3", name: "qwen3" },
            { id: "deepseek_v3", name: "deepseek_v3" },
            { id: "deepseek_v4", name: "deepseek_v4" },
            { id: "gemma4", name: "gemma4" },
            { id: "granite", name: "granite" },
            { id: "glm45", name: "glm45" },
            { id: "hunyuan_a13b", name: "hunyuan_a13b" },
            { id: "cohere_command3", name: "cohere_command3" },
            { id: "ernie45", name: "ernie45" },
            { id: "holo2", name: "holo2" },
            { id: "minimax_m2_append_think", name: "minimax_m2_append_think" },
          ],
          default: "default",
          tooltip: "discourse_ai.llms.provider_field_hints.reasoning_parser",
        },
        thinking_override: {
          type: :enum,
          values: [
            { id: "default", name: "Server default" },
            { id: "on", name: "Force on" },
            { id: "off", name: "Force off" },
          ],
          default: "default",
          depends_on: :reasoning_parser,
          tooltip: "discourse_ai.llms.provider_field_hints.thinking_override",
        },
        reasoning_effort: {
          type: :enum,
          values: [
            { id: "default", name: "Server default" },
            { id: "none", name: "None" },
            { id: "low", name: "Low" },
            { id: "medium", name: "Medium" },
            { id: "high", name: "High" },
          ],
          default: "default",
          depends_on: :reasoning_parser,
          tooltip: "discourse_ai.llms.provider_field_hints.reasoning_effort",
        },
        thinking_token_budget: {
          type: :number,
          depends_on: :reasoning_parser,
          tooltip: "discourse_ai.llms.provider_field_hints.thinking_token_budget",
        },
        disable_temperature: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_top_p: {
          type: :checkbox,
          hidden_if: :reasoning_effort,
        },
        disable_streaming: :checkbox,
      },
      ollama: {
        disable_system_prompt: :checkbox,
        enable_native_tool: :checkbox,
        disable_streaming: :checkbox,
      },
      open_router: {
        disable_native_tools: :checkbox,
        provider_order: :text,
        provider_quantizations: :text,
        disable_streaming: :checkbox,
        disable_temperature: :checkbox,
        disable_top_p: :checkbox,
      },
    }

    unless SiteSetting.ai_llm_temperature_top_p_enabled
      params.each_value do |provider_config|
        provider_config.delete(:disable_temperature)
        provider_config.delete(:disable_top_p)
      end
    end

    params
  end

  def to_llm
    DiscourseAi::Completions::Llm.proxy(self)
  end

  def identifier
    "#{id}"
  end

  def toggle_companion_user
    return if name == "fake" && Rails.env.production?

    enable_check = SiteSetting.ai_bot_enabled && enabled_chat_bot?

    if enable_check
      if !user
        next_id = DB.query_single(<<~SQL).first
          SELECT min(id) - 1 FROM users
        SQL

        new_user =
          User.new(
            id: [FIRST_BOT_USER_ID, next_id].min,
            email: "no_email_#{SecureRandom.hex}",
            name: name.titleize,
            username: UserNameSuggester.suggest(name),
            active: true,
            approved: true,
            admin: true,
            moderator: true,
            trust_level: TrustLevel[4],
          )
        new_user.save!(validate: false)
        update!(user: new_user)
      else
        user.active = true
        user.save!(validate: false)
      end
    else
      cleanup_companion_user
    end
  end

  def cleanup_companion_user
    return unless user

    # will include deleted
    has_posts = DB.query_single("SELECT 1 FROM posts WHERE user_id = #{user.id} LIMIT 1").present?

    if has_posts
      user.update!(active: false) if user.active
    else
      user.destroy!
      update!(user: nil)
    end
  end

  def tokenizer_class
    tokenizer.constantize
  end

  def self.normalize_attachment_types(value)
    normalized =
      Array(value)
        .map { |v| v.to_s.downcase.strip }
        .map { |v| ATTACHMENT_TYPE_ALIASES[v] || v }
        .reject(&:blank?)
        .uniq
    normalized = DEFAULT_ALLOWED_ATTACHMENT_TYPES if normalized.empty?
    normalized
  end

  def allowed_attachment_types
    self.class.normalize_attachment_types(
      self[:allowed_attachment_types].presence || DEFAULT_ALLOWED_ATTACHMENT_TYPES,
    )
  end

  def allowed_attachment_types=(value)
    self[:allowed_attachment_types] = self.class.normalize_attachment_types(value)
  end

  def lookup_custom_param(key)
    value = provider_params&.dig(key)
    return value if value.nil?

    param_def = self.class.provider_params.dig(provider&.to_sym, key.to_sym)

    if param_def.is_a?(Hash) && param_def[:depends_on]
      deps = Array(param_def[:depends_on])
      return nil if deps.any? { |dep| !param_active?(dep) }
    end

    if param_def == :secret || (param_def.is_a?(Hash) && param_def[:type] == :secret)
      if value.to_s =~ /\A\d+\z/
        resolved = AiSecret.find_by(id: value.to_i)
        return resolved&.secret if resolved
      end
    end

    value
  end

  def seeded?
    id.present? && id < 0
  end

  def api_key
    if seeded?
      env_key = "DISCOURSE_AI_SEEDED_LLM_API_KEY_#{id.abs}"
      ENV[env_key] || self[:api_key]
    elsif ai_secret.present?
      ai_secret.secret
    else
      self[:api_key]
    end
  end

  def credit_system_enabled?
    seeded? && llm_credit_allocation.present?
  end

  def aws_bedrock_credentials
    return nil unless provider == BEDROCK_PROVIDER_NAME

    role_arn = lookup_custom_param("role_arn")
    return nil if role_arn.blank?

    # Invalidate cache if role_arn changed
    if @cached_role_arn != role_arn
      @cached_role_arn = role_arn
      @aws_bedrock_credentials = nil
    end

    @aws_bedrock_credentials ||=
      begin
        require "aws-sdk-sts" unless defined?(Aws::STS)
        region = lookup_custom_param("region")

        Aws::AssumeRoleCredentials.new(
          role_arn: role_arn,
          role_session_name: "discourse-bedrock-#{Process.pid}",
          client: Aws::STS::Client.new(region: region),
        )
      end
  end

  private

  def param_active?(key)
    val = provider_params&.dig(key.to_s)
    return false if val.nil? || val == false || val == "false" || val == "default" || val == ""
    true
  end

  def api_key_or_secret_present
    return if seeded?
    # Converse provider supports auto-resolved credentials (env vars, instance profile)
    return if provider == BEDROCK_CONVERSE_PROVIDER_NAME
    if ai_secret_id.present?
      unless AiSecret.exists?(ai_secret_id)
        errors.add(:ai_secret_id, I18n.t("discourse_ai.llm_models.secret_not_found"))
      end
      return
    end
    return if self[:api_key].present?
    errors.add(:base, I18n.t("discourse_ai.llm_models.secret_required"))
  end

  def required_provider_params
    if provider == BEDROCK_PROVIDER_NAME
      if lookup_custom_param("region").blank?
        errors.add(:base, I18n.t("discourse_ai.llm_models.missing_provider_param", param: "region"))
      end

      if lookup_custom_param("access_key_id").blank? && lookup_custom_param("role_arn").blank?
        errors.add(:base, I18n.t("discourse_ai.llm_models.bedrock_missing_auth"))
      end
    elsif provider == BEDROCK_CONVERSE_PROVIDER_NAME
      if lookup_custom_param("region").blank?
        errors.add(:base, I18n.t("discourse_ai.llm_models.missing_provider_param", param: "region"))
      end
      # access_key_id and role_arn are optional — SDK can auto-resolve credentials
    end
  end
end

# == Schema Information
#
# Table name: llm_models
#
#  id                       :bigint           not null, primary key
#  allowed_attachment_types :text             default([]), not null, is an Array
#  api_key                  :string
#  cache_write_cost         :float            default(0.0)
#  cached_input_cost        :float
#  display_name             :string
#  input_cost               :float
#  max_output_tokens        :integer
#  max_prompt_tokens        :integer          not null
#  name                     :string           not null
#  output_cost              :float
#  provider                 :string           not null
#  provider_params          :jsonb
#  tokenizer                :string           not null
#  url                      :string
#  vision_enabled           :boolean          default(FALSE), not null
#  created_at               :datetime         not null
#  updated_at               :datetime         not null
#  ai_secret_id             :bigint
#  user_id                  :integer
#
# Indexes
#
#  index_llm_models_on_ai_secret_id  (ai_secret_id)
#