discourse/plugins/discourse-ai/lib/completions/report.rb

# frozen_string_literal: true
module DiscourseAi
  module Completions
    class Report
      UNKNOWN_FEATURE = "unknown"
      USER_LIMIT = 50
      LLM_MODEL_JOIN = "LEFT JOIN llm_models ON llm_models.id = ai_api_request_stats.llm_id"
      LLM_MODEL_ID_PATTERN = /^-?\d+$/

      attr_reader :start_date, :end_date, :base_query, :timezone

      def initialize(
        start_date: 30.days.ago,
        end_date: Time.current,
        timezone: Time.zone.name,
        exact_range: false
      )
        @timezone = timezone

        Time.zone = timezone # Set the timezone for parsing dates in the user's timezone
        @start_date = exact_range ? start_date : start_date.beginning_of_day
        @end_date = exact_range ? end_date : end_date.end_of_day
        Time.zone = nil # Reset to default timezone

        @base_query = AiApiRequestStat.between(@start_date, @end_date)
      end

      def total_tokens
        stats.total_tokens || 0
      end

      def total_cache_read_tokens
        stats.total_cache_read_tokens || 0
      end

      def total_cache_write_tokens
        stats.total_cache_write_tokens || 0
      end

      def total_request_tokens
        stats.total_request_tokens || 0
      end

      def total_response_tokens
        stats.total_response_tokens || 0
      end

      def total_requests
        stats.total_requests || 0
      end

      def total_spending
        total =
          total_input_spending + total_output_spending + total_cache_read_spending +
            total_cache_write_spending
        total.round(2)
      end

      def total_input_spending
        spending_component_total(:input)
      end

      def total_output_spending
        spending_component_total(:output)
      end

      def total_cache_read_spending
        spending_component_total(:cache_read)
      end

      def total_cache_write_spending
        spending_component_total(:cache_write)
      end

      def spending_component_total(component)
        info = LlmModel::COST_COMPONENTS.fetch(component)
        cost_attr = info[:cost]
        tokens_attr = "total_#{info[:tokens]}"

        model_costs.sum do |row|
          row.public_send(cost_attr).to_f * row.public_send(tokens_attr).to_i / 1_000_000.0
        end
      end

      def stats
        @stats ||= base_query.select("SUM(usage_count) as total_requests", *token_total_columns)[0]
      end

      def model_costs
        @model_costs ||=
          base_query
            .joins(LLM_MODEL_JOIN)
            .group(
              "llm_models.name, llm_models.input_cost, llm_models.output_cost, llm_models.cached_input_cost, llm_models.cache_write_cost",
            )
            .select(
              "llm_models.name",
              "llm_models.input_cost",
              "llm_models.output_cost",
              "llm_models.cached_input_cost",
              "llm_models.cache_write_cost",
              *token_total_columns,
            )
      end

      def guess_period(period = nil)
        period = nil if %i[day month hour].include?(period)
        period ||
          case @end_date - @start_date
          when 0..3.days
            :hour
          when 3.days..90.days
            :day
          else
            :month
          end
      end

      def tokens_by_period(period = nil)
        period = guess_period(period)
        results =
          base_query
            .group("DATE_TRUNC('#{period}', created_at)")
            .order("DATE_TRUNC('#{period}', created_at)")
            .select("DATE_TRUNC('#{period}', created_at) as period", *token_total_columns)

        # Convert periods to user's timezone
        results.map do |row|
          row.period = row.period.in_time_zone(timezone)
          row
        end
      end

      def user_breakdown
        stats =
          base_query
            .joins(LLM_MODEL_JOIN)
            .group(:user_id)
            .order("usage_count DESC")
            .limit(USER_LIMIT)
            .select(:user_id, "SUM(usage_count) as usage_count", *token_count_and_total_columns)

        User
          .joins("JOIN (#{stats.to_sql}) as stats ON stats.user_id = users.id")
          .select("users.*", "stats.*")
          .order("stats.usage_count DESC")
      end

      def feature_breakdown
        base_query
          .joins(LLM_MODEL_JOIN)
          .group(:feature_name)
          .order("usage_count DESC")
          .select(
            "case when coalesce(feature_name, '') = '' then '#{UNKNOWN_FEATURE}' else feature_name end as feature_name",
            "SUM(usage_count) as usage_count",
            *token_count_and_total_columns,
          )
      end

      def model_breakdown
        base_query
          .joins(LLM_MODEL_JOIN)
          .group(
            "COALESCE(llm_models.id::text, ai_api_request_stats.language_model)",
            "COALESCE(llm_models.display_name, ai_api_request_stats.language_model)",
            "llm_models.input_cost",
            "llm_models.output_cost",
            "llm_models.cached_input_cost",
            "llm_models.cache_write_cost",
          )
          .order("usage_count DESC")
          .select(
            "COALESCE(llm_models.display_name, ai_api_request_stats.language_model) as llm_label",
            "COALESCE(llm_models.id::text, ai_api_request_stats.language_model) as llm_id",
            "SUM(usage_count) as usage_count",
            *token_count_and_total_columns,
          )
      end

      def feature_model_breakdown
        base_query
          .joins(LLM_MODEL_JOIN)
          .group(
            :feature_name,
            "COALESCE(llm_models.id::text, ai_api_request_stats.language_model)",
            "COALESCE(llm_models.display_name, ai_api_request_stats.language_model)",
            "llm_models.input_cost",
            "llm_models.output_cost",
            "llm_models.cached_input_cost",
            "llm_models.cache_write_cost",
          )
          .order("feature_name, usage_count DESC")
          .select(
            "CASE WHEN COALESCE(feature_name, '') = '' THEN '#{UNKNOWN_FEATURE}' ELSE feature_name END as feature_name",
            "COALESCE(llm_models.id::text, ai_api_request_stats.language_model) as llm_id",
            "COALESCE(llm_models.display_name, ai_api_request_stats.language_model) as llm_label",
            "SUM(usage_count) as usage_count",
            *token_count_and_total_columns,
          )
      end

      def tokens_per_hour
        tokens_by_period(:hour)
      end

      def tokens_per_day
        tokens_by_period(:day)
      end

      def tokens_per_month
        tokens_by_period(:month)
      end

      def filter_by_feature(feature_name)
        if feature_name == UNKNOWN_FEATURE
          @base_query = base_query.where("coalesce(feature_name, '') = ''")
        else
          @base_query = base_query.where(feature_name: feature_name)
        end
        self
      end

      def filter_by_model(model_identifier)
        if model_identifier.to_s.match?(LLM_MODEL_ID_PATTERN)
          model = LlmModel.find_by(id: model_identifier)
          if model
            @base_query =
              base_query.where(
                "llm_id = ? OR (llm_id IS NULL AND language_model = ?)",
                model.id,
                model.name,
              )
          else
            @base_query = base_query.where(llm_id: model_identifier)
          end
        else
          @base_query = base_query.where(language_model: model_identifier)
        end
        self
      end

      def token_total_columns
        [
          "SUM(COALESCE(request_tokens, 0) + COALESCE(response_tokens, 0) + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_tokens",
          "SUM(COALESCE(cache_read_tokens, 0)) as total_cache_read_tokens",
          "SUM(COALESCE(cache_write_tokens,0)) as total_cache_write_tokens",
          "SUM(COALESCE(request_tokens,0)) as total_request_tokens",
          "SUM(COALESCE(response_tokens,0)) as total_response_tokens",
        ]
      end

      def token_count_and_total_columns
        spending_columns =
          LlmModel::COST_COMPONENTS.keys.map do |component|
            expr = LlmModel.spending_component_sql(component, :ai_api_request_stats)
            "SUM(#{expr}) / 1000000.0 as #{component}_spending"
          end
        [*token_total_columns, *spending_columns]
      end
    end
  end
end