discourse/plugins/discourse-ai/lib/completions/report.rb

263 lines
8.2 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module Completions
class Report
UNKNOWN_FEATURE = "unknown"
USER_LIMIT = 50
LLM_MODEL_JOIN = "LEFT JOIN llm_models ON llm_models.id = ai_api_request_stats.llm_id"
LLM_MODEL_ID_PATTERN = /^-?\d+$/
attr_reader :start_date, :end_date, :base_query, :timezone
def initialize(
start_date: 30.days.ago,
end_date: Time.current,
timezone: Time.zone.name,
exact_range: false
)
@timezone = timezone
Time.zone = timezone # Set the timezone for parsing dates in the user's timezone
@start_date = exact_range ? start_date : start_date.beginning_of_day
@end_date = exact_range ? end_date : end_date.end_of_day
Time.zone = nil # Reset to default timezone
@base_query = AiApiRequestStat.between(@start_date, @end_date)
end
def total_tokens
stats.total_tokens || 0
end
def total_cache_read_tokens
stats.total_cache_read_tokens || 0
end
def total_cache_write_tokens
stats.total_cache_write_tokens || 0
end
def total_request_tokens
stats.total_request_tokens || 0
end
def total_response_tokens
stats.total_response_tokens || 0
end
def total_requests
stats.total_requests || 0
end
def total_spending
total =
total_input_spending + total_output_spending + total_cache_read_spending +
total_cache_write_spending
total.round(2)
end
def total_input_spending
spending_component_total(:input)
end
def total_output_spending
spending_component_total(:output)
end
def total_cache_read_spending
spending_component_total(:cache_read)
end
def total_cache_write_spending
spending_component_total(:cache_write)
end
def spending_component_total(component)
info = LlmModel::COST_COMPONENTS.fetch(component)
cost_attr = info[:cost]
tokens_attr = "total_#{info[:tokens]}"
model_costs.sum do |row|
row.public_send(cost_attr).to_f * row.public_send(tokens_attr).to_i / 1_000_000.0
end
end
def stats
@stats ||= base_query.select("SUM(usage_count) as total_requests", *token_total_columns)[0]
end
def model_costs
@model_costs ||=
base_query
.joins(LLM_MODEL_JOIN)
.group(
"llm_models.name, llm_models.input_cost, llm_models.output_cost, llm_models.cached_input_cost, llm_models.cache_write_cost",
)
.select(
"llm_models.name",
"llm_models.input_cost",
"llm_models.output_cost",
"llm_models.cached_input_cost",
"llm_models.cache_write_cost",
*token_total_columns,
)
end
def guess_period(period = nil)
period = nil if %i[day month hour].include?(period)
period ||
case @end_date - @start_date
when 0..3.days
:hour
when 3.days..90.days
:day
else
:month
end
end
def tokens_by_period(period = nil)
period = guess_period(period)
results =
base_query
.group("DATE_TRUNC('#{period}', created_at)")
.order("DATE_TRUNC('#{period}', created_at)")
.select("DATE_TRUNC('#{period}', created_at) as period", *token_total_columns)
# Convert periods to user's timezone
results.map do |row|
row.period = row.period.in_time_zone(timezone)
row
end
end
def user_breakdown
stats =
base_query
.joins(LLM_MODEL_JOIN)
.group(:user_id)
.order("usage_count DESC")
.limit(USER_LIMIT)
.select(:user_id, "SUM(usage_count) as usage_count", *token_count_and_total_columns)
User
.joins("JOIN (#{stats.to_sql}) as stats ON stats.user_id = users.id")
.select("users.*", "stats.*")
.order("stats.usage_count DESC")
end
def feature_breakdown
base_query
.joins(LLM_MODEL_JOIN)
.group(:feature_name)
.order("usage_count DESC")
.select(
"case when coalesce(feature_name, '') = '' then '#{UNKNOWN_FEATURE}' else feature_name end as feature_name",
"SUM(usage_count) as usage_count",
*token_count_and_total_columns,
)
end
def model_breakdown
base_query
.joins(LLM_MODEL_JOIN)
.group(
"COALESCE(llm_models.id::text, ai_api_request_stats.language_model)",
"COALESCE(llm_models.display_name, ai_api_request_stats.language_model)",
"llm_models.input_cost",
"llm_models.output_cost",
"llm_models.cached_input_cost",
"llm_models.cache_write_cost",
)
.order("usage_count DESC")
.select(
"COALESCE(llm_models.display_name, ai_api_request_stats.language_model) as llm_label",
"COALESCE(llm_models.id::text, ai_api_request_stats.language_model) as llm_id",
"SUM(usage_count) as usage_count",
*token_count_and_total_columns,
)
end
def feature_model_breakdown
base_query
.joins(LLM_MODEL_JOIN)
.group(
:feature_name,
"COALESCE(llm_models.id::text, ai_api_request_stats.language_model)",
"COALESCE(llm_models.display_name, ai_api_request_stats.language_model)",
"llm_models.input_cost",
"llm_models.output_cost",
"llm_models.cached_input_cost",
"llm_models.cache_write_cost",
)
.order("feature_name, usage_count DESC")
.select(
"CASE WHEN COALESCE(feature_name, '') = '' THEN '#{UNKNOWN_FEATURE}' ELSE feature_name END as feature_name",
"COALESCE(llm_models.id::text, ai_api_request_stats.language_model) as llm_id",
"COALESCE(llm_models.display_name, ai_api_request_stats.language_model) as llm_label",
"SUM(usage_count) as usage_count",
*token_count_and_total_columns,
)
end
def tokens_per_hour
tokens_by_period(:hour)
end
def tokens_per_day
tokens_by_period(:day)
end
def tokens_per_month
tokens_by_period(:month)
end
def filter_by_feature(feature_name)
if feature_name == UNKNOWN_FEATURE
@base_query = base_query.where("coalesce(feature_name, '') = ''")
else
@base_query = base_query.where(feature_name: feature_name)
end
self
end
def filter_by_model(model_identifier)
if model_identifier.to_s.match?(LLM_MODEL_ID_PATTERN)
model = LlmModel.find_by(id: model_identifier)
if model
@base_query =
base_query.where(
"llm_id = ? OR (llm_id IS NULL AND language_model = ?)",
model.id,
model.name,
)
else
@base_query = base_query.where(llm_id: model_identifier)
end
else
@base_query = base_query.where(language_model: model_identifier)
end
self
end
def token_total_columns
[
"SUM(COALESCE(request_tokens, 0) + COALESCE(response_tokens, 0) + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_tokens",
"SUM(COALESCE(cache_read_tokens, 0)) as total_cache_read_tokens",
"SUM(COALESCE(cache_write_tokens,0)) as total_cache_write_tokens",
"SUM(COALESCE(request_tokens,0)) as total_request_tokens",
"SUM(COALESCE(response_tokens,0)) as total_response_tokens",
]
end
def token_count_and_total_columns
spending_columns =
LlmModel::COST_COMPONENTS.keys.map do |component|
expr = LlmModel.spending_component_sql(component, :ai_api_request_stats)
"SUM(#{expr}) / 1000000.0 as #{component}_spending"
end
[*token_total_columns, *spending_columns]
end
end
end
end