discourse/plugins/discourse-ai/evals/lib/features.rb
Roman Rizzi 3a647c8e50
FEATURE: Use evals to compare LLMs and Personas' prompts (#36027)
Implemented an eval “comparison matrix” that lets you run the same evals
across multiple personas or multiple LLMs and have a judge model declare
a winner with per-candidate scores. The CLI adds --compare
personas|llms, keeps persona selection (auto-prepending default for
persona mode), and always ensures a judge is configured. A dedicated
ComparisonRunner reuses Workbench results to build candidate outputs and
sends them to Judge#compare, which crafts a rubric-aware comparison
prompt and parses structured winner/ratings JSON. Outputs are streamed
to the console and individual run logs still get written. README
documents how to use the new flag and what each mode does.
2025-11-18 10:39:52 -03:00

77 lines
2 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module Evals
class Features
def initialize(modules: DiscourseAi::Configuration::Module.all, output: $stdout)
@modules = modules
@output = output
end
def print
module_entries.each do |module_name, entries|
output.puts module_name
if entries.empty?
output.puts " - no registered features"
next
end
entries.each { |entry| output.puts " - #{entry[:key]}" }
end
end
def feature_map(evals)
grouped_evals = Array(evals).group_by { |eval| eval.feature }
grouped_evals.transform_values { |mapped_evals| mapped_evals.map(&:id).sort }
end
def feature_keys
entries.map { |entry| entry[:key] }
end
def valid_feature_key?(key)
custom_keys = %w[custom:prompt custom:pdf_to_text custom:image_to_text custom:edit_artifact]
return true if custom_keys.include?(key)
feature_keys.include?(key)
end
def validate_feature!(feature_key)
return if feature_key.blank?
return if valid_feature_key?(feature_key)
STDERR.puts(
"Unknown feature '#{feature_key}'. Run with --list-features to view valid keys.",
)
exit 1
end
private
attr_reader :modules, :output
def module_entries
@module_entries ||= modules.map { |mod| [mod.name, entries_for_module(mod)] }
end
def entries
@entries ||= module_entries.flat_map { |(_, m_entries)| m_entries }
end
def entries_for_module(mod)
feature_entries_by_module[mod] ||= Array(mod.features).map do |feature|
{ key: feature_key(mod, feature), module_name: mod.name }
end
end
def feature_entries_by_module
@feature_entries_by_module ||= {}
end
def feature_key(mod, feature)
"#{mod.name}:#{feature.name}"
end
end
end
end