discourse/plugins/discourse-ai/spec/evals/recorder_spec.rb
Sam e3fae646d4
DEV: AI persona to agent migration (#38319)
Co-authored-by: Keegan George <kgeorge13@gmail.com>
2026-03-10 15:59:45 +11:00

160 lines
4.7 KiB
Ruby
Vendored

# frozen_string_literal: true
require_relative "../../evals/lib/recorder"
require_relative "../../evals/lib/structured_logger"
require_relative "../../evals/lib/eval"
RSpec.describe DiscourseAi::Evals::Recorder do
subject(:recorder) do
described_class.new(
eval_case,
logger,
"/tmp/example.json",
structured_logger,
total_targets: 1,
agent_key: agent_key,
output: output,
)
end
let(:eval_case) do
instance_double("DiscourseAi::Evals::Eval", id: "example-eval", to_json: { foo: "bar" })
end
let(:logger) { instance_double(Logger, info: nil, error: nil) }
let(:agent_key) { "default" }
let(:formatter) do
instance_double(
DiscourseAi::Evals::ConsoleFormatter,
announce_start: nil,
record_result: nil,
record_skip: nil,
pause_progress_line: nil,
record_comparison_judged: nil,
record_comparison_expected: nil,
finalize: nil,
)
end
let(:structured_logger) do
instance_double(
DiscourseAi::Evals::StructuredLogger,
start_root: nil,
root_started?: root_started,
add_child_step: child_step,
append_entry: nil,
finish_root: nil,
to_trace_event_json: "{}",
path: "/tmp/example.json",
)
end
let(:root_started) { true }
let(:child_step) { {} }
let(:output) { StringIO.new }
before { allow(DiscourseAi::Evals::ConsoleFormatter).to receive(:new).and_return(formatter) }
describe "#execution_context" do
it "exposes recorder loggers through an explicit completion context" do
context = recorder.execution_context
expect(context.audit_logger).to eq(logger)
expect(context.structured_audit_logger).to eq(structured_logger)
end
end
describe "#running" do
it "starts a root structured log step for the eval" do
recorder.running
expect(structured_logger).to have_received(:start_root).with(
name: "Evaluating example-eval (agent: default)",
args: {
foo: "bar",
agent_key: "default",
},
)
expect(logger).to have_received(:info).with(
"Starting evaluation 'example-eval' (agent: default)",
)
end
end
describe "#record_llm_skip" do
context "when structured logging has not started" do
let(:root_started) { false }
it "raises an informative error" do
expect { recorder.record_llm_skip("gpt-4", "vision-only feature") }.to raise_error(
ArgumentError,
"You didn't instantiated this object with #with_cassette",
)
end
end
it "logs the skip reason when the structured log is active" do
recorder.record_llm_skip("gpt-4", "vision-only feature")
expect(logger).to have_received(:info).with(
"Skipping LLM: gpt-4 - Reason: vision-only feature",
)
end
end
describe "#record_llm_results" do
let(:results) do
[
{ result: :pass },
{
result: :fail,
message: "Mismatch",
expected_output: "ideal",
actual_output: "oops",
context: "details",
},
]
end
let(:start_time) { Time.utc(2024, 1, 1, 12, 0, 0) }
let(:now) { Time.utc(2024, 1, 1, 12, 1, 0) }
before { allow(Time).to receive(:now).and_return(now) }
context "when structured logging has not started" do
let(:root_started) { false }
it "raises an informative error" do
expect { recorder.record_llm_results("gpt-4", results, start_time) }.to raise_error(
ArgumentError,
"You didn't instantiated this object with #with_cassette",
)
end
end
it "records structured log entries and prints human friendly output" do
recorder.record_llm_results("gpt-4", results, start_time)
expect(structured_logger).to have_received(:add_child_step).with(
name: "Evaluating with LLM: gpt-4",
)
expect(structured_logger).to have_received(:append_entry).with(
step: child_step,
name: :good,
started_at: start_time,
ended_at: now.utc,
)
expect(structured_logger).to have_received(:append_entry).with(
step: child_step,
name: :bad,
started_at: start_time,
ended_at: now.utc,
)
expect(logger).to have_received(:info).with("Evaluating with LLM: gpt-4")
expect(logger).to have_received(:error).with("Evaluation failed with LLM: gpt-4")
expect(output.string).to include("gpt-4: ")
expect(output.string).to include("Passed 🟢")
expect(output.string).to include("Failed 🔴")
expect(output.string).to include("---- Expected ----\nideal")
expect(output.string).to include("---- Actual ----\noops")
end
end
end