discourse/plugins/discourse-ai/spec/lib/agents/tools/db_schema_spec.rb
Natalie Tay fbcbdc46d8
FIX: Data explorer agent reliability for schema and plurality (#40152)
Reported through a few user tests, the agent was unreliable in three
ways:
- `tags.tag_name` instead of `tags.name`
- did not use `current_user_id` for "my posts" prompts
- plural nouns as singular
- and used unparse-able date defaults like "today".

Few issues:
- `DbSchema` tool was returning a dense one-line-per-table comma that
qwen was unable to deal with. Now line-per-column so schema accuracy
originally flaky is now 5/5 PASSING on qwen and Gemini.
- The prompt was teaching the wrong thing where the `-- null boolean
:opt_flag = #null` example made models use `#null` as a default value.
We now have a "Parameter rules" section, ISO date examples that match
the "no natural-language defaults" rule below them, explicit
`current_user_id` guidance for first-person prompts, and a plural-noun
rule that applies to each plural noun independently in the same prompt
(e.g. "categories and tags" → BOTH list params, not one of each).
- Eval runner now captures `name` and `description` separately, not just
`sql`. The description text is graded directly rather than grading the
SQL string.

Tested against qwen 3.5 122B (our hosted model) + Gemini 3.1 Flash Lite
(judge GPT-5.2): 20/20 each. New eval cases ship in this PR
https://github.com/discourse/discourse-ai-evals/pull/18
2026-05-19 16:23:17 +08:00

69 lines
2.3 KiB
Ruby
Vendored

# frozen_string_literal: true
RSpec.describe DiscourseAi::Agents::Tools::DbSchema do
fab!(:llm_model)
let(:bot_user) { DiscourseAi::AiBot::EntryPoint.find_user_from_model(llm_model.name) }
let(:llm) { DiscourseAi::Completions::Llm.proxy(llm_model) }
before do
enable_current_plugin
SiteSetting.ai_bot_enabled = true
end
def invoke(tables)
described_class.new({ tables: tables }, bot_user: bot_user, llm: llm).invoke
end
describe "#invoke" do
it "returns one column per line with table headers" do
info = invoke("posts,topics")[:schema_info]
expect(info).to match(/^TABLE posts$/)
expect(info).to match(/^TABLE topics$/)
expect(info).to match(/^ id integer, PK$/)
end
it "marks foreign-key columns using Discourse's curated map" do
info = invoke("topic_tags,topics")[:schema_info]
expect(info).to match(/^ topic_id integer, FK → topics/)
expect(info).to match(/^ user_id integer, FK → users/)
expect(info).to match(/^ last_post_user_id integer, FK → users/) # via *_user_id rule
end
it "does not invent FK targets for non-FK _id columns" do
info = invoke("topics")[:schema_info]
# Previous heuristic produced misleading targets like "FK → externals" / "FK → featured_user1s".
# Now we only mark FKs the Discourse map actually knows about.
expect(info).not_to include("FK → externals")
expect(info).not_to include("FK → featured_user1s")
end
it "simplifies long postgres type names" do
info = invoke("topics")[:schema_info]
expect(info).to include("varchar")
expect(info).to include("timestamp")
expect(info).not_to include("character varying")
expect(info).not_to include("timestamp without time zone")
end
it "marks nullable columns" do
info = invoke("topics")[:schema_info]
expect(info).to match(/^ deleted_at timestamp, null$/)
end
it "reports missing tables instead of silently dropping them" do
info = invoke("topics,not_a_real_table")[:schema_info]
expect(info).to include("TABLE topics")
expect(info).to include("TABLES NOT FOUND: not_a_real_table")
end
it "returns the requested tables string" do
expect(invoke("posts,topics")[:tables]).to eq("posts,topics")
end
end
end