discourse/plugins/discourse-ai/lib/admin_dashboard/admin_dashboard_facts.rb
Natalie Tay 02ebe73274
DEV: More updates to the highlight dash with more specific conditions (#40994)
- Groups the AI summary into clearer areas: getting people in, getting
people to participate, and support health. (lenses)
- Helps the AI connect related numbers instead of listing random stats.
(categorize KPI metrics)
- Notices when the community is creating many more or many fewer new
topics. (topic-volume signal)
- Shows when unanswered topics are a meaningful share of new topics, not
just a raw count. (unanswered-topic percentage)
- Makes the AI less likely to invent causes, traffic sources, or mention
missing data. (more grounding rules)
- Also for longer date ranges, we skip certain unmeaningful metrics
2026-06-18 11:20:21 +08:00

374 lines
13 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module AdminDashboard
# Computes a digested, significance-gated set of facts about a community for
# a period. Everything here is deterministic ground truth: the headline
# metrics (consistent with the admin dashboard tiles) plus richer internal and
# external signals, each pre-analysed so the LLM only has to phrase them.
#
# Returns:
# {
# period: { start_date:, end_date:, days: },
# trend: :growing | :steady | :declining | :mixed,
# metrics: [ { label:, value:, delta_pct: } ], # the 4 tile KPIs, always
# signals: [ { key:, headline:, score: } ], # richer facts, gated
# }
class AdminDashboardFacts
BROWSER_REQ_TYPES = %i[
page_view_anon_browser
page_view_anon_browser_mobile
page_view_logged_in_browser
page_view_logged_in_browser_mobile
].freeze
MAX_SIGNALS = 6
MAX_LANDING_TOPIC_DAYS = 93
MAX_STAFF_RATIO_DAYS = 93
METRIC_LABELS = {
new_signups: "new sign-ups",
dau_mau: "DAU/MAU stickiness (a percentage, not a user count)",
new_contributors: "new contributors",
accepted_solutions: "questions resolved (accepted solutions)",
}.freeze
def self.compute(start_date:, end_date:)
new(start_date: start_date, end_date: end_date).compute
end
def initialize(start_date:, end_date:)
@end_date = parse(end_date) || Date.current
@start_date = parse(start_date) || (@end_date - 30)
@start_date, @end_date = @end_date, @start_date if @start_date > @end_date
length = (@end_date - @start_date).to_i
@period_days = length + 1
@prev_end = @start_date - 1
@prev_start = @start_date - (length + 1)
end
def compute
kpis = AdminDashboardHighlights.build(start_date: @start_date, end_date: @end_date)[:kpis]
signals =
[
hot_topic,
topic_volume,
unanswered_gap,
staff_ratio,
traffic_volume,
traffic_spike,
geography,
landing_topic,
].compact.sort_by { |signal| -signal[:score] }.first(MAX_SIGNALS)
{
period: {
start_date: @start_date.to_s,
end_date: @end_date.to_s,
days: @period_days,
},
trend: trend(kpis),
metrics: kpis.map { |kpi| metric_for(kpi) },
signals: signals,
}
end
private
def parse(value)
Time.zone.parse(value.to_s)&.to_date
rescue ArgumentError, TypeError
nil
end
def metric_for(kpi)
key = kpi[:type]&.to_sym
label = METRIC_LABELS[kpi[:type]&.to_sym] || kpi[:type].to_s.tr("_", " ")
{
key: key,
category: metric_category(key),
label: label,
value: kpi[:value],
delta_pct: kpi[:percent_change],
}
end
def metric_category(key)
case key
when :new_signups
:acquisition
when :dau_mau, :new_contributors
:participation
when :accepted_solutions
:support
end
end
def trend(kpis)
deltas = kpis.map { |kpi| kpi[:percent_change] }.compact
up = deltas.count { |d| d >= 10 }
down = deltas.count { |d| d <= -10 }
return :mixed if up.positive? && down.positive?
return :growing if up >= 2
return :declining if down >= 2
:steady
end
def delta_pct(current, previous)
return nil if previous.blank? || previous.zero?
(((current - previous).to_f / previous) * 100).round
end
def signal(key, headline, score:, category:)
{ key: key, category: category, headline: headline, score: score }
end
# ---- internal signals --------------------------------------------------
def new_topics_count(start_date, end_date)
DB.query_single(<<~SQL, start_date: start_date, end_date: end_date).first.to_i
SELECT COUNT(*)
FROM topics t
WHERE t.created_at >= :start_date
AND t.created_at < (:end_date::date + 1)
AND t.deleted_at IS NULL
AND t.visible = true
AND t.archetype = 'regular'
SQL
end
def topic_volume
current = new_topics_count(@start_date, @end_date)
previous = new_topics_count(@prev_start, @prev_end)
delta = delta_pct(current, previous)
return if delta.nil? || delta.abs < 30
return if delta.positive? && current < 5
return if delta.negative? && previous < 5
direction = delta.positive? ? "up" : "down"
signal(
:topic_volume,
"New topics were #{direction} #{delta.abs}% versus the previous period",
score: [delta.abs / 200.0, 0.8].min,
category: :participation,
)
end
def hot_topic
row = DB.query(<<~SQL, start_date: @start_date, end_date: @end_date).first
SELECT t.id, t.title, COUNT(p.id) FILTER (WHERE p.post_number > 1) AS replies
FROM topics t
JOIN posts p ON p.topic_id = t.id AND p.deleted_at IS NULL AND p.post_type = 1
WHERE t.created_at >= :start_date
AND t.created_at < (:end_date::date + 1)
AND t.deleted_at IS NULL
AND t.visible = true
AND t.archetype = 'regular'
GROUP BY t.id, t.title
ORDER BY replies DESC
LIMIT 1
SQL
return if row.nil? || row.replies.to_i < 10
avg = DB.query_single(<<~SQL, start_date: @start_date, end_date: @end_date).first.to_f
SELECT AVG(reply_count) FROM (
SELECT COUNT(p.id) FILTER (WHERE p.post_number > 1) AS reply_count
FROM topics t
JOIN posts p ON p.topic_id = t.id AND p.deleted_at IS NULL AND p.post_type = 1
WHERE t.created_at >= :start_date AND t.created_at < (:end_date::date + 1)
AND t.deleted_at IS NULL AND t.visible = true AND t.archetype = 'regular'
GROUP BY t.id
) per_topic
SQL
return if avg.positive? && row.replies < (avg * 3)
signal(
:hot_topic,
"Busiest discussion: \"#{row.title}\" with #{row.replies} replies",
score: 0.7,
category: :participation,
)
end
def unanswered_gap
count = DB.query_single(<<~SQL, start_date: @start_date, end_date: @end_date).first.to_i
SELECT COUNT(*)
FROM topics t
WHERE t.created_at >= :start_date
AND t.created_at < (:end_date::date + 1)
AND t.posts_count = 1
AND t.deleted_at IS NULL
AND t.visible = true
AND t.archetype = 'regular'
SQL
return if count < 5
total = new_topics_count(@start_date, @end_date)
share = total.positive? ? ((count.to_f / total) * 100).round : 0
headline = "#{count} new topics received no reply"
headline = "#{headline} (#{share}% of new topics)" if share >= 10
signal(
:unanswered_gap,
headline,
score: [0.4 + (share / 100.0), 0.9].min,
category: :support,
)
end
def staff_ratio
return if @period_days > MAX_STAFF_RATIO_DAYS
row = DB.query(<<~SQL, start_date: @start_date, end_date: @end_date).first
SELECT
COUNT(*) FILTER (WHERE u.admin OR u.moderator) AS staff,
COUNT(*) AS total
FROM posts p
JOIN users u ON u.id = p.user_id
WHERE p.created_at >= :start_date
AND p.created_at < (:end_date::date + 1)
AND p.deleted_at IS NULL
AND p.post_type = 1
AND u.id > 0
SQL
return if row.nil? || row.total.to_i < 20
pct = ((row.staff.to_f / row.total) * 100).round
return if pct < 40
signal(
:staff_ratio,
"Staff wrote #{pct}% of posts this period",
score: pct / 100.0,
category: :participation,
)
end
# ---- external signals --------------------------------------------------
def daily_browser_pageviews(start_date, end_date)
req_types = BROWSER_REQ_TYPES.map { |type| ApplicationRequest.req_types[type] }
DB.query(<<~SQL, req_types: req_types, start_date: start_date, end_date: end_date)
SELECT date, SUM(count)::int AS count
FROM application_requests
WHERE req_type IN (:req_types) AND date >= :start_date AND date <= :end_date
GROUP BY date
ORDER BY date ASC
SQL
end
def traffic_volume
current = daily_browser_pageviews(@start_date, @end_date).sum { |r| r.count }
previous = daily_browser_pageviews(@prev_start, @prev_end).sum { |r| r.count }
delta = delta_pct(current, previous)
return if delta.nil? || delta.abs < 30
direction = delta.positive? ? "up" : "down"
signal(
:traffic_volume,
"Browser pageviews were #{direction} #{delta.abs}% versus the previous period",
score: [delta.abs / 200.0, 0.8].min,
category: :acquisition,
)
end
def traffic_spike
daily = daily_browser_pageviews(@start_date, @end_date)
return if daily.size < 3
counts = daily.map(&:count).sort
median = counts[counts.size / 2].to_f
peak = daily.max_by(&:count)
return if median.zero? || peak.count < (median * 3)
multiple = (peak.count / median).round(1)
source = dominant_referrer_for(peak.date)
headline =
if source
"Traffic spiked on #{peak.date} (#{multiple}x the typical day), with #{source} as the top external referrer"
else
"Traffic spiked on #{peak.date} (#{multiple}x the typical day)"
end
signal(:traffic_spike, headline, score: [multiple / 10.0, 0.95].min, category: :acquisition)
end
def dominant_referrer_for(date)
rows = DB.query(<<~SQL, date: date)
SELECT normalized_referrer, count
FROM browser_pageview_referrer_daily_rollups
WHERE date = :date AND normalized_referrer IS NOT NULL
ORDER BY count DESC
LIMIT 5
SQL
return if rows.blank?
total = rows.sum(&:count)
top = rows.find { |row| external_referrer?(row.normalized_referrer) }
return if top.nil?
return if total.zero? || (top.count.to_f / total) < 0.4
top.normalized_referrer
end
def external_referrer?(referrer)
site_host = BrowserPageviewReferrerInspector.normalize_host(Discourse.current_hostname)
referrer = referrer.to_s
return false if referrer.blank? || site_host.blank?
referrer != site_host && !referrer.start_with?("#{site_host}/") &&
!referrer.start_with?("#{site_host}?")
end
def geography
current = DB.query(<<~SQL, start_date: @start_date, end_date: @end_date)
SELECT country_code, SUM(count)::int AS count
FROM browser_pageview_country_daily_rollups
WHERE date >= :start_date AND date <= :end_date AND country_code IS NOT NULL
GROUP BY country_code
ORDER BY count DESC
LIMIT 5
SQL
return if current.blank?
total = current.sum(&:count)
top = current.first
return if total.zero? || (top.count.to_f / total) < 0.35
share = ((top.count.to_f / total) * 100).round
signal(
:geography,
"#{share}% of browser pageviews came from #{top.country_code}",
score: share / 100.0,
category: :acquisition,
)
end
def landing_topic
return if @period_days > MAX_LANDING_TOPIC_DAYS
row = DB.query(<<~SQL, start_date: @start_date, end_date: @end_date).first
SELECT e.topic_id, t.title, COUNT(*) AS visits
FROM browser_pageview_events e
JOIN topics t ON t.id = e.topic_id
WHERE e.created_at >= :start_date
AND e.created_at < (:end_date::date + 1)
AND e.topic_id IS NOT NULL
AND e.normalized_referrer IS NOT NULL
GROUP BY e.topic_id, t.title
ORDER BY visits DESC
LIMIT 1
SQL
return if row.nil? || row.visits.to_i < 50
signal(
:landing_topic,
"External visitors mostly landed on \"#{row.title}\" (#{row.visits} visits)",
score: 0.6,
category: :acquisition,
)
end
end
end
end