discourse/app/services/admin_dashboard_site_traffic.rb
Alan Guo Xiang Tan 437ab337d2
FEATURE: Add top countries and top referrers cards to the admin dashboard (#40215)
This commit adds two new cards to the redesigned admin dashboard's Site
Traffic section: top countries and top referrers, both sourced from
`browser_pageview_events`.

Key technical decisions:

1. Gate the cards on the `persist_browser_pageview_events` site setting.
The cards have no data source unless browser pageview events are being
persisted, so they are omitted from the dashboard.

2. Normalize referrers at write time. A new `normalized_referrer` column
on `browser_pageview_events` is populated by
`BrowserPageviewReferrerInspector`, which strips scheme, `www.`, port,
fragment, trailing slashes, and common tracking query params. Doing this
at insert time avoids per-row string operations at query time.

3. Count browser pageviews by country and by referrer in two new report
concerns. `Reports::TopCountriesByBrowserPageviews` groups by
`country_code` and `Reports::TopReferrersByBrowserPageviews` groups by
`normalized_referrer`. Both compute share of total browser pageviews and
rank the top 5 in SQL. The country report drops MaxMind reserved codes
(unknown, anonymous proxy, satellite). The referrer report drops
same-host referrals. Both also exclude anonymous browser pageviews
(`user_id IS NULL`) when the `login_required` site setting is enabled,
since only logged-in browser pageviews are meaningful on a closed forum.

4. Fetch each report through the existing dashboard service.
`AdminDashboardSiteTraffic#build` returns one entry per card with a `{
rows:, error: }` shape, e.g.:

   ```ruby
   {
     top_countries: {
       rows: [
         { country_code: "US", count: 142, percent: 35 },
         { country_code: "GB", count: 89, percent: 22 }
       ],
       error: nil
     },
     top_referrers: {
       rows: [
{ normalized_referrer: "news.ycombinator.com/item?id=1", count: 47,
percent: 12 },
{ normalized_referrer: "reddit.com/r/discourse", count: 31, percent: 8 }
       ],
       error: nil
     }
   }
   ```

On report failure, `rows: []` and `error: :timeout` (or another symbol).
This lets the UI render rows, error, or empty state independently.
Healthy responses are cached via `Report.find_cached`.
`SiteSetting.login_required` and `Discourse.current_hostname` flow into
`opts[:filters]` so toggling either invalidates the cache. Timeouts skip
the cache so the next request retries.

5. Use `Intl.DisplayNames` for country names instead of locale files.
`Intl.DisplayNames` is a built-in browser API that returns a localized
country name for an ISO 3166-1 alpha-2 code, avoiding ~250 translation
strings per locale.
2026-05-22 12:59:16 +08:00

314 lines
8.8 KiB
Ruby
Vendored

# frozen_string_literal: true
class AdminDashboardSiteTraffic
DEFAULT_RANGE_DAYS = 30
SERIES_LABEL_REQS = {
logged_in: "page_view_logged_in_browser",
anonymous: "page_view_anon_browser",
embedded: "page_view_embed",
crawlers: "page_view_crawler",
}.freeze
private_constant :DEFAULT_RANGE_DAYS
private_constant :SERIES_LABEL_REQS
def self.build(start_date:, end_date:)
new(start_date: start_date, end_date: end_date).build
end
def initialize(start_date:, end_date:)
@start_date = parse_date(start_date) || (DEFAULT_RANGE_DAYS - 1).days.ago.beginning_of_day
@end_date = parse_date(end_date)&.end_of_day || Time.zone.now.end_of_day
if @start_date.to_date > @end_date.to_date
@start_date = (DEFAULT_RANGE_DAYS - 1).days.ago.beginning_of_day
@end_date = Time.zone.now.end_of_day
end
end
def build
current_rows = traffic_rows(start_date.to_date, end_date.to_date)
prior_rows = traffic_rows(prior_start_date, prior_end_date)
include_embedded = include_embedded_series?
totals = build_totals(current_rows, include_embedded: include_embedded)
response = {
kpis: kpis(totals, prior_rows),
pageview_series: pageview_series(current_rows, include_embedded: include_embedded),
}
if SiteSetting.persist_browser_pageview_events
response[:top_countries] = fetch_card("top_countries_by_browser_pageviews")
response[:top_referrers] = fetch_card("top_referrers_by_browser_pageviews")
end
response
end
private
attr_reader :start_date, :end_date
def fetch_card(type)
opts = {
start_date: start_date,
end_date: end_date,
filters: {
login_required: SiteSetting.login_required,
host: Discourse.current_hostname,
},
limit: 5,
wrap_exceptions_in_test: true,
}
cached = Report.find_cached(type, opts)
return cached_to_payload(cached) if cached
report = Report.find(type, opts)
return { rows: [], error: "exception" } if report.nil?
# Timeouts skip the cache so the next request retries instead of being
# pinned to the error for the full 35-minute TTL.
Report.cache(report) if report.error != :timeout
return { rows: [], error: report.error.to_s } if report.error.present?
{ rows: report.data, error: nil }
end
def cached_to_payload(cached)
error = cached[:error]
return { rows: [], error: error.to_s } if error.present?
{ rows: (cached[:data] || []).map(&:symbolize_keys), error: nil }
end
def series_ids(include_embedded:)
series = %i[logged_in]
return series if login_required?
series << :anonymous
series << :embedded if include_embedded
series << :crawlers
series
end
def kpis(totals, prior_rows)
kpis = { browser_pageviews: browser_pageviews_kpi(totals, prior_rows) }
logged_in_share = logged_in_share_value(totals)
kpis[:logged_in_share] = { value: logged_in_share } if !logged_in_share.nil?
kpis
end
def browser_pageviews_kpi(totals, prior_rows)
kpi = { value: totals[:human] }
trend = build_trend(totals, prior_rows)
return kpi if trend.blank?
kpi.merge(trend)
end
def logged_in_share_value(totals)
return nil if login_required?
totals[:human].positive? ? ((totals[:logged_in].to_f / totals[:human]) * 100).round : 0
end
def pageview_series(rows, include_embedded:)
series_ids(include_embedded: include_embedded).map do |id|
{
req: series_req(id),
label: series_label(id),
color: series_color(id),
data: rows.map { |row| pageview_series_point(row, id) },
}
end
end
def pageview_series_point(row, id)
{ x: row.date.iso8601, y: row.public_send(id).to_i }
end
def series_req(id)
selected_request_type_names.fetch(id)
end
def series_label(id)
I18n.t("reports.site_traffic.xaxis.#{series_label_req(id)}")
end
def series_label_req(id)
SERIES_LABEL_REQS.fetch(id)
end
def series_color(id)
Reports::SiteTraffic::SERIES_COLORS.fetch(series_label_req(id))
end
def login_required?
SiteSetting.login_required
end
def prior_start_date
prior_end_date - (selected_day_count - 1)
end
def prior_end_date
start_date.to_date - 1
end
def prior_period_complete?
prior_period_tracking_started?
end
def prior_period_tracking_started?
return @prior_period_tracking_started if defined?(@prior_period_tracking_started)
req_type_sql =
if login_required?
"req_type = :logged_in_req_type"
else
"req_type IN (:logged_in_req_type, :anonymous_req_type)"
end
@prior_period_tracking_started =
DB.query_single(
<<~SQL,
SELECT 1
FROM application_requests
WHERE date <= :prior_start_date
AND #{req_type_sql}
LIMIT 1
SQL
prior_start_date: prior_start_date,
logged_in_req_type: selected_request_types[:logged_in],
anonymous_req_type: selected_request_types[:anonymous],
).present?
end
def parse_date(value)
return nil if value.blank?
Time.zone.parse(value.to_s)&.beginning_of_day
rescue ArgumentError, TypeError
nil
end
def selected_request_types
@selected_request_types ||=
selected_request_type_names.transform_values { |name| ApplicationRequest.req_types[name] }
end
def selected_request_type_names
@selected_request_type_names ||=
if SiteSetting.use_legacy_pageviews
{ logged_in: "page_view_logged_in", anonymous: "page_view_anon" }
else
{ logged_in: "page_view_logged_in_browser", anonymous: "page_view_anon_browser" }
end.merge(crawlers: "page_view_crawler", embedded: "page_view_embed")
end
def traffic_rows(range_start_date, range_end_date)
DB.query(
<<~SQL,
WITH dates AS (
SELECT
request_date::date AS date
FROM generate_series(
CAST(:start_date AS date),
CAST(:end_date AS date),
INTERVAL '1 day'
) request_date
)
SELECT
dates.date,
COALESCE(SUM(CASE WHEN ar.req_type = :logged_in_req_type THEN ar.count ELSE 0 END), 0)::bigint AS logged_in,
COALESCE(SUM(CASE WHEN ar.req_type = :anonymous_req_type THEN ar.count ELSE 0 END), 0)::bigint AS anonymous,
COALESCE(SUM(CASE WHEN ar.req_type = :crawler_req_type THEN ar.count ELSE 0 END), 0)::bigint AS crawlers,
COALESCE(SUM(CASE WHEN ar.req_type = :embedded_req_type THEN ar.count ELSE 0 END), 0)::bigint AS embedded
FROM dates
LEFT JOIN application_requests ar
ON ar.date = dates.date
AND ar.req_type IN (
:logged_in_req_type,
:anonymous_req_type,
:crawler_req_type,
:embedded_req_type
)
GROUP BY dates.date
ORDER BY dates.date ASC
SQL
start_date: range_start_date,
end_date: range_end_date,
logged_in_req_type: selected_request_types[:logged_in],
anonymous_req_type: selected_request_types[:anonymous],
crawler_req_type: selected_request_types[:crawlers],
embedded_req_type: selected_request_types[:embedded],
)
end
def build_totals(rows, include_embedded:)
logged_in = sum_rows(rows, :logged_in)
anonymous = login_required? ? 0 : sum_rows(rows, :anonymous)
crawlers = login_required? ? 0 : sum_rows(rows, :crawlers)
embedded = include_embedded ? sum_rows(rows, :embedded) : 0
{
logged_in: logged_in,
anonymous: anonymous,
embedded: embedded,
crawlers: crawlers,
human: logged_in + anonymous,
}
end
def build_trend(totals, prior_rows)
return nil if !prior_period_complete?
current_human = totals[:human]
previous_human = prior_human(prior_rows)
return nil if previous_human.zero? || current_human == previous_human
percent_change = ((current_human - previous_human).to_f / previous_human) * 100
return nil if percent_change.abs < 0.05
{
percent_change: format_trend_percent_change(percent_change),
comparison_period: {
start_date: prior_start_date.iso8601,
end_date: prior_end_date.iso8601,
},
}
end
def prior_human(prior_rows)
logged_in = sum_rows(prior_rows, :logged_in)
anonymous = login_required? ? 0 : sum_rows(prior_rows, :anonymous)
logged_in + anonymous
end
def format_trend_percent_change(percent_change)
percent_change.abs < 1 ? percent_change.round(1) : percent_change.round
end
def sum_rows(rows, field)
rows.sum { |row| row.public_send(field).to_i }
end
def include_embedded_series?
!login_required? && embedding_enabled? && EmbeddableHost.exists?
end
def embedding_enabled?
SiteSetting.embed_topics_list || SiteSetting.embed_full_app
end
def selected_day_count
(end_date.to_date - start_date.to_date).to_i + 1
end
end