mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-05-27 05:21:05 +08:00
Previously, persisted browser pageview tracking could raise when `RequestTracker` attempted to insert a `BrowserPageviewEvent` during a PostgreSQL readonly transaction. Transaction readonly is enabled for example, here: https://github.com/discourse/discourse/blob/main/app/models/report.rb#L250
812 lines
26 KiB
Ruby
Vendored
812 lines
26 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
require "method_profiler"
|
|
require "middleware/anonymous_cache"
|
|
require "http_user_agent_encoder"
|
|
require "discourse_lograge"
|
|
|
|
class Middleware::RequestTracker
|
|
@@detailed_request_loggers = nil
|
|
@@ip_skipper = nil
|
|
@@bpv_notifications_enabled = !Rails.env.test?
|
|
|
|
def self.bpv_notifications_enabled
|
|
@@bpv_notifications_enabled
|
|
end
|
|
|
|
def self.bpv_notifications_enabled=(value)
|
|
@@bpv_notifications_enabled = value
|
|
end
|
|
|
|
# You can add exceptions to our app rate limiter in the app.yml ENV section.
|
|
# example:
|
|
#
|
|
# env:
|
|
# DISCOURSE_MAX_REQS_PER_IP_EXCEPTIONS: >-
|
|
# 14.15.16.32/27
|
|
# 216.148.1.2
|
|
#
|
|
STATIC_IP_SKIPPER =
|
|
ENV["DISCOURSE_MAX_REQS_PER_IP_EXCEPTIONS"]&.split&.map { |ip| IPAddr.new(ip) }
|
|
|
|
MAX_URL_LENGTH = 2000
|
|
MAX_SESSION_ID_LENGTH = 32
|
|
MAX_USER_AGENT_LENGTH = 1000
|
|
MAX_IP_ADDRESS_LENGTH = 45
|
|
|
|
# register callbacks for detailed request loggers called on every request
|
|
# example:
|
|
#
|
|
# Middleware::RequestTracker.detailed_request_logger(->|env, data| do
|
|
# # do stuff with env and data
|
|
# end
|
|
def self.register_detailed_request_logger(callback)
|
|
MethodProfiler.ensure_discourse_instrumentation!
|
|
(@@detailed_request_loggers ||= []) << callback
|
|
end
|
|
|
|
def self.unregister_detailed_request_logger(callback)
|
|
@@detailed_request_loggers.delete(callback)
|
|
@detailed_request_loggers = nil if @@detailed_request_loggers.length == 0
|
|
end
|
|
|
|
# used for testing
|
|
def self.unregister_ip_skipper
|
|
@@ip_skipper = nil
|
|
end
|
|
|
|
# Register a custom `ip_skipper`, a function that will skip rate limiting
|
|
# for any IP that returns true.
|
|
#
|
|
# For example, if you never wanted to rate limit 1.2.3.4
|
|
#
|
|
# ```
|
|
# Middleware::RequestTracker.register_ip_skipper do |ip|
|
|
# ip == "1.2.3.4"
|
|
# end
|
|
# ```
|
|
def self.register_ip_skipper(&blk)
|
|
raise "IP skipper is already registered!" if @@ip_skipper
|
|
@@ip_skipper = blk
|
|
end
|
|
|
|
def self.ip_skipper
|
|
@@ip_skipper
|
|
end
|
|
|
|
def self.reset_rate_limiters_stack
|
|
@@stack =
|
|
begin
|
|
# Update the documentation for the `add_request_rate_limiter` plugin API if this list changes.
|
|
default_rate_limiters = [
|
|
RequestTracker::RateLimiters::User,
|
|
RequestTracker::RateLimiters::IP,
|
|
]
|
|
|
|
stack = RequestTracker::RateLimiters::Stack.new
|
|
default_rate_limiters.each { |limiter| stack.append(limiter) }
|
|
stack
|
|
end
|
|
end
|
|
|
|
def self.rate_limiters_stack
|
|
@@stack ||= reset_rate_limiters_stack
|
|
end
|
|
|
|
def initialize(app, settings = {})
|
|
@app = app
|
|
end
|
|
|
|
def self.log_request(data)
|
|
if data[:is_api]
|
|
ApplicationRequest.increment!(:api)
|
|
elsif data[:is_user_api]
|
|
ApplicationRequest.increment!(:user_api)
|
|
elsif data[:track_view]
|
|
if data[:is_crawler]
|
|
ApplicationRequest.increment!(:page_view_crawler)
|
|
WebCrawlerRequest.increment!(data[:user_agent])
|
|
elsif data[:is_embed]
|
|
# Embed pageviews are counted in the browser/beacon branches below so
|
|
# community-traffic counters stay unpolluted by iframe traffic.
|
|
elsif data[:has_auth_cookie]
|
|
ApplicationRequest.increment!(:page_view_logged_in)
|
|
ApplicationRequest.increment!(:page_view_logged_in_mobile) if data[:is_mobile]
|
|
elsif !SiteSetting.login_required
|
|
ApplicationRequest.increment!(:page_view_anon)
|
|
ApplicationRequest.increment!(:page_view_anon_mobile) if data[:is_mobile]
|
|
end
|
|
end
|
|
|
|
if tracks_browser_page_view?(data)
|
|
if data[:is_beacon]
|
|
if data[:is_embed]
|
|
ApplicationRequest.increment!(:page_view_embed)
|
|
elsif data[:has_auth_cookie]
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser_beacon)
|
|
if data[:is_mobile]
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser_mobile_beacon)
|
|
end
|
|
else
|
|
ApplicationRequest.increment!(:page_view_anon_browser_beacon)
|
|
ApplicationRequest.increment!(:page_view_anon_browser_mobile_beacon) if data[:is_mobile]
|
|
end
|
|
trigger_beacon_browser_pageview_event(data)
|
|
else
|
|
if data[:is_embed]
|
|
ApplicationRequest.increment!(:page_view_embed)
|
|
elsif data[:has_auth_cookie]
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser)
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile]
|
|
else
|
|
ApplicationRequest.increment!(:page_view_anon_browser)
|
|
ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile]
|
|
end
|
|
trigger_browser_pageview_event(data)
|
|
end
|
|
|
|
if data[:topic_id].present? && (!data[:has_auth_cookie] || data[:current_user_id].present?) &&
|
|
!data[:is_embed]
|
|
TopicsController.defer_topic_view(
|
|
data[:topic_id],
|
|
data[:request_remote_ip],
|
|
data[:current_user_id],
|
|
)
|
|
end
|
|
end
|
|
|
|
ApplicationRequest.increment!(:http_total)
|
|
|
|
status = data[:status]
|
|
if status >= 500
|
|
ApplicationRequest.increment!(:http_5xx)
|
|
elsif data[:is_background]
|
|
ApplicationRequest.increment!(:http_background)
|
|
elsif status >= 400
|
|
ApplicationRequest.increment!(:http_4xx)
|
|
elsif status >= 300
|
|
ApplicationRequest.increment!(:http_3xx)
|
|
elsif status >= 200
|
|
ApplicationRequest.increment!(:http_2xx)
|
|
end
|
|
end
|
|
|
|
def self.get_data(env, result, timing, request = nil)
|
|
status, headers = result
|
|
|
|
# result may be nil if the downstream app raised an exception
|
|
status = status.to_i
|
|
headers ||= {}
|
|
|
|
request ||= Rack::Request.new(env)
|
|
helper = Middleware::AnonymousCache::Helper.new(env, request)
|
|
|
|
# Since ActionDispatch::RemoteIp middleware is run before this middleware,
|
|
# we have access to the normalised remote IP based on ActionDispatch::RemoteIp::GetIp
|
|
#
|
|
# NOTE: Locally with MessageBus requests, the remote IP ends up as ::1 because
|
|
# of the X-Forwarded-For header set...somewhere, whereas all other requests
|
|
# end up as 127.0.0.1.
|
|
request_remote_ip = env["action_dispatch.remote_ip"].to_s.slice(0, MAX_IP_ADDRESS_LENGTH)
|
|
|
|
view_tracking_data = extract_view_tracking_data(env, status, headers)
|
|
|
|
auth_cookie = Auth::DefaultCurrentUserProvider.find_v0_auth_cookie(request)
|
|
auth_cookie ||= Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env)
|
|
has_auth_cookie = auth_cookie.present?
|
|
|
|
is_api ||= !!env[Auth::DefaultCurrentUserProvider::API_KEY_ENV]
|
|
is_user_api ||= !!env[Auth::DefaultCurrentUserProvider::USER_API_KEY_ENV]
|
|
|
|
is_message_bus = request.path.start_with?("#{Discourse.base_path}/message-bus/")
|
|
is_topic_timings = request.path.start_with?("#{Discourse.base_path}/topics/timings")
|
|
|
|
current_user_id = nil
|
|
current_username = nil
|
|
|
|
if view_tracking_data[:browser_page_view]
|
|
begin
|
|
if auth_cookie.is_a?(Hash)
|
|
current_user_id = auth_cookie[:user_id]
|
|
current_username = auth_cookie[:username]
|
|
else
|
|
user = CurrentUser.lookup_from_env(env)
|
|
if user
|
|
current_user_id = user.id
|
|
current_username = user.username
|
|
end
|
|
end
|
|
rescue Discourse::InvalidAccess => err
|
|
# This error is raised when the API key is invalid, no need to stop the show.
|
|
Discourse.warn_exception(
|
|
err,
|
|
message: "RequestTracker.get_data failed with an invalid API key error",
|
|
)
|
|
end
|
|
end
|
|
|
|
request_data = {
|
|
status: status,
|
|
is_crawler: helper.is_crawler?,
|
|
has_auth_cookie: has_auth_cookie,
|
|
current_user_id: current_user_id,
|
|
current_username: current_username,
|
|
is_api: is_api,
|
|
is_user_api: is_user_api,
|
|
is_background: is_message_bus || is_topic_timings,
|
|
is_mobile: helper.is_mobile?,
|
|
timing: timing,
|
|
queue_seconds: env[Middleware::ProcessingRequest::REQUEST_QUEUE_SECONDS_ENV_KEY],
|
|
request_remote_ip: request_remote_ip,
|
|
occurred_at: Time.zone.now,
|
|
}.merge(view_tracking_data)
|
|
|
|
if request_data[:is_background]
|
|
request_data[:background_type] = if is_message_bus
|
|
if request.query_string.include?("dlp=t")
|
|
"message-bus-dlp"
|
|
elsif env["HTTP_DONT_CHUNK"]
|
|
"message-bus-dontchunk"
|
|
else
|
|
"message-bus"
|
|
end
|
|
else
|
|
"topic-timings"
|
|
end
|
|
end
|
|
|
|
if request_data[:is_crawler]
|
|
user_agent = env["HTTP_USER_AGENT"]
|
|
user_agent = HttpUserAgentEncoder.ensure_utf8(user_agent) if user_agent
|
|
request_data[:user_agent] = user_agent
|
|
end
|
|
|
|
if cache = headers["X-Discourse-Cached"]
|
|
request_data[:cache] = cache
|
|
end
|
|
|
|
request_data
|
|
end
|
|
|
|
def log_request_info(env, result, info, request = nil)
|
|
# We've got to skip this on error ... its just logging
|
|
data =
|
|
begin
|
|
self.class.get_data(env, result, info, request)
|
|
rescue StandardError => err
|
|
Discourse.warn_exception(err, message: "RequestTracker.get_data failed")
|
|
|
|
# This is super hard to find if in testing, we should still raise in this case.
|
|
raise err if Rails.env.test?
|
|
|
|
nil
|
|
end
|
|
|
|
if data
|
|
if result && (headers = result[1])
|
|
headers["X-Discourse-TrackView"] = "1" if data[:track_view]
|
|
headers["X-Discourse-BrowserPageView"] = "1" if data[:browser_page_view]
|
|
end
|
|
|
|
if @@detailed_request_loggers
|
|
@@detailed_request_loggers.each { |logger| logger.call(env, data) }
|
|
end
|
|
|
|
log_later(data, env, request)
|
|
end
|
|
end
|
|
|
|
def call(env)
|
|
result = nil
|
|
info = nil
|
|
gc_stat_timing = nil
|
|
|
|
# Doing this before the app.call will allow us to have this data available
|
|
# in the MessageBus middleware to add headers in the 004-message_bus.rb initializer.
|
|
if !env["discourse.request_tracker.skip"]
|
|
env["discourse.view_tracking_data"] = self.class.extract_view_tracking_data(env, nil, nil)
|
|
end
|
|
|
|
request = Rack::Request.new(env)
|
|
cookie = find_auth_cookie(env)
|
|
|
|
if error_details = rate_limit(request, cookie)
|
|
available_in, error_code = error_details
|
|
|
|
message = <<~TEXT
|
|
Slow down, you're making too many requests.
|
|
Please retry again in #{available_in} seconds.
|
|
Error code: #{error_code}.
|
|
TEXT
|
|
|
|
headers = {
|
|
"Content-Type" => "text/plain",
|
|
"Retry-After" => available_in.to_s,
|
|
"Discourse-Rate-Limit-Error-Code" => error_code,
|
|
}
|
|
|
|
if username = cookie&.[](:username)
|
|
headers["X-Discourse-Username"] = username
|
|
end
|
|
|
|
return 429, headers, [message]
|
|
end
|
|
|
|
if !cookie
|
|
if error_details = check_crawler_limits(env)
|
|
available_in, error_code = error_details
|
|
message = "Too many crawling requests. Error code: #{error_code}."
|
|
|
|
headers = {
|
|
"Content-Type" => "text/plain",
|
|
"Retry-After" => available_in.to_s,
|
|
"Discourse-Rate-Limit-Error-Code" => error_code,
|
|
}
|
|
|
|
return 429, headers, [message]
|
|
end
|
|
end
|
|
|
|
env["discourse.request_tracker"] = self
|
|
|
|
if self.class.is_beacon_tracking_request?(request)
|
|
if self.class.same_origin_beacon_request?(request)
|
|
result = [204, {}, []]
|
|
else
|
|
env["discourse.request_tracker.skip"] = true
|
|
result = [403, {}, []]
|
|
end
|
|
return result
|
|
end
|
|
|
|
if self.class.is_pageview_tracking_request?(request)
|
|
result = [204, {}, []]
|
|
return result
|
|
end
|
|
|
|
MethodProfiler.start
|
|
|
|
if SiteSetting.instrument_gc_stat_per_request
|
|
gc_stat_timing = GCStatInstrumenter.instrument { result = @app.call(env) }
|
|
else
|
|
result = @app.call(env)
|
|
end
|
|
|
|
info = MethodProfiler.stop
|
|
|
|
# possibly transferred?
|
|
if info && (headers = result[1])
|
|
headers["X-Runtime"] = "%0.6f" % info[:total_duration]
|
|
|
|
if GlobalSetting.enable_performance_http_headers
|
|
if redis = info[:redis]
|
|
headers["X-Redis-Calls"] = redis[:calls].to_s
|
|
headers["X-Redis-Time"] = "%0.6f" % redis[:duration]
|
|
end
|
|
|
|
if sql = info[:sql]
|
|
headers["X-Sql-Calls"] = sql[:calls].to_s
|
|
headers["X-Sql-Time"] = "%0.6f" % sql[:duration]
|
|
end
|
|
|
|
if queue = env[Middleware::ProcessingRequest::REQUEST_QUEUE_SECONDS_ENV_KEY]
|
|
headers["X-Queue-Time"] = "%0.6f" % queue
|
|
end
|
|
end
|
|
end
|
|
|
|
if env[Auth::DefaultCurrentUserProvider::BAD_TOKEN] && (headers = result[1])
|
|
headers["Discourse-Logged-Out"] = "1"
|
|
end
|
|
|
|
result
|
|
ensure
|
|
if (limiters = env["DISCOURSE_RATE_LIMITERS"]) && env["DISCOURSE_IS_ASSET_PATH"]
|
|
limiters.each(&:rollback!)
|
|
|
|
env["DISCOURSE_ASSET_RATE_LIMITERS"].each do |limiter|
|
|
limiter.performed!
|
|
rescue RateLimiter::LimitExceeded
|
|
# skip
|
|
end
|
|
end
|
|
|
|
if !env["discourse.request_tracker.skip"]
|
|
info.merge!(gc_stat_timing) if gc_stat_timing
|
|
log_request_info(env, result, info, request)
|
|
end
|
|
end
|
|
|
|
def log_later(data, env, request)
|
|
Scheduler::Defer.later("Track view") do
|
|
unless Discourse.pg_readonly_mode?
|
|
self.class.log_request(data)
|
|
instrument_browser_page_view(env, request, data)
|
|
end
|
|
rescue ActiveRecord::ReadOnlyError
|
|
# Just noop if ActiveRecord is preventing writes
|
|
end
|
|
end
|
|
|
|
def find_auth_cookie(env)
|
|
min_allowed_timestamp = Time.now.to_i - (UserAuthToken::ROTATE_TIME_MINS + 1) * 60
|
|
cookie = Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env)
|
|
cookie if cookie && cookie[:issued_at] >= min_allowed_timestamp
|
|
end
|
|
|
|
def is_private_ip?(ip)
|
|
ip = IPAddr.new(ip)
|
|
!!(ip && (ip.private? || ip.loopback?))
|
|
rescue IPAddr::AddressFamilyError, IPAddr::InvalidAddressError
|
|
false
|
|
end
|
|
|
|
def rate_limit(request, cookie)
|
|
warn =
|
|
GlobalSetting.max_reqs_per_ip_mode == "warn" ||
|
|
GlobalSetting.max_reqs_per_ip_mode == "warn+block"
|
|
|
|
block =
|
|
GlobalSetting.max_reqs_per_ip_mode == "block" ||
|
|
GlobalSetting.max_reqs_per_ip_mode == "warn+block"
|
|
|
|
return if !block && !warn
|
|
|
|
ip = request.ip
|
|
|
|
if !GlobalSetting.max_reqs_rate_limit_on_private
|
|
return if is_private_ip?(ip)
|
|
end
|
|
|
|
return if @@ip_skipper&.call(ip)
|
|
return if STATIC_IP_SKIPPER&.any? { |entry| entry.include?(ip) }
|
|
|
|
rate_limiter = self.class.rate_limiters_stack.active_rate_limiter(request, cookie)
|
|
return nil if rate_limiter.nil?
|
|
rate_limit_key = rate_limiter.rate_limit_key
|
|
error_code_identifier = rate_limiter.error_code_identifier
|
|
global = rate_limiter.rate_limit_globally?
|
|
|
|
limiter10 =
|
|
RateLimiter.new(
|
|
nil,
|
|
"global_limit_10_#{rate_limit_key}",
|
|
GlobalSetting.max_reqs_per_ip_per_10_seconds,
|
|
10.seconds,
|
|
global:,
|
|
aggressive: true,
|
|
error_code: "#{error_code_identifier}_10_secs_limit",
|
|
)
|
|
|
|
limiter60 =
|
|
RateLimiter.new(
|
|
nil,
|
|
"global_limit_60_#{rate_limit_key}",
|
|
GlobalSetting.max_reqs_per_ip_per_minute,
|
|
1.minute,
|
|
global:,
|
|
error_code: "#{error_code_identifier}_60_secs_limit",
|
|
aggressive: true,
|
|
)
|
|
|
|
limiter_assets10 =
|
|
RateLimiter.new(
|
|
nil,
|
|
"global_limit_10_assets_#{rate_limit_key}",
|
|
GlobalSetting.max_asset_reqs_per_ip_per_10_seconds,
|
|
10.seconds,
|
|
error_code: "#{error_code_identifier}_assets_10_secs_limit",
|
|
global:,
|
|
)
|
|
|
|
request.env["DISCOURSE_RATE_LIMITERS"] = [limiter10, limiter60]
|
|
request.env["DISCOURSE_ASSET_RATE_LIMITERS"] = [limiter_assets10]
|
|
|
|
if !limiter_assets10.can_perform?
|
|
if warn
|
|
Discourse.warn(
|
|
"Global asset rate limit exceeded for #{rate_limiter.class.name}: #{rate_limit_key}: 10 second rate limit",
|
|
uri: request.env["REQUEST_URI"],
|
|
)
|
|
end
|
|
|
|
return limiter_assets10.seconds_to_wait(Time.now.to_i), limiter_assets10.error_code if block
|
|
end
|
|
|
|
begin
|
|
type = 10
|
|
limiter10.performed!
|
|
|
|
type = 60
|
|
limiter60.performed!
|
|
|
|
nil
|
|
rescue RateLimiter::LimitExceeded => e
|
|
if warn
|
|
Discourse.warn(
|
|
"Global rate limit exceeded for #{rate_limiter.class.name}: #{rate_limit_key}: #{type} second rate limit",
|
|
uri: request.env["REQUEST_URI"],
|
|
)
|
|
end
|
|
|
|
if block
|
|
[e.available_in, e.error_code]
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def check_crawler_limits(env)
|
|
slow_down_agents = SiteSetting.slow_down_crawler_user_agents
|
|
return if slow_down_agents.blank?
|
|
|
|
user_agent = HttpUserAgentEncoder.ensure_utf8(env["HTTP_USER_AGENT"])&.downcase
|
|
return if user_agent.blank?
|
|
|
|
return if !CrawlerDetection.crawler?(user_agent)
|
|
|
|
slow_down_agents
|
|
.downcase
|
|
.split("|")
|
|
.each do |crawler|
|
|
if user_agent.include?(crawler)
|
|
key = "#{crawler}_crawler_rate_limit"
|
|
limiter =
|
|
RateLimiter.new(nil, key, 1, SiteSetting.slow_down_crawler_rate, error_code: key)
|
|
limiter.performed!
|
|
break
|
|
end
|
|
end
|
|
nil
|
|
rescue RateLimiter::LimitExceeded => e
|
|
[e.available_in, e.error_code]
|
|
end
|
|
|
|
def self.extract_view_tracking_data(env, status, headers)
|
|
request = Rack::Request.new(env)
|
|
status ||= 200
|
|
headers ||= {}
|
|
|
|
return extract_beacon_view_tracking_data(env) if is_beacon_tracking_request?(request)
|
|
|
|
is_html_request = headers["Content-Type"]&.include?("text/html")
|
|
is_ajax_request = request.xhr?
|
|
|
|
# This Discourse-Track-View request header is set in `lib/ajax.js`,
|
|
# whenever the user navigates between Ember routes, to indicate a
|
|
# browser page view.
|
|
env_track_view = env["HTTP_DISCOURSE_TRACK_VIEW"]
|
|
explicit_track_view = status == 200 && %w[1 true].include?(env_track_view)
|
|
|
|
# An HTML response to a GET request is tracked implicitly, these do
|
|
# not count as browser page views but they do count as legacy page views.
|
|
implicit_track_view =
|
|
status == 200 && !%w[0 false].include?(env_track_view) && request.get? && !is_ajax_request &&
|
|
is_html_request
|
|
|
|
# This Discourse-Deferred-Track-View header is piggybacked on a
|
|
# follow-up MessageBus request after a real browser loads up a page
|
|
# to avoid bots influencing browser page views when loading HTML
|
|
# versions of a page.
|
|
#
|
|
# See `scripts/pageview.js` and `instance-initializers/page-tracking.js`
|
|
env_deferred_track_view = env["HTTP_DISCOURSE_TRACK_VIEW_DEFERRED"]
|
|
deferred_track_view = %w[1 true].include?(env_deferred_track_view)
|
|
|
|
# This only indicates that we are tracking a page view of some kind, not
|
|
# using an API key. In #log_request is where we are determining which
|
|
# of these count as browser page views.
|
|
#
|
|
# TL;DR -- Explicit and Deferred page views count as browser page views (BPVs),
|
|
# explicit and implicit page views count as legacy page views.
|
|
#
|
|
# If this is true, then the X-Discourse-TrackView header is included in
|
|
# the response.
|
|
#
|
|
# If the page view is explicit or deferred, then the X-Discourse-BrowserPageView header
|
|
# is included in the response.
|
|
track_view = !!(explicit_track_view || implicit_track_view)
|
|
browser_page_view = !!(explicit_track_view || deferred_track_view)
|
|
|
|
topic_id = env["HTTP_DISCOURSE_TRACK_VIEW_TOPIC_ID"]&.to_i
|
|
tracking_url = env["HTTP_DISCOURSE_TRACK_VIEW_URL"]&.slice(0, MAX_URL_LENGTH)
|
|
tracking_referrer = env["HTTP_DISCOURSE_TRACK_VIEW_REFERRER"]&.slice(0, MAX_URL_LENGTH)
|
|
tracking_session_id =
|
|
env["HTTP_DISCOURSE_TRACK_VIEW_SESSION_ID"]&.slice(0, MAX_SESSION_ID_LENGTH)
|
|
user_agent = env["HTTP_USER_AGENT"]&.slice(0, MAX_USER_AGENT_LENGTH)
|
|
|
|
# An embedded pageview is either an initial HTML load carrying `?embed_mode=true`,
|
|
# or a subsequent XHR from inside the embed iframe which sets the header below.
|
|
# Use `request.GET` (query-string only) rather than `request.params` so a missing
|
|
# or malformed request body cannot raise from here.
|
|
is_embed =
|
|
request.GET["embed_mode"] == "true" ||
|
|
%w[1 true].include?(env["HTTP_DISCOURSE_TRACK_VIEW_EMBED"])
|
|
|
|
{
|
|
track_view: track_view,
|
|
explicit_track_view: explicit_track_view,
|
|
deferred_track_view: deferred_track_view,
|
|
implicit_track_view: implicit_track_view,
|
|
browser_page_view: browser_page_view,
|
|
is_embed: is_embed,
|
|
topic_id: topic_id,
|
|
tracking_url: tracking_url,
|
|
tracking_referrer: tracking_referrer,
|
|
tracking_session_id: tracking_session_id,
|
|
user_agent: user_agent,
|
|
}
|
|
end
|
|
|
|
def self.tracks_browser_page_view?(data)
|
|
return false unless data[:browser_page_view]
|
|
return false if data[:is_crawler]
|
|
return true if data[:has_auth_cookie]
|
|
return false if SiteSetting.login_required
|
|
return false if data[:is_beacon] && CrawlerDetection.crawler_ip?(data[:request_remote_ip])
|
|
true
|
|
end
|
|
|
|
def self.is_beacon_tracking_request?(request)
|
|
SiteSetting.use_beacon_for_browser_page_views && request.post? &&
|
|
request.path == Discourse.beacon_pv_tracking_path
|
|
end
|
|
|
|
def self.same_origin_beacon_request?(request)
|
|
origin = request.get_header("HTTP_ORIGIN").presence || request.referer.presence
|
|
return false if origin.blank?
|
|
|
|
canonical_uri = URI.parse(Discourse.base_url_no_prefix)
|
|
origin_uri = URI.parse(origin)
|
|
|
|
canonical_uri.scheme == origin_uri.scheme && canonical_uri.host == origin_uri.host &&
|
|
canonical_uri.port == origin_uri.port
|
|
rescue URI::Error
|
|
false
|
|
end
|
|
|
|
def self.is_pageview_tracking_request?(request)
|
|
request.post? && request.path == "#{Discourse.base_path}/pageview"
|
|
end
|
|
|
|
def self.extract_beacon_view_tracking_data(env)
|
|
body = env["rack.input"]&.read
|
|
env["rack.input"]&.rewind
|
|
data =
|
|
begin
|
|
JSON.parse(body)
|
|
rescue JSON::ParserError
|
|
{}
|
|
end
|
|
|
|
topic_id = data["topic_id"]&.to_i
|
|
tracking_url = data["url"]&.slice(0, MAX_URL_LENGTH)
|
|
tracking_referrer = data["referrer"]&.slice(0, MAX_URL_LENGTH)
|
|
tracking_session_id = data["session_id"]&.slice(0, MAX_SESSION_ID_LENGTH)
|
|
user_agent = env["HTTP_USER_AGENT"]&.slice(0, MAX_USER_AGENT_LENGTH)
|
|
is_embed = data["embed"] == true
|
|
|
|
{
|
|
track_view: false,
|
|
explicit_track_view: false,
|
|
deferred_track_view: true,
|
|
implicit_track_view: false,
|
|
browser_page_view: true,
|
|
is_beacon: true,
|
|
is_embed: is_embed,
|
|
topic_id: topic_id,
|
|
tracking_url: tracking_url,
|
|
tracking_referrer: tracking_referrer,
|
|
tracking_session_id: tracking_session_id,
|
|
user_agent: user_agent,
|
|
}
|
|
end
|
|
private_class_method :extract_beacon_view_tracking_data
|
|
|
|
def self.trigger_browser_pageview_event(data)
|
|
if SiteSetting.persist_browser_pageview_events
|
|
persist_browser_pageview_event(build_browser_pageview_event_payload(data))
|
|
elsif SiteSetting.trigger_browser_pageview_events
|
|
DiscourseEvent.trigger(:browser_pageview, build_browser_pageview_event_payload(data))
|
|
end
|
|
end
|
|
private_class_method :trigger_browser_pageview_event
|
|
|
|
def self.persist_browser_pageview_event(payload)
|
|
Scheduler::Defer.later "Create BrowserPageviewEvent" do
|
|
BrowserPageviewEvent.create!(
|
|
url: payload[:url],
|
|
ip_address: payload[:ip_address],
|
|
country_code: payload[:country_code],
|
|
asn: payload[:asn],
|
|
referrer: payload[:referrer],
|
|
normalized_referrer: BrowserPageviewReferrerInspector.normalize(payload[:referrer]),
|
|
user_agent: payload[:user_agent],
|
|
session_id: payload[:session_id],
|
|
user_id: payload[:user_id],
|
|
topic_id: payload[:topic_id],
|
|
created_at: payload[:occurred_at],
|
|
)
|
|
rescue ActiveRecord::StatementInvalid => e
|
|
if e.cause.is_a?(PG::ReadOnlySqlTransaction)
|
|
# Skip recording browser pageviews when PostgreSQL is in read-only transaction mode.
|
|
elsif e.cause.is_a?(PG::NotNullViolation) && e.cause.message.include?("ip_address")
|
|
Rails.logger.debug("Discarding BrowserPageviewEvent: invalid IP #{payload[:ip_address]}")
|
|
else
|
|
raise
|
|
end
|
|
rescue => e
|
|
Rails.logger.error(
|
|
"Failed to create BrowserPageviewEvent with payload #{payload}: #{e.message}",
|
|
)
|
|
end
|
|
end
|
|
private_class_method :persist_browser_pageview_event
|
|
|
|
def self.trigger_beacon_browser_pageview_event(data)
|
|
if SiteSetting.trigger_browser_pageview_events
|
|
DiscourseEvent.trigger(:beacon_browser_pageview, build_browser_pageview_event_payload(data))
|
|
end
|
|
end
|
|
private_class_method :trigger_beacon_browser_pageview_event
|
|
|
|
def self.build_browser_pageview_event_payload(data)
|
|
ip_info = DiscourseIpInfo.get(data[:request_remote_ip])
|
|
{
|
|
user_id: data[:current_user_id],
|
|
url: data[:tracking_url],
|
|
ip_address: data[:request_remote_ip],
|
|
country_code: ip_info[:country_code],
|
|
asn: ip_info[:asn],
|
|
user_agent: data[:user_agent],
|
|
referrer: data[:tracking_referrer],
|
|
session_id: data[:tracking_session_id],
|
|
topic_id: data[:topic_id],
|
|
occurred_at: data[:occurred_at],
|
|
}
|
|
end
|
|
private_class_method :build_browser_pageview_event_payload
|
|
|
|
private
|
|
|
|
def instrument_browser_page_view(env, request, data)
|
|
return unless data[:browser_page_view]
|
|
return unless DiscourseLograge.enabled?
|
|
return unless self.class.bpv_notifications_enabled
|
|
|
|
request ||= Rack::Request.new(env)
|
|
|
|
if data[:is_beacon]
|
|
action = "beacon"
|
|
path = request.fullpath
|
|
else
|
|
action = "piggyback"
|
|
path = "#{Discourse.base_path}/pageview"
|
|
end
|
|
|
|
payload = {
|
|
controller: "PageviewController",
|
|
action: action,
|
|
method: "POST",
|
|
path: path,
|
|
format: :json,
|
|
status: 204,
|
|
params: {
|
|
},
|
|
headers: ActionDispatch::Request.new(env).headers,
|
|
custom_payload:
|
|
DiscourseLograge.custom_payload(
|
|
ip: data[:request_remote_ip],
|
|
username: data[:current_username],
|
|
tracked: self.class.tracks_browser_page_view?(data),
|
|
url: data[:tracking_url],
|
|
referrer: data[:tracking_referrer],
|
|
session_id: data[:tracking_session_id],
|
|
topic_id: data[:topic_id],
|
|
),
|
|
}
|
|
|
|
ActiveSupport::Notifications.instrument("process_action.action_controller", payload)
|
|
end
|
|
end
|