mirror of
https://github.com/discourse/discourse.git
synced 2025-09-06 10:50:21 +08:00
FEATURE: control which web crawlers can access using a whitelist or blacklist
This commit is contained in:
parent
cbbeedf53b
commit
ced7e9a691
22 changed files with 722 additions and 97 deletions
|
@ -28,4 +28,25 @@ module CrawlerDetection
|
|||
end
|
||||
|
||||
end
|
||||
|
||||
# Given a user_agent that returns true from crawler?, should its request be allowed?
|
||||
def self.allow_crawler?(user_agent)
|
||||
return true if SiteSetting.whitelisted_crawler_user_agents.blank? &&
|
||||
SiteSetting.blacklisted_crawler_user_agents.blank?
|
||||
|
||||
@whitelisted_matchers ||= {}
|
||||
@blacklisted_matchers ||= {}
|
||||
|
||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||
whitelisted = @whitelisted_matchers[SiteSetting.whitelisted_crawler_user_agents] ||= to_matcher(SiteSetting.whitelisted_crawler_user_agents)
|
||||
!user_agent.nil? && user_agent.match?(whitelisted)
|
||||
else
|
||||
blacklisted = @blacklisted_matchers[SiteSetting.blacklisted_crawler_user_agents] ||= to_matcher(SiteSetting.blacklisted_crawler_user_agents)
|
||||
user_agent.nil? || !user_agent.match?(blacklisted)
|
||||
end
|
||||
end
|
||||
|
||||
def self.is_blocked_crawler?(user_agent)
|
||||
crawler?(user_agent) && !allow_crawler?(user_agent)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue