mirror of
https://github.com/discourse/discourse.git
synced 2025-08-21 19:11:18 +08:00
FIX: Various watched words improvements
- Client-side censoring fixed for non-chrome browsers. (Regular expression rewritten to avoid lookback) - Regex generation is now done on the server, to reduce repeated logic, and make it easier to extend in plugins - Censor tests are moved to ruby, to ensure everything works end-to-end - If "watched words regular expressions" is enabled, warn the admin when the generated regex is invalid
This commit is contained in:
parent
4c6a0313f2
commit
39e0442de9
13 changed files with 134 additions and 178 deletions
|
@ -13,7 +13,7 @@ function getOpts(opts) {
|
|||
{
|
||||
getURL: Discourse.getURLWithCDN,
|
||||
currentUser: Discourse.__container__.lookup("current-user:main"),
|
||||
censoredWords: site.censored_words,
|
||||
censoredRegexp: site.censored_regexp,
|
||||
siteSettings,
|
||||
formatUsername
|
||||
},
|
||||
|
|
|
@ -97,7 +97,7 @@ const Topic = RestModel.extend({
|
|||
fancyTitle(title) {
|
||||
let fancyTitle = censor(
|
||||
emojiUnescape(title || ""),
|
||||
Discourse.Site.currentProp("censored_words")
|
||||
Discourse.Site.currentProp("censored_regexp")
|
||||
);
|
||||
|
||||
if (Discourse.SiteSettings.support_mixed_text_direction) {
|
||||
|
|
|
@ -1,75 +1,19 @@
|
|||
function escapeRegexp(text) {
|
||||
return text.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&").replace(/\*/g, "S*");
|
||||
}
|
||||
export function censorFn(regexpString, replacementLetter) {
|
||||
if (regexpString) {
|
||||
let censorRegexp = new RegExp(regexpString, "ig");
|
||||
replacementLetter = replacementLetter || "■";
|
||||
|
||||
function createCensorRegexp(patterns) {
|
||||
return new RegExp(`((?<!\\w)(?:${patterns.join("|")}))(?!\\w)`, "ig");
|
||||
}
|
||||
|
||||
export function censorFn(
|
||||
censoredWords,
|
||||
replacementLetter,
|
||||
watchedWordsRegularExpressions
|
||||
) {
|
||||
let patterns = [];
|
||||
|
||||
replacementLetter = replacementLetter || "■";
|
||||
|
||||
if (censoredWords && censoredWords.length) {
|
||||
patterns = censoredWords.split("|");
|
||||
if (!watchedWordsRegularExpressions) {
|
||||
patterns = patterns.map(t => `(${escapeRegexp(t)})`);
|
||||
}
|
||||
}
|
||||
|
||||
if (patterns.length) {
|
||||
let censorRegexp;
|
||||
|
||||
try {
|
||||
if (watchedWordsRegularExpressions) {
|
||||
censorRegexp = new RegExp(
|
||||
"((?:" + patterns.join("|") + "))(?![^\\(]*\\))",
|
||||
"ig"
|
||||
return function(text) {
|
||||
text = text.replace(censorRegexp, (fullMatch, ...groupMatches) => {
|
||||
const stringMatch = groupMatches.find(g => typeof g === "string");
|
||||
return fullMatch.replace(
|
||||
stringMatch,
|
||||
new Array(stringMatch.length + 1).join(replacementLetter)
|
||||
);
|
||||
} else {
|
||||
censorRegexp = createCensorRegexp(patterns);
|
||||
}
|
||||
});
|
||||
|
||||
if (censorRegexp) {
|
||||
return function(text) {
|
||||
let original = text;
|
||||
|
||||
try {
|
||||
let m = censorRegexp.exec(text);
|
||||
const fourCharReplacement = new Array(5).join(replacementLetter);
|
||||
|
||||
while (m && m[0]) {
|
||||
if (m[0].length > original.length) {
|
||||
return original;
|
||||
} // regex is dangerous
|
||||
if (watchedWordsRegularExpressions) {
|
||||
text = text.replace(censorRegexp, fourCharReplacement);
|
||||
} else {
|
||||
const replacement = new Array(m[0].length + 1).join(
|
||||
replacementLetter
|
||||
);
|
||||
text = text.replace(
|
||||
createCensorRegexp([escapeRegexp(m[0])]),
|
||||
replacement
|
||||
);
|
||||
}
|
||||
m = censorRegexp.exec(text);
|
||||
}
|
||||
|
||||
return text;
|
||||
} catch (e) {
|
||||
return original;
|
||||
}
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
// fall through
|
||||
}
|
||||
return text;
|
||||
};
|
||||
}
|
||||
|
||||
return function(t) {
|
||||
|
@ -77,6 +21,6 @@ export function censorFn(
|
|||
};
|
||||
}
|
||||
|
||||
export function censor(text, censoredWords, replacementLetter) {
|
||||
return censorFn(censoredWords, replacementLetter)(text);
|
||||
export function censor(text, censoredRegexp, replacementLetter) {
|
||||
return censorFn(censoredRegexp, replacementLetter)(text);
|
||||
}
|
||||
|
|
|
@ -29,15 +29,11 @@ export function setup(helper) {
|
|||
});
|
||||
|
||||
helper.registerPlugin(md => {
|
||||
const words = md.options.discourse.censoredWords;
|
||||
const censoredRegexp = md.options.discourse.censoredRegexp;
|
||||
|
||||
if (words && words.length > 0) {
|
||||
if (censoredRegexp) {
|
||||
const replacement = String.fromCharCode(9632);
|
||||
const censor = censorFn(
|
||||
words,
|
||||
replacement,
|
||||
md.options.discourse.watchedWordsRegularExpressions
|
||||
);
|
||||
const censor = censorFn(censoredRegexp, replacement);
|
||||
md.core.ruler.push("censored", state => censorTree(state, censor));
|
||||
}
|
||||
});
|
||||
|
|
|
@ -29,7 +29,7 @@ export function buildOptions(state) {
|
|||
lookupUploadUrls,
|
||||
previewing,
|
||||
linkify,
|
||||
censoredWords,
|
||||
censoredRegexp,
|
||||
disableEmojis
|
||||
} = state;
|
||||
|
||||
|
@ -67,7 +67,7 @@ export function buildOptions(state) {
|
|||
formatUsername,
|
||||
emojiUnicodeReplacer,
|
||||
lookupUploadUrls,
|
||||
censoredWords,
|
||||
censoredRegexp,
|
||||
allowedHrefSchemes: siteSettings.allowed_href_schemes
|
||||
? siteSettings.allowed_href_schemes.split("|")
|
||||
: null,
|
||||
|
|
|
@ -96,7 +96,7 @@ class AdminDashboardData
|
|||
:image_magick_check, :failing_emails_check,
|
||||
:subfolder_ends_in_slash_check,
|
||||
:pop3_polling_configuration, :email_polling_errored_recently,
|
||||
:out_of_date_themes, :unreachable_themes
|
||||
:out_of_date_themes, :unreachable_themes, :watched_words_check
|
||||
|
||||
add_problem_check do
|
||||
sidekiq_check || queue_size_check
|
||||
|
@ -224,6 +224,17 @@ class AdminDashboardData
|
|||
I18n.t('dashboard.force_https_warning', base_path: Discourse.base_path) unless SiteSetting.force_https
|
||||
end
|
||||
|
||||
def watched_words_check
|
||||
WatchedWord.actions.keys.each do |action|
|
||||
begin
|
||||
WordWatcher.word_matcher_regexp(action, raise_errors: true)
|
||||
rescue RegexpError => e
|
||||
return I18n.t('dashboard.watched_word_regexp_error', base_path: Discourse.base_path, action: action)
|
||||
end
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
||||
def out_of_date_themes
|
||||
old_themes = RemoteTheme.out_of_date_themes
|
||||
return unless old_themes.present?
|
||||
|
|
|
@ -30,7 +30,7 @@ class SiteSerializer < ApplicationSerializer
|
|||
:wizard_required,
|
||||
:topic_featured_link_allowed_category_ids,
|
||||
:user_themes,
|
||||
:censored_words,
|
||||
:censored_regexp,
|
||||
:shared_drafts_category_id
|
||||
)
|
||||
|
||||
|
@ -156,8 +156,8 @@ class SiteSerializer < ApplicationSerializer
|
|||
scope.topic_featured_link_allowed_category_ids
|
||||
end
|
||||
|
||||
def censored_words
|
||||
WordWatcher.words_for_action(:censor).join('|')
|
||||
def censored_regexp
|
||||
WordWatcher.word_matcher_regexp(:censor)&.source
|
||||
end
|
||||
|
||||
def shared_drafts_category_id
|
||||
|
|
|
@ -20,7 +20,10 @@ class WordWatcher
|
|||
end
|
||||
end
|
||||
|
||||
def self.word_matcher_regexp(action)
|
||||
# This regexp is run in miniracer, and the client JS app
|
||||
# Make sure it is compatible with major browsers when changing
|
||||
# hint: non-chrome browsers do not support 'lookbehind'
|
||||
def self.word_matcher_regexp(action, raise_errors: false)
|
||||
words = get_cached_words(action)
|
||||
if words
|
||||
words = words.map do |w|
|
||||
|
@ -31,10 +34,13 @@ class WordWatcher
|
|||
regexp = words.join('|')
|
||||
if !SiteSetting.watched_words_regular_expressions?
|
||||
regexp = "(#{regexp})"
|
||||
regexp = "(?<!\\w)(#{regexp})(?!\\w)"
|
||||
regexp = "(?:\\W|^)#{regexp}(?=\\W|$)"
|
||||
end
|
||||
Regexp.new(regexp, Regexp::IGNORECASE)
|
||||
end
|
||||
rescue RegexpError => e
|
||||
raise if raise_errors
|
||||
nil # Admin will be alerted via admin_dashboard_data.rb
|
||||
end
|
||||
|
||||
def self.word_to_regexp(word)
|
||||
|
|
|
@ -1304,6 +1304,7 @@ en:
|
|||
force_https_warning: "Your website is using SSL. But `<a href='%{base_path}/admin/site_settings/category/all_results?filter=force_https'>force_https</a>` is not yet enabled in your site settings."
|
||||
out_of_date_themes: "Updates are available for the following themes:"
|
||||
unreachable_themes: "We were unable to check for updates on the following themes:"
|
||||
watched_word_regexp_error: "The regular expression for %{action} watched words is invalid. Please check your <a href='%{base_path}/admin/logs/watched_words'>Watched Word settings</a>, or disable the 'watched words regular expressions' site setting."
|
||||
|
||||
site_settings:
|
||||
censored_words: "Words that will be automatically replaced with ■■■■"
|
||||
|
|
|
@ -162,7 +162,7 @@ module PrettyText
|
|||
__optInput.customEmoji = #{custom_emoji.to_json};
|
||||
__optInput.emojiUnicodeReplacer = __emojiUnicodeReplacer;
|
||||
__optInput.lookupUploadUrls = __lookupUploadUrls;
|
||||
__optInput.censoredWords = #{WordWatcher.words_for_action(:censor).join('|').to_json};
|
||||
__optInput.censoredRegexp = #{WordWatcher.word_matcher_regexp(:censor)&.source.to_json};
|
||||
JS
|
||||
|
||||
if opts[:topicId]
|
||||
|
|
|
@ -1042,15 +1042,88 @@ HTML
|
|||
expect(PrettyText.cook("abcde ^:;-P")).to include("emoji")
|
||||
end
|
||||
|
||||
it 'can censor words correctly' do
|
||||
begin
|
||||
['apple', 'banana'].each { |w| Fabricate(:watched_word, word: w, action: WatchedWord.actions[:censor]) }
|
||||
expect(PrettyText.cook('yay banana yay')).not_to include('banana')
|
||||
expect(PrettyText.cook('yay `banana` yay')).not_to include('banana')
|
||||
expect(PrettyText.cook("# banana")).not_to include('banana')
|
||||
expect(PrettyText.cook("# banana")).to include("\u25a0\u25a0")
|
||||
ensure
|
||||
$redis.flushall
|
||||
describe "censoring" do
|
||||
after(:all) { $redis.flushall }
|
||||
|
||||
def expect_cooked_match(raw, expected_cooked)
|
||||
expect(PrettyText.cook(raw)).to eq(expected_cooked)
|
||||
end
|
||||
|
||||
context "with basic words" do
|
||||
fab!(:watched_words) do
|
||||
["shucks", "whiz", "whizzer", "a**le", "badword*", "shuck$", "café", "$uper"].each do |word|
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: word)
|
||||
end
|
||||
end
|
||||
|
||||
it "works correctly" do
|
||||
expect_cooked_match("aw shucks, golly gee whiz.",
|
||||
"<p>aw ■■■■■■, golly gee ■■■■.</p>")
|
||||
end
|
||||
|
||||
it "doesn't censor words unless they have boundaries." do
|
||||
expect_cooked_match("you are a whizzard! I love cheesewhiz. Whiz.",
|
||||
"<p>you are a whizzard! I love cheesewhiz. ■■■■.</p>")
|
||||
end
|
||||
|
||||
it "censors words even if previous partial matches exist." do
|
||||
expect_cooked_match("you are a whizzer! I love cheesewhiz. Whiz.",
|
||||
"<p>you are a ■■■■■■■! I love cheesewhiz. ■■■■.</p>")
|
||||
end
|
||||
|
||||
it "won't break links by censoring them." do
|
||||
expect_cooked_match("The link still works. [whiz](http://www.whiz.com)",
|
||||
'<p>The link still works. <a href="http://www.whiz.com" rel="nofollow noopener">■■■■</a></p>')
|
||||
end
|
||||
|
||||
it "escapes regexp characters" do
|
||||
expect_cooked_match(
|
||||
"I have a pen, I have an a**le",
|
||||
"<p>I have a pen, I have an ■■■■■</p>"
|
||||
)
|
||||
end
|
||||
|
||||
it "works for words ending in non-word characters" do
|
||||
expect_cooked_match(
|
||||
"Aw shuck$, I can't fix the problem with money",
|
||||
"<p>Aw ■■■■■■, I can't fix the problem with money</p>")
|
||||
end
|
||||
|
||||
it "works for words ending in accented characters" do
|
||||
expect_cooked_match(
|
||||
"Let's go to a café today",
|
||||
"<p>Let's go to a ■■■■ today</p>")
|
||||
end
|
||||
|
||||
it "works for words starting with non-word characters" do
|
||||
expect_cooked_match(
|
||||
"Discourse is $uper amazing",
|
||||
"<p>Discourse is ■■■■■ amazing</p>")
|
||||
end
|
||||
|
||||
it "handles * as wildcard" do
|
||||
expect_cooked_match(
|
||||
"No badword or apple here plz.",
|
||||
"<p>No ■■■■■■■ or ■■■■■ here plz.</p>")
|
||||
end
|
||||
end
|
||||
|
||||
context "with watched words as regular expressions" do
|
||||
before { SiteSetting.watched_words_regular_expressions = true }
|
||||
it "supports words as regular expressions" do
|
||||
["xyz*", "plee+ase"].each do |word|
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: word)
|
||||
end
|
||||
|
||||
expect_cooked_match("Pleased to meet you, but pleeeease call me later, xyz123",
|
||||
"<p>Pleased to meet you, but ■■■■■■■■■ call me later, ■■■123</p>")
|
||||
end
|
||||
|
||||
it "supports custom boundaries" do
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "\\btown\\b")
|
||||
expect_cooked_match("Meet downtown in your town at the townhouse on Main St.",
|
||||
"<p>Meet downtown in your ■■■■ at the townhouse on Main St.</p>")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ describe WordWatcher do
|
|||
it "is correct when watched_words_regular_expressions = false" do
|
||||
SiteSetting.watched_words_regular_expressions = false
|
||||
regexp = WordWatcher.word_matcher_regexp(:block)
|
||||
expect(regexp.inspect).to eq("/(?<!\\w)((#{word1}|#{word2}))(?!\\w)/i")
|
||||
expect(regexp.inspect).to eq("/(?:\\W|^)(#{word1}|#{word2})(?=\\W|$)/i")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -21,7 +21,6 @@ const rawOpts = {
|
|||
enable_markdown_linkify: true,
|
||||
markdown_linkify_tlds: "com"
|
||||
},
|
||||
censoredWords: "shucks|whiz|whizzer|a**le|badword*|shuck$|café|$uper",
|
||||
getURL: url => url
|
||||
};
|
||||
|
||||
|
@ -974,89 +973,15 @@ QUnit.test("images", assert => {
|
|||
});
|
||||
|
||||
QUnit.test("censoring", assert => {
|
||||
assert.cooked(
|
||||
"aw shucks, golly gee whiz.",
|
||||
"<p>aw ■■■■■■, golly gee ■■■■.</p>",
|
||||
"it censors words in the Site Settings"
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"you are a whizzard! I love cheesewhiz. Whiz.",
|
||||
"<p>you are a whizzard! I love cheesewhiz. ■■■■.</p>",
|
||||
"it doesn't censor words unless they have boundaries."
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"you are a whizzer! I love cheesewhiz. Whiz.",
|
||||
"<p>you are a ■■■■■■■! I love cheesewhiz. ■■■■.</p>",
|
||||
"it censors words even if previous partial matches exist."
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"The link still works. [whiz](http://www.whiz.com)",
|
||||
'<p>The link still works. <a href="http://www.whiz.com">■■■■</a></p>',
|
||||
"it won't break links by censoring them."
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"Call techapj the computer whiz at 555-555-1234 for free help.",
|
||||
"<p>Call techapj the computer ■■■■ at 555-555-1234 for free help.</p>",
|
||||
"uses both censored words and patterns from site settings"
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"I have a pen, I have an a**le",
|
||||
"<p>I have a pen, I have an ■■■■■</p>",
|
||||
"it escapes regexp chars"
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"Aw shuck$, I can't fix the problem with money",
|
||||
"<p>Aw ■■■■■■, I can't fix the problem with money</p>",
|
||||
"it works for words ending in non-word characters"
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"Let's go to a café today",
|
||||
"<p>Let's go to a ■■■■ today</p>",
|
||||
"it works for words ending in accented characters"
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"Discourse is $uper amazing",
|
||||
"<p>Discourse is ■■■■■ amazing</p>",
|
||||
"it works for words starting with non-word characters"
|
||||
);
|
||||
|
||||
assert.cooked(
|
||||
"No badword or apple here plz.",
|
||||
"<p>No ■■■■■■■ or ■■■■■ here plz.</p>",
|
||||
"it handles * as wildcard"
|
||||
);
|
||||
|
||||
assert.cookedOptions(
|
||||
"Pleased to meet you, but pleeeease call me later, xyz123",
|
||||
{
|
||||
siteSettings: {
|
||||
watched_words_regular_expressions: true
|
||||
},
|
||||
censoredWords: "xyz*|plee+ase"
|
||||
censoredRegexp: "(xyz*|plee+ase)"
|
||||
},
|
||||
"<p>Pleased to meet you, but ■■■■ call me later, ■■■■123</p>",
|
||||
"supports words as regular expressions"
|
||||
);
|
||||
|
||||
assert.cookedOptions(
|
||||
"Meet downtown in your town at the townhouse on Main St.",
|
||||
{
|
||||
siteSettings: {
|
||||
watched_words_regular_expressions: true
|
||||
},
|
||||
censoredWords: "\\btown\\b"
|
||||
},
|
||||
"<p>Meet downtown in your ■■■■ at the townhouse on Main St.</p>",
|
||||
"supports words as regular expressions"
|
||||
"<p>Pleased to meet you, but ■■■■■■■■■ call me later, ■■■123</p>",
|
||||
"supports censoring"
|
||||
);
|
||||
// More tests in pretty_text_spec.rb
|
||||
});
|
||||
|
||||
QUnit.test("code blocks/spans hoisting", assert => {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue