diff --git a/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js b/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js index 0a759c16bc3..257af29f14d 100644 --- a/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js +++ b/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js @@ -21,6 +21,7 @@ module("Unit | Utility | allowLister", function () { "custom.foo", "custom.baz", "custom[data-*]", + "custom[data-custom-*=foo]", "custom[rel=nofollow]", ]); @@ -38,11 +39,12 @@ module("Unit | Utility | allowLister", function () { custom: { class: ["foo", "baz"], "data-*": ["*"], + "data-custom-*": ["foo"], rel: ["nofollow", "test"], }, }, }, - "Expecting a correct white list" + "Expecting a correct allow list" ); allowLister.disable("test"); @@ -53,7 +55,7 @@ module("Unit | Utility | allowLister", function () { tagList: {}, attrList: {}, }, - "Expecting an empty white list" + "Expecting an empty allow list" ); }); }); diff --git a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js index 8e4bda3cd55..e3166ee7e29 100644 --- a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js +++ b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js @@ -893,8 +893,8 @@ eviltrout
assert.cooked( "```eviltrout\nhello\n```", - 'hello\n
',
- "it doesn't not allowlist all classes"
+ 'hello\n
',
+ "it converts to custom block unknown code names"
);
assert.cooked(
diff --git a/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js b/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js
index 5dbca0b4f43..a1efa2b8bc6 100644
--- a/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js
+++ b/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js
@@ -1,6 +1,7 @@
import PrettyText, { buildOptions } from "pretty-text/pretty-text";
import { module, test } from "qunit";
-import { hrefAllowed } from "pretty-text/sanitizer";
+import { hrefAllowed, sanitize } from "pretty-text/sanitizer";
+import AllowLister from "pretty-text/allow-lister";
module("Unit | Utility | sanitizer", function () {
test("sanitize", function (assert) {
@@ -250,4 +251,67 @@ module("Unit | Utility | sanitizer", function () {
"escape single quotes"
);
});
+
+ test("correctly sanitizes complex data attributes rules", function (assert) {
+ const allowLister = new AllowLister();
+
+ allowLister.allowListFeature("test", [
+ "pre[data-*]",
+ "code[data-custom-*=foo]",
+ "div[data-cat-*]",
+ ]);
+ allowLister.enable("test");
+
+ assert.strictEqual(sanitize("", allowLister), "");
+ assert.strictEqual(sanitize("", allowLister), "");
+ assert.strictEqual(sanitize("", allowLister), "");
+ assert.strictEqual(sanitize("", allowLister), "");
+ assert.strictEqual(sanitize("", allowLister), "");
+ assert.strictEqual(sanitize("", allowLister), "");
+
+ assert.strictEqual(
+ sanitize("", allowLister),
+ ''
+ );
+
+ assert.strictEqual(
+ sanitize("", allowLister),
+ ''
+ );
+
+ assert.strictEqual(
+ sanitize("
", allowLister),
+ "
"
+ );
+
+ assert.strictEqual(
+ sanitize("
", allowLister),
+ "
"
+ );
+
+ assert.strictEqual(
+ sanitize("
", allowLister),
+ "
"
+ );
+
+ assert.strictEqual(
+ sanitize("
", allowLister),
+ '
'
+ );
+
+ assert.strictEqual(
+ sanitize("
", allowLister),
+ "
"
+ );
+
+ assert.strictEqual(
+ sanitize("", allowLister),
+ ''
+ );
+
+ assert.strictEqual(
+ sanitize("", allowLister),
+ ''
+ );
+ });
});
diff --git a/app/assets/javascripts/pretty-text/addon/sanitizer.js b/app/assets/javascripts/pretty-text/addon/sanitizer.js
index 1b61c6c243a..19017184a9c 100644
--- a/app/assets/javascripts/pretty-text/addon/sanitizer.js
+++ b/app/assets/javascripts/pretty-text/addon/sanitizer.js
@@ -41,6 +41,18 @@ export function hrefAllowed(href, extraHrefMatchers) {
}
}
+function testDataAttribute(forTag, name, value) {
+ return Object.keys(forTag).find((k) => {
+ const nameWithMatcher = `^${k.replace(/\*$/, "\\w+?")}`;
+ const validValues = forTag[k];
+
+ return (
+ new RegExp(nameWithMatcher).test(name) &&
+ (validValues.includes("*") ? true : validValues.includes(value))
+ );
+ });
+}
+
export function sanitize(text, allowLister) {
if (!text) {
return "";
@@ -72,12 +84,13 @@ export function sanitize(text, allowLister) {
const forTag = allowList.attrList[tag];
if (forTag) {
const forAttr = forTag[name];
+
if (
(forAttr &&
(forAttr.indexOf("*") !== -1 || forAttr.indexOf(value) !== -1)) ||
(name.indexOf("data-html-") === -1 &&
name.indexOf("data-") === 0 &&
- forTag["data-*"]) ||
+ (forTag["data-*"] || testDataAttribute(forTag, name, value))) ||
(tag === "a" &&
name === "href" &&
hrefAllowed(value, extraHrefMatchers)) ||
diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js b/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js
index 1fdab1bf29c..d73516677f2 100644
--- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js
+++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js
@@ -1,35 +1,78 @@
// we need a custom renderer for code blocks cause we have a slightly non compliant
// format with special handling for text and so on
-
const TEXT_CODE_CLASSES = ["text", "pre", "plain"];
+function extractTokenInfo(info, md) {
+ if (!info) {
+ return;
+ }
+
+ info = info.trim();
+
+ const matches = info.match(/(^\s*\S*)\s*(.*)/i);
+ if (!matches) {
+ return;
+ }
+
+ // ensure the token has only valid chars
+ // c++, strucuted-text and p91, are all valid
+ if (!/^[\w+-]*$/i.test(matches[1])) {
+ return;
+ }
+
+ const ASCII_REGEX = /[^\x00-\x7F]/;
+ const tag = md.utils.unescapeAll(matches[1].replace(ASCII_REGEX, ""));
+ const extractedData = { tag, attributes: {} };
+
+ if (matches[2]?.length) {
+ md.utils
+ .unescapeAll(matches[2].replace(ASCII_REGEX, ""))
+ .split(",")
+ .forEach((potentialPair) => {
+ const [key, value] = potentialPair.trim().split(/\s+/g)[0].split("=");
+
+ // invalid pairs would get caught here and not used, eg `foo=`
+ if (key && value) {
+ extractedData.attributes[key] = value;
+ }
+ });
+ }
+
+ return extractedData;
+}
+
function render(tokens, idx, options, env, slf, md) {
- let token = tokens[idx],
- info = token.info ? md.utils.unescapeAll(token.info) : "",
- langName = md.options.discourse.defaultCodeLang,
- className,
- escapedContent = md.utils.escapeHtml(token.content);
+ const token = tokens[idx];
+ const escapedContent = md.utils.escapeHtml(token.content);
+ const tokenInfo = extractTokenInfo(token.info, md);
+ const tag = tokenInfo?.tag || md.options.discourse.defaultCodeLang;
+ const attributes = tokenInfo?.attributes || {};
- if (info) {
- // strip off any additional languages
- info = info.trim().split(/\s+/g)[0];
+ let className;
+
+ const acceptableCodeClasses =
+ md.options.discourse.acceptableCodeClasses || [];
+
+ if (TEXT_CODE_CLASSES.indexOf(tag) > -1) {
+ className = "lang-nohighlight";
+ } else if (acceptableCodeClasses.indexOf(tag) > -1) {
+ className = `lang-${tag}`;
+ } else {
+ className = "lang-nohighlight";
+ attributes["wrap"] = tag;
}
- const acceptableCodeClasses = md.options.discourse.acceptableCodeClasses;
- if (
- acceptableCodeClasses &&
- info &&
- acceptableCodeClasses.indexOf(info) !== -1
- ) {
- langName = info;
- }
+ const dataAttributes = Object.keys(attributes)
+ .map((key) => {
+ const value = md.utils.escapeHtml(attributes[key]);
+ key = md.utils.escapeHtml(key);
+ return `data-code-${key}="${value}"`;
+ })
+ .join(" ");
- className =
- TEXT_CODE_CLASSES.indexOf(info) !== -1
- ? "lang-nohighlight"
- : "lang-" + langName;
-
- return `${escapedContent}
\n`;
+ return `${escapedContent}
\n`;
}
export function setup(helper) {
@@ -41,6 +84,8 @@ export function setup(helper) {
.concat(["auto", "nohighlight"]);
});
+ helper.allowList(["pre[data-code-*]"]);
+
helper.allowList({
custom(tag, name, value) {
if (tag === "code" && name === "class") {
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index cefe57911f4..974ba8622a9 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -2188,7 +2188,7 @@ en:
display_name_on_posts: "Show a user's full name on their posts in addition to their @username."
show_time_gap_days: "If two posts are made this many days apart, display the time gap in the topic."
short_progress_text_threshold: "After the number of posts in a topic goes above this number, the progress bar will only show the current post number. If you change the progress bar's width, you may need to change this value."
- default_code_lang: "Default programming language syntax highlighting applied to GitHub code blocks (auto, nohighlight, ruby, python etc.)"
+ default_code_lang: "Default programming language syntax highlighting applied to code blocks (auto, nohighlight, ruby, python etc.)"
warn_reviving_old_topic_age: "When someone starts replying to a topic where the last reply is older than this many days, a warning will be displayed. Disable by setting to 0."
autohighlight_all_code: "Force apply code highlighting to all preformatted code blocks even when they didn't explicitly specify the language."
highlighted_languages: "Included syntax highlighting rules. (Warning: including too many languages may impact performance) see: https://highlightjs.org/static/demo for a demo"
diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb
index 73827007340..6db1a86e350 100644
--- a/spec/components/pretty_text_spec.rb
+++ b/spec/components/pretty_text_spec.rb
@@ -527,12 +527,27 @@ describe PrettyText do
end
it 'can include code class correctly' do
+ SiteSetting.highlighted_languages += '|c++|structured-text|p21'
+
# keep in mind spaces should be trimmed per spec
expect(PrettyText.cook("``` ruby the mooby\n`````")).to eq('
')
expect(PrettyText.cook("```cpp\ncpp\n```")).to match_html("cpp\n
")
expect(PrettyText.cook("```\ncpp\n```")).to match_html("cpp\n
")
expect(PrettyText.cook("```text\ncpp\n```")).to match_html("cpp\n
")
-
+ expect(PrettyText.cook("```custom\ncustom content\n```")).to match_html("custom content\n
")
+ expect(PrettyText.cook("```custom foo=bar\ncustom content\n```")).to match_html("custom content
")
+ expect(PrettyText.cook("```INVALID a=1\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```INVALID a=1, foo=bar , baz=2\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```text\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```auto\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```ruby startline=3 $%@#\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```mermaid a_-你=17\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```mermaid foo=\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```mermaid foo= begin admin o\n```")).to match_html("\n
")
+ expect(PrettyText.cook("```c++\nc++\n```")).to match_html("c++\n
")
+ expect(PrettyText.cook("```structured-text\nstructured-text\n```")).to match_html("structured-text\n
")
+ expect(PrettyText.cook("```p21\np21\n```")).to match_html("p21\n
")
+ expect(PrettyText.cook("")).to match_html("")
end
it 'indents code correctly' do
@@ -553,7 +568,7 @@ describe PrettyText do
it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do
code = <<~MD
X
- ```js
+ ```auto
var isAdmin = false;
/* begin admin only */ if (isAdmin) {
console.log("You are an admin.");
diff --git a/spec/integrity/common_mark_spec.rb b/spec/integrity/common_mark_spec.rb
index 852e3ea0505..fbf7a9ee16c 100644
--- a/spec/integrity/common_mark_spec.rb
+++ b/spec/integrity/common_mark_spec.rb
@@ -34,6 +34,8 @@ describe "CommonMark" do
cooked.gsub!(" class=\"lang-auto\"", '')
cooked.gsub!(/(.*)<\/span>/, "\\1")
cooked.gsub!(/<\/a>/, "")
+ # we support data-attributes which is not in the spec
+ cooked.gsub!("", '') # we don't care about this cooked.gsub!("\n", "") html.gsub!("\n", "")