diff --git a/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js b/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js index 0a759c16bc3..257af29f14d 100644 --- a/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js +++ b/app/assets/javascripts/discourse/tests/unit/lib/allow-lister-test.js @@ -21,6 +21,7 @@ module("Unit | Utility | allowLister", function () { "custom.foo", "custom.baz", "custom[data-*]", + "custom[data-custom-*=foo]", "custom[rel=nofollow]", ]); @@ -38,11 +39,12 @@ module("Unit | Utility | allowLister", function () { custom: { class: ["foo", "baz"], "data-*": ["*"], + "data-custom-*": ["foo"], rel: ["nofollow", "test"], }, }, }, - "Expecting a correct white list" + "Expecting a correct allow list" ); allowLister.disable("test"); @@ -53,7 +55,7 @@ module("Unit | Utility | allowLister", function () { tagList: {}, attrList: {}, }, - "Expecting an empty white list" + "Expecting an empty allow list" ); }); }); diff --git a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js index 8e4bda3cd55..e3166ee7e29 100644 --- a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js +++ b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js @@ -893,8 +893,8 @@ eviltrout

assert.cooked( "```eviltrout\nhello\n```", - '
hello\n
', - "it doesn't not allowlist all classes" + '
hello\n
', + "it converts to custom block unknown code names" ); assert.cooked( diff --git a/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js b/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js index 5dbca0b4f43..a1efa2b8bc6 100644 --- a/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js +++ b/app/assets/javascripts/discourse/tests/unit/lib/sanitizer-test.js @@ -1,6 +1,7 @@ import PrettyText, { buildOptions } from "pretty-text/pretty-text"; import { module, test } from "qunit"; -import { hrefAllowed } from "pretty-text/sanitizer"; +import { hrefAllowed, sanitize } from "pretty-text/sanitizer"; +import AllowLister from "pretty-text/allow-lister"; module("Unit | Utility | sanitizer", function () { test("sanitize", function (assert) { @@ -250,4 +251,67 @@ module("Unit | Utility | sanitizer", function () { "escape single quotes" ); }); + + test("correctly sanitizes complex data attributes rules", function (assert) { + const allowLister = new AllowLister(); + + allowLister.allowListFeature("test", [ + "pre[data-*]", + "code[data-custom-*=foo]", + "div[data-cat-*]", + ]); + allowLister.enable("test"); + + assert.strictEqual(sanitize("", allowLister), ""); + assert.strictEqual(sanitize("", allowLister), ""); + assert.strictEqual(sanitize("", allowLister), ""); + assert.strictEqual(sanitize("", allowLister), ""); + assert.strictEqual(sanitize("", allowLister), ""); + assert.strictEqual(sanitize("", allowLister), ""); + + assert.strictEqual( + sanitize("
", allowLister),
+      '
'
+    );
+
+    assert.strictEqual(
+      sanitize("
", allowLister),
+      '
'
+    );
+
+    assert.strictEqual(
+      sanitize("", allowLister),
+      ""
+    );
+
+    assert.strictEqual(
+      sanitize("", allowLister),
+      ""
+    );
+
+    assert.strictEqual(
+      sanitize("", allowLister),
+      ""
+    );
+
+    assert.strictEqual(
+      sanitize("", allowLister),
+      ''
+    );
+
+    assert.strictEqual(
+      sanitize("", allowLister),
+      ""
+    );
+
+    assert.strictEqual(
+      sanitize("
", allowLister), + '
' + ); + + assert.strictEqual( + sanitize("
", allowLister), + '
' + ); + }); }); diff --git a/app/assets/javascripts/pretty-text/addon/sanitizer.js b/app/assets/javascripts/pretty-text/addon/sanitizer.js index 1b61c6c243a..19017184a9c 100644 --- a/app/assets/javascripts/pretty-text/addon/sanitizer.js +++ b/app/assets/javascripts/pretty-text/addon/sanitizer.js @@ -41,6 +41,18 @@ export function hrefAllowed(href, extraHrefMatchers) { } } +function testDataAttribute(forTag, name, value) { + return Object.keys(forTag).find((k) => { + const nameWithMatcher = `^${k.replace(/\*$/, "\\w+?")}`; + const validValues = forTag[k]; + + return ( + new RegExp(nameWithMatcher).test(name) && + (validValues.includes("*") ? true : validValues.includes(value)) + ); + }); +} + export function sanitize(text, allowLister) { if (!text) { return ""; @@ -72,12 +84,13 @@ export function sanitize(text, allowLister) { const forTag = allowList.attrList[tag]; if (forTag) { const forAttr = forTag[name]; + if ( (forAttr && (forAttr.indexOf("*") !== -1 || forAttr.indexOf(value) !== -1)) || (name.indexOf("data-html-") === -1 && name.indexOf("data-") === 0 && - forTag["data-*"]) || + (forTag["data-*"] || testDataAttribute(forTag, name, value))) || (tag === "a" && name === "href" && hrefAllowed(value, extraHrefMatchers)) || diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js b/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js index 1fdab1bf29c..d73516677f2 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/code.js @@ -1,35 +1,78 @@ // we need a custom renderer for code blocks cause we have a slightly non compliant // format with special handling for text and so on - const TEXT_CODE_CLASSES = ["text", "pre", "plain"]; +function extractTokenInfo(info, md) { + if (!info) { + return; + } + + info = info.trim(); + + const matches = info.match(/(^\s*\S*)\s*(.*)/i); + if (!matches) { + return; + } + + // ensure the token has only valid chars + // c++, strucuted-text and p91, are all valid + if (!/^[\w+-]*$/i.test(matches[1])) { + return; + } + + const ASCII_REGEX = /[^\x00-\x7F]/; + const tag = md.utils.unescapeAll(matches[1].replace(ASCII_REGEX, "")); + const extractedData = { tag, attributes: {} }; + + if (matches[2]?.length) { + md.utils + .unescapeAll(matches[2].replace(ASCII_REGEX, "")) + .split(",") + .forEach((potentialPair) => { + const [key, value] = potentialPair.trim().split(/\s+/g)[0].split("="); + + // invalid pairs would get caught here and not used, eg `foo=` + if (key && value) { + extractedData.attributes[key] = value; + } + }); + } + + return extractedData; +} + function render(tokens, idx, options, env, slf, md) { - let token = tokens[idx], - info = token.info ? md.utils.unescapeAll(token.info) : "", - langName = md.options.discourse.defaultCodeLang, - className, - escapedContent = md.utils.escapeHtml(token.content); + const token = tokens[idx]; + const escapedContent = md.utils.escapeHtml(token.content); + const tokenInfo = extractTokenInfo(token.info, md); + const tag = tokenInfo?.tag || md.options.discourse.defaultCodeLang; + const attributes = tokenInfo?.attributes || {}; - if (info) { - // strip off any additional languages - info = info.trim().split(/\s+/g)[0]; + let className; + + const acceptableCodeClasses = + md.options.discourse.acceptableCodeClasses || []; + + if (TEXT_CODE_CLASSES.indexOf(tag) > -1) { + className = "lang-nohighlight"; + } else if (acceptableCodeClasses.indexOf(tag) > -1) { + className = `lang-${tag}`; + } else { + className = "lang-nohighlight"; + attributes["wrap"] = tag; } - const acceptableCodeClasses = md.options.discourse.acceptableCodeClasses; - if ( - acceptableCodeClasses && - info && - acceptableCodeClasses.indexOf(info) !== -1 - ) { - langName = info; - } + const dataAttributes = Object.keys(attributes) + .map((key) => { + const value = md.utils.escapeHtml(attributes[key]); + key = md.utils.escapeHtml(key); + return `data-code-${key}="${value}"`; + }) + .join(" "); - className = - TEXT_CODE_CLASSES.indexOf(info) !== -1 - ? "lang-nohighlight" - : "lang-" + langName; - - return `
${escapedContent}
\n`; + return `${escapedContent}\n`; } export function setup(helper) { @@ -41,6 +84,8 @@ export function setup(helper) { .concat(["auto", "nohighlight"]); }); + helper.allowList(["pre[data-code-*]"]); + helper.allowList({ custom(tag, name, value) { if (tag === "code" && name === "class") { diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index cefe57911f4..974ba8622a9 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -2188,7 +2188,7 @@ en: display_name_on_posts: "Show a user's full name on their posts in addition to their @username." show_time_gap_days: "If two posts are made this many days apart, display the time gap in the topic." short_progress_text_threshold: "After the number of posts in a topic goes above this number, the progress bar will only show the current post number. If you change the progress bar's width, you may need to change this value." - default_code_lang: "Default programming language syntax highlighting applied to GitHub code blocks (auto, nohighlight, ruby, python etc.)" + default_code_lang: "Default programming language syntax highlighting applied to code blocks (auto, nohighlight, ruby, python etc.)" warn_reviving_old_topic_age: "When someone starts replying to a topic where the last reply is older than this many days, a warning will be displayed. Disable by setting to 0." autohighlight_all_code: "Force apply code highlighting to all preformatted code blocks even when they didn't explicitly specify the language." highlighted_languages: "Included syntax highlighting rules. (Warning: including too many languages may impact performance) see: https://highlightjs.org/static/demo for a demo" diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 73827007340..6db1a86e350 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -527,12 +527,27 @@ describe PrettyText do end it 'can include code class correctly' do + SiteSetting.highlighted_languages += '|c++|structured-text|p21' + # keep in mind spaces should be trimmed per spec expect(PrettyText.cook("``` ruby the mooby\n`````")).to eq('
') expect(PrettyText.cook("```cpp\ncpp\n```")).to match_html("
cpp\n
") expect(PrettyText.cook("```\ncpp\n```")).to match_html("
cpp\n
") expect(PrettyText.cook("```text\ncpp\n```")).to match_html("
cpp\n
") - + expect(PrettyText.cook("```custom\ncustom content\n```")).to match_html("
custom content\n
") + expect(PrettyText.cook("```custom foo=bar\ncustom content\n```")).to match_html("
custom content
") + expect(PrettyText.cook("```INVALID a=1\n```")).to match_html("
\n
") + expect(PrettyText.cook("```INVALID a=1, foo=bar , baz=2\n```")).to match_html("
\n
") + expect(PrettyText.cook("```text\n```")).to match_html("
\n
") + expect(PrettyText.cook("```auto\n```")).to match_html("
\n
") + expect(PrettyText.cook("```ruby startline=3 $%@#\n```")).to match_html("
\n
") + expect(PrettyText.cook("```mermaid a_-你=17\n```")).to match_html("
\n
") + expect(PrettyText.cook("```mermaid foo=\n```")).to match_html("
\n
") + expect(PrettyText.cook("```mermaid foo=‮ begin admin o\n```")).to match_html("
\n
") + expect(PrettyText.cook("```c++\nc++\n```")).to match_html("
c++\n
") + expect(PrettyText.cook("```structured-text\nstructured-text\n```")).to match_html("
structured-text\n
") + expect(PrettyText.cook("```p21\np21\n```")).to match_html("
p21\n
") + expect(PrettyText.cook("
")).to match_html("
")
     end
 
     it 'indents code correctly' do
@@ -553,7 +568,7 @@ describe PrettyText do
     it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do
       code = <<~MD
          X
-         ```js
+         ```auto
          var isAdmin = false;
          /*‮ begin admin only */⁦ if (isAdmin) ⁩ ⁦ {
          console.log("You are an admin.");
diff --git a/spec/integrity/common_mark_spec.rb b/spec/integrity/common_mark_spec.rb
index 852e3ea0505..fbf7a9ee16c 100644
--- a/spec/integrity/common_mark_spec.rb
+++ b/spec/integrity/common_mark_spec.rb
@@ -34,6 +34,8 @@ describe "CommonMark" do
         cooked.gsub!(" class=\"lang-auto\"", '')
         cooked.gsub!(/(.*)<\/span>/, "\\1")
         cooked.gsub!(/<\/a>/, "")
+        # we support data-attributes which is not in the spec
+        cooked.gsub!("
", '
')
         # we don't care about this
         cooked.gsub!("
\n
", "
") html.gsub!("
\n
", "
")