diff --git a/app/assets/javascripts/discourse/components/d-editor.js.es6 b/app/assets/javascripts/discourse/components/d-editor.js.es6
index eda16824b70..ba821726cbe 100644
--- a/app/assets/javascripts/discourse/components/d-editor.js.es6
+++ b/app/assets/javascripts/discourse/components/d-editor.js.es6
@@ -9,6 +9,7 @@ import { emojiUrlFor } from 'discourse/lib/text';
import { getRegister } from 'discourse-common/lib/get-owner';
import { findRawTemplate } from 'discourse/lib/raw-templates';
import { determinePostReplaceSelection, clipboardData } from 'discourse/lib/utilities';
+import toMarkdown from 'discourse/lib/to-markdown';
import { ajax } from 'discourse/lib/ajax';
import { popupAjaxError } from 'discourse/lib/ajax-error';
import deprecated from 'discourse-common/lib/deprecated';
@@ -647,7 +648,7 @@ export default Ember.Component.extend({
const { clipboard, types } = clipboardData(e);
let plainText = clipboard.getData("text/plain");
- const html = clipboard.getData("text/html");
+ let html = clipboard.getData("text/html");
let handled = false;
if (plainText) {
@@ -657,30 +658,19 @@ export default Ember.Component.extend({
this.appEvents.trigger('composer:insert-text', table);
handled = true;
}
+
+ if (html && html.includes("urn:schemas-microsoft-com:office:word")) {
+ html = ""; // use plain text data for microsoft word
+ }
}
if (this.siteSettings.enable_rich_text_paste && html && !handled) {
- const placeholder = `${ plainText || I18n.t('pasting') }`;
- const self = this;
+ const markdown = toMarkdown(html);
- this.appEvents.trigger('composer:insert-text', placeholder);
- handled = true;
-
- ajax('/composer/parse_html', {
- type: 'POST',
- data: { html }
- }).then(response => {
- if (response.markdown) {
- self.appEvents.trigger('composer:replace-text', placeholder, response.markdown);
- } else if (!plainText) {
- self.appEvents.trigger('composer:replace-text', placeholder, "");
- }
- }).catch(error => {
- if (!plainText) {
- self.appEvents.trigger('composer:replace-text', placeholder, "");
- popupAjaxError(error);
- }
- });
+ if (!plainText || plainText.length < markdown.length) {
+ this.appEvents.trigger('composer:insert-text', markdown);
+ handled = true;
+ }
}
const uploadFiles = types.includes("Files") && !plainText && !handled;
diff --git a/test/javascripts/helpers/parse-html.js.es6 b/app/assets/javascripts/discourse/helpers/parse-html.js.es6
similarity index 99%
rename from test/javascripts/helpers/parse-html.js.es6
rename to app/assets/javascripts/discourse/helpers/parse-html.js.es6
index 0d3bab90d56..c9469fa6b9f 100644
--- a/test/javascripts/helpers/parse-html.js.es6
+++ b/app/assets/javascripts/discourse/helpers/parse-html.js.es6
@@ -5,4 +5,4 @@ export default function parseHTML(rawHtml) {
parser.parseComplete(rawHtml);
return builder.dom;
-}
\ No newline at end of file
+}
diff --git a/app/assets/javascripts/discourse/lib/to-markdown.js.es6 b/app/assets/javascripts/discourse/lib/to-markdown.js.es6
new file mode 100644
index 00000000000..c20a738d3ea
--- /dev/null
+++ b/app/assets/javascripts/discourse/lib/to-markdown.js.es6
@@ -0,0 +1,285 @@
+import parseHTML from 'discourse/helpers/parse-html';
+
+const trimLeft = text => text.replace(/^\s+/,"");
+const trimRight = text => text.replace(/\s+$/,"");
+
+class Tag {
+ constructor(name, prefix = "", suffix = "") {
+ this.name = name;
+ this.prefix = prefix;
+ this.suffix = suffix;
+ }
+
+ decorate(text) {
+ if (this.prefix || this.suffix) {
+ return [this.prefix, text, this.suffix].join("");
+ }
+
+ return text;
+ }
+
+ toMarkdown() {
+ const text = this.element.innerMarkdown();
+
+ if (text && text.trim()) {
+ return this.decorate(text);
+ }
+
+ return text;
+ }
+
+ static blocks() {
+ return ["address", "article", "aside", "blockquote", "dd", "div", "dl", "dt", "fieldset",
+ "figcaption", "figure", "footer", "form", "header", "hgroup", "hr", "main", "nav",
+ "ol", "p", "pre", "section", "table", "ul"];
+ }
+
+ static headings() {
+ return ["h1", "h2", "h3", "h4", "h5", "h6"];
+ }
+
+ static emphases() {
+ return [ ["b", "**"], ["strong", "**"], ["i", "_"], ["em", "_"], ["s", "~~"], ["strike", "~~"] ];
+ }
+
+ static slices() {
+ return ["dt", "dd", "tr", "thead", "tbody", "tfoot"];
+ }
+
+ static trimmable() {
+ return [...Tag.blocks(), ...Tag.headings(), ...Tag.slices(), "li", "td", "th", "br", "hr"];
+ }
+
+ static block(name, prefix, suffix) {
+ return class extends Tag {
+ constructor() {
+ super(name, prefix, suffix);
+ }
+
+ decorate(text) {
+ return `\n\n${this.prefix}${text}${this.suffix}\n\n`;
+ }
+ };
+ }
+
+ static heading(name, i) {
+ const prefix = `${[...Array(i)].map(() => "#").join("")} `;
+ return Tag.block(name, prefix, "");
+ }
+
+ static emphasis(name, decorator) {
+ return class extends Tag {
+ constructor() {
+ super(name, decorator, decorator);
+ }
+
+ decorate(text) {
+ text = text.trim();
+
+ if (text.includes("\n")) {
+ this.prefix = `<${this.name}>`;
+ this.suffix = `${this.name}>`;
+ }
+
+ return super.decorate(text);
+ }
+ };
+ }
+
+ static replace(name, text) {
+ return class extends Tag {
+ constructor() {
+ super(name, "", "");
+ this.text = text;
+ }
+
+ toMarkdown() {
+ return this.text;
+ }
+ };
+ }
+
+ static link() {
+ return class extends Tag {
+ constructor() {
+ super("a");
+ }
+
+ decorate(text) {
+ const attr = this.element.attributes;
+
+ if (attr && attr.href && text !== attr.href) {
+ return "[" + text + "](" + attr.href + ")";
+ }
+
+ return text;
+ }
+ };
+ }
+
+ static image() {
+ return class extends Tag {
+ constructor() {
+ super("img");
+ }
+
+ toMarkdown() {
+ const e = this.element;
+ const attr = e.attributes;
+ const pAttr = e.parent && e.parent.attributes;
+ const src = (attr && attr.src) || (pAttr && pAttr.src);
+
+ if (src) {
+ const alt = (attr && attr.alt) || (pAttr && pAttr.alt) || "";
+ return "";
+ }
+
+ return "";
+ }
+ };
+ }
+
+ static slice(name, prefix, suffix) {
+ return class extends Tag {
+ constructor() {
+ super(name, prefix, suffix);
+ }
+
+ decorate(text) {
+ if (!this.element.next) {
+ this.suffix = "";
+ }
+ return `${text}${this.suffix}`;
+ }
+ };
+ }
+
+ static cell(name) {
+ return Tag.slice(name, "", " ");
+ }
+
+ static li() {
+ return class extends Tag.slice("li", "", "\n") {
+ decorate(text) {
+ const indent = this.element.filterParentNames("ul").slice(1).map(() => " ").join("");
+ return super.decorate(`${indent}* ${trimLeft(text)}`);
+ }
+ };
+ }
+
+}
+
+const tags = [
+ ...Tag.blocks().map((b) => Tag.block(b)),
+ ...Tag.headings().map((h, i) => Tag.heading(h, i + 1)),
+ ...Tag.slices().map((s) => Tag.slice(s, "", "\n")),
+ ...Tag.emphases().map((e) => Tag.emphasis(e[0], e[1])),
+ Tag.cell("td"), Tag.cell("th"),
+ Tag.replace("br", "\n"), Tag.replace("hr", "\n---\n"), Tag.replace("head", ""),
+ Tag.li(), Tag.link(), Tag.image(),
+
+ // TO-DO CREATE: code, tbody, ins, del, blockquote, small, large
+ // UPDATE: ol, pre, thead, th, td
+];
+
+class Element {
+ constructor(element, parent, previous, next) {
+ this.name = element.name;
+ this.type = element.type;
+ this.data = element.data;
+ this.children = element.children;
+ this.attributes = element.attributes;
+
+ if (parent) {
+ this.parent = parent;
+ this.parentNames = (parent.parentNames || []).slice();
+ this.parentNames.push(parent.name);
+ }
+ this.previous = previous;
+ this.next = next;
+ }
+
+ tag() {
+ const tag = new (tags.filter(t => (new t().name === this.name))[0] || Tag)();
+ tag.element = this;
+ return tag;
+ }
+
+ innerMarkdown() {
+ return Element.parseChildren(this);
+ }
+
+ leftTrimmable() {
+ return this.previous && Tag.trimmable().includes(this.previous.name);
+ }
+
+ rightTrimmable() {
+ return this.next && Tag.trimmable().includes(this.next.name);
+ }
+
+ text() {
+ let text = this.data || "";
+
+ if (this.leftTrimmable()) {
+ text = trimLeft(text);
+ }
+
+ if (this.rightTrimmable()) {
+ text = trimRight(text);
+ }
+
+ text = text.replace(/[ \t]+/g, " ");
+
+ return text;
+ }
+
+ toMarkdown() {
+ switch(this.type) {
+ case "text":
+ return this.text();
+ break;
+ case "tag":
+ return this.tag().toMarkdown();
+ break;
+ }
+ }
+
+ filterParentNames(name) {
+ return this.parentNames.filter(p => p === name);
+ }
+
+ static toMarkdown(element, parent, prev, next) {
+ return new Element(element, parent, prev, next).toMarkdown();
+ }
+
+ static parseChildren(parent) {
+ return Element.parse(parent.children, parent);
+ }
+
+ static parse(elements, parent = null) {
+ if (elements) {
+ let result = [];
+
+ for (let i = 0; i < elements.length; i++) {
+ const prev = (i === 0) ? null : elements[i-1];
+ const next = (i === elements.length) ? null : elements[i+1];
+
+ result.push(Element.toMarkdown(elements[i], parent, prev, next));
+ }
+
+ return result.join("");
+ }
+
+ return "";
+ }
+}
+
+export default function toMarkdown(html) {
+ try {
+ let markdown = Element.parse(parseHTML(html)).trim();
+ markdown = markdown.replace(/^/, "").replace(/<\/b>$/, "").trim(); // fix for google doc copy paste
+ return markdown.replace(/\r/g, "").replace(/\n \n/g, "\n\n").replace(/\n{3,}/g, "\n\n");
+ } catch(err) {
+ return "";
+ }
+}
diff --git a/app/assets/javascripts/vendor.js b/app/assets/javascripts/vendor.js
index 4b500d18bc8..6a7bd345eca 100644
--- a/app/assets/javascripts/vendor.js
+++ b/app/assets/javascripts/vendor.js
@@ -37,3 +37,4 @@
//= require virtual-dom
//= require virtual-dom-amd
//= require highlight.js
+//= require htmlparser.js
diff --git a/test/javascripts/lib/category-badge-test.js.es6 b/test/javascripts/lib/category-badge-test.js.es6
index ffd837a5e61..6dc1ba2c513 100644
--- a/test/javascripts/lib/category-badge-test.js.es6
+++ b/test/javascripts/lib/category-badge-test.js.es6
@@ -2,7 +2,7 @@ import createStore from 'helpers/create-store';
QUnit.module("lib:category-link");
-import parseHTML from 'helpers/parse-html';
+import parseHTML from 'discourse/helpers/parse-html';
import { categoryBadgeHTML } from "discourse/helpers/category-link";
QUnit.test("categoryBadge without a category", assert => {
@@ -44,4 +44,4 @@ QUnit.test("allowUncategorized", assert => {
assert.blank(categoryBadgeHTML(uncategorized), "it doesn't return HTML for uncategorized by default");
assert.present(categoryBadgeHTML(uncategorized, {allowUncategorized: true}), "it returns HTML");
-});
\ No newline at end of file
+});
diff --git a/test/javascripts/lib/to-markdown-test.js.es6 b/test/javascripts/lib/to-markdown-test.js.es6
new file mode 100644
index 00000000000..96ee32db5ac
--- /dev/null
+++ b/test/javascripts/lib/to-markdown-test.js.es6
@@ -0,0 +1,126 @@
+import toMarkdown from 'discourse/lib/to-markdown';
+
+QUnit.module("lib:to-markdown");
+
+QUnit.test("converts styles between normal words", assert => {
+ const html = `Line with styles between words.`;
+ const markdown = `Line with ~~styles~~ **_between_** words.`;
+ assert.equal(toMarkdown(html), markdown);
+});
+
+QUnit.test("converts inline nested styles", assert => {
+ let html = `Italicised line with some random bold words.`;
+ let markdown = `_Italicised line with **some random** **bold** words._`;
+ assert.equal(toMarkdown(html), markdown);
+
+ html = `Italicised line
+ with some
+ random bold words.`;
+ markdown = `Italicised line\n with some\n random ~~bold~~ words.`;
+ assert.equal(toMarkdown(html), markdown);
+});
+
+QUnit.test("converts a link", assert => {
+ const html = `Discourse`;
+ const markdown = `[Discourse](https://discourse.org)`;
+ assert.equal(toMarkdown(html), markdown);
+});
+
+QUnit.test("put raw URL instead of converting the link", assert => {
+ let url = "https://discourse.org";
+ const html = () => `${url}`;
+
+ assert.equal(toMarkdown(html()), url);
+
+ url = "discourse.org/t/topic-slug/1";
+ assert.equal(toMarkdown(html()), url);
+});
+
+QUnit.test("skip empty link", assert => {
+ assert.equal(toMarkdown(``), "");
+});
+
+QUnit.test("converts heading tags", assert => {
+ const html = `
+ Heading 1
+ Heading 2
+
+ \t Heading 3
+
+
+ Heading 4
+
+
+
+Heading 5
+
+
+
+
+Heading 6
+ `;
+ const markdown = `# Heading 1\n\n## Heading 2\n\n### Heading 3\n\n#### Heading 4\n\n##### Heading 5\n\n###### Heading 6`;
+ assert.equal(toMarkdown(html), markdown);
+});
+
+QUnit.test("converts ul and ol list tags", assert => {
+ const html = `
+
+
+ `;
+ const markdown = `* Item 1\n* Item 2\n\n * Sub Item 1\n * Sub Item 2\n\n * Sub _Sub_ Item 1\n * Sub **Sub** Item 2\n\n* Item 3`;
+ assert.equal(toMarkdown(html), markdown);
+});
+
+QUnit.test("stripes unwanted inline tags", assert => {
+ const html = `
+
+
+
+
Lorem ipsum dolor sit amet, consectetur elit.
Ut minim veniam, laboris
Heading 1 | Head 2 |
---|---|
Lorem | ipsum |
dolor | sit amet |