mirror of
https://github.com/discourse/discourse.git
synced 2025-08-21 19:11:18 +08:00
FEATURE: convert incoming emails in HTML to markdown
- remove incoming_email_prefer_html site setting - remove HtmlCleaner class
This commit is contained in:
parent
e155cb6db1
commit
b76674f640
6 changed files with 10 additions and 157 deletions
|
@ -1306,7 +1306,6 @@ en:
|
||||||
reply_by_email_enabled: "Enable replying to topics via email."
|
reply_by_email_enabled: "Enable replying to topics via email."
|
||||||
reply_by_email_address: "Template for reply by email incoming email address, for example: %{reply_key}@reply.example.com or replies+%{reply_key}@example.com"
|
reply_by_email_address: "Template for reply by email incoming email address, for example: %{reply_key}@reply.example.com or replies+%{reply_key}@example.com"
|
||||||
alternative_reply_by_email_addresses: "List of alternative templates for reply by email incoming email addresses. Example: %{reply_key}@reply.example.com|replies+%{reply_key}@example.com"
|
alternative_reply_by_email_addresses: "List of alternative templates for reply by email incoming email addresses. Example: %{reply_key}@reply.example.com|replies+%{reply_key}@example.com"
|
||||||
incoming_email_prefer_html: "Use the HTML instead of the text for incoming email. May cause unexpected formatting issues!"
|
|
||||||
|
|
||||||
disable_emails: "Prevent Discourse from sending any kind of emails"
|
disable_emails: "Prevent Discourse from sending any kind of emails"
|
||||||
|
|
||||||
|
|
|
@ -646,8 +646,6 @@ email:
|
||||||
pop3_polling_username: ''
|
pop3_polling_username: ''
|
||||||
pop3_polling_password: ''
|
pop3_polling_password: ''
|
||||||
log_mail_processing_failures: false
|
log_mail_processing_failures: false
|
||||||
incoming_email_prefer_html:
|
|
||||||
default: false
|
|
||||||
email_in:
|
email_in:
|
||||||
default: false
|
default: false
|
||||||
client: true
|
client: true
|
||||||
|
|
|
@ -1,132 +0,0 @@
|
||||||
module Email
|
|
||||||
# HtmlCleaner cleans up the extremely dirty HTML that many email clients
|
|
||||||
# generate by stripping out any excess divs or spans, removing styling in
|
|
||||||
# the process (which also makes the html more suitable to be parsed as
|
|
||||||
# Markdown).
|
|
||||||
class HtmlCleaner
|
|
||||||
# Elements to hoist all children out of
|
|
||||||
HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
|
|
||||||
# Node types to always delete
|
|
||||||
HTML_DELETE_ELEMENT_TYPES = [
|
|
||||||
Nokogiri::XML::Node::DTD_NODE,
|
|
||||||
Nokogiri::XML::Node::COMMENT_NODE,
|
|
||||||
]
|
|
||||||
|
|
||||||
# Private variables:
|
|
||||||
# @doc - nokogiri document
|
|
||||||
# @out - same as @doc, but only if trimming has occured
|
|
||||||
def initialize(html)
|
|
||||||
if String === html
|
|
||||||
@doc = Nokogiri::HTML(html)
|
|
||||||
else
|
|
||||||
@doc = html
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class << self
|
|
||||||
# Email::HtmlCleaner.trim(inp, opts={})
|
|
||||||
#
|
|
||||||
# Arguments:
|
|
||||||
# inp - Either a HTML string or a Nokogiri document.
|
|
||||||
# Options:
|
|
||||||
# :return => :doc, :string
|
|
||||||
# Specify the desired return type.
|
|
||||||
# Defaults to the type of the input.
|
|
||||||
# A value of :string is equivalent to calling get_document_text()
|
|
||||||
# on the returned document.
|
|
||||||
def trim(inp, opts={})
|
|
||||||
cleaner = HtmlCleaner.new(inp)
|
|
||||||
|
|
||||||
opts[:return] ||= ((String === inp) ? :string : :doc)
|
|
||||||
|
|
||||||
if opts[:return] == :string
|
|
||||||
cleaner.output_html
|
|
||||||
else
|
|
||||||
cleaner.output_document
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Email::HtmlCleaner.get_document_text(doc)
|
|
||||||
#
|
|
||||||
# Get the body portion of the document, including html, as a string.
|
|
||||||
def get_document_text(doc)
|
|
||||||
body = doc.xpath('//body')
|
|
||||||
if body
|
|
||||||
body.inner_html
|
|
||||||
else
|
|
||||||
doc.inner_html
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def output_document
|
|
||||||
@out ||= begin
|
|
||||||
doc = @doc
|
|
||||||
trim_process_node doc
|
|
||||||
add_newlines doc
|
|
||||||
doc
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def output_html
|
|
||||||
HtmlCleaner.get_document_text(output_document)
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def add_newlines(doc)
|
|
||||||
# Replace <br> tags with a markdown \n
|
|
||||||
doc.xpath('//br').each do |br|
|
|
||||||
br.replace(new_linebreak_node doc, 2)
|
|
||||||
end
|
|
||||||
# Surround <p> tags with newlines, to help with line-wise postprocessing
|
|
||||||
# and ensure markdown paragraphs
|
|
||||||
doc.xpath('//p').each do |p|
|
|
||||||
p.before(new_linebreak_node doc)
|
|
||||||
p.after(new_linebreak_node doc, 2)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def new_linebreak_node(doc, count=1)
|
|
||||||
Nokogiri::XML::Text.new("\n" * count, doc)
|
|
||||||
end
|
|
||||||
|
|
||||||
def trim_process_node(node)
|
|
||||||
if should_hoist?(node)
|
|
||||||
hoisted = trim_hoist_element node
|
|
||||||
hoisted.each { |child| trim_process_node child }
|
|
||||||
elsif should_delete?(node)
|
|
||||||
node.remove
|
|
||||||
else
|
|
||||||
if children = node.children
|
|
||||||
children.each { |child| trim_process_node child }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
node
|
|
||||||
end
|
|
||||||
|
|
||||||
def trim_hoist_element(element)
|
|
||||||
hoisted = []
|
|
||||||
element.children.each do |child|
|
|
||||||
element.before(child)
|
|
||||||
hoisted << child
|
|
||||||
end
|
|
||||||
element.remove
|
|
||||||
hoisted
|
|
||||||
end
|
|
||||||
|
|
||||||
def should_hoist?(node)
|
|
||||||
return false unless node.element?
|
|
||||||
HTML_HOIST_ELEMENTS.include? node.name
|
|
||||||
end
|
|
||||||
|
|
||||||
def should_delete?(node)
|
|
||||||
return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
|
|
||||||
return true if node.element? && node.name == 'head'
|
|
||||||
return true if node.text? && node.text.strip.blank?
|
|
||||||
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,7 +1,7 @@
|
||||||
require "digest"
|
require "digest"
|
||||||
require_dependency "new_post_manager"
|
require_dependency "new_post_manager"
|
||||||
require_dependency "post_action_creator"
|
require_dependency "post_action_creator"
|
||||||
require_dependency "email/html_cleaner"
|
require_dependency "html_to_markdown"
|
||||||
|
|
||||||
module Email
|
module Email
|
||||||
|
|
||||||
|
@ -188,18 +188,18 @@ module Email
|
||||||
text = fix_charset(@mail)
|
text = fix_charset(@mail)
|
||||||
end
|
end
|
||||||
|
|
||||||
if html.present? && (SiteSetting.incoming_email_prefer_html || text.blank?)
|
|
||||||
html = Email::HtmlCleaner.new(html).output_html
|
|
||||||
html = trim_discourse_markers(html)
|
|
||||||
html, elided = EmailReplyTrimmer.trim(html, true)
|
|
||||||
return [html, elided]
|
|
||||||
end
|
|
||||||
|
|
||||||
if text.present?
|
if text.present?
|
||||||
text = trim_discourse_markers(text)
|
text = trim_discourse_markers(text)
|
||||||
text, elided = EmailReplyTrimmer.trim(text, true)
|
text, elided = EmailReplyTrimmer.trim(text, true)
|
||||||
return [text, elided]
|
return [text, elided]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if html.present?
|
||||||
|
markdown = HtmlToMarkdown.new(html).to_markdown
|
||||||
|
markdown = trim_discourse_markers(markdown)
|
||||||
|
markdown, elided = EmailReplyTrimmer.trim(markdown, true)
|
||||||
|
return [markdown, elided]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def fix_charset(mail_part)
|
def fix_charset(mail_part)
|
||||||
|
|
|
@ -115,7 +115,7 @@ class HtmlToMarkdown
|
||||||
RUBY
|
RUBY
|
||||||
end
|
end
|
||||||
|
|
||||||
WHITELISTED ||= %w{del ins kbd s small strike sub sup table tbody td tfoot th thead tr}
|
WHITELISTED ||= %w{del ins kbd s small strike sub sup}
|
||||||
WHITELISTED.each do |tag|
|
WHITELISTED.each do |tag|
|
||||||
class_eval <<-RUBY
|
class_eval <<-RUBY
|
||||||
def visit_#{tag}(node)
|
def visit_#{tag}(node)
|
||||||
|
|
|
@ -157,7 +157,7 @@ describe Email::Receiver do
|
||||||
expect(topic.posts.last.cooked).not_to match(/<br/)
|
expect(topic.posts.last.cooked).not_to match(/<br/)
|
||||||
|
|
||||||
expect { process(:html_reply) }.to change { topic.posts.count }
|
expect { process(:html_reply) }.to change { topic.posts.count }
|
||||||
expect(topic.posts.last.raw).to eq("This is a <b>HTML</b> reply ;)")
|
expect(topic.posts.last.raw).to eq("This is a **HTML** reply ;)")
|
||||||
|
|
||||||
expect { process(:hebrew_reply) }.to change { topic.posts.count }
|
expect { process(:hebrew_reply) }.to change { topic.posts.count }
|
||||||
expect(topic.posts.last.raw).to eq("שלום! מה שלומך היום?")
|
expect(topic.posts.last.raw).to eq("שלום! מה שלומך היום?")
|
||||||
|
@ -174,18 +174,6 @@ describe Email::Receiver do
|
||||||
expect(topic.posts.last.raw).to eq("This is the *text* part.")
|
expect(topic.posts.last.raw).to eq("This is the *text* part.")
|
||||||
end
|
end
|
||||||
|
|
||||||
it "prefers html over text when site setting is enabled" do
|
|
||||||
SiteSetting.incoming_email_prefer_html = true
|
|
||||||
expect { process(:text_and_html_reply) }.to change { topic.posts.count }
|
|
||||||
expect(topic.posts.last.raw).to eq('This is the <b>html</b> part.')
|
|
||||||
end
|
|
||||||
|
|
||||||
it "uses text when prefer_html site setting is enabled but no html is available" do
|
|
||||||
SiteSetting.incoming_email_prefer_html = true
|
|
||||||
expect { process(:text_reply) }.to change { topic.posts.count }
|
|
||||||
expect(topic.posts.last.raw).to eq("This is a text reply :)")
|
|
||||||
end
|
|
||||||
|
|
||||||
it "removes the 'on <date>, <contact> wrote' quoting line" do
|
it "removes the 'on <date>, <contact> wrote' quoting line" do
|
||||||
expect { process(:on_date_contact_wrote) }.to change { topic.posts.count }
|
expect { process(:on_date_contact_wrote) }.to change { topic.posts.count }
|
||||||
expect(topic.posts.last.raw).to eq("This is the actual reply.")
|
expect(topic.posts.last.raw).to eq("This is the actual reply.")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue