2
0
Fork 0
mirror of https://github.com/discourse/discourse.git synced 2025-09-05 08:59:27 +08:00

FEATURE: convert incoming emails in HTML to markdown

- remove incoming_email_prefer_html site setting
- remove HtmlCleaner class
This commit is contained in:
Régis Hanol 2017-04-26 16:49:06 +02:00
parent e155cb6db1
commit b76674f640
6 changed files with 10 additions and 157 deletions

View file

@ -1,132 +0,0 @@
module Email
# HtmlCleaner cleans up the extremely dirty HTML that many email clients
# generate by stripping out any excess divs or spans, removing styling in
# the process (which also makes the html more suitable to be parsed as
# Markdown).
class HtmlCleaner
# Elements to hoist all children out of
HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
# Node types to always delete
HTML_DELETE_ELEMENT_TYPES = [
Nokogiri::XML::Node::DTD_NODE,
Nokogiri::XML::Node::COMMENT_NODE,
]
# Private variables:
# @doc - nokogiri document
# @out - same as @doc, but only if trimming has occured
def initialize(html)
if String === html
@doc = Nokogiri::HTML(html)
else
@doc = html
end
end
class << self
# Email::HtmlCleaner.trim(inp, opts={})
#
# Arguments:
# inp - Either a HTML string or a Nokogiri document.
# Options:
# :return => :doc, :string
# Specify the desired return type.
# Defaults to the type of the input.
# A value of :string is equivalent to calling get_document_text()
# on the returned document.
def trim(inp, opts={})
cleaner = HtmlCleaner.new(inp)
opts[:return] ||= ((String === inp) ? :string : :doc)
if opts[:return] == :string
cleaner.output_html
else
cleaner.output_document
end
end
# Email::HtmlCleaner.get_document_text(doc)
#
# Get the body portion of the document, including html, as a string.
def get_document_text(doc)
body = doc.xpath('//body')
if body
body.inner_html
else
doc.inner_html
end
end
end
def output_document
@out ||= begin
doc = @doc
trim_process_node doc
add_newlines doc
doc
end
end
def output_html
HtmlCleaner.get_document_text(output_document)
end
private
def add_newlines(doc)
# Replace <br> tags with a markdown \n
doc.xpath('//br').each do |br|
br.replace(new_linebreak_node doc, 2)
end
# Surround <p> tags with newlines, to help with line-wise postprocessing
# and ensure markdown paragraphs
doc.xpath('//p').each do |p|
p.before(new_linebreak_node doc)
p.after(new_linebreak_node doc, 2)
end
end
def new_linebreak_node(doc, count=1)
Nokogiri::XML::Text.new("\n" * count, doc)
end
def trim_process_node(node)
if should_hoist?(node)
hoisted = trim_hoist_element node
hoisted.each { |child| trim_process_node child }
elsif should_delete?(node)
node.remove
else
if children = node.children
children.each { |child| trim_process_node child }
end
end
node
end
def trim_hoist_element(element)
hoisted = []
element.children.each do |child|
element.before(child)
hoisted << child
end
element.remove
hoisted
end
def should_hoist?(node)
return false unless node.element?
HTML_HOIST_ELEMENTS.include? node.name
end
def should_delete?(node)
return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
return true if node.element? && node.name == 'head'
return true if node.text? && node.text.strip.blank?
false
end
end
end

View file

@ -1,7 +1,7 @@
require "digest"
require_dependency "new_post_manager"
require_dependency "post_action_creator"
require_dependency "email/html_cleaner"
require_dependency "html_to_markdown"
module Email
@ -188,18 +188,18 @@ module Email
text = fix_charset(@mail)
end
if html.present? && (SiteSetting.incoming_email_prefer_html || text.blank?)
html = Email::HtmlCleaner.new(html).output_html
html = trim_discourse_markers(html)
html, elided = EmailReplyTrimmer.trim(html, true)
return [html, elided]
end
if text.present?
text = trim_discourse_markers(text)
text, elided = EmailReplyTrimmer.trim(text, true)
return [text, elided]
end
if html.present?
markdown = HtmlToMarkdown.new(html).to_markdown
markdown = trim_discourse_markers(markdown)
markdown, elided = EmailReplyTrimmer.trim(markdown, true)
return [markdown, elided]
end
end
def fix_charset(mail_part)