diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb
index 30c394648d6..5cfbc9b6495 100644
--- a/lib/html_to_markdown.rb
+++ b/lib/html_to_markdown.rb
@@ -136,7 +136,7 @@ class HtmlToMarkdown
end
def visit_img(node)
- if is_valid_url?(node["src"]) && is_visible_img?(node)
+ if is_valid_src?(node["src"]) && is_visible_img?(node)
if @opts[:keep_img_tags]
@stack[-1].markdown << node.to_html
else
@@ -147,7 +147,7 @@ class HtmlToMarkdown
end
def visit_a(node)
- if is_valid_url?(node["href"])
+ if is_valid_href?(node["href"])
@stack[-1].markdown << "["
traverse(node)
@stack[-1].markdown << "](#{node["href"]})"
@@ -206,14 +206,20 @@ class HtmlToMarkdown
(lines + [""]).join("\n")
end
- def is_valid_url?(url)
- url.present? && (url.start_with?("http") || url.start_with?("www."))
+ def is_valid_href?(href)
+ href.present? && (href.start_with?("http") || href.start_with?("www."))
+ end
+
+ def is_valid_src?(src)
+ return false if src.blank?
+ return true if @opts[:keep_cid_imgs] && src.start_with?("cid:")
+ src.start_with?("http") || src.start_with?("www.")
end
def is_visible_img?(img)
- return false if img["width"].present? && img["width"].to_i == 0
+ return false if img["width"].present? && img["width"].to_i == 0
return false if img["height"].present? && img["height"].to_i == 0
- return false if img["style"].present? && img["style"][/(width|height)\s*:\s*0/]
+ return false if img["style"].present? && img["style"][/(width|height)\s*:\s*0/]
true
end
diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb
index 091bc79ee80..8a4a8077f8a 100644
--- a/spec/components/html_to_markdown_spec.rb
+++ b/spec/components/html_to_markdown_spec.rb
@@ -3,8 +3,8 @@ require 'html_to_markdown'
describe HtmlToMarkdown do
- def html_to_markdown(html)
- HtmlToMarkdown.new(html).to_markdown
+ def html_to_markdown(html, opts={})
+ HtmlToMarkdown.new(html, opts).to_markdown
end
it "remove whitespaces" do
@@ -55,14 +55,15 @@ describe HtmlToMarkdown do
expect(html_to_markdown(%Q{Discourse})).to eq("Discourse")
end
- HTML_WITH_IMG ||= %Q{
}
+ HTML_WITH_IMG ||= %Q{
}
+ HTML_WITH_CID_IMG ||= %Q{
}
it "converts
" do
expect(html_to_markdown(HTML_WITH_IMG)).to eq("")
end
it "keeps
with 'keep_img_tags'" do
- expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG)
+ expect(html_to_markdown(HTML_WITH_IMG, keep_img_tags: true)).to eq(HTML_WITH_IMG)
end
it "removes empty & invalid
" do
@@ -71,6 +72,11 @@ describe HtmlToMarkdown do
expect(html_to_markdown(%Q{
})).to eq("")
end
+ it "keeps
with src='cid:' whith 'keep_cid_imgs'" do
+ expect(html_to_markdown(HTML_WITH_CID_IMG, keep_cid_imgs: true)).to eq("")
+ expect(html_to_markdown(HTML_WITH_CID_IMG, keep_img_tags: true, keep_cid_imgs: true)).to eq("
")
+ end
+
it "skips hidden
" do
expect(html_to_markdown(%Q{
})).to eq("")
expect(html_to_markdown(%Q{
})).to eq("")