diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb
index 61554f6f5cf..1e9f06a3223 100644
--- a/app/models/topic_embed.rb
+++ b/app/models/topic_embed.rb
@@ -113,7 +113,8 @@ class TopicEmbed < ActiveRecord::Base
fd = FinalDestination.new(
url,
validate_uri: true,
- max_redirects: 5
+ max_redirects: 5,
+ follow_canonical: true,
)
url = fd.resolve
diff --git a/lib/final_destination.rb b/lib/final_destination.rb
index 3831abff43a..e3805479c83 100644
--- a/lib/final_destination.rb
+++ b/lib/final_destination.rb
@@ -63,7 +63,8 @@ class FinalDestination
end
@status = :ready
- @http_verb = @force_get_hosts.any? { |host| hostname_matches?(host) } ? :get : :head
+ @follow_canonical = @opts[:follow_canonical]
+ @http_verb = http_verb(@force_get_hosts, @follow_canonical)
@cookie = nil
@limited_ips = []
@verbose = @opts[:verbose] || false
@@ -77,6 +78,14 @@ class FinalDestination
20
end
+ def http_verb(force_get_hosts, follow_canonical)
+ if follow_canonical || force_get_hosts.any? { |host| hostname_matches?(host) }
+ :get
+ else
+ :head
+ end
+ end
+
def timeout
@timeout || FinalDestination.connection_timeout
end
@@ -215,6 +224,18 @@ class FinalDestination
end
end
+ if @follow_canonical
+ next_url = uri(fetch_canonical_url(response.body))
+
+ if next_url.to_s.present? && next_url != @uri
+ @follow_canonical = false
+ @uri = next_url
+ @http_verb = http_verb(@force_get_hosts, @follow_canonical)
+
+ return resolve
+ end
+ end
+
@content_type = response.headers['Content-Type'] if response.headers.has_key?('Content-Type')
@status = :resolved
return @uri
@@ -458,4 +479,12 @@ class FinalDestination
end
end
+ def fetch_canonical_url(body)
+ return if body.blank?
+ canonical_link = Nokogiri::HTML5(body).at("link[rel='canonical']")
+
+ return if canonical_link.nil?
+
+ canonical_link['href']
+ end
end
diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb
index 8c626eb6575..8b781324bd5 100644
--- a/spec/components/final_destination_spec.rb
+++ b/spec/components/final_destination_spec.rb
@@ -49,6 +49,13 @@ describe FinalDestination do
}
end
+ def canonical_follow(from, dest)
+ stub_request(:get, from).to_return(
+ status: 200,
+ body: "
"
+ )
+ end
+
def redirect_response(from, dest)
stub_request(:head, from).to_return(
status: 302,
@@ -175,6 +182,39 @@ describe FinalDestination do
end
end
+ context 'follows canonical links' do
+ it 'resolves the canonical link as the final destination' do
+ canonical_follow("https://eviltrout.com", "https://codinghorror.com/blog")
+ stub_request(:head, "https://codinghorror.com/blog").to_return(doc_response)
+
+ final = FinalDestination.new('https://eviltrout.com', opts.merge(follow_canonical: true))
+
+ expect(final.resolve.to_s).to eq("https://codinghorror.com/blog")
+ expect(final.redirected?).to eq(false)
+ expect(final.status).to eq(:resolved)
+ end
+
+ it "does not follow the canonical link if it's the same as the current URL" do
+ canonical_follow("https://eviltrout.com", "https://eviltrout.com")
+
+ final = FinalDestination.new('https://eviltrout.com', opts.merge(follow_canonical: true))
+
+ expect(final.resolve.to_s).to eq("https://eviltrout.com")
+ expect(final.redirected?).to eq(false)
+ expect(final.status).to eq(:resolved)
+ end
+
+ it "does not follow the canonical link if it's invalid" do
+ canonical_follow("https://eviltrout.com", "")
+
+ final = FinalDestination.new('https://eviltrout.com', opts.merge(follow_canonical: true))
+
+ expect(final.resolve.to_s).to eq("https://eviltrout.com")
+ expect(final.redirected?).to eq(false)
+ expect(final.status).to eq(:resolved)
+ end
+ end
+
context "GET can be forced" do
before do
stub_request(:head, 'https://force.get.com/posts?page=4')
diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb
index 694ad884248..8659eb7d569 100644
--- a/spec/models/topic_embed_spec.rb
+++ b/spec/models/topic_embed_spec.rb
@@ -190,18 +190,20 @@ describe TopicEmbed do
end
describe '.find_remote' do
+ fab!(:embeddable_host) { Fabricate(:embeddable_host) }
+ let(:file) { StringIO.new }
+
+ before do
+ TopicEmbed.stubs(:open).returns file
+ end
context ".title_scrub" do
-
let(:url) { 'http://eviltrout.com/123' }
let(:contents) { "Through the Looking Glass - Classic Bookssome content here" }
- fab!(:embeddable_host) { Fabricate(:embeddable_host) }
- let!(:file) { StringIO.new }
before do
file.stubs(:read).returns contents
- TopicEmbed.stubs(:open).returns file
- stub_request(:head, url)
+ stub_request(:get, url)
end
it "doesn't scrub the title by default" do
@@ -214,44 +216,38 @@ describe TopicEmbed do
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("Through the Looking Glass")
end
-
end
context 'post with allowed classes "foo" and "emoji"' do
fab!(:user) { Fabricate(:user) }
let(:url) { 'http://eviltrout.com/123' }
let(:contents) { "my normal size emoji
Hi
" }
- fab!(:embeddable_host) { Fabricate(:embeddable_host) }
- let!(:file) { StringIO.new }
-
- response = nil
before do
SiteSetting.allowed_embed_classnames = 'emoji, foo'
file.stubs(:read).returns contents
- TopicEmbed.stubs(:open).returns file
- stub_request(:head, url)
- response = TopicEmbed.find_remote(url)
+ stub_request(:get, url)
+ @response = TopicEmbed.find_remote(url)
end
it "has no author tag" do
- expect(response.author).to be_blank
+ expect(@response.author).to be_blank
end
it 'img node has emoji class' do
- expect(response.body).to have_tag('img', with: { class: 'emoji' })
+ expect(@response.body).to have_tag('img', with: { class: 'emoji' })
end
it 'img node has foo class' do
- expect(response.body).to have_tag('img', with: { class: 'foo' })
+ expect(@response.body).to have_tag('img', with: { class: 'foo' })
end
it 'p node has foo class' do
- expect(response.body).to have_tag('p', with: { class: 'foo' })
+ expect(@response.body).to have_tag('p', with: { class: 'foo' })
end
it 'nodes removes classes other than emoji' do
- expect(response.body).to have_tag('img', without: { class: 'other' })
+ expect(@response.body).to have_tag('img', without: { class: 'other' })
end
end
@@ -259,67 +255,56 @@ describe TopicEmbed do
fab!(:user) { Fabricate(:user, username: 'eviltrout') }
let(:url) { 'http://eviltrout.com/321' }
let(:contents) { 'rich and morty' }
- fab!(:embeddable_host) { Fabricate(:embeddable_host) }
- let!(:file) { StringIO.new }
-
- response = nil
before(:each) do
file.stubs(:read).returns contents
TopicEmbed.stubs(:open).returns file
- stub_request(:head, url)
- response = TopicEmbed.find_remote(url)
+ stub_request(:get, url)
end
it "has no author tag" do
+ response = TopicEmbed.find_remote(url)
+
expect(response.author).to eq(user)
end
end
context 'post with no allowed classes' do
-
fab!(:user) { Fabricate(:user) }
let(:url) { 'http://eviltrout.com/123' }
let(:contents) { "my normal size emoji
Hi
" }
- fab!(:embeddable_host) { Fabricate(:embeddable_host) }
- let!(:file) { StringIO.new }
-
- response = nil
before(:each) do
SiteSetting.allowed_embed_classnames = ''
file.stubs(:read).returns contents
- TopicEmbed.stubs(:open).returns file
- stub_request(:head, url)
- response = TopicEmbed.find_remote(url)
+ stub_request(:get, url)
+ @response = TopicEmbed.find_remote(url)
end
it 'img node doesn\'t have emoji class' do
- expect(response.body).to have_tag('img', without: { class: 'emoji' })
+ expect(@response.body).to have_tag('img', without: { class: 'emoji' })
end
it 'img node doesn\'t have foo class' do
- expect(response.body).to have_tag('img', without: { class: 'foo' })
+ expect(@response.body).to have_tag('img', without: { class: 'foo' })
end
it 'p node doesn\'t foo class' do
- expect(response.body).to have_tag('p', without: { class: 'foo' })
+ expect(@response.body).to have_tag('p', without: { class: 'foo' })
end
it 'img node doesn\'t have other class' do
- expect(response.body).to have_tag('img', without: { class: 'other' })
+ expect(@response.body).to have_tag('img', without: { class: 'other' })
end
end
context "non-ascii URL" do
let(:url) { 'http://eviltrout.com/test/ماهی' }
let(:contents) { "سلاماین یک پاراگراف آزمون است." }
- fab!(:embeddable_host) { Fabricate(:embeddable_host) }
- let!(:file) { StringIO.new }
before do
- stub_request(:head, url)
- stub_request(:get, url).to_return(body: contents).then.to_raise
+ stub_request(:get, url)
+ file.stubs(:read).returns contents
end
it "doesn't throw an error" do
@@ -331,12 +316,10 @@ describe TopicEmbed do
context "encoded URL" do
let(:url) { 'http://example.com/hello%20world' }
let(:contents) { "Hello World!" }
- fab!(:embeddable_host) { Fabricate(:embeddable_host) }
- let!(:file) { StringIO.new }
before do
- stub_request(:head, url)
- stub_request(:get, url).to_return(body: contents).then.to_raise
+ stub_request(:get, url)
+ file.stubs(:read).returns contents
end
it "doesn't throw an error" do
@@ -356,17 +339,15 @@ describe TopicEmbed do
context "emails" do
let(:url) { 'http://example.com/foo' }
let(:contents) { '