2
0
Fork 0
mirror of https://github.com/discourse/discourse.git synced 2025-09-05 08:59:27 +08:00

PERF: ability to crawl for titles without extra HEAD req

Also, introduces a much more aggressive timeout for title crawling
and introduces gzip to body that is crawled
This commit is contained in:
Sam 2018-01-29 15:36:52 +11:00
parent 1f6adbea5c
commit fa5880e04f
4 changed files with 176 additions and 26 deletions

View file

@ -207,6 +207,32 @@ describe FinalDestination do
end
end
describe '.get' do
it "can correctly stream with a redirect" do
FinalDestination.clear_https_cache!("wikipedia.com")
stub_request(:get, "http://wikipedia.com/").
to_return(status: 302, body: "" , headers: { "location" => "https://wikipedia.com/" })
# webmock does not do chunks
stub_request(:get, "https://wikipedia.com/").
to_return(status: 200, body: "<html><head>" , headers: {})
result = nil
chunk = nil
result = FinalDestination.new("http://wikipedia.com", opts).get do |resp, c|
chunk = c
throw :done
end
expect(result).to eq("https://wikipedia.com/")
expect(chunk).to eq("<html><head>")
end
end
describe '.validate_uri' do
context "host lookups" do
it "works for various hosts" do

View file

@ -54,7 +54,18 @@ describe RetrieveTitle do
)
expect(title).to eq("Video Title")
end
end
context "crawl" do
it "can properly extract a title from a url" do
stub_request(:get, "https://brelksdjflaskfj.com/amazing")
.to_return(status: 200, body: "<html><title>very amazing</title>")
# we still resolve the IP address for every host
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("very amazing")
end
end
end