mirror of
https://github.com/discourse/discourse.git
synced 2025-09-05 08:59:27 +08:00
PERF: ability to crawl for titles without extra HEAD req
Also, introduces a much more aggressive timeout for title crawling and introduces gzip to body that is crawled
This commit is contained in:
parent
1f6adbea5c
commit
fa5880e04f
4 changed files with 176 additions and 26 deletions
|
@ -207,6 +207,32 @@ describe FinalDestination do
|
|||
end
|
||||
end
|
||||
|
||||
describe '.get' do
|
||||
|
||||
it "can correctly stream with a redirect" do
|
||||
|
||||
FinalDestination.clear_https_cache!("wikipedia.com")
|
||||
|
||||
stub_request(:get, "http://wikipedia.com/").
|
||||
to_return(status: 302, body: "" , headers: { "location" => "https://wikipedia.com/" })
|
||||
|
||||
# webmock does not do chunks
|
||||
stub_request(:get, "https://wikipedia.com/").
|
||||
to_return(status: 200, body: "<html><head>" , headers: {})
|
||||
|
||||
result = nil
|
||||
chunk = nil
|
||||
|
||||
result = FinalDestination.new("http://wikipedia.com", opts).get do |resp, c|
|
||||
chunk = c
|
||||
throw :done
|
||||
end
|
||||
|
||||
expect(result).to eq("https://wikipedia.com/")
|
||||
expect(chunk).to eq("<html><head>")
|
||||
end
|
||||
end
|
||||
|
||||
describe '.validate_uri' do
|
||||
context "host lookups" do
|
||||
it "works for various hosts" do
|
||||
|
|
|
@ -54,7 +54,18 @@ describe RetrieveTitle do
|
|||
)
|
||||
expect(title).to eq("Video Title")
|
||||
end
|
||||
end
|
||||
|
||||
context "crawl" do
|
||||
it "can properly extract a title from a url" do
|
||||
stub_request(:get, "https://brelksdjflaskfj.com/amazing")
|
||||
.to_return(status: 200, body: "<html><title>very amazing</title>")
|
||||
|
||||
# we still resolve the IP address for every host
|
||||
IPSocket.stubs(:getaddress).returns('100.2.3.4')
|
||||
|
||||
expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("very amazing")
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue