mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-05-24 02:41:20 +08:00
We currently rely on Open Graph data to render YouTube oneboxes.
However, YouTube does not always give us a response with the Open Graph
data necessary, leading to empty preview HTML like this that shows
as a broken image in the composer:
```
<img src="" width="480" height="360" title=" - YouTube" style="aspect-ratio: 480 / 360;">
```
In addition, our old method of parsing YouTube script tags for JSON
which contains information for the video, introduced in
4d669d2b71,
seems to no longer work reliably, possibly due to changes in YouTube's
JS structure.
To fix this, we switch to using YouTube's oEmbed endpoint which gives us
all the metadata we need for oneboxes in JSON format, including title,
author,
thumbnail URL, and video dimensions. This approach is more robust and
less likely to break
due to changes in YouTube's page structure.
c.f.
https://meta.discourse.org/t/youtube-uris-fail-to-render-thumbnails-when-oneboxed/387673
205 lines
5.4 KiB
Ruby
Vendored
205 lines
5.4 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
require "cgi"
|
|
require "onebox/normalizer"
|
|
require "onebox/open_graph"
|
|
require "onebox/oembed"
|
|
require "onebox/json_ld"
|
|
|
|
module Onebox
|
|
module Engine
|
|
module StandardEmbed
|
|
def self.oembed_providers
|
|
@@oembed_providers ||= {}
|
|
end
|
|
|
|
def self.add_oembed_provider(regexp, endpoint)
|
|
oembed_providers[regexp] = endpoint
|
|
end
|
|
|
|
def self.opengraph_providers
|
|
@@opengraph_providers ||= []
|
|
end
|
|
|
|
def self.add_opengraph_provider(regexp)
|
|
opengraph_providers << regexp
|
|
end
|
|
|
|
# Some oembed providers (like meetup.com) don't provide links to themselves
|
|
add_oembed_provider(%r{www\.meetup\.com/}, "http://api.meetup.com/oembed")
|
|
add_oembed_provider(%r{www\.mixcloud\.com/}, "https://www.mixcloud.com/oembed/")
|
|
# In order to support Private Videos
|
|
add_oembed_provider(%r{vimeo\.com/}, "https://vimeo.com/api/oembed.json")
|
|
# NYT requires login so use oembed only
|
|
add_oembed_provider(%r{nytimes\.com/}, "https://www.nytimes.com/svc/oembed/json/")
|
|
# YouTube's oEmbed for reliable metadata (thumbnails, titles)
|
|
add_oembed_provider(/youtube\.com|youtu\.be/, "https://www.youtube.com/oembed")
|
|
|
|
def always_https?
|
|
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
|
end
|
|
|
|
def raw
|
|
return @raw if defined?(@raw)
|
|
|
|
@raw = {}
|
|
|
|
set_opengraph_data_on_raw
|
|
set_twitter_data_on_raw
|
|
set_oembed_data_on_raw
|
|
set_json_ld_data_on_raw
|
|
set_favicon_data_on_raw
|
|
set_description_on_raw
|
|
|
|
@raw
|
|
end
|
|
|
|
protected
|
|
|
|
def html_doc
|
|
return @html_doc if defined?(@html_doc)
|
|
|
|
headers = nil
|
|
headers = { "Cookie" => options[:cookie] } if options[:cookie]
|
|
|
|
@html_doc = Onebox::Helpers.fetch_html_doc(url, headers)
|
|
end
|
|
|
|
def get_oembed
|
|
@oembed ||= Onebox::Oembed.new(get_json_response)
|
|
end
|
|
|
|
def get_opengraph
|
|
@opengraph ||= ::Onebox::OpenGraph.new(html_doc)
|
|
end
|
|
|
|
def get_twitter
|
|
return {} unless html_doc
|
|
|
|
twitter = {}
|
|
|
|
html_doc
|
|
.css("meta")
|
|
.each do |m|
|
|
if (m["property"] && m["property"][/^twitter:(.+)$/i]) ||
|
|
(m["name"] && m["name"][/^twitter:(.+)$/i])
|
|
value = (m["content"] || m["value"]).to_s
|
|
twitter[$1.tr("-:", "_").to_sym] ||= value if (value.present? && value != "0 minutes")
|
|
end
|
|
end
|
|
|
|
twitter
|
|
end
|
|
|
|
def get_favicon
|
|
return nil unless html_doc
|
|
|
|
favicon =
|
|
html_doc.css(
|
|
'link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]',
|
|
).first
|
|
favicon = favicon.nil? ? nil : (favicon["href"].nil? ? nil : favicon["href"].strip)
|
|
|
|
return nil if favicon.blank?
|
|
|
|
absolute_url = Onebox::Helpers.get_absolute_image_url(favicon, url)
|
|
|
|
return nil if absolute_url.length > UrlHelper::MAX_URL_LENGTH
|
|
|
|
absolute_url
|
|
end
|
|
|
|
def get_description
|
|
return nil unless html_doc
|
|
|
|
description = html_doc.at("meta[name='description']").to_h["content"]
|
|
description ||= html_doc.at("meta[name='Description']").to_h["content"]
|
|
|
|
description
|
|
end
|
|
|
|
def get_json_response
|
|
oembed_url = get_oembed_url
|
|
|
|
return "{}" if oembed_url.blank?
|
|
|
|
begin
|
|
Onebox::Helpers.fetch_response(oembed_url)
|
|
rescue StandardError
|
|
"{}"
|
|
end
|
|
rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError
|
|
"{}"
|
|
end
|
|
|
|
def get_oembed_url
|
|
oembed_url = nil
|
|
|
|
StandardEmbed.oembed_providers.each do |regexp, endpoint|
|
|
if url =~ regexp
|
|
oembed_url = "#{endpoint}?url=#{url}"
|
|
break
|
|
end
|
|
end
|
|
|
|
if html_doc
|
|
if oembed_url.blank?
|
|
application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
|
|
oembed_url = application_json.value if application_json
|
|
end
|
|
|
|
if oembed_url.blank?
|
|
text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
|
|
oembed_url ||= text_json.value if text_json
|
|
end
|
|
end
|
|
|
|
oembed_url
|
|
end
|
|
|
|
def get_json_ld
|
|
@json_ld ||= Onebox::JsonLd.new(html_doc)
|
|
end
|
|
|
|
def set_from_normalizer_data(normalizer)
|
|
normalizer.data.each do |k, _|
|
|
v = normalizer.public_send(k)
|
|
@raw[k] ||= v unless v.nil?
|
|
end
|
|
end
|
|
|
|
def set_opengraph_data_on_raw
|
|
og = get_opengraph
|
|
set_from_normalizer_data(og)
|
|
@raw.except!(:title_attr)
|
|
end
|
|
|
|
def set_twitter_data_on_raw
|
|
twitter = get_twitter
|
|
twitter.each { |k, v| @raw[k] ||= v if v.present? }
|
|
end
|
|
|
|
def set_oembed_data_on_raw
|
|
oembed = get_oembed
|
|
set_from_normalizer_data(oembed)
|
|
end
|
|
|
|
def set_json_ld_data_on_raw
|
|
json_ld = get_json_ld
|
|
set_from_normalizer_data(json_ld)
|
|
end
|
|
|
|
def set_favicon_data_on_raw
|
|
favicon = get_favicon
|
|
@raw[:favicon] = favicon if favicon.present?
|
|
end
|
|
|
|
def set_description_on_raw
|
|
unless @raw[:description]
|
|
description = get_description
|
|
@raw[:description] = description if description.present?
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|