discourse/lib/onebox/engine/youtube_onebox.rb
Martin Brennan 2b04fbeb30
FIX: Use oEmbed for YouTube oneboxing (#35959)
We currently rely on Open Graph data to render YouTube oneboxes.
However, YouTube does not always give us a response with the Open Graph
data necessary, leading to empty preview HTML like this that shows
as a broken image in the composer:

```
<img src="" width="480" height="360" title=" - YouTube" style="aspect-ratio: 480 / 360;">
```

In addition, our old method of parsing YouTube script tags for JSON
which contains information for the video, introduced in

4d669d2b71,
seems to no longer work reliably, possibly due to changes in YouTube's
JS structure.

To fix this, we switch to using YouTube's oEmbed endpoint which gives us
all the metadata we need for oneboxes in JSON format, including title,
author,
thumbnail URL, and video dimensions. This approach is more robust and
less likely to break
due to changes in YouTube's page structure.

c.f.
https://meta.discourse.org/t/youtube-uris-fail-to-render-thumbnails-when-oneboxed/387673
2025-11-12 14:13:16 +10:00

220 lines
6.7 KiB
Ruby
Vendored

# frozen_string_literal: true
module Onebox
module Engine
class YoutubeOnebox
include Engine
include StandardEmbed
matches_domain("youtube.com", "www.youtube.com", "m.youtube.com", "youtu.be")
requires_iframe_origins "https://www.youtube.com"
always_https
def self.matches_path(path)
path.match?(%r{^/.+$})
end
WIDTH = 480
HEIGHT = 360
def self.embed_url(video_id)
"https://www.youtube.com/embed/#{video_id}"
end
def parse_embed_response
return @parse_embed_response if defined?(@parse_embed_response)
# Only try oEmbed for video URLs (not channels, playlists, etc.)
# This is a fast check before making the HTTP request
if video_id
# Try oEmbed first (most reliable)
begin
oembed = get_oembed
if oembed.data[:thumbnail_url] && oembed.data[:title]
@parse_embed_response = {
image: oembed.data[:thumbnail_url],
title: oembed.data[:title],
}
return @parse_embed_response
end
rescue StandardError
# Fall through to other methods
end
# Try parsing embed page JSON (legacy, can be broken
# or missing in some cases)
embed_url = self.class.embed_url(video_id)
@embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
end
begin
script_tag =
@embed_doc.xpath("//script").find { |tag| tag.to_s.include?("ytcfg.set") }.to_s
match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
yt_json = ::JSON.parse(match[:json])
renderer =
::JSON.parse(yt_json["PLAYER_VARS"]["embedded_player_response"])["embedPreview"][
"thumbnailPreviewRenderer"
]
title = renderer["title"]["runs"].first["text"]
image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
@parse_embed_response = { image:, title: }
rescue StandardError
end
end
def placeholder_html
if video_id || list_id
result = parse_embed_response
result ||= get_opengraph.data
"<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{CGI.escapeHTML(result[:title])}'>"
else
to_html
end
end
def to_html
if video_id
<<-HTML
<img class="youtube-thumbnail onebox" style="display: none;" src="#{video_thumbnail_url}">
<iframe
src="#{self.class.embed_url(video_id)}?#{embed_params}"
width="#{WIDTH}"
height="#{HEIGHT}"
frameborder="0"
allowfullscreen
class="youtube-onebox"
></iframe>
HTML
elsif list_id
<<-HTML
<iframe
src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
width="#{WIDTH}"
height="#{HEIGHT}"
frameborder="0"
allowfullscreen
class="youtube-onebox"
></iframe>
HTML
else
# for channel pages
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
return if html.blank?
html.gsub!(%r{['"]//}, "https://")
html
end
end
def video_title
@video_title ||=
begin
result = parse_embed_response || get_opengraph.data
result[:title]
end
end
private
def video_id
@video_id ||=
begin
id = nil
# http://youtu.be/afyK1HSFfgw
id = uri.path[%r{/([\w\-]+)}, 1] if uri.host["youtu.be"]
# https://www.youtube.com/embed/vsF0K3Ou1v0
id ||= uri.path[%r{/embed/([\w\-]+)}, 1] if uri.path["/embed/"]
# https://www.youtube.com/shorts/wi2jAtpBl0Y
id ||= uri.path[%r{/shorts/([\w\-]+)}, 1] if uri.path["/shorts/"]
# https://www.youtube.com/live/eJemwqO0SDw
id ||= uri.path[%r{/live/([\w\-]+)}, 1] if uri.path["/live/"]
# https://www.youtube.com/watch?v=Z0UISCEe52Y
id ||= params["v"]
sanitize_yt_id(id)
end
end
def list_id
@list_id ||= sanitize_yt_id(params["list"])
end
def sanitize_yt_id(raw)
raw&.match?(/\A[\w-]+\z/) ? raw : nil
end
def embed_params
p = { "feature" => "oembed", "wmode" => "opaque" }
p["list"] = list_id if list_id
# Parse timestrings, and assign the result as a start= parameter
start =
if params["start"]
params["start"]
elsif params["t"]
params["t"]
elsif uri.fragment && uri.fragment.start_with?("t=")
# referencing uri is safe here because any throws were already caught by video_id returning nil
# remove the t= from the start
uri.fragment[2..-1]
end
p["start"] = parse_timestring(start) if start
p["end"] = parse_timestring params["end"] if params["end"]
# Official workaround for looping videos
# https://developers.google.com/youtube/player_parameters#loop
# use params.include? so that you can just add "&loop"
if params.include?("loop")
p["loop"] = 1
p["playlist"] = video_id
end
# https://developers.google.com/youtube/player_parameters#rel
p["rel"] = 0 if params.include?("rel")
# https://developers.google.com/youtube/player_parameters#enablejsapi
p["enablejsapi"] = params["enablejsapi"] if params.include?("enablejsapi")
URI.encode_www_form(p)
end
def parse_timestring(string)
($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
end
def params
return {} unless uri.query
# This mapping is necessary because CGI.parse returns a hash of keys to arrays.
# And *that* is necessary because querystrings support arrays, so they
# force you to deal with it to avoid security issues that would pop up
# if one day it suddenly gave you an array.
#
# However, we aren't interested. Just take the first one.
@params ||=
begin
p = {}
CGI.parse(uri.query).each { |k, v| p[k] = v.first }
p
end
rescue StandardError
{}
end
def video_thumbnail_url
"https://img.youtube.com/vi/#{video_id}/maxresdefault.jpg"
end
end
end
end