mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-05-07 05:15:58 +08:00
Some checks are pending
Licenses / run (push) Waiting to run
Linting / run (push) Waiting to run
Publish Assets / publish-assets (push) Waiting to run
Tests / core backend (push) Waiting to run
Tests / plugins backend (push) Waiting to run
Tests / core frontend (Chrome) (push) Waiting to run
Tests / plugins frontend (push) Waiting to run
Tests / themes frontend (push) Waiting to run
Tests / core system (push) Waiting to run
Tests / plugins system (push) Waiting to run
Tests / themes system (push) Waiting to run
Tests / core frontend (Firefox ESR) (push) Waiting to run
Tests / core frontend (Firefox Evergreen) (push) Waiting to run
Tests / chat system (push) Waiting to run
Tests / merge (push) Blocked by required conditions
Lazily loaded videos currently won't be picked up by search engines as they do not contain an iframe or VideoObject. This commit uses a new API `build_plugin_html "server:topic-show-crawler-post-end"` to insert populated [ld+json](https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data#supported-formats) into the end of each post which has a video. Results: - https://search.google.com/test/rich-results/result?id=vegb8Qkz7HThsMbFaeN2AQ - https://search.google.com/test/rich-results/result?id=asumX41RneM2tMstchv45A - (vimeo) https://search.google.com/test/rich-results/result?id=EZWOMJCkDqEQIHfOLkSkcw Note: There were originally two ways around this - add each VideoObject to the `post.cook` all the time agnostic of crawler view - or add them to the `<head>` with appropriate metadata, needing to duplicate all post meta data within a `isPartOf` attribute. I chose to create a new api so that we can satisfy both "only add the meta data in crawler view" and prevent post metadata duplication.
197 lines
5.8 KiB
Ruby
197 lines
5.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Onebox
|
|
module Engine
|
|
class YoutubeOnebox
|
|
include Engine
|
|
include StandardEmbed
|
|
|
|
matches_domain("youtube.com", "www.youtube.com", "m.youtube.com", "youtu.be")
|
|
requires_iframe_origins "https://www.youtube.com"
|
|
always_https
|
|
|
|
def self.matches_path(path)
|
|
path.match?(%r{^/.+$})
|
|
end
|
|
|
|
WIDTH = 480
|
|
HEIGHT = 360
|
|
|
|
def self.embed_url(video_id)
|
|
"https://www.youtube.com/embed/#{video_id}"
|
|
end
|
|
|
|
def parse_embed_response
|
|
return unless video_id
|
|
return @parse_embed_response if defined?(@parse_embed_response)
|
|
|
|
embed_url = self.class.embed_url(video_id)
|
|
@embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
|
|
|
|
begin
|
|
script_tag =
|
|
@embed_doc.xpath("//script").find { |tag| tag.to_s.include?("ytcfg.set") }.to_s
|
|
match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
|
|
|
|
yt_json = ::JSON.parse(match[:json])
|
|
renderer =
|
|
::JSON.parse(yt_json["PLAYER_VARS"]["embedded_player_response"])["embedPreview"][
|
|
"thumbnailPreviewRenderer"
|
|
]
|
|
|
|
title = renderer["title"]["runs"].first["text"]
|
|
|
|
image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
|
|
rescue StandardError
|
|
return
|
|
end
|
|
|
|
@parse_embed_response = { image: image, title: title }
|
|
end
|
|
|
|
def placeholder_html
|
|
if video_id || list_id
|
|
result = parse_embed_response
|
|
result ||= get_opengraph.data
|
|
|
|
"<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{CGI.escapeHTML(result[:title])}'>"
|
|
else
|
|
to_html
|
|
end
|
|
end
|
|
|
|
def to_html
|
|
if video_id
|
|
<<-HTML
|
|
<iframe
|
|
src="#{self.class.embed_url(video_id)}?#{embed_params}"
|
|
width="#{WIDTH}"
|
|
height="#{HEIGHT}"
|
|
frameborder="0"
|
|
allowfullscreen
|
|
class="youtube-onebox"
|
|
></iframe>
|
|
HTML
|
|
elsif list_id
|
|
<<-HTML
|
|
<iframe
|
|
src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
|
|
width="#{WIDTH}"
|
|
height="#{HEIGHT}"
|
|
frameborder="0"
|
|
allowfullscreen
|
|
class="youtube-onebox"
|
|
></iframe>
|
|
HTML
|
|
else
|
|
# for channel pages
|
|
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
|
return if html.blank?
|
|
html.gsub!(%r{['"]//}, "https://")
|
|
html
|
|
end
|
|
end
|
|
|
|
def video_title
|
|
@video_title ||=
|
|
begin
|
|
result = parse_embed_response || get_opengraph.data
|
|
result[:title]
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def video_id
|
|
@video_id ||=
|
|
begin
|
|
id = nil
|
|
|
|
# http://youtu.be/afyK1HSFfgw
|
|
id = uri.path[%r{/([\w\-]+)}, 1] if uri.host["youtu.be"]
|
|
|
|
# https://www.youtube.com/embed/vsF0K3Ou1v0
|
|
id ||= uri.path[%r{/embed/([\w\-]+)}, 1] if uri.path["/embed/"]
|
|
|
|
# https://www.youtube.com/shorts/wi2jAtpBl0Y
|
|
id ||= uri.path[%r{/shorts/([\w\-]+)}, 1] if uri.path["/shorts/"]
|
|
|
|
# https://www.youtube.com/live/eJemwqO0SDw
|
|
id ||= uri.path[%r{/live/([\w\-]+)}, 1] if uri.path["/live/"]
|
|
|
|
# https://www.youtube.com/watch?v=Z0UISCEe52Y
|
|
id ||= params["v"]
|
|
|
|
sanitize_yt_id(id)
|
|
end
|
|
end
|
|
|
|
def list_id
|
|
@list_id ||= sanitize_yt_id(params["list"])
|
|
end
|
|
|
|
def sanitize_yt_id(raw)
|
|
raw&.match?(/\A[\w-]+\z/) ? raw : nil
|
|
end
|
|
|
|
def embed_params
|
|
p = { "feature" => "oembed", "wmode" => "opaque" }
|
|
|
|
p["list"] = list_id if list_id
|
|
|
|
# Parse timestrings, and assign the result as a start= parameter
|
|
start =
|
|
if params["start"]
|
|
params["start"]
|
|
elsif params["t"]
|
|
params["t"]
|
|
elsif uri.fragment && uri.fragment.start_with?("t=")
|
|
# referencing uri is safe here because any throws were already caught by video_id returning nil
|
|
# remove the t= from the start
|
|
uri.fragment[2..-1]
|
|
end
|
|
|
|
p["start"] = parse_timestring(start) if start
|
|
p["end"] = parse_timestring params["end"] if params["end"]
|
|
|
|
# Official workaround for looping videos
|
|
# https://developers.google.com/youtube/player_parameters#loop
|
|
# use params.include? so that you can just add "&loop"
|
|
if params.include?("loop")
|
|
p["loop"] = 1
|
|
p["playlist"] = video_id
|
|
end
|
|
|
|
# https://developers.google.com/youtube/player_parameters#rel
|
|
p["rel"] = 0 if params.include?("rel")
|
|
|
|
# https://developers.google.com/youtube/player_parameters#enablejsapi
|
|
p["enablejsapi"] = params["enablejsapi"] if params.include?("enablejsapi")
|
|
|
|
URI.encode_www_form(p)
|
|
end
|
|
|
|
def parse_timestring(string)
|
|
($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
|
|
end
|
|
|
|
def params
|
|
return {} unless uri.query
|
|
# This mapping is necessary because CGI.parse returns a hash of keys to arrays.
|
|
# And *that* is necessary because querystrings support arrays, so they
|
|
# force you to deal with it to avoid security issues that would pop up
|
|
# if one day it suddenly gave you an array.
|
|
#
|
|
# However, we aren't interested. Just take the first one.
|
|
@params ||=
|
|
begin
|
|
p = {}
|
|
CGI.parse(uri.query).each { |k, v| p[k] = v.first }
|
|
p
|
|
end
|
|
rescue StandardError
|
|
{}
|
|
end
|
|
end
|
|
end
|
|
end
|