mirror of
https://github.com/discourse/discourse.git
synced 2025-10-04 17:32:34 +08:00
FIX: Add ld+json VideoObject to each lazy video (#34678)
Some checks are pending
Licenses / run (push) Waiting to run
Linting / run (push) Waiting to run
Publish Assets / publish-assets (push) Waiting to run
Tests / core backend (push) Waiting to run
Tests / plugins backend (push) Waiting to run
Tests / core frontend (Chrome) (push) Waiting to run
Tests / plugins frontend (push) Waiting to run
Tests / themes frontend (push) Waiting to run
Tests / core system (push) Waiting to run
Tests / plugins system (push) Waiting to run
Tests / themes system (push) Waiting to run
Tests / core frontend (Firefox ESR) (push) Waiting to run
Tests / core frontend (Firefox Evergreen) (push) Waiting to run
Tests / chat system (push) Waiting to run
Tests / merge (push) Blocked by required conditions
Some checks are pending
Licenses / run (push) Waiting to run
Linting / run (push) Waiting to run
Publish Assets / publish-assets (push) Waiting to run
Tests / core backend (push) Waiting to run
Tests / plugins backend (push) Waiting to run
Tests / core frontend (Chrome) (push) Waiting to run
Tests / plugins frontend (push) Waiting to run
Tests / themes frontend (push) Waiting to run
Tests / core system (push) Waiting to run
Tests / plugins system (push) Waiting to run
Tests / themes system (push) Waiting to run
Tests / core frontend (Firefox ESR) (push) Waiting to run
Tests / core frontend (Firefox Evergreen) (push) Waiting to run
Tests / chat system (push) Waiting to run
Tests / merge (push) Blocked by required conditions
Lazily loaded videos currently won't be picked up by search engines as they do not contain an iframe or VideoObject. This commit uses a new API `build_plugin_html "server:topic-show-crawler-post-end"` to insert populated [ld+json](https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data#supported-formats) into the end of each post which has a video. Results: - https://search.google.com/test/rich-results/result?id=vegb8Qkz7HThsMbFaeN2AQ - https://search.google.com/test/rich-results/result?id=asumX41RneM2tMstchv45A - (vimeo) https://search.google.com/test/rich-results/result?id=EZWOMJCkDqEQIHfOLkSkcw Note: There were originally two ways around this - add each VideoObject to the `post.cook` all the time agnostic of crawler view - or add them to the `<head>` with appropriate metadata, needing to duplicate all post meta data within a `isPartOf` attribute. I chose to create a new api so that we can satisfy both "only add the meta data in crawler view" and prevent post metadata duplication.
This commit is contained in:
parent
7ca6fe119e
commit
a9b1fb9b43
12 changed files with 402 additions and 11 deletions
|
@ -593,9 +593,9 @@ module ApplicationHelper
|
|||
current_user&.user_option&.homepage || HomepageHelper.resolve(request, current_user)
|
||||
end
|
||||
|
||||
def build_plugin_html(name)
|
||||
def build_plugin_html(name, **kwargs)
|
||||
return "" unless allow_plugins?
|
||||
DiscoursePluginRegistry.build_html(name, controller) || ""
|
||||
DiscoursePluginRegistry.build_html(name, controller, **kwargs) || ""
|
||||
end
|
||||
|
||||
# If there is plugin HTML return that, otherwise yield to the template
|
||||
|
|
|
@ -120,6 +120,8 @@
|
|||
<% end %>
|
||||
</div>
|
||||
<% end %>
|
||||
|
||||
<%= build_plugin_html "server:topic-show-crawler-post-end", post: post %>
|
||||
</div>
|
||||
<% end %>
|
||||
<% end %>
|
||||
|
|
|
@ -229,9 +229,9 @@ class DiscoursePluginRegistry
|
|||
html_builders[name] << block
|
||||
end
|
||||
|
||||
def self.build_html(name, ctx = nil)
|
||||
def self.build_html(name, ctx = nil, **kwargs)
|
||||
builders = html_builders[name] || []
|
||||
builders.map { |b| b.call(ctx) }.join("\n").html_safe
|
||||
builders.map { |b| b.call(ctx, **kwargs) }.join("\n").html_safe
|
||||
end
|
||||
|
||||
def self.seed_paths
|
||||
|
|
|
@ -14,6 +14,10 @@ module Onebox
|
|||
path.match?(%r{^(/@.+/video/\d+|/v/\d+)(/\w+)?/?$})
|
||||
end
|
||||
|
||||
def self.embed_url(video_id)
|
||||
"https://www.tiktok.com/embed/v2/#{video_id}"
|
||||
end
|
||||
|
||||
def placeholder_html
|
||||
<<-HTML
|
||||
<img
|
||||
|
@ -33,7 +37,7 @@ module Onebox
|
|||
<<-HTML
|
||||
<iframe
|
||||
class="tiktok-onebox"
|
||||
src="https://www.tiktok.com/embed/v2/#{oembed_data.embed_product_id}"
|
||||
src="#{self.class.embed_url(oembed_data.embed_product_id)}"
|
||||
sandbox="allow-popups allow-popups-to-escape-sandbox allow-scripts allow-top-navigation allow-same-origin"
|
||||
frameborder="0"
|
||||
seamless="seamless"
|
||||
|
|
|
@ -14,13 +14,17 @@ module Onebox
|
|||
path.match?(%r{^/\d+(/\w+)?/?$})
|
||||
end
|
||||
|
||||
def self.embed_url(video_id)
|
||||
"https://player.vimeo.com/video/#{video_id}"
|
||||
end
|
||||
|
||||
def placeholder_html
|
||||
::Onebox::Helpers.video_placeholder_html
|
||||
end
|
||||
|
||||
def to_html
|
||||
video_src = Nokogiri::HTML5.fragment(oembed_data[:html]).at_css("iframe")&.[]("src")
|
||||
video_src = "https://player.vimeo.com/video/#{oembed_data[:video_id]}" if video_src.blank?
|
||||
video_src = self.class.embed_url(oembed_data[:video_id]) if video_src.blank?
|
||||
video_src = video_src.gsub("autoplay=1", "").chomp("?")
|
||||
|
||||
<<-HTML
|
||||
|
|
|
@ -17,11 +17,15 @@ module Onebox
|
|||
WIDTH = 480
|
||||
HEIGHT = 360
|
||||
|
||||
def self.embed_url(video_id)
|
||||
"https://www.youtube.com/embed/#{video_id}"
|
||||
end
|
||||
|
||||
def parse_embed_response
|
||||
return unless video_id
|
||||
return @parse_embed_response if defined?(@parse_embed_response)
|
||||
|
||||
embed_url = "https://www.youtube.com/embed/#{video_id}"
|
||||
embed_url = self.class.embed_url(video_id)
|
||||
@embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
|
||||
|
||||
begin
|
||||
|
@ -60,7 +64,7 @@ module Onebox
|
|||
if video_id
|
||||
<<-HTML
|
||||
<iframe
|
||||
src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
|
||||
src="#{self.class.embed_url(video_id)}?#{embed_params}"
|
||||
width="#{WIDTH}"
|
||||
height="#{HEIGHT}"
|
||||
frameborder="0"
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseLazyVideos
|
||||
class CrawlerPostEnd
|
||||
attr_reader :controller, :post
|
||||
|
||||
PROVIDER_NAMES = { "youtube" => "YouTube", "vimeo" => "Vimeo", "tiktok" => "TikTok" }.freeze
|
||||
SCRIPT_ESCAPE_REGEX = %r{</script}i.freeze
|
||||
LAZY_VIDEO_CONTAINER = "lazy-video-container".freeze
|
||||
|
||||
def initialize(controller, post)
|
||||
@controller = controller
|
||||
@post = post
|
||||
end
|
||||
|
||||
def html
|
||||
return "" if !controller.instance_of?(TopicsController)
|
||||
return "" if !SiteSetting.lazy_videos_enabled
|
||||
return "" if !post
|
||||
return "" if post.cooked.exclude?(LAZY_VIDEO_CONTAINER)
|
||||
|
||||
generate_video_schemas
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def generate_video_schemas
|
||||
videos = extract_videos_from_post(post)
|
||||
return "" if videos.empty?
|
||||
|
||||
@post_excerpt ||= post.excerpt(200, strip_links: true, text_entities: true)
|
||||
|
||||
videos
|
||||
.each_with_object([]) do |video, scripts|
|
||||
schema = build_video_object(video, post, @post_excerpt)
|
||||
next if !schema
|
||||
|
||||
scripts << build_json_script(schema)
|
||||
end
|
||||
.join("\n")
|
||||
end
|
||||
|
||||
def extract_videos_from_post(post)
|
||||
@parsed_doc ||= Nokogiri::HTML5.fragment(post.cooked)
|
||||
videos = []
|
||||
|
||||
@parsed_doc
|
||||
.css(".lazy-video-container")
|
||||
.each do |container|
|
||||
video_data = {
|
||||
provider: container["data-provider-name"],
|
||||
id: container["data-video-id"],
|
||||
title: container["data-video-title"],
|
||||
url: container.at_css("a")&.[]("href"),
|
||||
thumbnail: container.at_css("img")&.[]("src"),
|
||||
}
|
||||
|
||||
videos << video_data if video_data[:provider] && video_data[:id]
|
||||
end
|
||||
|
||||
videos
|
||||
end
|
||||
|
||||
def build_video_object(video, post, post_excerpt)
|
||||
embed_url = get_embed_url(video[:provider], video[:id])
|
||||
return nil if !embed_url
|
||||
|
||||
schema = {
|
||||
"@context" => "https://schema.org",
|
||||
"@type" => "VideoObject",
|
||||
"name" => video[:title] || "#{PROVIDER_NAMES[video[:provider]] || video[:provider]} Video",
|
||||
"embedUrl" => embed_url,
|
||||
"url" => post.full_url,
|
||||
"uploadDate" => post.created_at.iso8601,
|
||||
}
|
||||
|
||||
schema["description"] = post_excerpt if post_excerpt.present?
|
||||
schema["thumbnailUrl"] = video[:thumbnail] if video[:thumbnail]
|
||||
schema["contentUrl"] = video[:url] if video[:url]
|
||||
|
||||
schema
|
||||
end
|
||||
|
||||
def build_json_script(schema)
|
||||
json = MultiJson.dump(schema).gsub(SCRIPT_ESCAPE_REGEX, "<\\/script")
|
||||
"<script type=\"application/ld+json\">#{json}</script>"
|
||||
end
|
||||
|
||||
def get_embed_url(provider, video_id)
|
||||
case provider
|
||||
when "youtube"
|
||||
Onebox::Engine::YoutubeOnebox.embed_url(video_id)
|
||||
when "vimeo"
|
||||
Onebox::Engine::VimeoOnebox.embed_url(video_id)
|
||||
when "tiktok"
|
||||
Onebox::Engine::TiktokOnebox.embed_url(video_id)
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -10,11 +10,16 @@ enabled_site_setting :lazy_videos_enabled
|
|||
|
||||
register_asset "stylesheets/lazy-videos.scss"
|
||||
|
||||
require_relative "lib/lazy-videos/lazy_youtube"
|
||||
require_relative "lib/lazy-videos/lazy_vimeo"
|
||||
require_relative "lib/lazy-videos/lazy_tiktok"
|
||||
require_relative "lib/discourse_lazy_videos/lazy_youtube"
|
||||
require_relative "lib/discourse_lazy_videos/lazy_vimeo"
|
||||
require_relative "lib/discourse_lazy_videos/lazy_tiktok"
|
||||
require_relative "lib/discourse_lazy_videos/crawler_post_end"
|
||||
|
||||
after_initialize do
|
||||
register_html_builder("server:topic-show-crawler-post-end") do |controller, post:|
|
||||
DiscourseLazyVideos::CrawlerPostEnd.new(controller, post).html
|
||||
end
|
||||
|
||||
on(:reduce_cooked) do |fragment|
|
||||
fragment
|
||||
.css(".lazy-video-container")
|
||||
|
|
|
@ -0,0 +1,270 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
describe DiscourseLazyVideos::CrawlerPostEnd do
|
||||
fab!(:topic)
|
||||
fab!(:post1) { Fabricate(:post, topic: topic, post_number: 1) }
|
||||
fab!(:post2) { Fabricate(:post, topic: topic, post_number: 2) }
|
||||
|
||||
let(:controller) { TopicsController.new }
|
||||
let(:topic_view) { TopicView.new(topic) }
|
||||
let(:post_crawler_schema) { described_class.new(controller, post1) }
|
||||
|
||||
before do
|
||||
SiteSetting.lazy_videos_enabled = true
|
||||
controller.instance_variable_set(:@topic_view, topic_view)
|
||||
end
|
||||
|
||||
describe "#html" do
|
||||
context "when not on TopicsController" do
|
||||
let(:controller) { ApplicationController.new }
|
||||
let(:schema) { described_class.new(controller, post1) }
|
||||
|
||||
it "returns empty string" do
|
||||
expect(schema.html).to eq("")
|
||||
end
|
||||
end
|
||||
|
||||
context "when lazy_videos_enabled is false" do
|
||||
before { SiteSetting.lazy_videos_enabled = false }
|
||||
|
||||
it "returns empty string" do
|
||||
expect(post_crawler_schema.html).to eq("")
|
||||
end
|
||||
end
|
||||
|
||||
context "when no post is set" do
|
||||
let(:schema) { described_class.new(controller, nil) }
|
||||
|
||||
it "returns empty string" do
|
||||
expect(schema.html).to eq("")
|
||||
end
|
||||
end
|
||||
|
||||
context "when post has no videos" do
|
||||
it "returns empty string" do
|
||||
post1.update!(cooked: "<p>Just some text without videos</p>")
|
||||
|
||||
expect(post_crawler_schema.html).to eq("")
|
||||
end
|
||||
end
|
||||
|
||||
context "when first post has a YouTube video" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="youtube-onebox lazy-video-container"
|
||||
data-video-id="dQw4w9WgXcQ"
|
||||
data-video-title="Rick Astley - Never Gonna Give You Up"
|
||||
data-provider-name="youtube">
|
||||
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">
|
||||
<img src="https://img.youtube.com/vi/dQw4w9WgXcQ/maxresdefault.jpg"
|
||||
title="Rick Astley - Never Gonna Give You Up">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "generates VideoObject schema" do
|
||||
html = post_crawler_schema.html
|
||||
|
||||
expect(html).to include('<script type="application/ld+json">')
|
||||
expect(html).to include('"@type":"VideoObject"')
|
||||
expect(html).to include('"name":"Rick Astley - Never Gonna Give You Up"')
|
||||
expect(html).to include('"embedUrl":"https://www.youtube.com/embed/dQw4w9WgXcQ"')
|
||||
expect(html).to include(
|
||||
'"thumbnailUrl":"https://img.youtube.com/vi/dQw4w9WgXcQ/maxresdefault.jpg"',
|
||||
)
|
||||
end
|
||||
|
||||
it "includes post URL and upload date" do
|
||||
html = post_crawler_schema.html
|
||||
|
||||
expect(html).to include('"url":"' + post1.full_url)
|
||||
expect(html).to include('"uploadDate":"' + post1.created_at.iso8601)
|
||||
end
|
||||
end
|
||||
|
||||
context "when non-first post has a video" do
|
||||
before { post2.update!(cooked: <<~HTML) }
|
||||
<div class="vimeo-onebox lazy-video-container"
|
||||
data-video-id="123456"
|
||||
data-video-title="Vimeo Video"
|
||||
data-provider-name="vimeo">
|
||||
<a href="https://vimeo.com/123456">
|
||||
<img src="https://example.com/thumbnail.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "uses the comment post URL" do
|
||||
schema = described_class.new(controller, post2)
|
||||
html = schema.html
|
||||
|
||||
expect(html).to include('"url":"' + post2.full_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "when post has multiple videos" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="youtube-onebox lazy-video-container"
|
||||
data-video-id="video1"
|
||||
data-video-title="Video 1"
|
||||
data-provider-name="youtube">
|
||||
<a href="https://www.youtube.com/watch?v=video1">
|
||||
<img src="https://img.youtube.com/vi/video1/maxresdefault.jpg">
|
||||
</a>
|
||||
</div>
|
||||
<div class="vimeo-onebox lazy-video-container"
|
||||
data-video-id="123456"
|
||||
data-video-title="Video 2"
|
||||
data-provider-name="vimeo">
|
||||
<a href="https://vimeo.com/123456">
|
||||
<img src="https://example.com/thumbnail.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "generates separate schemas for each video" do
|
||||
html = post_crawler_schema.html
|
||||
|
||||
expect(html.scan('<script type="application/ld+json">').count).to eq(2)
|
||||
expect(html).to include('"embedUrl":"https://www.youtube.com/embed/video1"')
|
||||
expect(html).to include('"embedUrl":"https://player.vimeo.com/video/123456"')
|
||||
end
|
||||
end
|
||||
|
||||
context "when post has a TikTok video" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="tiktok-onebox lazy-video-container"
|
||||
data-video-id="7181751442041220378"
|
||||
data-video-title="TikTok Video"
|
||||
data-provider-name="tiktok">
|
||||
<a href="https://www.tiktok.com/@user/video/7181751442041220378">
|
||||
<img src="https://example.com/tiktok-thumbnail.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "generates TikTok VideoObject schema" do
|
||||
html = post_crawler_schema.html
|
||||
|
||||
expect(html).to include('"embedUrl":"https://www.tiktok.com/embed/v2/7181751442041220378"')
|
||||
end
|
||||
end
|
||||
|
||||
context "with escaping and security" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="youtube-onebox lazy-video-container"
|
||||
data-video-id="test123"
|
||||
data-video-title="Title with "quotes" and <tags>"
|
||||
data-provider-name="youtube">
|
||||
<a href="https://youtube.com/watch?v=test123">
|
||||
<img src="https://img.youtube.com/vi/test123/maxresdefault.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "properly escapes special characters" do
|
||||
html = post_crawler_schema.html
|
||||
|
||||
expect(html).to include('<script type="application/ld+json">')
|
||||
expect(html).to include("</script>")
|
||||
|
||||
parsed = JSON.parse(html.match(%r{<script[^>]*>(.*?)</script>}m)[1])
|
||||
expect(parsed["name"]).to eq('Title with "quotes" and <tags>')
|
||||
end
|
||||
end
|
||||
|
||||
context "with missing video data" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="lazy-video-container"
|
||||
data-video-id="test123">
|
||||
<a href="https://youtube.com/watch?v=test123">
|
||||
<img src="https://img.youtube.com/vi/test123/maxresdefault.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "returns empty when provider is missing" do
|
||||
expect(post_crawler_schema.html).to eq("")
|
||||
end
|
||||
end
|
||||
|
||||
context "with invalid provider" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="lazy-video-container"
|
||||
data-video-id="test123"
|
||||
data-provider-name="invalid">
|
||||
<a href="https://example.com/video">
|
||||
<img src="https://example.com/thumb.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "returns empty for unsupported providers" do
|
||||
expect(post_crawler_schema.html).to eq("")
|
||||
end
|
||||
end
|
||||
|
||||
context "when contentUrl is provided" do
|
||||
before { post1.update!(cooked: <<~HTML) }
|
||||
<div class="youtube-onebox lazy-video-container"
|
||||
data-video-id="test123"
|
||||
data-video-title="Test Video"
|
||||
data-provider-name="youtube">
|
||||
<a href="https://www.youtube.com/watch?v=test123">
|
||||
<img src="https://img.youtube.com/vi/test123/maxresdefault.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
it "includes contentUrl from the anchor href" do
|
||||
html = post_crawler_schema.html
|
||||
|
||||
expect(html).to include('"contentUrl":"https://www.youtube.com/watch?v=test123"')
|
||||
end
|
||||
end
|
||||
|
||||
context "when post has both text content and video" do
|
||||
before do
|
||||
post1.update!(
|
||||
raw: "Check out this amazing video! It's a classic that everyone should watch.",
|
||||
)
|
||||
|
||||
post1.update_columns(cooked: <<~HTML)
|
||||
<p>Check out this amazing video! It's a classic that everyone should watch.</p>
|
||||
<div class="youtube-onebox lazy-video-container"
|
||||
data-video-id="test123"
|
||||
data-video-title="Test Video"
|
||||
data-provider-name="youtube">
|
||||
<a href="https://www.youtube.com/watch?v=test123">
|
||||
<img src="https://img.youtube.com/vi/test123/maxresdefault.jpg">
|
||||
</a>
|
||||
</div>
|
||||
HTML
|
||||
|
||||
post1.reload
|
||||
end
|
||||
|
||||
it "includes description from post excerpt" do
|
||||
expect(post1.raw).to include("Check out this amazing video")
|
||||
expect(post1.cooked).to include("lazy-video-container")
|
||||
|
||||
excerpt = post1.excerpt(200, strip_links: true, text_entities: true)
|
||||
expect(excerpt).to be_present
|
||||
expect(excerpt).to include("Check out")
|
||||
|
||||
html = post_crawler_schema.html
|
||||
|
||||
if html.empty?
|
||||
puts "Controller class: #{controller.class}"
|
||||
puts "Crawler post: #{controller.instance_variable_get(:@crawler_post)&.id}"
|
||||
puts "Topic view: #{controller.instance_variable_get(:@topic_view)}"
|
||||
puts "SiteSetting.lazy_videos_enabled: #{SiteSetting.lazy_videos_enabled}"
|
||||
end
|
||||
|
||||
expect(html).not_to be_empty
|
||||
expect(html).to include('"description":')
|
||||
parsed = JSON.parse(html.match(%r{<script[^>]*>(.*?)</script>}m)[1])
|
||||
expect(parsed["description"]).to include("Check out this amazing video")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue