From fc311dbe3bf281cfd52a21194e99ca226373b7a2 Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Wed, 10 Aug 2016 15:40:58 -0400 Subject: [PATCH] FEATURE: An option to search more recent posts for very large sites. On very large forums searching posts can be slow, so this commit introduces the ability to try and search only the most recent posts first, and then going for a larger breadth search if there aren't enough results. Enable `search_prefer_recent_posts` and you can customize how many recent posts to filter with `search_recent_posts_size` --- config/locales/server.en.yml | 2 ++ config/site_settings.yml | 3 +++ lib/search.rb | 29 ++++++++++++++++++++++++++++- spec/components/search_spec.rb | 17 +++++++++++++++++ spec/models/search_observer_spec.rb | 24 +++++++----------------- 5 files changed, 57 insertions(+), 18 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 50188377cf9..d13a167408c 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -828,6 +828,8 @@ en: min_private_message_title_length: "Minimum allowed title length for a message in characters" min_search_term_length: "Minimum valid search term length in characters" search_tokenize_chinese_japanese_korean: "Force search to tokenize Chinese/Japanese/Korean even on non CJK sites" + search_prefer_recent_posts: "If searching your large forum is slow, this option tries an index of more recent posts first" + search_recent_posts_size: "How many recent posts to keep in the index" allow_uncategorized_topics: "Allow topics to be created without a category. WARNING: If there are any uncategorized topics, you must recategorize them before turning this off." allow_duplicate_topic_titles: "Allow topics with identical, duplicate titles." unique_posts_mins: "How many minutes before a user can make a post with the same content again" diff --git a/config/site_settings.yml b/config/site_settings.yml index 8d6128c3b6a..d2e9fa2f059 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1047,6 +1047,9 @@ uncategorized: default: 3 search_tokenize_chinese_japanese_korean: false + search_prefer_recent_posts: false + search_recent_posts_size: 100000 + max_similar_results: 5 minimum_topics_similar: 50 diff --git a/lib/search.rb b/lib/search.rb index 007aa7a2d5e..c09e03ba1ba 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -128,6 +128,24 @@ class Search end end + def self.min_post_id_no_cache + return 0 unless SiteSetting.search_prefer_recent_posts? + + offset = Post.count - SiteSetting.search_recent_posts_size + return 0 if offset <= 0 + + Post.order(:id).offset(offset).limit(1).pluck(:id)[0] + end + + def self.min_post_id(opts=nil) + return 0 unless SiteSetting.search_prefer_recent_posts? + + # It can be quite slow to count all the posts so let's cache it + Rails.cache.fetch("search-min-post-id:#{SiteSetting.search_recent_posts_size}", expires_in: 1.day) do + min_post_id_no_cache + end + end + attr_accessor :term def initialize(term, opts=nil) @@ -545,7 +563,16 @@ class Search posts = posts.joins('JOIN users u ON u.id = posts.user_id') posts = posts.where("posts.raw || ' ' || u.username || ' ' || COALESCE(u.name, '') ilike ?", "%#{term_without_quote}%") else + + posts = posts.where("post_search_data.search_data @@ #{ts_query}") + + min_id = Search.min_post_id + if min_id > 0 + fast_query = posts.dup.where("post_search_data.post_id >= #{min_id}") + posts = fast_query if fast_query.dup.count >= 20 + end + exact_terms = @term.scan(/"([^"]+)"/).flatten exact_terms.each do |exact| posts = posts.where("posts.raw ilike ?", "%#{exact}%") @@ -669,7 +696,7 @@ class Search .to_sql else posts_query(@limit, aggregate_search: true, - private_messages: opts[:private_messages]) + private_messages: opts[:private_messages]) .select('topics.id', "#{min_or_max}(post_number) post_number") .group('topics.id') .to_sql diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 287676e16af..8c478b8b2e9 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -644,4 +644,21 @@ describe Search do end end + context "#min_post_id" do + it "returns 0 when prefer_recent_posts is disabled" do + SiteSetting.search_prefer_recent_posts = false + expect(Search.min_post_id_no_cache).to eq(0) + end + + it "returns a value when prefer_recent_posts is enabled" do + SiteSetting.search_prefer_recent_posts = true + SiteSetting.search_recent_posts_size = 1 + + Fabricate(:post) + p2 = Fabricate(:post) + + expect(Search.min_post_id_no_cache).to eq(p2.id) + end + end + end diff --git a/spec/models/search_observer_spec.rb b/spec/models/search_observer_spec.rb index dc0a2daaf38..fccf8612207 100644 --- a/spec/models/search_observer_spec.rb +++ b/spec/models/search_observer_spec.rb @@ -2,14 +2,6 @@ require 'rails_helper' describe SearchObserver do - def get_row(post_id) - SqlBuilder.map_exec( - OpenStruct, - "select * from post_search_data where post_id = :post_id", - post_id: post_id - ).first - end - it 'correctly indexes chinese' do SiteSetting.default_locale = 'zh_CN' data = "你好世界" @@ -17,8 +9,8 @@ describe SearchObserver do SearchObserver.update_posts_index(99, "你好世界", "", nil) - row = get_row(99) - expect(row.raw_data.split(' ').length).to eq(2) + raw_data = PostSearchData.where(post_id: 99).pluck(:raw_data)[0] + expect(raw_data.split(' ').length).to eq(2) end it 'correctly indexes a post' do @@ -26,15 +18,13 @@ describe SearchObserver do SearchObserver.update_posts_index(99, data, "", nil) - row = get_row(99) - - expect(row.raw_data).to eq("This is a test") - expect(row.locale).to eq("en") + raw_data, locale = PostSearchData.where(post_id: 99).pluck(:raw_data, :locale)[0] + expect(raw_data).to eq("This is a test") + expect(locale).to eq("en") SearchObserver.update_posts_index(99, "tester", "", nil) - row = get_row(99) - - expect(row.raw_data).to eq("tester") + raw_data = PostSearchData.where(post_id: 99).pluck(:raw_data)[0] + expect(raw_data).to eq("tester") end end