From 139314c2133fb74a9e94ffbdc92f3551de279bea Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 10 Aug 2015 17:41:14 +1000 Subject: [PATCH] FIX: search for 1.5 etc was broken FEATURE: search by exact phrase Use PG tokenizer to split up search phrase instead of relying on Ruby code This produces much more accurate tokenization --- lib/search.rb | 22 ++++++++++++++-------- spec/components/search_spec.rb | 8 ++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index 0a07dcf9f79..2dd6b100392 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -385,6 +385,10 @@ class Search posts = posts.where("posts.raw || ' ' || u.username || ' ' || u.name ilike ?", "%#{@term}%") else posts = posts.where("post_search_data.search_data @@ #{ts_query}") + exact_terms = @term.scan(/"([^"]+)"/).flatten + exact_terms.each do |exact| + posts = posts.where("posts.raw ilike ?", "%#{exact}%") + end end end @@ -457,20 +461,22 @@ class Search end def self.ts_query(term, locale = nil, joiner = "&") + + + data = Post.exec_sql("SELECT to_tsvector(:locale, :term)", + locale: locale || long_locale, + term: term + ).values[0][0] + locale = Post.sanitize(locale) if locale - all_terms = term.gsub(/[\p{P}\p{S}]+/, ' ').squish.split + all_terms = data.scan(/'([^']+)'\:\d+/).flatten query = Post.sanitize(all_terms.map {|t| "#{PG::Connection.escape_string(t)}:*"}.join(" #{joiner} ")) "TO_TSQUERY(#{locale || query_locale}, #{query})" end def ts_query(locale=nil) - if !locale - @ts_query ||= begin - Search.ts_query(@term, locale) - end - else - Search.ts_query(@term, locale) - end + @ts_query_cache ||= {} + @ts_query_cache[(locale || query_locale) + " " + @term] ||= Search.ts_query(@term, locale) end def aggregate_search(opts = {}) diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 1ded92fbc77..f2b15a86d40 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -389,6 +389,14 @@ describe Search do end + it 'can search numbers correctly, and match exact phrases' do + topic = Fabricate(:topic, created_at: 3.months.ago) + Fabricate(:post, raw: '3.0 eta is in 2 days horrah', topic: topic) + + expect(Search.execute('3.0 eta').posts.length).to eq(1) + expect(Search.execute('"3.0, eta is"').posts.length).to eq(0) + end + it 'can find by status' do post = Fabricate(:post, raw: 'hi this is a test 123 123') topic = post.topic