From 2f3ba0a791337f809fa87ad4db3bdaf395eea2b1 Mon Sep 17 00:00:00 2001 From: Kirill Pimenov Date: Thu, 28 Feb 2013 23:03:52 +0400 Subject: [PATCH 1/3] Better SQL substitutions --- lib/search.rb | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index ab2c0787c46..a0570ff2791 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -19,10 +19,11 @@ module Search u.username AS title, u.email, NULL AS color - FROM users AS u - JOIN users_search s on s.id = u.id - WHERE s.search_data @@ TO_TSQUERY('english', :query) - ORDER BY last_posted_at desc + FROM users AS u + JOIN users_search s on s.id = u.id + WHERE s.search_data @@ TO_TSQUERY(:locale, :query) + ORDER BY last_posted_at desc + LIMIT :limit " end @@ -36,14 +37,16 @@ module Search FROM topics AS ft JOIN posts AS p ON p.topic_id = ft.id AND p.post_number = 1 JOIN posts_search s on s.id = p.id - WHERE s.search_data @@ TO_TSQUERY('english', :query) + WHERE s.search_data @@ TO_TSQUERY(:locale, :query) AND ft.deleted_at IS NULL AND ft.visible AND ft.archetype <> '#{Archetype.private_message}' ORDER BY - TS_RANK_CD(TO_TSVECTOR('english', ft.title), TO_TSQUERY('english', :query)) desc, - TS_RANK_CD(search_data, TO_TSQUERY('english', :query)) desc, - bumped_at desc" + TS_RANK_CD(TO_TSVECTOR(:locale, ft.title), TO_TSQUERY(:locale, :query)) desc, + TS_RANK_CD(search_data, TO_TSQUERY(:locale, :query)) desc, + bumped_at desc + LIMIT :limit + " end @@ -57,14 +60,16 @@ module Search FROM topics AS ft JOIN posts AS p ON p.topic_id = ft.id AND p.post_number <> 1 JOIN posts_search s on s.id = p.id - WHERE s.search_data @@ TO_TSQUERY('english', :query) + WHERE s.search_data @@ TO_TSQUERY(:locale, :query) AND ft.deleted_at IS NULL and p.deleted_at IS NULL AND ft.visible AND ft.archetype <> '#{Archetype.private_message}' ORDER BY - TS_RANK_CD(TO_TSVECTOR('english', ft.title), TO_TSQUERY('english', :query)) desc, - TS_RANK_CD(search_data, TO_TSQUERY('english', :query)) desc, - bumped_at desc" + TS_RANK_CD(TO_TSVECTOR(:locale, ft.title), TO_TSQUERY(:locale, :query)) desc, + TS_RANK_CD(search_data, TO_TSQUERY(:locale, :query)) desc, + bumped_at desc + LIMIT :limit + " end def self.category_query_sql @@ -76,15 +81,16 @@ module Search c.color FROM categories AS c JOIN categories_search s on s.id = c.id - WHERE s.search_data @@ TO_TSQUERY('english', :query) + WHERE s.search_data @@ TO_TSQUERY(:locale, :query) ORDER BY topics_month desc + LIMIT :limit " end def self.query(term, type_filter=nil) return nil if term.blank? - sanitized_term = term.gsub(/[^0-9a-zA-Z_ ]/, '') + sanitized_term = PG::Connection.escape_string(term) #term.gsub(/[^0-9a-zA-Z_ ]/, '') # really short terms are totally pointless return nil if sanitized_term.blank? || sanitized_term.length < self.min_search_term_length @@ -94,14 +100,13 @@ module Search if type_filter.present? raise Discourse::InvalidAccess.new("invalid type filter") unless Search.facets.include?(type_filter) - sql = Search.send("#{type_filter}_query_sql") << " LIMIT #{Search.per_facet * Search.facets.size}" - db_result = ActiveRecord::Base.exec_sql(sql , query: terms.join(" & ")) + sql = Search.send("#{type_filter}_query_sql") + db_result = ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "), locale: 'english', limit: Search.per_facet * Search.facets.size) else db_result = [] [user_query_sql, category_query_sql, topic_query_sql].each do |sql| - sql << " LIMIT " << (Search.per_facet + 1).to_s - db_result += ActiveRecord::Base.exec_sql(sql , query: terms.join(" & ")).to_a + db_result += ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "),locale: 'english', limit: (Search.per_facet + 1)).to_a end end @@ -118,8 +123,8 @@ module Search end if expected_topics > 0 - tmp = ActiveRecord::Base.exec_sql "#{post_query_sql} limit :per_facet", - query: terms.join(" & "), per_facet: expected_topics * 3 + tmp = ActiveRecord::Base.exec_sql post_query_sql, + query: terms.join(" & "), locale: 'english', limit: expected_topics * 3 topic_ids = Set.new db_result.map{|r| r["id"]} From ee0396198e900797aeae2fa63499036026518f08 Mon Sep 17 00:00:00 2001 From: Kirill Pimenov Date: Thu, 28 Feb 2013 23:14:22 +0400 Subject: [PATCH 2/3] Added basic locale recognition for expected FTS stemming --- lib/search.rb | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index a0570ff2791..9aa8fdce07a 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -87,6 +87,16 @@ module Search " end + def self.current_locale_long + case I18n.locale # Currently-present in /conf/locales/* only, sorry :-( Add as needed + when :ru then 'russian' + when :fr then 'french' + when :nl then 'dutch' + when :sv then 'swedish' + else 'english' + end + end + def self.query(term, type_filter=nil) return nil if term.blank? @@ -101,12 +111,12 @@ module Search if type_filter.present? raise Discourse::InvalidAccess.new("invalid type filter") unless Search.facets.include?(type_filter) sql = Search.send("#{type_filter}_query_sql") - db_result = ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "), locale: 'english', limit: Search.per_facet * Search.facets.size) + db_result = ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "), locale: current_locale_long, limit: Search.per_facet * Search.facets.size) else db_result = [] [user_query_sql, category_query_sql, topic_query_sql].each do |sql| - db_result += ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "),locale: 'english', limit: (Search.per_facet + 1)).to_a + db_result += ActiveRecord::Base.exec_sql(sql , query: terms.join(" & "),locale: current_locale_long, limit: (Search.per_facet + 1)).to_a end end @@ -124,7 +134,7 @@ module Search if expected_topics > 0 tmp = ActiveRecord::Base.exec_sql post_query_sql, - query: terms.join(" & "), locale: 'english', limit: expected_topics * 3 + query: terms.join(" & "), locale: current_locale_long, limit: expected_topics * 3 topic_ids = Set.new db_result.map{|r| r["id"]} From f639397aff9baaa779307ce3275bf9bd0eabdca3 Mon Sep 17 00:00:00 2001 From: Kirill Pimenov Date: Thu, 28 Feb 2013 23:52:35 +0400 Subject: [PATCH 3/3] Correct stripping of non BasicLatin characters --- lib/search.rb | 3 ++- spec/components/search_spec.rb | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index 9aa8fdce07a..cf0d45b15c4 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -100,7 +100,8 @@ module Search def self.query(term, type_filter=nil) return nil if term.blank? - sanitized_term = PG::Connection.escape_string(term) #term.gsub(/[^0-9a-zA-Z_ ]/, '') + sanitized_term = PG::Connection.escape_string(term.gsub(/[:()&!]/,'')) # Instead of original term.gsub(/[^0-9a-zA-Z_ ]/, '') + # We are stripping only symbols taking place in FTS and simply sanitizing the rest. # really short terms are totally pointless return nil if sanitized_term.blank? || sanitized_term.length < self.min_search_term_length diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 422b4de200f..432886e7702 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -1,3 +1,5 @@ +# encoding: utf-8 + require 'spec_helper' require 'search' @@ -70,8 +72,7 @@ describe Search do end it 'escapes non alphanumeric characters' do - ActiveRecord::Base.expects(:exec_sql).never - Search.query(':!$').should be_blank + Search.query(':!$);}]>@\#\"\'').should be_blank # There are at least three levels of sanitation for Search.query! end it 'works when given two terms with spaces' do @@ -123,6 +124,20 @@ describe Search do end + context 'cyrillic topic' do + let!(:cyrillic_topic) { Fabricate(:topic) do + user + title { sequence(:title) { |i| "Тестовая запись #{i}" } } + end + } + let!(:post) {Fabricate(:post, topic: cyrillic_topic, user: cyrillic_topic.user)} + let(:result) { first_of_type(Search.query('запись'), 'topic') } + + it 'finds something when given cyrillic query' do + result.should be_present + end + end + context 'categories' do let!(:category) { Fabricate(:category) }