2
0
Fork 0
mirror of https://github.com/discourse/discourse.git synced 2025-09-10 21:01:33 +08:00

FIX: Improve Topic.similar_to with better Topic#title matches.

This changes PG text search to only match the given title against
lexemes that are formed from the title. Likewise, the given raw will
only be matched against lexemes that are formed from the post's raw.
This commit is contained in:
Guo Xiang Tan 2020-07-28 11:53:25 +08:00
parent 14003abc37
commit 597d542c33
No known key found for this signature in database
GPG key ID: FBD110179AAC1F20
3 changed files with 49 additions and 20 deletions

View file

@ -581,9 +581,17 @@ class Topic < ActiveRecord::Base
return [] if title.blank?
raw = raw.presence || ""
search_data = "#{title} #{raw[0...MAX_SIMILAR_BODY_LENGTH]}".strip
filter_words = Search.prepare_data(search_data)
ts_query = Search.ts_query(term: filter_words, joiner: "|")
title_tsquery = Search.set_tsquery_weight_filter(
Search.prepare_data(title.strip),
'A'
)
raw_tsquery = Search.set_tsquery_weight_filter(
Search.prepare_data(raw[0...MAX_SIMILAR_BODY_LENGTH].strip),
'B'
)
tsquery = Search.to_tsquery(term: "#{title_tsquery} & #{raw_tsquery}", joiner: "|")
candidates = Topic
.visible
@ -591,9 +599,9 @@ class Topic < ActiveRecord::Base
.secured(Guardian.new(user))
.joins("JOIN topic_search_data s ON topics.id = s.topic_id")
.joins("LEFT JOIN categories c ON topics.id = c.topic_id")
.where("search_data @@ #{ts_query}")
.where("search_data @@ #{tsquery}")
.where("c.topic_id IS NULL")
.order("ts_rank(search_data, #{ts_query}) DESC")
.order("ts_rank(search_data, #{tsquery}) DESC")
.limit(SiteSetting.max_similar_results * 3)
candidate_ids = candidates.pluck(:id)