2
0
Fork 0
mirror of https://github.com/discourse/discourse.git synced 2025-10-03 17:21:20 +08:00

FEATURE: Use similarity in user search (#20112)

Currently, when doing `@mention` for users we have 0 tolerance for typos and misspellings.

With this patch, if a user search doesn't return enough results we go and use `pg_trgm` features to try and find more matches based on trigrams of usernames and names.

It also introduces GiST indexes on those fields in order to improve performance of this search, going from 130ms down to 15ms in my tests.

This is all gated in a feature flag and can be enabled by running  `SiteSetting.user_search_similar_results = true` in the rails console.
This commit is contained in:
Rafael dos Santos Silva 2023-02-02 13:35:04 -03:00 committed by GitHub
parent ca2b2d034f
commit 14cf8eacf1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 96 additions and 8 deletions

View file

@ -2222,12 +2222,14 @@ end
#
# Indexes
#
# idx_users_admin (id) WHERE admin
# idx_users_moderator (id) WHERE moderator
# index_users_on_last_posted_at (last_posted_at)
# index_users_on_last_seen_at (last_seen_at)
# index_users_on_secure_identifier (secure_identifier) UNIQUE
# index_users_on_uploaded_avatar_id (uploaded_avatar_id)
# index_users_on_username (username) UNIQUE
# index_users_on_username_lower (username_lower) UNIQUE
# idx_users_admin (id) WHERE admin
# idx_users_moderator (id) WHERE moderator
# index_users_on_last_posted_at (last_posted_at)
# index_users_on_last_seen_at (last_seen_at)
# index_users_on_name_trgm (name) USING gist
# index_users_on_secure_identifier (secure_identifier) UNIQUE
# index_users_on_uploaded_avatar_id (uploaded_avatar_id)
# index_users_on_username (username) UNIQUE
# index_users_on_username_lower (username_lower) UNIQUE
# index_users_on_username_lower_trgm (username_lower) USING gist
#

View file

@ -160,6 +160,8 @@ class UserSearch
.each { |id| users << id }
end

return users.to_a if users.size >= @limit

# 5. last seen users (for search auto-suggestions)
if @last_seen_users
scoped_users
@ -169,6 +171,32 @@ class UserSearch
.each { |id| users << id }
end

return users.to_a if users.size >= @limit

if SiteSetting.user_search_similar_results
# 6. similar usernames
if @term.present?
scoped_users
.where("username_lower <-> ? < 1", @term)
.order(["username_lower <-> ? ASC", @term])
.limit(@limit - users.size)
.pluck(:id)
.each { |id| users << id }
end

return users.to_a if users.size >= @limit

# 7. similar names
if SiteSetting.enable_names? && @term.present?
scoped_users
.where("name <-> ? < 1", @term)
.order(["name <-> ? ASC", @term])
.limit(@limit - users.size)
.pluck(:id)
.each { |id| users << id }
end
end

users.to_a
end


View file

@ -2194,6 +2194,9 @@ backups:
client: true

search:
user_search_similar_results:
default: false
hidden: true
prioritize_exact_search_title_match:
default: false
hidden: true

View file

@ -0,0 +1,24 @@
# frozen_string_literal: true

class AddTrigramIndexesToUsers < ActiveRecord::Migration[7.0]
disable_ddl_transaction!

def change
add_index(
:users,
:username_lower,
using: "gist",
opclass: :gist_trgm_ops,
algorithm: :concurrently,
name: "index_users_on_username_lower_trgm",
)
add_index(
:users,
:name,
using: "gist",
opclass: :gist_trgm_ops,
algorithm: :concurrently,
name: "index_users_on_name_trgm",
)
end
end

View file

@ -267,4 +267,35 @@ RSpec.describe UserSearch do
expect(results[2]).to eq("mrorange")
end
end

context "when using SiteSetting.user_search_similar_results" do
it "should find the user even with a typo if the setting is enabled" do
rafael = Fabricate(:user, username: "rafael", name: "Rafael Silva")
codinghorror = Fabricate(:user, username: "codinghorror", name: "Jeff Atwood")
pfaffman = Fabricate(:user, username: "pfaffman")
zogstrip = Fabricate(:user, username: "zogstrip", name: "Régis Hanol")

SiteSetting.user_search_similar_results = false
expect(UserSearch.new("rafel").search).to be_blank
expect(UserSearch.new("codding").search).to be_blank
expect(UserSearch.new("pffman").search).to be_blank

SiteSetting.user_search_similar_results = true
expect(UserSearch.new("rafel").search).to include(rafael)
expect(UserSearch.new("codding").search).to include(codinghorror)
expect(UserSearch.new("pffman").search).to include(pfaffman)

SiteSetting.user_search_similar_results = false
expect(UserSearch.new("silvia").search).to be_blank
expect(UserSearch.new("atwod").search).to be_blank
expect(UserSearch.new("regis").search).to be_blank
expect(UserSearch.new("reg").search).to be_blank

SiteSetting.user_search_similar_results = true
expect(UserSearch.new("silvia").search).to include(rafael)
expect(UserSearch.new("atwod").search).to include(codinghorror)
expect(UserSearch.new("regis").search).to include(zogstrip)
expect(UserSearch.new("reg").search).to include(zogstrip)
end
end
end