mirror of
https://gh.wpcy.net/https://github.com/discourse/discourse.git
synced 2026-06-19 06:43:54 +08:00
Rename plugin API key scope resources to use shorter, cleaner names: - automations_trigger → automation (action: post → trigger_automation) - discourse_ai → ai - discourse_data_explorer → data_explorer Includes reversible migrations to update existing api_key_scopes records in the database and updated locale keys to match the new resource names. Also adds safety improvements: - ApiKeyScope#permits? returns false when mapping is nil, gracefully handling renamed or removed scopes - ApiKeyScopeSerializer uses safe navigation to avoid errors for missing scope mappings - Scope resources are now sorted alphabetically in both the API response and the admin UI
482 lines
15 KiB
Ruby
Vendored
482 lines
15 KiB
Ruby
Vendored
# frozen_string_literal: true
|
|
|
|
describe DiscourseAi::Embeddings::EmbeddingsController do
|
|
context "when performing a topic search" do
|
|
fab!(:vector_def, :open_ai_embedding_def)
|
|
|
|
before do
|
|
enable_current_plugin
|
|
SiteSetting.min_search_term_length = 3
|
|
SiteSetting.ai_embeddings_selected_model = vector_def.id
|
|
DiscourseAi::Embeddings::SemanticSearch.clear_cache_for("test")
|
|
SearchIndexer.enable
|
|
end
|
|
|
|
fab!(:category)
|
|
fab!(:subcategory) { Fabricate(:category, parent_category_id: category.id) }
|
|
|
|
fab!(:topic)
|
|
fab!(:post) { Fabricate(:post, topic: topic) }
|
|
|
|
fab!(:topic_in_subcategory) { Fabricate(:topic, category: subcategory) }
|
|
fab!(:post_in_subcategory) { Fabricate(:post, topic: topic_in_subcategory) }
|
|
|
|
def index(topic)
|
|
vector = DiscourseAi::Embeddings::Vector.instance
|
|
|
|
stub_request(:post, "https://api.openai.com/v1/embeddings").to_return(
|
|
status: 200,
|
|
body: JSON.dump({ data: [{ embedding: [0.1] * 1536 }] }),
|
|
)
|
|
|
|
vector.generate_representation_from(topic)
|
|
end
|
|
|
|
def stub_embedding(query)
|
|
embedding = [0.049382] * 1536
|
|
|
|
EmbeddingsGenerationStubs.openai_service(
|
|
vector_def.lookup_custom_param("model_name"),
|
|
query,
|
|
embedding,
|
|
)
|
|
end
|
|
|
|
def create_api_key(user)
|
|
key = ApiKey.create!(user: user)
|
|
ApiKeyScope.create!(resource: "ai", action: "search", api_key_id: key.id)
|
|
key
|
|
end
|
|
|
|
it "is able to make API requests using a scoped API key" do
|
|
index(topic)
|
|
query = "test"
|
|
stub_embedding(query)
|
|
user = topic.user
|
|
|
|
api_key = create_api_key(user)
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
headers: {
|
|
"Api-Key" => api_key.key,
|
|
"Api-Username" => user.username,
|
|
}
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(topic.id)
|
|
end
|
|
|
|
context "when rate limiting is enabled" do
|
|
before { RateLimiter.enable }
|
|
|
|
it "will rate limit correctly" do
|
|
stub_const(described_class, :MAX_HYDE_SEARCHES_PER_MINUTE, 1) do
|
|
stub_const(described_class, :MAX_SEARCHES_PER_MINUTE, 2) do
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
|
expect(response.status).to eq(200)
|
|
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
|
expect(response.status).to eq(200)
|
|
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
|
expect(response.status).to eq(429)
|
|
end
|
|
end
|
|
end
|
|
|
|
it "applies per-IP rate limiting for anonymous users" do
|
|
stub_const(described_class, :MAX_SEARCHES_PER_MINUTE, 1) do
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
env: {
|
|
"REMOTE_ADDR" => "1.2.3.4",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
env: {
|
|
"REMOTE_ADDR" => "1.2.3.4",
|
|
}
|
|
expect(response.status).to eq(429)
|
|
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
env: {
|
|
"REMOTE_ADDR" => "5.6.7.8",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
end
|
|
end
|
|
|
|
it "applies a global anonymous rate limit across all IPs" do
|
|
stub_const(described_class, :MAX_SEARCHES_PER_MINUTE, 100) do
|
|
stub_const(described_class, :MAX_ANON_SEARCHES_PER_MINUTE, 2) do
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
env: {
|
|
"REMOTE_ADDR" => "1.2.3.4",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
env: {
|
|
"REMOTE_ADDR" => "5.6.7.8",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
|
|
query = "test #{SecureRandom.hex}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false",
|
|
env: {
|
|
"REMOTE_ADDR" => "9.10.11.12",
|
|
}
|
|
expect(response.status).to eq(429)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
it "returns results correctly when performing a non Hyde search" do
|
|
index(topic)
|
|
index(topic_in_subcategory)
|
|
|
|
query = "test"
|
|
stub_embedding(query)
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(
|
|
topic.id,
|
|
topic_in_subcategory.id,
|
|
)
|
|
end
|
|
|
|
it "is able to filter to a specific category (including sub categories)" do
|
|
index(topic)
|
|
index(topic_in_subcategory)
|
|
|
|
query = "test category:#{category.slug}"
|
|
stub_embedding("test")
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["topics"].map { |t| t["id"] }).to eq([topic_in_subcategory.id])
|
|
end
|
|
|
|
it "doesn't skip HyDE if the hyde param is missing" do
|
|
assign_fake_provider_to(:ai_default_llm_model)
|
|
index(topic)
|
|
index(topic_in_subcategory)
|
|
|
|
query = "test category:#{category.slug}"
|
|
stub_embedding("test")
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["Hyde #{query}"]) do
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}"
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["topics"].map { |t| t["id"] }).to eq([topic_in_subcategory.id])
|
|
end
|
|
end
|
|
|
|
context "with HYDE site setting" do
|
|
before do
|
|
assign_fake_provider_to(:ai_default_llm_model)
|
|
index(topic)
|
|
index(topic_in_subcategory)
|
|
end
|
|
|
|
it "uses HYDE when site setting is enabled and no hyde param is provided" do
|
|
SiteSetting.ai_embeddings_semantic_search_use_hyde = true
|
|
|
|
query = "test"
|
|
stub_embedding("test")
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["Hyde #{query}"]) do
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}"
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
end
|
|
|
|
it "doesn't use HYDE when site setting is disabled and no hyde param is provided" do
|
|
SiteSetting.ai_embeddings_semantic_search_use_hyde = false
|
|
|
|
query = "test"
|
|
stub_embedding("test")
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}"
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
|
|
it "overrides site setting when hyde=true param is provided" do
|
|
SiteSetting.ai_embeddings_semantic_search_use_hyde = false
|
|
|
|
query = "test"
|
|
stub_embedding("test")
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["Hyde #{query}"]) do
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=true"
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
end
|
|
|
|
it "overrides site setting when hyde=false param is provided" do
|
|
SiteSetting.ai_embeddings_semantic_search_use_hyde = true
|
|
|
|
query = "test"
|
|
stub_embedding("test")
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
|
|
it "handles hyde=0 as false" do
|
|
SiteSetting.ai_embeddings_semantic_search_use_hyde = true
|
|
|
|
query = "test"
|
|
stub_embedding("test")
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=0"
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
|
|
it "handles hyde=1 as true" do
|
|
SiteSetting.ai_embeddings_semantic_search_use_hyde = false
|
|
|
|
query = "test"
|
|
stub_embedding("test")
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["Hyde #{query}"]) do
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=1"
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
context "when embedding API fails" do
|
|
fab!(:vector_def, :open_ai_embedding_def)
|
|
fab!(:user)
|
|
|
|
before do
|
|
enable_current_plugin
|
|
SiteSetting.min_search_term_length = 3
|
|
SiteSetting.ai_embeddings_selected_model = vector_def.id
|
|
DiscourseAi::Embeddings::SemanticSearch.clear_cache_for("test")
|
|
sign_in(user)
|
|
end
|
|
|
|
it "returns empty results instead of 500 error for semantic search" do
|
|
stub_request(:post, "https://api.openai.com/v1/embeddings").to_return(
|
|
status: 429,
|
|
body: JSON.dump({ error: { message: "You exceeded your current quota" } }),
|
|
)
|
|
|
|
get "/discourse-ai/embeddings/semantic-search.json?q=test&hyde=false"
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["topics"]).to be_blank
|
|
end
|
|
|
|
it "returns empty results instead of 500 error for quick search" do
|
|
SiteSetting.ai_embeddings_semantic_quick_search_enabled = true
|
|
|
|
stub_request(:post, "https://api.openai.com/v1/embeddings").to_return(
|
|
status: 429,
|
|
body: JSON.dump({ error: { message: "You exceeded your current quota" } }),
|
|
)
|
|
|
|
get "/discourse-ai/embeddings/quick-search.json?q=test"
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["topics"]).to be_blank
|
|
end
|
|
end
|
|
|
|
context "when performing a quick search" do
|
|
fab!(:vector_def, :open_ai_embedding_def)
|
|
fab!(:user)
|
|
|
|
before do
|
|
enable_current_plugin
|
|
SiteSetting.min_search_term_length = 3
|
|
SiteSetting.ai_embeddings_selected_model = vector_def.id
|
|
SiteSetting.ai_embeddings_semantic_quick_search_enabled = true
|
|
DiscourseAi::Embeddings::SemanticSearch.clear_cache_for("test")
|
|
SearchIndexer.enable
|
|
sign_in(user)
|
|
end
|
|
|
|
fab!(:topic)
|
|
fab!(:post) { Fabricate(:post, topic: topic, raw: "This is a test post") }
|
|
|
|
def index(topic)
|
|
vector = DiscourseAi::Embeddings::Vector.instance
|
|
|
|
stub_request(:post, "https://api.openai.com/v1/embeddings").to_return(
|
|
status: 200,
|
|
body: JSON.dump({ data: [{ embedding: [0.1] * 1536 }] }),
|
|
)
|
|
|
|
vector.generate_representation_from(topic)
|
|
end
|
|
|
|
def stub_embedding(query)
|
|
embedding = [0.049382] * 1536
|
|
|
|
EmbeddingsGenerationStubs.openai_service(
|
|
vector_def.lookup_custom_param("model_name"),
|
|
query,
|
|
embedding,
|
|
)
|
|
end
|
|
|
|
it "returns 400 when query is too short" do
|
|
get "/discourse-ai/embeddings/quick-search.json?q=ab"
|
|
|
|
expect(response.status).to eq(400)
|
|
end
|
|
|
|
it "returns results successfully with valid query" do
|
|
index(topic)
|
|
stub_embedding("test")
|
|
|
|
get "/discourse-ai/embeddings/quick-search.json?q=test"
|
|
|
|
expect(response.status).to eq(200)
|
|
expect(response.parsed_body["posts"]).to be_present
|
|
expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(topic.id)
|
|
end
|
|
|
|
context "when rate limiting is enabled" do
|
|
before { RateLimiter.enable }
|
|
|
|
it "rate limits non-cached queries" do
|
|
61.times do |i|
|
|
query = "test#{i}"
|
|
stub_embedding(query)
|
|
get "/discourse-ai/embeddings/quick-search.json?q=#{query}"
|
|
end
|
|
|
|
expect(response.status).to eq(429)
|
|
end
|
|
|
|
it "does not rate limit cached queries" do
|
|
query = "cached_query"
|
|
semantic_search = instance_double(DiscourseAi::Embeddings::SemanticSearch)
|
|
allow(DiscourseAi::Embeddings::SemanticSearch).to receive(:new).and_return(semantic_search)
|
|
allow(semantic_search).to receive(:cached_query?).with(query).and_return(true)
|
|
allow(semantic_search).to receive(:search_for_topics).with(
|
|
query,
|
|
1,
|
|
hyde: false,
|
|
).and_return([])
|
|
|
|
100.times { get "/discourse-ai/embeddings/quick-search.json?q=#{query}" }
|
|
|
|
expect(response.status).to eq(200)
|
|
end
|
|
end
|
|
end
|
|
|
|
context "when anonymous quick search rate limiting" do
|
|
fab!(:vector_def, :open_ai_embedding_def)
|
|
|
|
before do
|
|
enable_current_plugin
|
|
SiteSetting.min_search_term_length = 3
|
|
SiteSetting.ai_embeddings_selected_model = vector_def.id
|
|
SiteSetting.ai_embeddings_semantic_quick_search_enabled = true
|
|
RateLimiter.enable
|
|
end
|
|
|
|
def stub_embedding(query)
|
|
embedding = [0.049382] * 1536
|
|
EmbeddingsGenerationStubs.openai_service(
|
|
vector_def.lookup_custom_param("model_name"),
|
|
query,
|
|
embedding,
|
|
)
|
|
end
|
|
|
|
it "applies per-IP rate limiting for anonymous users" do
|
|
semantic_search = instance_double(DiscourseAi::Embeddings::SemanticSearch)
|
|
allow(DiscourseAi::Embeddings::SemanticSearch).to receive(:new).and_return(semantic_search)
|
|
allow(semantic_search).to receive(:cached_query?).and_return(false)
|
|
allow(semantic_search).to receive(:search_for_topics).and_return([])
|
|
|
|
stub_const(described_class, :MAX_SEARCHES_PER_MINUTE, 1) do
|
|
query = "test_query1"
|
|
get "/discourse-ai/embeddings/quick-search.json?q=#{query}",
|
|
env: {
|
|
"REMOTE_ADDR" => "1.2.3.4",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
|
|
query = "test_query2"
|
|
get "/discourse-ai/embeddings/quick-search.json?q=#{query}",
|
|
env: {
|
|
"REMOTE_ADDR" => "1.2.3.4",
|
|
}
|
|
expect(response.status).to eq(429)
|
|
|
|
query = "test_query3"
|
|
get "/discourse-ai/embeddings/quick-search.json?q=#{query}",
|
|
env: {
|
|
"REMOTE_ADDR" => "5.6.7.8",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
end
|
|
end
|
|
|
|
it "applies a global anonymous rate limit across all IPs" do
|
|
semantic_search = instance_double(DiscourseAi::Embeddings::SemanticSearch)
|
|
allow(DiscourseAi::Embeddings::SemanticSearch).to receive(:new).and_return(semantic_search)
|
|
allow(semantic_search).to receive(:cached_query?).and_return(false)
|
|
allow(semantic_search).to receive(:search_for_topics).and_return([])
|
|
|
|
stub_const(described_class, :MAX_SEARCHES_PER_MINUTE, 100) do
|
|
stub_const(described_class, :MAX_ANON_SEARCHES_PER_MINUTE, 2) do
|
|
get "/discourse-ai/embeddings/quick-search.json?q=test_q1",
|
|
env: {
|
|
"REMOTE_ADDR" => "1.2.3.4",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
|
|
get "/discourse-ai/embeddings/quick-search.json?q=test_q2",
|
|
env: {
|
|
"REMOTE_ADDR" => "5.6.7.8",
|
|
}
|
|
expect(response.status).to eq(200)
|
|
|
|
get "/discourse-ai/embeddings/quick-search.json?q=test_q3",
|
|
env: {
|
|
"REMOTE_ADDR" => "9.10.11.12",
|
|
}
|
|
expect(response.status).to eq(429)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|