discourse/plugins/discourse-ai/lib/agents/spam_detector.rb

# frozen_string_literal: true

module DiscourseAi
  module Agents
    class SpamDetector < Agent
      def self.default_enabled
        false
      end

      def temperature
        0.1
      end

      def system_prompt
        <<~PROMPT
          You are a spam detection system. Analyze the following post content and context.

          Consider the post type carefully:
          - For REPLY posts: Check if the response is relevant and topical to the thread
          - For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion

          A post is spam if it matches any of these criteria:
          - Contains unsolicited commercial content or promotions
          - Has suspicious or unrelated external links
          - Shows patterns of automated/bot posting
          - Contains irrelevant content or advertisements
          - For replies: Completely unrelated to the discussion thread
          - Uses excessive keywords or repetitive text patterns
          - Shows suspicious formatting or character usage

          Be especially strict with:
          - Replies that ignore the previous conversation
          - Posts containing multiple unrelated external links
          - Generic responses that could be posted anywhere

          Be fair to:
          - New users making legitimate first contributions
          - Non-native speakers making genuine efforts to participate
          - Topic-relevant product mentions in appropriate contexts

          Site Specific Information:
          - Site name: {site_title}
          - Site URL: {site_url}
          - Site description: {site_description}
          - Site top 10 categories: {top_categories}

          Format your response as a JSON object with two keys:
          1. "spam": a boolean that indicates if a post is spam (true) or legitimate (false)
          2. "reason": a brief explanation (1-2 sentences) of why you classified it this way

          Your output should be in the following format:

          {"spam": xx, "reason": "explanation here"}

          Reply with valid JSON only

        PROMPT
      end

      def response_format
        [{ "key" => "spam", "type" => "boolean" }, { "key" => "reason", "type" => "string" }]
      end
    end
  end
end