discourse/plugins/discourse-ai/spec/jobs/scheduled/summaries_backfill_spec.rb

# frozen_string_literal: true

RSpec.describe Jobs::SummariesBackfill do
  subject(:job) { described_class.new }

  fab!(:topic) do
    Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago)
  end
  let(:limit) { 24 } # guarantee two summaries per batch
  let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.

  before do
    enable_current_plugin
    assign_fake_provider_to(:ai_default_llm_model)
    SiteSetting.ai_summarization_enabled = true
    SiteSetting.ai_summary_backfill_maximum_topics_per_hour = limit
    SiteSetting.ai_summary_gists_enabled = true
  end

  describe "#current_budget" do
    let(:type) { AiSummary.summary_types[:complete] }

    context "when no summary has been backfilled yet" do
      it "returns the full budget" do
        expect(job.current_budget(type)).to eq(limit / intervals)
      end

      it "ignores summaries generated by users" do
        Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])

        expect(job.current_budget(type)).to eq(limit / intervals)
      end

      it "only accounts for summaries of the given type" do
        Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])

        expect(job.current_budget(type)).to eq(limit / intervals)
      end
    end
  end

  describe "#backfill_candidates" do
    let(:type) { AiSummary.summary_types[:complete] }

    it "only selects posts with enough words" do
      topic.update!(word_count: 100)

      expect(job.backfill_candidates(type)).to be_empty
    end

    it "ignores up to date summaries" do
      Fabricate(:ai_summary, target: topic, highest_target_number: 2, updated_at: 10.minutes.ago)

      expect(job.backfill_candidates(type)).to be_empty
    end

    it "ignores outdated summaries updated less than five minutes ago" do
      Fabricate(:ai_summary, target: topic, highest_target_number: 1, updated_at: 4.minutes.ago)

      expect(job.backfill_candidates(type)).to be_empty
    end

    it "orders candidates by topic#last_posted_at" do
      topic.update!(last_posted_at: 1.minute.ago)
      topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)

      expect(job.backfill_candidates(type).map(&:id)).to contain_exactly(topic.id, topic_2.id)
    end

    it "prioritizes topics without summaries" do
      topic_2 =
        Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
      topic.update!(last_posted_at: 1.minute.ago)
      Fabricate(:ai_summary, target: topic, updated_at: 1.hour.ago, highest_target_number: 1)

      expect(job.backfill_candidates(type).map(&:id)).to contain_exactly(topic_2.id, topic.id)
    end

    it "respects max age setting" do
      SiteSetting.ai_summary_backfill_topic_max_age_days = 1
      topic.update!(last_posted_at: 2.days.ago)

      expect(job.backfill_candidates(type)).to be_empty
    end
  end

  describe "#execute" do
    it "backfills a batch" do
      topic_2 =
        Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
      topic.update!(last_posted_at: 1.minute.ago)
      Fabricate(:ai_summary, target: topic, updated_at: 3.hours.ago, highest_target_number: 1)
      Fabricate(:topic_ai_gist, target: topic, updated_at: 3.hours.ago, highest_target_number: 1)

      summary_1 = "Summary of topic_2"
      gist_1 = "Gist of topic_2"
      summary_2 = "Updated summary of topic"
      gist_2 = "Updated gist of topic"

      DiscourseAi::Completions::Llm.with_prepared_responses(
        [gist_1, gist_2, summary_1, summary_2],
      ) { job.execute({}) }

      expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1)
      expect(AiSummary.gist.find_by(target: topic_2).summarized_text).to eq(gist_1)
      expect(AiSummary.complete.find_by(target: topic).summarized_text).to eq(summary_2)
      expect(AiSummary.gist.find_by(target: topic).summarized_text).to eq(gist_2)

      # Queue has to be empty if we just generated all summaries
      expect(job.backfill_candidates(AiSummary.summary_types[:complete])).to be_empty
      expect(job.backfill_candidates(AiSummary.summary_types[:gist])).to be_empty

      # Queue still empty when they are up to date and time passes.
      AiSummary.update_all(updated_at: 20.minutes.ago)
      expect(job.backfill_candidates(AiSummary.summary_types[:complete])).to be_empty
      expect(job.backfill_candidates(AiSummary.summary_types[:gist])).to be_empty
    end

    it "updates the highest_target_number if the summary turned to be up to date" do
      og_highest_post_number = topic.highest_post_number
      existing_summary =
        Fabricate(
          :ai_summary,
          target: topic,
          updated_at: 3.hours.ago,
          highest_target_number: og_highest_post_number,
        )
      topic.update!(highest_post_number: og_highest_post_number + 1)

      # No prepared responses here. We don't perform a completion call.
      job.execute({})

      expect(existing_summary.reload.highest_target_number).to eq(og_highest_post_number + 1)
    end

    it "caches the LlmModel and reuses it for all summaries in a batch" do
      topic_2 =
        Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
      topic.update!(last_posted_at: 1.minute.ago)

      # Track LlmModel.find_by calls
      find_by_call_count = 0
      LlmModel
        .stubs(:find_by)
        .with do
          find_by_call_count += 1
          true
        end
        .returns(LlmModel.last)

      DiscourseAi::Completions::Llm.with_prepared_responses(
        %w[gist_1 gist_2 summary_1 summary_2],
      ) { job.execute({}) }

      # Should only call LlmModel.find_by once for the entire batch, not once per topic
      expect(find_by_call_count).to eq(1)
    end
  end
end