discourse/plugins/discourse-ai/lib/automation/report_context_generator.rb
Sam 0bad05d036
DEV: Extract reusable PostsFilter into core (#40436)
Promotes the posts query-string filter that previously lived in
discourse-ai's `Research::Filter` to a core `PostsFilter` class so it
can
be shared across core and plugins. The parser gains alias support
(`categories`, `exclude_category`, `exclude_tag`, `created_after`, ...),
`-`/`=`/`-=` exclusion prefixes, an `option_info` helper for
autocomplete,
and an `add_filter`/`remove_filter` extension API backed by a new
`posts_filter_options` plugin modifier.

Consumers are updated to build on the shared class:

* discourse-ai: `Research::Filter` becomes a thin subclass, the
researcher
  tool documents the new exclusion syntax, and the report context
generator builds its relation from a PostsFilter query string instead of
  hand-rolled SQL.
* discourse-assign: registers an `assigned_to:` filter (supporting
  `nobody`, `*` and usernames) and contributes its autocomplete entry.
* discourse-workflows: adds an `action:post` node with create/get/list
operations built on PostsFilter, and extracts a shared `PostHelper`
mixin
  reused by the existing create_post node.

Adds core locale strings and specs for the new class.

We also added two new APIs to node context to facilitate working with
posts:

#### `exec_ctx.create_post`

Creates a post while enforcing workflow actor permissions and preventing
recursive workflow execution.

```rb
post =
  exec_ctx.create_post(
    user: author,
    raw: "Reply body",
    topic_id: topic_id,
    reply_to_post_number: reply_to_post_number,
  )
```

Arguments:

- `user:` required `User` object used as the post author.
- `raw:` required raw post body.
- `topic_id:` required topic id where the post should be created.
- `reply_to_post_number:` optional post number to reply to.

The helper verifies that the author can see the topic, rejects closed or
archived topics, and creates the post with `skip_workflows: true`.

#### `exec_ctx.serialize_post`

Serializes a post into the standard Discourse Workflows post output
shape.

```rb
data =
  exec_ctx.serialize_post(
    post,
    guardian: actor.guardian,
    include_raw: true,
    include_cooked: false,
  )
```

Arguments:

- `post` required `Post` record.
- `guardian:` optional guardian used for permission-aware fields such as
visible tags. Defaults to the system guardian.
- `include_raw:` optional boolean. Defaults to `true`.
- `include_cooked:` optional boolean. Defaults to `false`.

Use this helper whenever a workflow node outputs post data. It keeps
post outputs consistent across action and trigger nodes.

---------

Co-authored-by: discourse-patch-triage[bot] <272280883+discourse-patch-triage[bot]@users.noreply.github.com>
Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com>
2026-06-02 14:52:47 +02:00

252 lines
8.2 KiB
Ruby
Vendored

# frozen_string_literal: true
module DiscourseAi
module Automation
class ReportContextGenerator
def self.generate(**args)
new(**args).generate
end
def initialize(
start_date:,
duration:,
category_ids: nil,
tags: nil,
allow_secure_categories: false,
max_posts: 200,
tokens_per_post: 100,
tokenizer: nil,
prioritized_group_ids: [],
exclude_category_ids: nil,
exclude_tags: nil
)
@start_date = start_date
@duration = duration
@category_ids = category_ids
@tags = tags
@allow_secure_categories = allow_secure_categories
@max_posts = max_posts
@tokenizer = tokenizer || DiscourseAi::Tokenizer::OpenAiTokenizer
@tokens_per_post = tokens_per_post
@prioritized_group_ids = prioritized_group_ids
@posts =
build_posts_relation(
exclude_category_ids: exclude_category_ids,
exclude_tags: exclude_tags,
)
if defined?(DiscourseSolved)
@solutions =
DiscourseSolved::TopicAnswer
.joins(:solved_topic)
.where("discourse_solved_solved_topics.topic_id": @posts.select(:topic_id))
.pluck(
"discourse_solved_solved_topics.topic_id",
"discourse_solved_topic_answers.answer_post_id",
)
.each_with_object({}) do |(topic_id, answer_post_id), h|
(h[topic_id] ||= []) << answer_post_id
end
else
@solutions = {}
end
end
def build_posts_relation(exclude_category_ids:, exclude_tags:)
scope =
Post
.where("posts.created_at >= ?", @start_date)
.joins(topic: :category)
.includes(:topic, :user)
.where("topics.visible")
.where("posts.created_at < ?", @start_date + @duration)
.where("posts.post_type = ?", Post.types[:regular])
.where("posts.hidden_at IS NULL")
.where("topics.deleted_at IS NULL")
filter_query =
posts_filter_query(exclude_category_ids: exclude_category_ids, exclude_tags: exclude_tags)
guardian = @allow_secure_categories ? Discourse.system_user.guardian : Guardian.new
PostsFilter.new(guardian: guardian, scope: scope).filter_from_query_string(filter_query)
end
def posts_filter_query(exclude_category_ids:, exclude_tags:)
parts = []
parts << "category:#{Array(@category_ids).join(",")}" if @category_ids.present?
parts << "tag:#{Array(@tags).join(",")}" if @tags.present?
if exclude_category_ids.present?
parts << "exclude_category:#{Array(exclude_category_ids).join(",")}"
end
parts << "exclude_tag:#{Array(exclude_tags).join(",")}" if exclude_tags.present?
parts.join(" ")
end
def format_topic(topic)
info = []
info << ""
info << "### #{topic.title}"
info << "topic_id: #{topic.id}"
info << "solved: true" if @solutions.key?(topic.id)
info << "category: #{topic.category&.name}"
# We may make this optional, but for now we remove all
# tags that are not visible to anon
tags = topic.tags.visible(Guardian.new).pluck(:name)
info << "tags: #{tags.join(", ")}" if tags.present?
info << topic.created_at.strftime("%Y-%m-%d %H:%M")
{ created_at: topic.created_at, info: info.join("\n"), posts: {} }
end
def format_post(post)
buffer = []
buffer << ""
buffer << "post_number: #{post.post_number}"
buffer << "solution: true" if @solutions[post.topic_id]&.include?(post.id)
buffer << post.created_at.strftime("%Y-%m-%d %H:%M")
buffer << "user: #{post.user&.username}"
buffer << "likes: #{post.like_count}"
excerpt =
@tokenizer.truncate(
post.raw,
@tokens_per_post,
strict: SiteSetting.ai_strict_token_counting,
)
excerpt = "excerpt: #{excerpt}..." if excerpt.length < post.raw.length
buffer << "#{excerpt}"
{ likes: post.like_count, info: buffer.join("\n") }
end
def format_summary
topic_count =
@posts
.where("topics.created_at > ?", @start_date)
.select(:topic_id)
.distinct(:topic_id)
.count
buffer = []
buffer << "Start Date: #{@start_date.to_date}"
buffer << "End Date: #{(@start_date + @duration).to_date}"
buffer << "New posts: #{@posts.count}"
buffer << "New topics: #{topic_count}"
top_users =
Post
.where(id: @posts.select(:id))
.joins(:user)
.group(:user_id, :username)
.select(
"user_id, username, sum(posts.like_count) like_count, count(posts.id) post_count",
)
.order("sum(posts.like_count) desc")
.limit(10)
buffer << "Top users:"
top_users.each do |user|
buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
end
if @prioritized_group_ids.present?
group_names =
Group
.where(id: @prioritized_group_ids)
.pluck(:name, :full_name)
.map do |name, full_name|
if full_name.present?
"#{name} (#{full_name[0..100].gsub("\n", " ")})"
else
name
end
end
.join(", ")
buffer << ""
buffer << "Top users in #{group_names} group#{group_names.include?(",") ? "s" : ""}:"
group_users = GroupUser.where(group_id: @prioritized_group_ids).select(:user_id)
top_users
.where(user_id: group_users)
.each do |user|
buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
end
end
buffer.join("\n")
end
def format_topics
buffer = []
topics = {}
post_count = 0
@posts = @posts.order("posts.like_count desc, posts.created_at desc")
if @prioritized_group_ids.present?
user_groups = GroupUser.where(group_id: @prioritized_group_ids)
prioritized_posts = @posts.where(user_id: user_groups.select(:user_id)).limit(@max_posts)
post_count += add_posts(prioritized_posts, topics)
end
add_posts(@posts.limit(@max_posts), topics, limit: @max_posts - post_count)
# we need last posts in all topics
# they may have important info
last_posts =
@posts.where("posts.post_number = topics.highest_post_number").where(
"topics.id IN (?)",
topics.keys,
)
add_posts(last_posts, topics)
topics.each do |topic_id, topic_info|
topic_info[:post_likes] = topic_info[:posts].sum { |_, post_info| post_info[:likes] }
end
topics = topics.sort { |a, b| b[1][:post_likes] <=> a[1][:post_likes] }
topics.each do |topic_id, topic_info|
buffer << topic_info[:info]
last_post_number = 0
topic_info[:posts]
.sort { |a, b| a[0] <=> b[0] }
.each do |post_number, post_info|
buffer << "\n..." if post_number > last_post_number + 1
buffer << post_info[:info]
last_post_number = post_number
end
end
buffer.join("\n")
end
def generate
buffer = []
buffer << "## Summary"
buffer << format_summary
buffer << "\n## Topics"
buffer << format_topics
buffer.join("\n")
end
def add_posts(relation, topics, limit: nil)
post_count = 0
relation.each do |post|
topics[post.topic_id] ||= format_topic(post.topic)
if !topics[post.topic_id][:posts][post.post_number]
topics[post.topic_id][:posts][post.post_number] = format_post(post)
post_count += 1
limit -= 1 if limit
end
break if limit && limit <= 0
end
post_count
end
end
end
end