discourse/lib/email/cleaner.rb
Michael Brown 371eff1f66 FIX: Email::Cleaner must clear the encoding of replaced bodies
If this is *not* done, then the decoded body will already be considered to be
encoded, e.g.:

```
pry(main)> puts part
Content-Type: text/html;
 charset=utf-8
Content-Transfer-Encoding: base64

PGh0bWw+PGhlYWQ+PC9oZWFkPjxib2R5Pjxicj48YnI+PGRpdiBjbGFzcz0i
cHJvdG9ubWFpbF9zaWduYXR1cmVfYmxvY2stdXNlciI+PC9kaXY+PGJyPjxi
cj5TZW50IGZyb20gPGEgdGFyZ2V0PSJfYmxhbmsiIGhyZWY9Imh0dHBzOi8v
cHJvdG9uLm1lL21haWwvaG9tZSIgcmVsPSJub3JlZmVycmVyIj5Qcm90b24g
TWFpbDwvYT4gZm9yIEFuZHJvaWQuPC9ib2R5PjwvaHRtbD4=

pry(main)> part.body = part.body.decoded
=> "<html><head></head><body><br><br><div class=\"protonmail_signature_block-user\"></div><br><br>Sent from <a target=\"_blank\" href=\"https://proton.me/mail/home\" rel=\"noreferrer\">Proton Mail</a> for Android.</body></html>"

pry(main)> puts part
Content-Type: text/html;
 charset=utf-8
Content-Transfer-Encoding: base64

htmlhead/headbodybrbrdivclassprotonmailsignatureblockuser/di
vbrbrSentfromatargetblankhrefhttps//protonme/mail/homerelnor
eferrerProtonMail/aforAndroid/body/htmk=
```

Clearing the CTE indicates to the Mail gem that the content needs to be encoded
if necessary.
2026-01-15 13:05:25 -05:00

56 lines
1.6 KiB
Ruby

# frozen_string_literal: true
module Email
class Cleaner
def initialize(mail, remove_attachments: true, truncate: true, rejected: false)
@mail = Mail.new(mail)
@mail.charset = "UTF-8"
# The default sort order may incorrectly put postscripts to the message
# (e.g. such as those that might be appended by mailing lists as in
# https://meta.discourse.org/t/377793/21) ahead of "real content"
#
# default is: "text/plain", "text/enriched", "text/html", "multipart/alternative"
@mail.body.set_sort_order([])
@remove_attachments = remove_attachments
@truncate = truncate
@rejected = rejected
end
def execute
@mail.without_attachments! if @remove_attachments
truncate! if @truncate
remove_null_byte(@mail.to_s)
end
def self.delete_rejected!
IncomingEmail.delete_by(
"rejection_message IS NOT NULL AND created_at < ?",
SiteSetting.delete_rejected_email_after_days.days.ago,
)
end
private
def truncate!
parts.each do |part|
part.body = part.body.decoded.truncate(truncate_limit, omission: "")
# let the serialiser re-encode it with an appropriate format - leaving
# the original one set tells Mail that the content is *already* encoded
part.content_transfer_encoding = nil
end
end
def parts
@mail.multipart? ? @mail.parts : [@mail]
end
def truncate_limit
@rejected ? SiteSetting.raw_rejected_email_max_length : SiteSetting.raw_email_max_length
end
def remove_null_byte(message)
message.gsub!("\x00", "")
message
end
end
end