2
0
Fork 0
mirror of https://github.com/discourse/discourse.git synced 2025-08-21 19:11:18 +08:00
discourse/lib/upload_recovery.rb
Sam Saffron ebcb571de7 FIX: allow upload recovery to recover uploads with sha mismatch
Filename on disk may mismatch sha of file in some old 1X setups. This will
attempt to recover file even if sha1 mismatches. We had an old bug that
caused this.

This also adds `uploads:fix_relative_upload_links` which attempts to replace
urls of the format `/upload/default/...` with `upload://`
2019-05-22 15:24:36 +10:00

179 lines
4.3 KiB
Ruby

# frozen_string_literal: true
class UploadRecovery
def initialize(dry_run: false)
@dry_run = dry_run
end
def recover(posts = Post)
posts.have_uploads.find_each do |post|
begin
analyzer = PostAnalyzer.new(post.raw, post.topic_id)
analyzer.cooked_stripped.css("img", "a").each do |media|
if media.name == "img" && orig_src = media["data-orig-src"]
if dom_class = media["class"]
if (Post.white_listed_image_classes & dom_class.split).count > 0
next
end
end
if @dry_run
puts "#{post.full_url} #{orig_src}"
else
recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
end
elsif url = (media["href"] || media["src"])
data = Upload.extract_url(url)
next unless data
sha1 = data[2]
unless upload = Upload.get_from_url(url)
if @dry_run
puts "#{post.full_url} #{url}"
else
recover_post_upload(post, sha1)
end
end
end
end
rescue => e
raise e unless @dry_run
puts "#{post.full_url} #{e.class}: #{e.message}"
end
end
end
private
def recover_post_upload(post, sha1)
return unless valid_sha1?(sha1)
attributes = {
post: post,
sha1: sha1
}
if Discourse.store.external?
recover_post_upload_from_s3(attributes)
else
recover_post_upload_from_local(attributes)
end
end
def ensure_upload!(post:, sha1:, upload:)
return if !upload.persisted?
if upload.sha1 != sha1
STDERR.puts "Warning #{post.url} had an incorrect sha, remapping #{sha1} to #{upload.sha1}"
post.raw = post.raw.gsub(sha1, upload.sha1)
post.save!
end
post.rebake!
end
def recover_post_upload_from_local(post:, sha1:)
recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
ensure_upload!(post: post, sha1: sha1, upload: upload)
end
end
def recover_post_upload_from_s3(post:, sha1:)
recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
ensure_upload!(post: post, sha1: sha1, upload: upload)
end
end
def recover_from_local(sha1:, user_id:)
public_path = Rails.root.join("public")
@paths ||= begin
Dir.glob(File.join(
public_path,
'uploads',
'tombstone',
RailsMultisite::ConnectionManagement.current_db,
'original',
'**',
'*.*'
)).concat(Dir.glob(File.join(
public_path,
'uploads',
RailsMultisite::ConnectionManagement.current_db,
'original',
'**',
'*.*'
)))
end
@paths.each do |path|
if path =~ /#{sha1}/
begin
tmp = Tempfile.new
tmp.write(File.read(path))
tmp.rewind
upload = create_upload(tmp, File.basename(path), user_id)
yield upload if block_given?
ensure
tmp&.close
end
end
end
end
def recover_from_s3(sha1:, user_id:)
@object_keys ||= begin
s3_helper = Discourse.store.s3_helper
s3_helper.list("original").map(&:key).concat(
s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key)
)
end
@object_keys.each do |key|
if key =~ /#{sha1}/
tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
if key.include?(tombstone_prefix)
old_key = key
key = key.sub(tombstone_prefix, "")
Discourse.store.s3_helper.copy(
old_key,
key,
options: { acl: "public-read" }
)
end
url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
begin
tmp = FileHelper.download(
url,
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
tmp_file_name: "recover_from_s3"
)
if tmp
upload = create_upload(tmp, File.basename(key), user_id)
yield upload if block_given?
end
ensure
tmp&.close
end
end
end
end
def create_upload(file, filename, user_id)
UploadCreator.new(file, filename).create_for(user_id)
end
def valid_sha1?(sha1)
sha1.present? && sha1.length == Upload::SHA1_LENGTH
end
end