2
0
Fork 0
mirror of https://github.com/discourse/discourse.git synced 2025-09-07 12:02:53 +08:00
discourse/lib/tasks/uploads.rake
Martin Brennan abca91cc4d
FEATURE: Add rake task to disable secure media (#8669)
* Add a rake task to disable secure media. This sets all uploads to `secure: false`, changes the upload ACL to public, and rebakes all the posts using the uploads to make sure they point to the correct URLs. This is in a transaction for each upload with the upload being updated the last step, so if the task fails it can be resumed.
* Also allow viewing media via the secure url if secure media is disabled, redirecting to the normal CDN url, because otherwise media links will be broken while we go and rebake all the posts + update ACLs
2020-01-07 12:27:24 +10:00

1148 lines
35 KiB
Ruby

# frozen_string_literal: true
require "db_helper"
require "digest/sha1"
require "base62"
################################################################################
# gather #
################################################################################
require_dependency "rake_helpers"
task "uploads:gather" => :environment do
ENV["RAILS_DB"] ? gather_uploads : gather_uploads_for_all_sites
end
def gather_uploads_for_all_sites
RailsMultisite::ConnectionManagement.each_connection { gather_uploads }
end
def file_exists?(path)
File.exists?(path) && File.size(path) > 0
rescue
false
end
def gather_uploads
public_directory = "#{Rails.root}/public"
current_db = RailsMultisite::ConnectionManagement.current_db
puts "", "Gathering uploads for '#{current_db}'...", ""
Upload.where("url ~ '^\/uploads\/'")
.where("url !~ '^\/uploads\/#{current_db}'")
.find_each do |upload|
begin
old_db = upload.url[/^\/uploads\/([^\/]+)\//, 1]
from = upload.url.dup
to = upload.url.sub("/uploads/#{old_db}/", "/uploads/#{current_db}/")
source = "#{public_directory}#{from}"
destination = "#{public_directory}#{to}"
# create destination directory & copy file unless it already exists
unless file_exists?(destination)
`mkdir -p '#{File.dirname(destination)}'`
`cp --link '#{source}' '#{destination}'`
end
# ensure file has been succesfuly copied over
raise unless file_exists?(destination)
# remap links in db
DbHelper.remap(from, to)
rescue
putc "!"
else
putc "."
end
end
puts "", "Done!"
end
################################################################################
# backfill_shas #
################################################################################
task "uploads:backfill_shas" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db|
puts "Backfilling #{db}..."
Upload.where(sha1: nil).find_each do |u|
begin
path = Discourse.store.path_for(u)
u.sha1 = Upload.generate_digest(path)
u.save!
putc "."
rescue => e
puts "Skipping #{u.original_filename} (#{u.url}) #{e.message}"
end
end
end
puts "", "Done"
end
################################################################################
# migrate_from_s3 #
################################################################################
task "uploads:migrate_from_s3" => :environment do
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
end
def guess_filename(url, raw)
begin
uri = URI.parse("http:#{url}")
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
filename = if f.meta && f.meta["content-disposition"]
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
end
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
filename ||= File.basename(url)
filename
rescue
nil
ensure
f.try(:close!) rescue nil
end
end
def migrate_all_from_s3
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3 }
end
def migrate_from_s3
require "file_store/s3_store"
# make sure S3 is disabled
if SiteSetting.Upload.enable_s3_uploads
puts "You must disable S3 uploads before running that task."
return
end
db = RailsMultisite::ConnectionManagement.current_db
puts "Migrating uploads from S3 to local storage for '#{db}'..."
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
Post
.where("user_id > 0")
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%(upload://%'")
.find_each do |post|
begin
updated = false
post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url|
begin
if filename = guess_filename(url, post.raw)
file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
sha1 = Upload.generate_digest(file)
origin = nil
existing_upload = Upload.find_by(sha1: sha1)
if existing_upload&.url&.start_with?("//")
filename = existing_upload.original_filename
origin = existing_upload.origin
existing_upload.destroy
end
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
if new_upload&.save
updated = true
url = new_upload.url
end
end
url
rescue
url
end
end
post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url|
begin
if sha1 = Upload.sha1_from_short_url(url)
if upload = Upload.find_by(sha1: sha1)
if upload.url.start_with?("//")
file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
filename = upload.original_filename
origin = upload.origin
upload.destroy
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
if new_upload&.save
updated = true
url = new_upload.url
end
end
end
end
url
rescue
url
end
end
if updated
post.save!
post.rebake!
putc "#"
else
putc "."
end
rescue
putc "X"
end
end
puts "Done!"
end
################################################################################
# migrate_to_s3 #
################################################################################
task "uploads:migrate_to_s3" => :environment do
ENV["RAILS_DB"] ? migrate_to_s3 : migrate_to_s3_all_sites
end
def migrate_to_s3_all_sites
RailsMultisite::ConnectionManagement.each_connection do
begin
migrate_to_s3
rescue RuntimeError => e
if ENV["SKIP_FAILED"]
puts e
else
raise e unless ENV["SKIP_FAILED"]
end
end
end
end
def migration_successful?(db, should_raise = false)
success = true
failure_message = "S3 migration failed for db '#{db}'."
prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/"
base_url = File.join(SiteSetting.Upload.s3_base_url, prefix)
count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count
error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}"
raise error_message if count > 0 && should_raise
success &&= count == 0
puts error_message if count > 0
cdn_path = SiteSetting.cdn_path("/uploads/#{db}/original").sub(/https?:/, "")
count = Post.where("cooked LIKE '%#{cdn_path}%'").count
error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}"
raise error_message if count > 0 && should_raise
success &&= count == 0
puts error_message if count > 0
Rake::Task['posts:missing_uploads'].invoke('single_site')
count = PostCustomField.where(name: Post::MISSING_UPLOADS).count
error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}"
raise error_message if count > 0 && should_raise
success &&= count == 0
puts error_message if count > 0
count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count
if count > 0
puts "#{count} posts still require rebaking and will be rebaked during regular job"
if count > 100
puts "To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'"
end
success = false
else
puts "No posts require rebaking"
end
success
end
task "uploads:s3_migration_status" => :environment do
success = true
RailsMultisite::ConnectionManagement.each_connection do
db = RailsMultisite::ConnectionManagement.current_db
success &&= migration_successful?(db)
end
queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x }
if queued_jobs > 50
puts "WARNING: There are #{queued_jobs} jobs queued! Wait till Sidekiq clears backlog prior to migrating site to a new host"
exit 1
end
if !success
puts "Site is not ready for migration"
exit 1
end
puts "All sites appear to have uploads in order!"
end
def migrate_to_s3
# we don't want have migrated state, ensure we run all jobs here
Jobs.run_immediately!
db = RailsMultisite::ConnectionManagement.current_db
dry_run = !!ENV["DRY_RUN"]
puts "Checking if #{db} already migrated..."
return puts "Already migrated #{db}!" if migration_successful?(db)
puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run
puts "Migrating uploads to S3 for '#{db}'..."
if Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{GlobalSetting.relative_url_root}/uploads/#{db}/original/_X/%'").exists?
puts <<~TEXT
Some uploads were not migrated to the new scheme. Please run these commands in the rails console
SiteSetting.migrate_to_new_scheme = true
Jobs::MigrateUploadScheme.new.execute(nil)
TEXT
exit 1
end
unless ENV["DISCOURSE_S3_BUCKET"].present? &&
ENV["DISCOURSE_S3_REGION"].present? &&
(
(
ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? &&
ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present?
) ||
ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present?
)
puts <<~TEXT
Please provide the following environment variables
- DISCOURSE_S3_BUCKET
- DISCOURSE_S3_REGION
and either
- DISCOURSE_S3_ACCESS_KEY_ID
- DISCOURSE_S3_SECRET_ACCESS_KEY
or
- DISCOURSE_S3_USE_IAM_PROFILE
TEXT
exit 2
end
if SiteSetting.Upload.s3_cdn_url.blank?
puts "Please provide the 'DISCOURSE_S3_CDN_URL' environment variable"
exit 3
end
bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/"
public_directory = Rails.root.join("public").to_s
opts = {
region: ENV["DISCOURSE_S3_REGION"],
access_key_id: ENV["DISCOURSE_S3_ACCESS_KEY_ID"],
secret_access_key: ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"]
}
# S3::Client ignores the `region` option when an `endpoint` is provided.
# Without `region`, non-default region bucket creation will break for S3, so we can only
# define endpoint when not using S3 i.e. when SiteSetting.s3_endpoint is provided.
opts[:endpoint] = SiteSetting.s3_endpoint if SiteSetting.s3_endpoint.present?
s3 = Aws::S3::Client.new(opts)
if bucket_has_folder_path
bucket, folder = S3Helper.get_bucket_and_folder_path(ENV["DISCOURSE_S3_BUCKET"])
folder = File.join(folder, "/")
else
bucket, folder = ENV["DISCOURSE_S3_BUCKET"], ""
end
puts "Uploading files to S3..."
print " - Listing local files"
local_files = []
IO.popen("cd #{public_directory} && find uploads/#{db}/original -type f").each do |file|
local_files << file.chomp
putc "." if local_files.size % 1000 == 0
end
puts " => #{local_files.size} files"
print " - Listing S3 files"
s3_objects = []
prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/"
options = { bucket: bucket, prefix: folder + prefix }
loop do
response = s3.list_objects_v2(options)
s3_objects.concat(response.contents)
putc "."
break if response.next_continuation_token.blank?
options[:continuation_token] = response.next_continuation_token
end
puts " => #{s3_objects.size} files"
puts " - Syncing files to S3"
synced = 0
failed = []
skip_etag_verify = ENV["SKIP_ETAG_VERIFY"].present?
local_files.each do |file|
path = File.join(public_directory, file)
name = File.basename(path)
etag = Digest::MD5.file(path).hexdigest unless skip_etag_verify
key = file[file.index(prefix)..-1]
key.prepend(folder) if bucket_has_folder_path
original_path = file.sub("uploads/#{db}", "")
if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) }
next if File.size(path) == s3_object.size && (skip_etag_verify || s3_object.etag[etag])
end
options = {
acl: "public-read",
body: File.open(path, "rb"),
bucket: bucket,
content_type: MiniMime.lookup_by_filename(name)&.content_type,
key: key,
}
if !FileHelper.is_supported_image?(name)
upload = Upload.find_by(url: "/#{file}")
if upload&.original_filename
options[:content_disposition] =
%Q{attachment; filename="#{upload.original_filename}"}
end
if upload&.secure
options[:acl] = "private"
end
end
etag ||= Digest::MD5.file(path).hexdigest
if dry_run
puts "#{file} => #{options[:key]}"
synced += 1
elsif s3.put_object(options).etag[etag]
putc "."
synced += 1
else
putc "X"
failed << path
end
end
puts
failure_message = "S3 migration failed for db '#{db}'."
if failed.size > 0
puts "Failed to upload #{failed.size} files"
puts failed.join("\n")
raise failure_message
elsif s3_objects.size + synced >= local_files.size
puts "Updating the URLs in the database..."
from = "/uploads/#{db}/original/"
to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}"
if dry_run
puts "REPLACING '#{from}' WITH '#{to}'"
else
DbHelper.remap(from, to, anchor_left: true)
end
[
[
"src=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"src='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"href=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"href='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"\\[img\\]/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]",
"[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]"
]
].each do |from_url, to_url|
if dry_run
puts "REPLACING '#{from_url}' WITH '#{to_url}'"
else
DbHelper.regexp_replace(from_url, to_url)
end
end
unless dry_run
# Legacy inline image format
Post.where("raw LIKE '%![](/uploads/default/original/%)%'").each do |post|
regexp = /!\[\](\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
post.raw.scan(regexp).each do |upload_url, _|
upload = Upload.get_from_url(upload_url)
post.raw = post.raw.gsub("![](#{upload_url})", "![](#{upload.short_url})")
end
post.save!(validate: false)
end
end
if Discourse.asset_host.present?
# Uploads that were on local CDN will now be on S3 CDN
from = "#{Discourse.asset_host}/uploads/#{db}/original/"
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
if dry_run
puts "REMAPPING '#{from}' TO '#{to}'"
else
DbHelper.remap(from, to)
end
end
# Uploads that were on base hostname will now be on S3 CDN
from = "#{Discourse.base_url}/uploads/#{db}/original/"
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
if dry_run
puts "REMAPPING '#{from}' TO '#{to}'"
else
DbHelper.remap(from, to)
end
unless dry_run
puts "Removing old optimized images..."
OptimizedImage
.joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id")
.where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'")
.delete_all
puts "Flagging all posts containing lightboxes for rebake..."
count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil)
puts "#{count} posts were flagged for a rebake"
end
end
migration_successful?(db, true)
puts "Done!"
end
################################################################################
# clean_up #
################################################################################
task "uploads:clean_up" => :environment do
ENV["RAILS_DB"] ? clean_up_uploads : clean_up_uploads_all_sites
end
def clean_up_uploads_all_sites
RailsMultisite::ConnectionManagement.each_connection { clean_up_uploads }
end
def clean_up_uploads
db = RailsMultisite::ConnectionManagement.current_db
puts "Cleaning up uploads and thumbnails for '#{db}'..."
if Discourse.store.external?
puts "This task only works for internal storages."
exit 1
end
puts <<~OUTPUT
This task will remove upload records and files permanently.
Would you like to take a full backup before the clean up? (Y/N)
OUTPUT
if STDIN.gets.chomp.downcase == 'y'
puts "Starting backup..."
backuper = BackupRestore::Backuper.new(Discourse.system_user.id)
backuper.run
exit 1 unless backuper.success
end
public_directory = Rails.root.join("public").to_s
##
## DATABASE vs FILE SYSTEM
##
# uploads & avatars
Upload.find_each do |upload|
path = File.join(public_directory, upload.url)
if !File.exists?(path)
upload.destroy!
putc "#"
else
putc "."
end
end
# optimized images
OptimizedImage.find_each do |optimized_image|
path = File.join(public_directory, optimized_image.url)
if !File.exists?(path)
optimized_image.destroy!
putc "#"
else
putc "."
end
end
##
## FILE SYSTEM vs DATABASE
##
uploads_directory = File.join(public_directory, 'uploads', db).to_s
# avatars (no avatar should be stored in that old directory)
FileUtils.rm_rf("#{uploads_directory}/avatars")
# uploads and optimized images
Dir.glob("#{uploads_directory}/**/*.*").each do |file_path|
sha1 = Upload.generate_digest(file_path)
url = file_path.split(public_directory, 2)[1]
if (Upload.where(sha1: sha1).empty? &&
Upload.where(url: url).empty?) &&
(OptimizedImage.where(sha1: sha1).empty? &&
OptimizedImage.where(url: url).empty?)
FileUtils.rm(file_path)
putc "#"
else
putc "."
end
end
puts "Removing empty directories..."
puts `find #{uploads_directory} -type d -empty -exec rmdir {} \\;`
puts "Done!"
end
################################################################################
# missing files #
################################################################################
# list all missing uploads and optimized images
task "uploads:missing_files" => :environment do
if ENV["RAILS_DB"]
list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
else
RailsMultisite::ConnectionManagement.each_connection do |db|
if ENV["SKIP_EXTERNAL"] == "1" && Discourse.store.external?
puts "#{RailsMultisite::ConnectionManagement.current_db} has uploads stored externally skipping!"
else
if Discourse.store.external?
puts "-" * 80
puts "WARNING! WARNING! WARNING!"
puts "-" * 80
puts
puts <<~TEXT
#{RailsMultisite::ConnectionManagement.current_db} has uploads on S3!
validating without inventory is likely to take an enormous amount of time.
We recommend you run SKIP_EXTERNAL=1 rake uploads:missing to skip validating if on a multisite.
TEXT
end
list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
end
end
end
end
def list_missing_uploads(skip_optimized: false)
Discourse.store.list_missing_uploads(skip_optimized: skip_optimized)
end
task "uploads:missing" => :environment do
Rake::Task["uploads:missing_files"].invoke
end
################################################################################
# regenerate_missing_optimized #
################################################################################
# regenerate missing optimized images
task "uploads:regenerate_missing_optimized" => :environment do
if ENV["RAILS_DB"]
regenerate_missing_optimized
else
RailsMultisite::ConnectionManagement.each_connection { regenerate_missing_optimized }
end
end
def regenerate_missing_optimized
db = RailsMultisite::ConnectionManagement.current_db
puts "Regenerating missing optimized images for '#{db}'..."
if Discourse.store.external?
puts "This task only works for internal storages."
return
end
public_directory = "#{Rails.root}/public"
missing_uploads = Set.new
avatar_upload_ids = UserAvatar.all.pluck(:custom_upload_id, :gravatar_upload_id).flatten.compact
default_scope = OptimizedImage.includes(:upload)
[
default_scope
.where("optimized_images.upload_id IN (?)", avatar_upload_ids),
default_scope
.where("optimized_images.upload_id NOT IN (?)", avatar_upload_ids)
.where("LENGTH(COALESCE(url, '')) > 0")
.where("width > 0 AND height > 0")
].each do |scope|
scope.find_each do |optimized_image|
upload = optimized_image.upload
next unless optimized_image.url =~ /^\/[^\/]/
next unless upload.url =~ /^\/[^\/]/
thumbnail = "#{public_directory}#{optimized_image.url}"
original = "#{public_directory}#{upload.url}"
if !File.exists?(thumbnail) || File.size(thumbnail) <= 0
# make sure the original image exists locally
if (!File.exists?(original) || File.size(original) <= 0) && upload.origin.present?
# try to fix it by redownloading it
begin
downloaded = FileHelper.download(
upload.origin,
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
tmp_file_name: "discourse-missing",
follow_redirect: true
) rescue nil
if downloaded && downloaded.size > 0
FileUtils.mkdir_p(File.dirname(original))
File.open(original, "wb") { |f| f.write(downloaded.read) }
end
ensure
downloaded.try(:close!) if downloaded.respond_to?(:close!)
end
end
if File.exists?(original) && File.size(original) > 0
FileUtils.mkdir_p(File.dirname(thumbnail))
OptimizedImage.resize(original, thumbnail, optimized_image.width, optimized_image.height)
putc "#"
else
missing_uploads << original
putc "X"
end
else
putc "."
end
end
end
puts "", "Done"
if missing_uploads.size > 0
puts "Missing uploads:"
missing_uploads.sort.each { |u| puts u }
end
end
################################################################################
# migrate_to_new_scheme #
################################################################################
task "uploads:start_migration" => :environment do
SiteSetting.migrate_to_new_scheme = true
puts "Migration started!"
end
task "uploads:stop_migration" => :environment do
SiteSetting.migrate_to_new_scheme = false
puts "Migration stoped!"
end
task "uploads:analyze", [:cache_path, :limit] => :environment do |_, args|
now = Time.zone.now
current_db = RailsMultisite::ConnectionManagement.current_db
puts "Analyzing uploads for '#{current_db}'... This may take awhile...\n"
cache_path = args[:cache_path]
current_db = RailsMultisite::ConnectionManagement.current_db
uploads_path = Rails.root.join('public', 'uploads', current_db)
path =
if cache_path
cache_path
else
path = "/tmp/#{current_db}-#{now.to_i}-paths.txt"
FileUtils.touch("/tmp/#{now.to_i}-paths.txt")
`find #{uploads_path} -type f -printf '%s %h/%f\n' > #{path}`
path
end
extensions = {}
paths_count = 0
File.readlines(path).each do |line|
size, file_path = line.split(" ", 2)
paths_count += 1
extension = File.extname(file_path).chomp.downcase
extensions[extension] ||= {}
extensions[extension]["count"] ||= 0
extensions[extension]["count"] += 1
extensions[extension]["size"] ||= 0
extensions[extension]["size"] += size.to_i
end
uploads_count = Upload.count
optimized_images_count = OptimizedImage.count
puts <<~REPORT
Report for '#{current_db}'
-----------#{'-' * current_db.length}
Number of `Upload` records in DB: #{uploads_count}
Number of `OptimizedImage` records in DB: #{optimized_images_count}
**Total DB records: #{uploads_count + optimized_images_count}**
Number of images in uploads folder: #{paths_count}
------------------------------------#{'-' * paths_count.to_s.length}
REPORT
helper = Class.new do
include ActionView::Helpers::NumberHelper
end
helper = helper.new
printf "%-15s | %-15s | %-15s\n", 'extname', 'total size', 'count'
puts "-" * 45
extensions.sort_by { |_, value| value['size'] }.reverse.each do |extname, value|
printf "%-15s | %-15s | %-15s\n", extname, helper.number_to_human_size(value['size']), value['count']
end
puts "\n"
limit = args[:limit] || 10
sql = <<~SQL
SELECT
users.username,
COUNT(uploads.user_id) AS num_of_uploads,
SUM(uploads.filesize) AS total_size_of_uploads,
COUNT(optimized_images.id) AS num_of_optimized_images
FROM users
INNER JOIN uploads ON users.id = uploads.user_id
INNER JOIN optimized_images ON uploads.id = optimized_images.upload_id
GROUP BY users.id
ORDER BY total_size_of_uploads DESC
LIMIT #{limit}
SQL
puts "Users using the most disk space"
puts "-------------------------------\n"
printf "%-25s | %-25s | %-25s | %-25s\n", 'username', 'total size of uploads', 'number of uploads', 'number of optimized images'
puts "-" * 110
DB.query_single(sql).each do |username, num_of_uploads, total_size_of_uploads, num_of_optimized_images|
printf "%-25s | %-25s | %-25s | %-25s\n", username, helper.number_to_human_size(total_size_of_uploads), num_of_uploads, num_of_optimized_images
end
puts "\n"
puts "List of file paths @ #{path}"
puts "Duration: #{Time.zone.now - now} seconds"
end
task "uploads:fix_incorrect_extensions" => :environment do
UploadFixer.fix_all_extensions
end
task "uploads:recover_from_tombstone" => :environment do
Rake::Task["uploads:recover"].invoke
end
task "uploads:recover" => :environment do
dry_run = ENV["DRY_RUN"].present?
stop_on_error = ENV["STOP_ON_ERROR"].present?
if ENV["RAILS_DB"]
UploadRecovery.new(dry_run: dry_run, stop_on_error: stop_on_error).recover
else
RailsMultisite::ConnectionManagement.each_connection do |db|
UploadRecovery.new(dry_run: dry_run, stop_on_error: stop_on_error).recover
end
end
end
task "uploads:disable_secure_media" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db|
unless Discourse.store.external?
puts "This task only works for external storage."
exit 1
end
puts "Disabling secure media and resetting uploads to not secure in #{db}...", ""
SiteSetting.secure_media = false
secure_uploads = Upload.includes(:posts).where(secure: true)
secure_upload_count = secure_uploads.count
i = 0
secure_uploads.find_each(batch_size: 20).each do |upload|
Upload.transaction do
upload.secure = false
RakeHelpers.print_status_with_label("Updating ACL for upload #{upload.id}.......", i, secure_upload_count)
Discourse.store.update_upload_ACL(upload)
RakeHelpers.print_status_with_label("Rebaking posts for upload #{upload.id}.......", i, secure_upload_count)
upload.posts.each(&:rebake!)
upload.save
i += 1
end
end
RakeHelpers.print_status_with_label("Rebaking and updating complete! ", i, secure_upload_count)
puts ""
end
puts "Secure media is now disabled!", ""
end
##
# Run this task whenever the secure_media or login_required
# settings are changed for a Discourse instance to update
# the upload secure flag and S3 upload ACLs.
task "uploads:ensure_correct_acl" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db|
unless Discourse.store.external?
puts "This task only works for external storage."
exit 1
end
puts "Ensuring correct ACL for uploads in #{db}...", ""
Upload.transaction do
mark_secure_in_loop_because_no_login_required = false
# First of all only get relevant uploads (supported media).
#
# Also only get uploads that are not for a theme or a site setting, so only
# get post related uploads.
uploads_with_supported_media = Upload.includes(:posts, :optimized_images).where(
"LOWER(original_filename) SIMILAR TO '%\.(jpg|jpeg|png|gif|svg|ico|mp3|ogg|wav|m4a|mov|mp4|webm|ogv)'"
).joins(:post_uploads)
puts "There are #{uploads_with_supported_media.count} upload(s) with supported media that could be marked secure.", ""
# Simply mark all these uploads as secure if login_required because no anons will be able to access them
if SiteSetting.login_required?
mark_all_as_secure_login_required(uploads_with_supported_media)
else
# If NOT login_required, then we have to go for the other slower flow, where in the loop
# we mark the upload as secure if the first post it is used in is with_secure_media?
mark_secure_in_loop_because_no_login_required = true
puts "Marking posts as secure in the next step because login_required is false."
end
puts "", "Rebaking #{uploads_with_supported_media.count} upload posts and updating ACLs in S3.", ""
upload_ids_to_mark_as_secure, uploads_skipped_because_of_error = update_acls_and_rebake_upload_posts(
uploads_with_supported_media, mark_secure_in_loop_because_no_login_required
)
log_rebake_errors(uploads_skipped_because_of_error)
mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure)
end
end
puts "", "Done"
end
def mark_all_as_secure_login_required(uploads_with_supported_media)
puts "Marking #{uploads_with_supported_media.count} upload(s) as secure because login_required is true.", ""
uploads_with_supported_media.update_all(secure: true)
puts "Finished marking upload(s) as secure."
end
def log_rebake_errors(uploads_skipped_because_of_error)
return if uploads_skipped_because_of_error.empty?
puts "Skipped the following uploads due to error:", ""
uploads_skipped_because_of_error.each do |message|
puts message
end
end
def mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure)
return if upload_ids_to_mark_as_secure.empty?
puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because their first post contains secure media."
Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true)
puts "Finished marking uploads as secure."
end
def update_acls_and_rebake_upload_posts(uploads_with_supported_media, mark_secure_in_loop_because_no_login_required)
upload_ids_to_mark_as_secure = []
uploads_skipped_because_of_error = []
i = 0
uploads_with_supported_media.find_each(batch_size: 50) do |upload_with_supported_media|
RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, uploads_with_supported_media.count)
Discourse.store.update_upload_ACL(upload_with_supported_media)
RakeHelpers.print_status_with_label("Rebaking posts for upload.....", i, uploads_with_supported_media.count)
begin
upload_with_supported_media.posts.each { |post| post.rebake! }
if mark_secure_in_loop_because_no_login_required
first_post_with_upload = upload_with_supported_media.posts.order(sort_order: :asc).first
mark_secure = first_post_with_upload ? first_post_with_upload.with_secure_media? : false
upload_ids_to_mark_as_secure << upload_with_supported_media.id if mark_secure
end
rescue => e
uploads_skipped_because_of_error << "#{upload_with_supported_media.original_filename} (#{upload_with_supported_media.url}) #{e.message}"
end
i += 1
end
RakeHelpers.print_status_with_label("Rebaking complete! ", i, uploads_with_supported_media.count)
puts ""
[upload_ids_to_mark_as_secure, uploads_skipped_because_of_error]
end
def inline_uploads(post)
replaced = false
original_raw = post.raw
post.raw = post.raw.gsub(/(\((\/uploads\S+).*\))/) do
upload = Upload.find_by(url: $2)
if !upload
data = Upload.extract_url($2)
if data && sha1 = data[2]
upload = Upload.find_by(sha1: sha1)
if !upload
sha_map = JSON.parse(post.custom_fields["UPLOAD_SHA1_MAP"] || "{}")
if mapped_sha = sha_map[sha1]
upload = Upload.find_by(sha1: mapped_sha)
end
end
end
end
result = $1
if upload&.id
result.sub!($2, upload.short_url)
replaced = true
else
puts "Upload not found #{$2} in Post #{post.id} - #{post.url}"
end
result
end
if replaced
puts "Corrected image urls in #{post.full_url} raw backup stored in custom field"
post.custom_fields["BACKUP_POST_RAW"] = original_raw
post.save_custom_fields
post.save!(validate: false)
post.rebake!
end
end
def inline_img_tags(post)
replaced = false
original_raw = post.raw
post.raw = post.raw.gsub(/(<img\s+src=["'](\/uploads\/[^'"]*)["'].*>)/i) do
next if $2.include?("..")
upload = Upload.find_by(url: $2)
if !upload
data = Upload.extract_url($2)
if data && sha1 = data[2]
upload = Upload.find_by(sha1: sha1)
end
end
if !upload
local_file = File.join(Rails.root, "public", $2)
if File.exist?(local_file)
File.open(local_file) do |f|
upload = UploadCreator.new(f, "image").create_for(post.user_id)
end
end
end
if upload
replaced = true
"![image](#{upload.short_url})"
else
puts "skipping missing upload in #{post.full_url} #{$1}"
$1
end
end
if replaced
puts "Corrected image urls in #{post.full_url} raw backup stored in custom field"
post.custom_fields["BACKUP_POST_RAW"] = original_raw
post.save_custom_fields
post.save!(validate: false)
post.rebake!
end
end
def fix_relative_links
Post.where('raw like ?', '%](/uploads%').find_each do |post|
inline_uploads(post)
end
Post.where("raw ilike ?", '%<img%src=%/uploads/%>%').find_each do |post|
inline_img_tags(post)
end
end
task "uploads:fix_relative_upload_links" => :environment do
if RailsMultisite::ConnectionManagement.current_db != "default"
fix_relative_links
else
RailsMultisite::ConnectionManagement.each_connection do
fix_relative_links
end
end
end