mirror of
https://github.com/discourse/discourse.git
synced 2025-10-03 17:21:20 +08:00
FIX: Store context for failed uploads (#33546)
This change makes it easier to review failed uploads, especially downloadable ones, by standardizing `skip_reason` and adding a `skip_details` column to the uploads DB's `uploads` table for extended failure information.
This commit is contained in:
parent
e7f856169a
commit
6f73f31a45
1 changed files with 56 additions and 41 deletions
|
@ -25,6 +25,11 @@ module BulkImport
|
|||
UploadMetadata = Struct.new(:original_filename, :origin_url, :description)
|
||||
|
||||
class UploadsImporter
|
||||
class DownloadFailedError < StandardError
|
||||
end
|
||||
class UploadSizeExceededError < DownloadFailedError
|
||||
end
|
||||
|
||||
TRANSACTION_SIZE = 1000
|
||||
QUEUE_SIZE = 1000
|
||||
|
||||
|
@ -129,17 +134,18 @@ module BulkImport
|
|||
current_count = 0
|
||||
|
||||
while !(params = status_queue.pop).nil?
|
||||
begin
|
||||
if params.delete(:skipped) == true
|
||||
skipped_count += 1
|
||||
elsif (error_message = params.delete(:error)) || params[:upload].nil?
|
||||
error_count += 1
|
||||
puts "", "Failed to create upload: #{params[:id]} (#{error_message})", ""
|
||||
end
|
||||
case params.delete(:status)
|
||||
when :error
|
||||
error_count += 1
|
||||
puts "", "Failed to create upload: #{params[:id]} (#{params[:skip_details]})", ""
|
||||
when :skipped
|
||||
skipped_count += 1
|
||||
end
|
||||
|
||||
begin
|
||||
insert(<<~SQL, params)
|
||||
INSERT INTO uploads (id, upload, markdown, skip_reason)
|
||||
VALUES (:id, :upload, :markdown, :skip_reason)
|
||||
INSERT INTO uploads (id, upload, markdown, skip_reason, skip_details)
|
||||
VALUES (:id, :upload, :markdown, :skip_reason, :skip_details)
|
||||
SQL
|
||||
rescue StandardError => e
|
||||
puts "", "Failed to insert upload: #{params[:id]} (#{e.message}))", ""
|
||||
|
@ -195,10 +201,11 @@ module BulkImport
|
|||
|
||||
if !file_exists
|
||||
status_queue << {
|
||||
status: :skipped,
|
||||
id: row["id"],
|
||||
upload: nil,
|
||||
skipped: true,
|
||||
skip_reason: "file not found",
|
||||
skip_reason: "file_not_found",
|
||||
skip_details: nil,
|
||||
}
|
||||
next
|
||||
end
|
||||
|
@ -242,21 +249,24 @@ module BulkImport
|
|||
|
||||
if upload_okay
|
||||
status_queue << {
|
||||
status: :ok,
|
||||
id: row["id"],
|
||||
upload: upload.attributes.to_json,
|
||||
markdown:
|
||||
UploadMarkdown.new(upload).to_markdown(display_name: metadata.description),
|
||||
skip_reason: nil,
|
||||
skip_details: nil,
|
||||
}
|
||||
break
|
||||
elsif retry_count >= 3
|
||||
error_message ||= upload&.errors&.full_messages&.join(", ") || "unknown error"
|
||||
status_queue << {
|
||||
status: :error,
|
||||
id: row["id"],
|
||||
upload: nil,
|
||||
markdown: nil,
|
||||
error: "too many retries: #{error_message}",
|
||||
skip_reason: "too many retries",
|
||||
skip_reason: "too_many_retries",
|
||||
skip_details: "Too many retries: #{error_message}",
|
||||
}
|
||||
break
|
||||
end
|
||||
|
@ -266,11 +276,12 @@ module BulkImport
|
|||
end
|
||||
rescue StandardError => e
|
||||
status_queue << {
|
||||
status: :error,
|
||||
id: row["id"],
|
||||
upload: nil,
|
||||
markdown: nil,
|
||||
error: e.message,
|
||||
skip_reason: "error",
|
||||
skip_reason: e.is_a?(DownloadFailedError) ? "download_error" : "error",
|
||||
skip_details: e.message,
|
||||
}
|
||||
ensure
|
||||
data_file&.close!
|
||||
|
@ -288,42 +299,47 @@ module BulkImport
|
|||
|
||||
def download_file(url:, id:, retry_count: 0)
|
||||
path = download_cache_path(id)
|
||||
original_filename = nil
|
||||
|
||||
if File.exist?(path) && (original_filename = get_original_filename(id))
|
||||
return path, original_filename
|
||||
end
|
||||
|
||||
fd = FinalDestination.new(url)
|
||||
file = nil
|
||||
original_filename = nil
|
||||
|
||||
fd.get do |response, chunk, uri|
|
||||
if file.nil?
|
||||
check_response!(response, uri)
|
||||
original_filename = extract_filename_from_response(response, uri)
|
||||
file = File.open(path, "wb")
|
||||
begin
|
||||
fd = FinalDestination.new(url)
|
||||
|
||||
fd.get do |response, chunk, uri|
|
||||
if file.nil?
|
||||
check_response!(response, uri)
|
||||
original_filename = extract_filename_from_response(response, uri)
|
||||
file = File.open(path, "wb")
|
||||
end
|
||||
|
||||
file.write(chunk)
|
||||
|
||||
if (file_size = file.size) > MAX_FILE_SIZE
|
||||
File.unlink(path)
|
||||
|
||||
raise UploadSizeExceededError,
|
||||
"Upload size #{file.size} bytes exceeds the limit of #{MAX_FILE_SIZE} bytes"
|
||||
end
|
||||
end
|
||||
|
||||
file.write(chunk)
|
||||
return unless file
|
||||
|
||||
if file.size > MAX_FILE_SIZE
|
||||
file.close
|
||||
file.unlink
|
||||
file = nil
|
||||
throw :done
|
||||
end
|
||||
end
|
||||
|
||||
if file
|
||||
file.close
|
||||
insert(
|
||||
"INSERT INTO downloads (id, original_filename) VALUES (?, ?)",
|
||||
[id, original_filename],
|
||||
)
|
||||
return path, original_filename
|
||||
end
|
||||
|
||||
nil
|
||||
[path, original_filename]
|
||||
rescue StandardError => e
|
||||
raise DownloadFailedError, "Failed to download upload from #{url}: #{e.message}"
|
||||
ensure
|
||||
file&.close
|
||||
end
|
||||
end
|
||||
|
||||
def download_cache_path(id)
|
||||
|
@ -337,10 +353,8 @@ module BulkImport
|
|||
|
||||
def check_response!(response, uri)
|
||||
if uri.blank?
|
||||
code = response.code.to_i
|
||||
|
||||
if code >= 400
|
||||
raise "#{code} Error"
|
||||
if response.code.to_i >= 400
|
||||
response.value
|
||||
else
|
||||
throw :done
|
||||
end
|
||||
|
@ -695,7 +709,8 @@ module BulkImport
|
|||
id TEXT PRIMARY KEY NOT NULL,
|
||||
upload JSON_TEXT,
|
||||
markdown TEXT,
|
||||
skip_reason TEXT
|
||||
skip_reason TEXT,
|
||||
skip_details TEXT
|
||||
)
|
||||
SQL
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue