mirror of
https://github.com/discourse/discourse.git
synced 2025-10-03 17:21:20 +08:00
In #33558 we removed automatic_backups_enabled setting, and instead rely on backup_frequency being blank to disable. There was a big oversight there with the site setting type system, which will coerce the value to an integer. It also makes it so you can't blank the value out in the UI. This is a "fix forward" solution where instead of "set to blank to disable" we do "set to 0 to disable". This works along the grain of the site setting type system for a workable fix where we don't have to deal with the irreversible migration in the previous change. We can potentially go and add in "nullable" to the type system at a later point.
799 lines
22 KiB
Ruby
799 lines
22 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "base"
|
|
require "mysql2"
|
|
require "htmlentities"
|
|
require "parallel"
|
|
|
|
class BulkImport::VBulletin < BulkImport::Base
|
|
TABLE_PREFIX = ENV["TABLE_PREFIX"] || "vb_"
|
|
SUSPENDED_TILL = Date.new(3000, 1, 1)
|
|
ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || "/shared/import/data/attachments"
|
|
AVATAR_DIR = ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
|
|
|
|
def initialize
|
|
super
|
|
|
|
host = ENV["DB_HOST"] || "localhost"
|
|
username = ENV["DB_USERNAME"] || "root"
|
|
password = ENV["DB_PASSWORD"]
|
|
database = ENV["DB_NAME"] || "vbulletin"
|
|
charset = ENV["DB_CHARSET"] || "utf8"
|
|
|
|
@html_entities = HTMLEntities.new
|
|
@encoding = CHARSET_MAP[charset]
|
|
|
|
@client =
|
|
Mysql2::Client.new(
|
|
host: host,
|
|
username: username,
|
|
password: password,
|
|
database: database,
|
|
encoding: charset,
|
|
reconnect: true,
|
|
)
|
|
|
|
@client.query_options.merge!(as: :array, cache_rows: false)
|
|
|
|
@has_post_thanks = mysql_query(<<-SQL).to_a.count > 0
|
|
SELECT `COLUMN_NAME`
|
|
FROM `INFORMATION_SCHEMA`.`COLUMNS`
|
|
WHERE `TABLE_SCHEMA`='#{database}'
|
|
AND `TABLE_NAME`='user'
|
|
AND `COLUMN_NAME` LIKE 'post_thanks_%'
|
|
SQL
|
|
|
|
@user_ids_by_email = {}
|
|
end
|
|
|
|
def execute
|
|
# enable as per requirement:
|
|
# SiteSetting.backup_frequency = 0
|
|
# SiteSetting.disable_emails = "non-staff"
|
|
# SiteSetting.authorized_extensions = '*'
|
|
# SiteSetting.max_image_size_kb = 102400
|
|
# SiteSetting.max_attachment_size_kb = 102400
|
|
# SiteSetting.clean_up_uploads = false
|
|
# SiteSetting.clean_orphan_uploads_grace_period_hours = 43200
|
|
|
|
import_groups
|
|
import_users
|
|
import_group_users
|
|
|
|
import_user_emails
|
|
import_user_stats
|
|
|
|
import_user_passwords
|
|
import_user_salts
|
|
import_user_profiles
|
|
|
|
import_categories
|
|
import_topics
|
|
import_posts
|
|
|
|
import_likes
|
|
|
|
import_private_topics
|
|
import_topic_allowed_users
|
|
import_private_posts
|
|
|
|
create_permalink_file
|
|
import_attachments
|
|
import_avatars
|
|
import_signatures
|
|
end
|
|
|
|
def execute_after
|
|
max_age = SiteSetting.delete_user_max_post_age
|
|
SiteSetting.delete_user_max_post_age = 50 * 365
|
|
|
|
merge_duplicated_users
|
|
|
|
SiteSetting.delete_user_max_post_age = max_age
|
|
end
|
|
|
|
def import_groups
|
|
puts "", "Importing groups..."
|
|
|
|
groups = mysql_stream <<-SQL
|
|
SELECT usergroupid, title, description, usertitle
|
|
FROM #{TABLE_PREFIX}usergroup
|
|
WHERE usergroupid > #{@last_imported_group_id}
|
|
ORDER BY usergroupid
|
|
SQL
|
|
|
|
create_groups(groups) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
name: normalize_text(row[1]),
|
|
bio_raw: normalize_text(row[2]),
|
|
title: normalize_text(row[3]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "", "Importing users..."
|
|
|
|
users = mysql_stream <<-SQL
|
|
SELECT u.userid, username, email, joindate, birthday, ipaddress, u.usergroupid, bandate, liftdate
|
|
FROM #{TABLE_PREFIX}user u
|
|
LEFT JOIN #{TABLE_PREFIX}userban ub ON ub.userid = u.userid
|
|
WHERE u.userid > #{@last_imported_user_id}
|
|
ORDER BY u.userid
|
|
SQL
|
|
|
|
create_users(users) do |row|
|
|
u = {
|
|
imported_id: row[0],
|
|
username: normalize_text(row[1]),
|
|
name: normalize_text(row[1]),
|
|
email: row[2],
|
|
created_at: Time.zone.at(row[3]),
|
|
date_of_birth: parse_birthday(row[4]),
|
|
primary_group_id: group_id_from_imported_id(row[6]),
|
|
}
|
|
u[:ip_address] = row[5][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[5].present?
|
|
if row[7]
|
|
u[:suspended_at] = Time.zone.at(row[7])
|
|
u[:suspended_till] = row[8] > 0 ? Time.zone.at(row[8]) : SUSPENDED_TILL
|
|
end
|
|
u
|
|
end
|
|
end
|
|
|
|
def import_user_emails
|
|
puts "", "Importing user emails..."
|
|
|
|
users = mysql_stream <<-SQL
|
|
SELECT u.userid, email, joindate
|
|
FROM #{TABLE_PREFIX}user u
|
|
WHERE u.userid > #{@last_imported_user_id}
|
|
ORDER BY u.userid
|
|
SQL
|
|
|
|
create_user_emails(users) do |row|
|
|
user_id, email = row[0..1]
|
|
|
|
@user_ids_by_email[email.downcase] ||= []
|
|
user_ids = @user_ids_by_email[email.downcase] << user_id
|
|
|
|
if user_ids.count > 1
|
|
# fudge email to avoid conflicts; accounts from the 2nd and on will later be merged back into the first
|
|
# NOTE: gsub! is used to avoid creating a new (frozen) string
|
|
email.gsub!(/^/, SecureRandom.hex)
|
|
end
|
|
|
|
{
|
|
imported_id: user_id,
|
|
imported_user_id: user_id,
|
|
email: email,
|
|
created_at: Time.zone.at(row[2]),
|
|
}
|
|
end
|
|
|
|
# for debugging purposes; not used operationally
|
|
save_duplicated_users
|
|
end
|
|
|
|
def import_user_stats
|
|
puts "", "Importing user stats..."
|
|
|
|
users = mysql_stream <<-SQL
|
|
SELECT u.userid, joindate, posts, COUNT(t.threadid) AS threads, p.dateline
|
|
#{", post_thanks_user_amount, post_thanks_thanked_times" if @has_post_thanks}
|
|
FROM #{TABLE_PREFIX}user u
|
|
LEFT OUTER JOIN #{TABLE_PREFIX}post p ON p.postid = u.lastpostid
|
|
LEFT OUTER JOIN #{TABLE_PREFIX}thread t ON u.userid = t.postuserid
|
|
WHERE u.userid > #{@last_imported_user_id}
|
|
GROUP BY u.userid
|
|
ORDER BY u.userid
|
|
SQL
|
|
|
|
create_user_stats(users) do |row|
|
|
user = {
|
|
imported_id: row[0],
|
|
imported_user_id: row[0],
|
|
new_since: Time.zone.at(row[1]),
|
|
post_count: row[2],
|
|
topic_count: row[3],
|
|
first_post_created_at: row[4] && Time.zone.at(row[4]),
|
|
}
|
|
|
|
if @has_post_thanks
|
|
user[:likes_given] = row[5]
|
|
user[:likes_received] = row[6]
|
|
end
|
|
|
|
user
|
|
end
|
|
end
|
|
|
|
def import_group_users
|
|
puts "", "Importing group users..."
|
|
|
|
group_users = mysql_stream <<-SQL
|
|
SELECT usergroupid, userid
|
|
FROM #{TABLE_PREFIX}user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
SQL
|
|
|
|
create_group_users(group_users) do |row|
|
|
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
|
|
end
|
|
end
|
|
|
|
def import_user_passwords
|
|
puts "", "Importing user passwords..."
|
|
|
|
user_passwords = mysql_stream <<-SQL
|
|
SELECT userid, password
|
|
FROM #{TABLE_PREFIX}user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
create_custom_fields("user", "password", user_passwords) do |row|
|
|
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
|
|
end
|
|
end
|
|
|
|
def import_user_salts
|
|
puts "", "Importing user salts..."
|
|
|
|
user_salts = mysql_stream <<-SQL
|
|
SELECT userid, salt
|
|
FROM #{TABLE_PREFIX}user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
AND LENGTH(COALESCE(salt, '')) > 0
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
create_custom_fields("user", "salt", user_salts) do |row|
|
|
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
|
|
end
|
|
end
|
|
|
|
def import_user_profiles
|
|
puts "", "Importing user profiles..."
|
|
|
|
user_profiles = mysql_stream <<-SQL
|
|
SELECT userid, homepage, profilevisits
|
|
FROM #{TABLE_PREFIX}user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
create_user_profiles(user_profiles) do |row|
|
|
{
|
|
user_id: user_id_from_imported_id(row[0]),
|
|
website:
|
|
(
|
|
begin
|
|
URI.parse(row[1]).to_s
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
),
|
|
views: row[2],
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "Importing categories..."
|
|
|
|
categories = mysql_query(<<-SQL).to_a
|
|
select
|
|
forumid,
|
|
parentid,
|
|
case
|
|
when forumid in (
|
|
select distinct forumid from (
|
|
select forumid, title, count(title)
|
|
from forum
|
|
group by replace(replace(title, ':', ''), '&', '')
|
|
having count(title) > 1
|
|
) as duplicated_forum_ids
|
|
)
|
|
then
|
|
-- deduplicate by fudging the title; categories will needed to be manually merged later
|
|
concat(title, '_DUPLICATE_', forumid)
|
|
else
|
|
title
|
|
end as title,
|
|
description,
|
|
displayorder
|
|
from forum
|
|
order by forumid
|
|
SQL
|
|
|
|
return if categories.empty?
|
|
|
|
parent_categories = categories.select { |c| c[1] == -1 }
|
|
children_categories = categories.select { |c| c[1] != -1 }
|
|
|
|
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
|
|
|
|
# cut down the tree to only 2 levels of categories
|
|
children_categories.each do |cc|
|
|
cc[1] = categories.find { |c| c[0] == cc[1] }[1] until parent_category_ids.include?(cc[1])
|
|
end
|
|
|
|
puts "", "Importing parent categories..."
|
|
create_categories(parent_categories) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
name: normalize_text(row[2]),
|
|
description: normalize_text(row[3]),
|
|
position: row[4],
|
|
}
|
|
end
|
|
|
|
puts "", "Importing children categories..."
|
|
create_categories(children_categories) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
name: normalize_text(row[2]),
|
|
description: normalize_text(row[3]),
|
|
position: row[4],
|
|
parent_category_id: category_id_from_imported_id(row[1]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topics
|
|
puts "", "Importing topics..."
|
|
|
|
topics = mysql_stream <<-SQL
|
|
SELECT threadid, title, forumid, postuserid, open, dateline, views, visible, sticky
|
|
FROM #{TABLE_PREFIX}thread t
|
|
WHERE threadid > #{@last_imported_topic_id}
|
|
AND EXISTS (SELECT 1 FROM #{TABLE_PREFIX}post p WHERE p.threadid = t.threadid)
|
|
ORDER BY threadid
|
|
SQL
|
|
|
|
create_topics(topics) do |row|
|
|
created_at = Time.zone.at(row[5])
|
|
|
|
t = {
|
|
imported_id: row[0],
|
|
title: normalize_text(row[1]),
|
|
category_id: category_id_from_imported_id(row[2]),
|
|
user_id: user_id_from_imported_id(row[3]),
|
|
closed: row[4] == 0,
|
|
created_at: created_at,
|
|
views: row[6],
|
|
visible: row[7] == 1,
|
|
}
|
|
|
|
t[:pinned_at] = created_at if row[8] == 1
|
|
|
|
t
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "Importing posts..."
|
|
|
|
posts = mysql_stream <<-SQL
|
|
SELECT postid, p.threadid, parentid, userid, p.dateline, p.visible, pagetext
|
|
#{", post_thanks_amount" if @has_post_thanks}
|
|
|
|
FROM #{TABLE_PREFIX}post p
|
|
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
|
|
WHERE postid > #{@last_imported_post_id}
|
|
ORDER BY postid
|
|
SQL
|
|
|
|
create_posts(posts) do |row|
|
|
topic_id = topic_id_from_imported_id(row[1])
|
|
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
|
|
reply_to_post_number =
|
|
topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
|
|
|
|
post = {
|
|
imported_id: row[0],
|
|
topic_id: topic_id,
|
|
reply_to_post_number: reply_to_post_number,
|
|
user_id: user_id_from_imported_id(row[3]),
|
|
created_at: Time.zone.at(row[4]),
|
|
hidden: row[5] != 1,
|
|
raw: normalize_text(row[6]),
|
|
}
|
|
|
|
post[:like_count] = row[7] if @has_post_thanks
|
|
post
|
|
end
|
|
end
|
|
|
|
def import_likes
|
|
return unless @has_post_thanks
|
|
puts "", "Importing likes..."
|
|
|
|
@imported_likes = Set.new
|
|
@last_imported_post_id = 0
|
|
|
|
post_thanks = mysql_stream <<-SQL
|
|
SELECT postid, userid, date
|
|
FROM #{TABLE_PREFIX}post_thanks
|
|
WHERE postid > #{@last_imported_post_id}
|
|
ORDER BY postid
|
|
SQL
|
|
|
|
create_post_actions(post_thanks) do |row|
|
|
post_id = post_id_from_imported_id(row[0])
|
|
user_id = user_id_from_imported_id(row[1])
|
|
|
|
next if post_id.nil? || user_id.nil?
|
|
next if @imported_likes.add?([post_id, user_id]).nil?
|
|
|
|
{
|
|
post_id: post_id_from_imported_id(row[0]),
|
|
user_id: user_id_from_imported_id(row[1]),
|
|
post_action_type_id: 2,
|
|
created_at: Time.zone.at(row[2]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_private_topics
|
|
puts "", "Importing private topics..."
|
|
|
|
@imported_topics = {}
|
|
|
|
topics = mysql_stream <<-SQL
|
|
SELECT pmtextid, title, fromuserid, touserarray, dateline
|
|
FROM #{TABLE_PREFIX}pmtext
|
|
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
|
|
ORDER BY pmtextid
|
|
SQL
|
|
|
|
create_topics(topics) do |row|
|
|
title = extract_pm_title(row[1])
|
|
user_ids = [row[2], row[3].scan(/i:(\d+)/)].flatten.map(&:to_i).sort
|
|
key = [title, user_ids]
|
|
|
|
next if @imported_topics.has_key?(key)
|
|
@imported_topics[key] = row[0] + PRIVATE_OFFSET
|
|
{
|
|
archetype: Archetype.private_message,
|
|
imported_id: row[0] + PRIVATE_OFFSET,
|
|
title: title,
|
|
user_id: user_id_from_imported_id(row[2]),
|
|
created_at: Time.zone.at(row[4]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topic_allowed_users
|
|
puts "", "Importing topic allowed users..."
|
|
|
|
allowed_users = Set.new
|
|
|
|
mysql_stream(<<-SQL).each do |row|
|
|
SELECT pmtextid, touserarray
|
|
FROM #{TABLE_PREFIX}pmtext
|
|
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
|
|
ORDER BY pmtextid
|
|
SQL
|
|
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET)
|
|
row[1]
|
|
.scan(/i:(\d+)/)
|
|
.flatten
|
|
.each do |id|
|
|
next unless user_id = user_id_from_imported_id(id)
|
|
allowed_users << [topic_id, user_id]
|
|
end
|
|
end
|
|
|
|
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
|
|
end
|
|
|
|
def import_private_posts
|
|
puts "", "Importing private posts..."
|
|
|
|
posts = mysql_stream <<-SQL
|
|
SELECT pmtextid, title, fromuserid, touserarray, dateline, message
|
|
FROM #{TABLE_PREFIX}pmtext
|
|
WHERE pmtextid > #{@last_imported_private_post_id - PRIVATE_OFFSET}
|
|
ORDER BY pmtextid
|
|
SQL
|
|
|
|
create_posts(posts) do |row|
|
|
title = extract_pm_title(row[1])
|
|
user_ids = [row[2], row[3].scan(/i:(\d+)/)].flatten.map(&:to_i).sort
|
|
key = [title, user_ids]
|
|
|
|
next unless topic_id = topic_id_from_imported_id(@imported_topics[key])
|
|
|
|
{
|
|
imported_id: row[0] + PRIVATE_OFFSET,
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_id(row[2]),
|
|
created_at: Time.zone.at(row[4]),
|
|
raw: normalize_text(row[5]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def create_permalink_file
|
|
puts "", "Creating Permalink File...", ""
|
|
|
|
total = Topic.listable_topics.count
|
|
start = Time.now
|
|
|
|
i = 0
|
|
File.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |f|
|
|
Topic.listable_topics.find_each do |topic|
|
|
i += 1
|
|
pcf = topic.posts.includes(:_custom_fields).where(post_number: 1).first.custom_fields
|
|
if pcf && pcf["import_id"]
|
|
id = pcf["import_id"].split("-").last
|
|
|
|
f.print ["XXX#{id} YYY#{topic.id}"].to_csv
|
|
print "\r%7d/%7d - %6d/sec" % [i, total, i.to_f / (Time.now - start)] if i % 5000 == 0
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# find the uploaded file information from the db
|
|
def find_upload(post, attachment_id)
|
|
sql =
|
|
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
|
|
FROM #{TABLE_PREFIX}attachment a
|
|
WHERE a.attachmentid = #{attachment_id}"
|
|
results = mysql_query(sql)
|
|
|
|
unless row = results.first
|
|
puts "Couldn't find attachment record for attachment_id = #{attachment_id} post.id = #{post.id}"
|
|
return
|
|
end
|
|
|
|
attachment_id = row[0]
|
|
user_id = row[1]
|
|
db_filename = row[2]
|
|
|
|
filename =
|
|
File.join(ATTACHMENT_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
|
|
real_filename = db_filename
|
|
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
|
|
|
|
unless File.exist?(filename)
|
|
puts "Attachment file #{row.inspect} doesn't exist"
|
|
return nil
|
|
end
|
|
|
|
upload = create_upload(post.user.id, filename, real_filename)
|
|
|
|
if upload.nil? || upload.errors.any?
|
|
puts "Upload not valid :("
|
|
puts upload.errors.inspect if upload
|
|
return
|
|
end
|
|
|
|
[upload, real_filename]
|
|
rescue Mysql2::Error => e
|
|
puts "SQL Error"
|
|
puts e.message
|
|
puts sql
|
|
end
|
|
|
|
def import_attachments
|
|
puts "", "importing attachments..."
|
|
|
|
RateLimiter.disable
|
|
current_count = 0
|
|
total_count = Post.count
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
|
|
|
|
Post.find_each do |post|
|
|
current_count += 1
|
|
print_status current_count, total_count
|
|
|
|
new_raw = post.raw.dup
|
|
new_raw.gsub!(attachment_regex) do |s|
|
|
matches = attachment_regex.match(s)
|
|
attachment_id = matches[1]
|
|
|
|
upload, filename = find_upload(post, attachment_id)
|
|
unless upload
|
|
fail_count += 1
|
|
next
|
|
# should we strip invalid attach tags?
|
|
end
|
|
|
|
html_for_upload(upload, filename)
|
|
end
|
|
|
|
if new_raw != post.raw
|
|
PostRevisor.new(post).revise!(
|
|
post.user,
|
|
{ raw: new_raw },
|
|
bypass_bump: true,
|
|
edit_reason: "Import attachments from vBulletin",
|
|
)
|
|
end
|
|
|
|
success_count += 1
|
|
end
|
|
|
|
puts "", "imported #{success_count} attachments... failed: #{fail_count}."
|
|
RateLimiter.enable
|
|
end
|
|
|
|
def import_avatars
|
|
if AVATAR_DIR && File.exist?(AVATAR_DIR)
|
|
puts "", "importing user avatars"
|
|
|
|
RateLimiter.disable
|
|
start = Time.now
|
|
count = 0
|
|
|
|
Dir.foreach(AVATAR_DIR) do |item|
|
|
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
|
|
|
|
next if item == (".") || item == ("..") || item == (".DS_Store")
|
|
next unless item =~ /avatar(\d+)_(\d).gif/
|
|
scan = item.scan(/avatar(\d+)_(\d).gif/)
|
|
next if scan[0][0].blank?
|
|
u = UserCustomField.find_by(name: "import_id", value: scan[0][0]).try(:user)
|
|
next if u.blank?
|
|
# raise "User not found for id #{user_id}" if user.blank?
|
|
|
|
photo_real_filename = File.join(AVATAR_DIR, item)
|
|
puts "#{photo_real_filename} not found" unless File.exist?(photo_real_filename)
|
|
|
|
upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename))
|
|
count += 1
|
|
if upload.persisted?
|
|
u.import_mode = false
|
|
u.create_user_avatar
|
|
u.import_mode = true
|
|
u.user_avatar.update(custom_upload_id: upload.id)
|
|
u.update(uploaded_avatar_id: upload.id)
|
|
else
|
|
puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!"
|
|
end
|
|
end
|
|
|
|
puts "", "imported #{count} avatars..."
|
|
RateLimiter.enable
|
|
end
|
|
end
|
|
|
|
def import_signatures
|
|
puts "Importing user signatures..."
|
|
|
|
total_count = mysql_query(<<-SQL).first[0].to_i
|
|
SELECT COUNT(userid) count
|
|
FROM #{TABLE_PREFIX}sigparsed
|
|
SQL
|
|
current_count = 0
|
|
|
|
user_signatures = mysql_stream <<-SQL
|
|
SELECT userid, signatureparsed
|
|
FROM #{TABLE_PREFIX}sigparsed
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
user_signatures.each do |sig|
|
|
current_count += 1
|
|
print_status current_count, total_count
|
|
user_id = sig[0]
|
|
user_sig = sig[1]
|
|
next if user_id.blank? || user_sig.blank?
|
|
|
|
u = UserCustomField.find_by(name: "import_id", value: user_id).try(:user)
|
|
next if u.blank?
|
|
|
|
# can not hold dupes
|
|
UserCustomField.where(
|
|
user_id: u.id,
|
|
name: %w[see_signatures signature_raw signature_cooked],
|
|
).destroy_all
|
|
|
|
user_sig.gsub!(%r{\[/?sigpic\]}i, "")
|
|
|
|
UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true)
|
|
UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig)
|
|
UserCustomField.create!(
|
|
user_id: u.id,
|
|
name: "signature_cooked",
|
|
value: PrettyText.cook(user_sig, omit_nofollow: false),
|
|
)
|
|
end
|
|
end
|
|
|
|
def merge_duplicated_users
|
|
count = 0
|
|
total_count = 0
|
|
|
|
duplicated = {}
|
|
@user_ids_by_email
|
|
.select { |e, ids| ids.count > 1 }
|
|
.each_with_index do |(email, ids), i|
|
|
duplicated[email] = [ids, i]
|
|
count += 1
|
|
total_count += ids.count
|
|
end
|
|
|
|
puts "", "Merging #{total_count} duplicated users across #{count} distinct emails..."
|
|
|
|
start = Time.now
|
|
|
|
Parallel.each duplicated do |email, (user_ids, i)|
|
|
# nothing to do about these - they will remain a randomized hex string
|
|
next unless email.presence
|
|
|
|
# queried one by one to ensure ordering
|
|
first, *rest =
|
|
user_ids.map do |id|
|
|
UserCustomField.includes(:user).find_by!(name: "import_id", value: id).user
|
|
end
|
|
|
|
rest.each do |dup|
|
|
UserMerger.new(dup, first).merge!
|
|
first.reload
|
|
printf "."
|
|
end
|
|
|
|
print "\n%6d/%6d - %6d/sec" % [i, count, i.to_f / (Time.now - start)] if i % 10 == 0
|
|
end
|
|
|
|
puts
|
|
end
|
|
|
|
def save_duplicated_users
|
|
File.open("duplicated_users.json", "w+") { |f| f.puts @user_ids_by_email.to_json }
|
|
end
|
|
|
|
def read_duplicated_users
|
|
@user_ids_by_email = JSON.parse File.read("duplicated_users.json")
|
|
end
|
|
|
|
def extract_pm_title(title)
|
|
normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "")
|
|
end
|
|
|
|
def parse_birthday(birthday)
|
|
return if birthday.blank?
|
|
date_of_birth =
|
|
begin
|
|
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
return if date_of_birth.nil?
|
|
if date_of_birth.year < 1904
|
|
Date.new(1904, date_of_birth.month, date_of_birth.day)
|
|
else
|
|
date_of_birth
|
|
end
|
|
end
|
|
|
|
def print_status(current, max, start_time = nil)
|
|
if start_time.present?
|
|
elapsed_seconds = Time.now - start_time
|
|
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
|
|
else
|
|
elements_per_minute = ""
|
|
end
|
|
|
|
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
|
|
end
|
|
|
|
def mysql_stream(sql)
|
|
@client.query(sql, stream: true)
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql)
|
|
end
|
|
end
|
|
|
|
BulkImport::VBulletin.new.run
|