diff --git a/script/import_scripts/nodebb/nodebb.rb b/script/import_scripts/nodebb/nodebb.rb new file mode 100644 index 00000000000..1c9acfef222 --- /dev/null +++ b/script/import_scripts/nodebb/nodebb.rb @@ -0,0 +1,527 @@ +require_relative '../base.rb' +require_relative './redis' + +class ImportScripts::NodeBB < ImportScripts::Base + # CHANGE THESE BEFORE RUNNING THE IMPORTER + # ATTACHMENT_DIR needs to be absolute, not relative path + ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads' + BATCH_SIZE = 2000 + + def initialize + super + + adapter = NodeBB::Redis + + @client = adapter.new( + host: "localhost", + port: "6379", + db: 0 + ) + + load_merged_posts + end + + def load_merged_posts + puts 'loading merged posts with topics...' + + # we keep here the posts that were merged + # as topics + # + # { post_id: discourse_post_id } + @merged_posts_map = {} + + PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id| + post = Post.find(post_id) + topic_id = post.topic_id + nodebb_post_id = post.custom_fields['import_merged_post_id'] + + @merged_posts_map[nodebb_post_id] = topic_id + end + end + + def execute + import_groups + import_categories + import_users + add_users_to_groups + import_topics + import_posts + import_attachments + post_process_posts + end + + def import_groups + puts '', 'importing groups' + + groups = @client.groups + total_count = groups.count + progress_count = 0 + start_time = Time.now + + create_groups(groups) do |group| + { + id: group["name"], + name: group["slug"] + } + end + end + + def import_categories + puts "", "importing top level categories..." + + category_map = @client.categories + category_ids = category_map.keys + categories = category_map.values + + top_level_categories = categories.select { |c| c["parentCid"] == "0" } + + create_categories(top_level_categories) do |category| + { + id: category["cid"], + name: category["name"], + position: category["order"], + description: category["description"], + } + end + + puts "", "importing child categories..." + + children_categories = categories.select { |c| c["parentCid"] != "0" } + top_level_category_ids = Set.new(top_level_categories.map { |c| c["cid"] }) + + # cut down the tree to only 2 levels of categories + children_categories.each do |cc| + while !top_level_category_ids.include?(cc["parentCid"]) + cc["parentCid"] = categories.detect { |c| c["cid"] == cc["parentCid"] }["parentCid"] + end + end + + create_categories(children_categories) do |category| + { + id: category["cid"], + name: category["name"], + position: category["order"], + description: category["description"], + parent_category_id: category_id_from_imported_category_id(category["parentCid"]) + } + end + end + + def import_users + puts "", "importing users" + + users = @client.users + user_count = users.count + + # we use this group to grant admin to users + admin_group = @client.group("administrators") + + create_users(users, total: user_count) do |user| + username = user["username"] + email = user["email"] + + # skip users without username + next unless username + + # fake email for users without email + email = fake_email if email.blank? + + # use user.suspended to handle banned users + if user["banned"] == "1" + suspended_at = Time.now + suspended_till = Time.now + 100.years + end + + { + id: user["uid"], + name: user["fullname"], + username: username, + email: email, + admin: admin_group["member_ids"].include?(user["uid"]), + website: user["website"], + location: user["location"], + suspended_at: suspended_at, + suspended_till: suspended_till, + primary_group_id: group_id_from_imported_group_id(user["groupTitle"]), + created_at: user["joindate"], + custom_fields: { + import_pass: user["password"] + }, + post_create_action: proc do |u| + import_profile_picture(user, u) + import_profile_background(user, u) + end + } + end + end + + def import_profile_picture(old_user, imported_user) + picture = old_user["picture"] + + return if picture.blank? + + # URI.scheme returns nil for internal URLs + uri = URI.parse(picture) + is_external = uri.scheme + + if is_external + # download external image + begin + string_io = open(picture, read_timeout: 5) + rescue Net::ReadTimeout + puts "timeout downloading avatar for user #{imported_user.id}" + return nil + end + + # continue if download failed + return unless string_io + + # try to get filename from headers + if string_io.meta["content-disposition"] + filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2] + end + + # try to get it from path + filename = File.basename(picture) unless filename + + # can't determine filename, skip upload + if !filename + puts "Can't determine filename, skipping avatar upload for user #{imported_user.id}" + return + end + + # write tmp file + file = Tempfile.new(filename, encoding: 'ascii-8bit') + file.write string_io.read + file.rewind + + upload = UploadCreator.new(file, filename).create_for(imported_user.id) + else + # remove "/assets/uploads/" from attachment + picture = picture.gsub("/assets/uploads", "") + filepath = File.join(ATTACHMENT_DIR, picture) + filename = File.basename(picture) + + unless File.exists?(filepath) + puts "Avatar file doesn't exist: #{filename}" + return nil + end + + upload = create_upload(imported_user.id, filepath, filename) + end + + return if !upload.persisted? + + imported_user.create_user_avatar + imported_user.user_avatar.update(custom_upload_id: upload.id) + imported_user.update(uploaded_avatar_id: upload.id) + ensure + string_io.close rescue nil + file.close rescue nil + file.unlind rescue nil + end + + def import_profile_background(old_user, imported_user) + picture = old_user["cover:url"] + + return if picture.blank? + + # URI returns nil for invalid URLs + uri = URI.parse(picture) + is_external = uri.scheme + + if is_external + begin + string_io = open(picture, read_timeout: 5) + rescue Net::ReadTimeout + return nil + end + + if string_io.meta["content-disposition"] + filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2] + end + + filename = File.basename(picture) unless filename + + # can't determine filename, skip upload + if !filename + puts "Can't determine filename, skipping background upload for user #{imported_user.id}" + return + end + + # write tmp file + file = Tempfile.new(filename, encoding: 'ascii-8bit') + file.write string_io.read + file.rewind + + upload = UploadCreator.new(file, filename).create_for(imported_user.id) + else + # remove "/assets/uploads/" from attachment + picture = picture.gsub("/assets/uploads", "") + filepath = File.join(ATTACHMENT_DIR, picture) + filename = File.basename(picture) + + unless File.exists?(filepath) + puts "Background file doesn't exist: #{filename}" + return nil + end + + upload = create_upload(imported_user.id, filepath, filename) + end + + return if !upload.persisted? + + imported_user.user_profile.update(profile_background: upload.url) + ensure + string_io.close rescue nil + file.close rescue nil + file.unlink rescue nil + end + + def add_users_to_groups + puts "", "adding users to groups..." + + groups = @client.groups + total_count = groups.count + progress_count = 0 + start_time = Time.now + + @client.groups.each do |group| + dgroup = find_group_by_import_id(group["name"]) + + # do thing if we migrated this group already + next if dgroup.custom_fields['import_users_added'] + + group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) } + group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) } + + # add members + dgroup.bulk_add(group_member_ids) + + # reload group + dgroup.reload + + # add owners + owners = User.find(group_owner_ids) + owners.each { |owner| dgroup.add_owner(owner) } + + dgroup.custom_fields['import_users_added'] = true + dgroup.save + + progress_count += 1 + print_status(progress_count, total_count, start_time) + end + end + + def import_topics + puts "", "importing topics..." + + topic_count = @client.topic_count + + batches(BATCH_SIZE) do |offset| + topics = @client.topics(offset, BATCH_SIZE) + + break if topics.size < 1 + + create_posts(topics, total: topic_count, offset: offset) do |topic| + # skip if is deleted + if topic["deleted"] == "1" + puts "Topic with id #{topic["tid"]} was deleted, skipping" + next + end + + topic_id = "t#{topic["tid"]}" + raw = topic["mainpost"]["content"] + + data = { + id: topic_id, + user_id: user_id_from_imported_user_id(topic["uid"]) || Discourse::SYSTEM_USER_ID, + title: topic["title"], + category: category_id_from_imported_category_id(topic["cid"]), + raw: raw, + created_at: topic["timestamp"], + views: topic["viewcount"], + closed: topic["locked"] == "1", + post_create_action: proc do |p| + # keep track of this to use in import_posts + p.custom_fields["import_merged_post_id"] = topic["mainPid"] + p.save + @merged_posts_map[topic["mainPid"]] = p.id + end + } + + data[:pinned_at] = data[:created_at] if topic["pinned"] == "1" + + data + end + end + end + + def import_posts + puts "", "importing posts..." + + post_count = @client.post_count + + batches(BATCH_SIZE) do |offset| + posts = @client.posts(offset, BATCH_SIZE) + + break if posts.size < 1 + + create_posts(posts, total: post_count, offset: offset) do |post| + # skip if it's merged_post + next if @merged_posts_map[post["pid"]] + + # skip if it's deleted + next if post["deleted"] == "1" + + raw = post["content"] + post_id = "p#{post["pid"]}" + + next if raw.blank? + topic = topic_lookup_from_imported_post_id("t#{post["tid"]}") + + unless topic + puts "Topic with id #{post["tid"]} not found, skipping" + next + end + + data = { + id: post_id, + user_id: user_id_from_imported_user_id(post["uid"]) || Discourse::SYSTEM_USER_ID, + topic_id: topic[:topic_id], + raw: raw, + created_at: post["timestamp"], + post_create_action: proc do |p| + post["upvoted_by"].each do |upvoter_id| + user = User.new + user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID + + begin + PostAction.act(user, p, PostActionType.types[:like]) + rescue PostAction::AlreadyActed + end + end + end + } + + if post['toPid'] + # Look reply to topic + parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number) + + # Look reply post if topic is missing + parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number) + + if parent_id + data[:reply_to_post_number] = parent_id + else + puts "Post with id #{post["toPid"]} not found for reply" + end + end + + data + end + end + end + + def post_process_posts + puts "", "Postprocessing posts..." + + current = 0 + max = Post.count + start_time = Time.now + + Post.find_each do |post| + begin + next if post.custom_fields['import_post_processing'] + + new_raw = postprocess_post(post) + if new_raw != post.raw + post.raw = new_raw + post.custom_fields['import_post_processing'] = true + post.save + end + ensure + print_status(current += 1, max, start_time) + end + end + end + + def import_attachments + puts '', 'importing attachments...' + + current = 0 + max = Post.count + start_time = Time.now + + Post.find_each do |post| + current += 1 + print_status(current, max, start_time) + + new_raw = post.raw.dup + new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do + image_md = Regexp.last_match[0] + text, filepath = $1, $2 + filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR) + + # if file exists + # upload attachment and return html for it + if File.exists?(filepath) + filename = File.basename(filepath) + upload = create_upload(post.user_id, filepath, filename) + + html_for_upload(upload, filename) + else + puts "File with path #{filepath} not found for post #{post.id}, upload will be broken" + image_md + end + end + + if new_raw != post.raw + PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB') + end + end + end + + def postprocess_post(post) + raw = post.raw + + # [link to post](/post/:id) + raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do + text, post_id = $1, $2 + + if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}") + url = topic_lookup[:url] + "[#{text}](#{url})" + else + "/404" + end + end + + # [link to topic](/topic/:id) + raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do + text, topic_id = $1, $2 + + if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}") + url = topic_lookup[:url] + "[#{text}](#{url})" + else + "/404" + end + end + + # @username with dash to underscore + raw = raw.gsub(/@([a-zA-Z0-9-]+)/) do + username = $1 + + username.gsub('-', '_') + end + + raw + end + + def fake_email + SecureRandom.hex << "@domain.com" + end +end + +ImportScripts::NodeBB.new.perform diff --git a/script/import_scripts/nodebb/redis.rb b/script/import_scripts/nodebb/redis.rb new file mode 100644 index 00000000000..e77e33f3578 --- /dev/null +++ b/script/import_scripts/nodebb/redis.rb @@ -0,0 +1,109 @@ +require 'redis' + +module NodeBB + class Redis + attr_reader :redis + + def initialize(params) + @redis = ::Redis.new(params) + end + + def groups + group_keys = redis.zrange('groups:visible:createtime', 0, -1) + + group_keys.map { |group_key| group(group_key) } + end + + def group(id) + group = redis.hgetall("group:#{id}") + group["createtime"] = timestamp_to_date(group["createtime"]) + group["member_ids"] = redis.zrange("group:#{id}:members", 0, -1) + group["owner_ids"] = redis.smembers("group:#{id}:owners") + + group + end + + def users + user_keys = redis.zrange('users:joindate', 0, -1) + + user_keys.map { |user_key| user(user_key) } + end + + def user(id) + user = redis.hgetall("user:#{id}") + + user["joindate"] = timestamp_to_date(user["joindate"]) + user["lastonline"] = timestamp_to_date(user["lastonline"]) + + user + end + + def categories + category_keys = redis.zrange('categories:cid', 0, -1) + + {}.tap do |categories| + category_keys.each do |category_key| + category = redis.hgetall("category:#{category_key}") + + categories[category['cid']] = category + end + end + end + + def topics(offset = 0, page_size = 2000) + # redis get keys inclusive + # so we move the offset a bit to continue in the next item + offset = offset + 1 unless offset == 0 + from = offset + to = page_size + offset + + topic_keys = redis.zrange('topics:tid', from, to) + + topic_keys.map { |topic_key| topic(topic_key) } + end + + def topic(id) + topic = redis.hgetall("topic:#{id}") + + topic["lastposttime"] = timestamp_to_date(topic["lastposttime"]) + topic["timestamp"] = timestamp_to_date(topic["timestamp"]) + topic["mainpost"] = post(topic["mainPid"]) + + topic + end + + def topic_count + redis.zcard('topics:tid') + end + + def posts(offset = 0, page_size = 2000) + # redis get keys inclusive + # so we move the offset a bit to continue in the next item + offset = offset + 1 unless offset == 0 + from = offset + to = page_size + offset + + post_keys = redis.zrange('posts:pid', from, to) + + post_keys.map { |post_key| post(post_key) } + end + + def post(id) + post = redis.hgetall("post:#{id}") + post["timestamp"] = timestamp_to_date(post["timestamp"]) + post["upvoted_by"] = redis.smembers("pid:#{id}:upvote") + + post + end + + def post_count + redis.zcard('posts:pid') + end + + private + + def timestamp_to_date(createtime) + Time.at(createtime[0..-4].to_i).utc if createtime + end + end +end