diff --git a/script/import_scripts/drupal.rb b/script/import_scripts/drupal.rb index c97c2e632d9..a10615fe320 100644 --- a/script/import_scripts/drupal.rb +++ b/script/import_scripts/drupal.rb @@ -1,16 +1,21 @@ # frozen_string_literal: true require "mysql2" +require "htmlentities" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Drupal < ImportScripts::Base - DRUPAL_DB = ENV['DRUPAL_DB'] || "newsite3" + DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal" VID = ENV['DRUPAL_VID'] || 1 + BATCH_SIZE = 1000 + ATTACHMENT_DIR = "/root/files/upload" def initialize super + @htmlentities = HTMLEntities.new + @client = Mysql2::Client.new( host: "localhost", username: "root", @@ -19,142 +24,210 @@ class ImportScripts::Drupal < ImportScripts::Base ) end - def categories_query - @client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE vid = #{VID}") - end - def execute - create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row| - { id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created']) } - end - # You'll need to edit the following query for your Drupal install: - # - # * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here. - # * Table name may be term_data. - # * May need to select a vid other than 1. - create_categories(categories_query) do |c| - { id: c['tid'], name: c['name'], description: c['description'] } - end + import_users + import_categories # "Nodes" in Drupal are divided into types. Here we import two types, # and will later import all the comments/replies for each node. # You will need to figure out what the type names are on your install and edit the queries to match. if ENV['DRUPAL_IMPORT_BLOG'] - create_blog_topics + import_blog_topics end - create_forum_topics + import_forum_topics - create_replies + import_replies + import_likes + mark_topics_as_solved + import_sso_records + import_attachments + postprocess_posts + create_permalinks + import_gravatars + end - begin - create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil')) - rescue => e - puts '', "Failed to create admin user" - puts e.message + def import_users + puts "", "importing users" + + user_count = mysql_query("SELECT count(uid) count FROM users").first["count"] + + last_user_id = -1 + + batches(BATCH_SIZE) do |offset| + users = mysql_query(<<-SQL + SELECT uid, + name username, + mail email, + created + FROM users + WHERE uid > #{last_user_id} + ORDER BY uid + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if users.empty? + + last_user_id = users[-1]["uid"] + + users.reject! { |u| @lookup.user_already_imported?(u["uid"]) } + + create_users(users, total: user_count, offset: offset) do |user| + email = user["email"].presence || fake_email + email = fake_email unless email[EmailValidator.email_regex] + + username = @htmlentities.decode(user["username"]).strip + + { + id: user["uid"], + name: username, + email: email, + created_at: Time.zone.at(user["created"]) + } + end end end - def create_blog_topics - puts '', "creating blog topics" + def import_categories + # You'll need to edit the following query for your Drupal install: + # + # * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here. + # * Table name may be term_data. + # * May need to select a vid other than 1 - create_category({ - name: 'Blog', - user_id: -1, - description: "Articles from the blog" - }, nil) unless Category.find_by_name('Blog') + puts "", "importing categories" - results = @client.query(" - SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky, - f.body_value body - FROM node n, - field_data_body f - WHERE n.type = 'blog' - AND n.nid = f.entity_id - AND n.status = 1 - ", cache_rows: false) + categories = mysql_query(<<-SQL + SELECT tid, + name, + description + FROM taxonomy_term_data + WHERE vid = #{VID} + SQL + ).to_a - create_posts(results) do |row| + create_categories(categories) do |category| { - id: "nid:#{row['nid']}", - user_id: user_id_from_imported_user_id(row['uid']) || -1, - category: 'Blog', - raw: row['body'], - created_at: Time.zone.at(row['created']), - pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, - title: row['title'].try(:strip), - custom_fields: { import_id: "nid:#{row['nid']}" } + id: category['tid'], + name: @htmlentities.decode(category['name']).strip, + description: @htmlentities.decode(category['description']).strip } end end - def create_forum_topics - puts '', "creating forum topics" + def import_blog_topics + puts '', "importing blog topics" - total_count = @client.query(" + create_category( + { + name: 'Blog', + description: "Articles from the blog" + }, + nil) unless Category.find_by_name('Blog') + + blogs = mysql_query(<<-SQL + SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky, + f.body_value body + FROM node n, + field_data_body f + WHERE n.type = 'article' + AND n.nid = f.entity_id + AND n.status = 1 + SQL + ).to_a + + category_id = Category.find_by_name('Blog').id + + create_posts(blogs) do |topic| + { + id: "nid:#{topic['nid']}", + user_id: user_id_from_imported_user_id(topic['uid']) || -1, + category: category_id, + raw: topic['body'], + created_at: Time.zone.at(topic['created']), + pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil, + title: topic['title'].try(:strip), + custom_fields: { import_id: "nid:#{topic['nid']}" } + } + end + end + + def import_forum_topics + puts '', "importing forum topics" + + total_count = mysql_query(<<-SQL SELECT COUNT(*) count FROM forum_index fi, node n WHERE n.type = 'forum' AND fi.nid = n.nid - AND n.status = 1;").first['count'] + AND n.status = 1 + SQL + ).first['count'] - batch_size = 1000 - - batches(batch_size) do |offset| - results = @client.query(" + batches(BATCH_SIZE) do |offset| + results = mysql_query(<<-SQL SELECT fi.nid nid, fi.title title, fi.tid tid, n.uid uid, fi.created created, fi.sticky sticky, - f.body_value body - FROM forum_index fi, - node n, - field_data_body f + f.body_value body, + nc.totalcount views, + fl.timestamp solved + FROM forum_index fi + LEFT JOIN node n ON fi.nid = n.nid + LEFT JOIN field_data_body f ON f.entity_id = n.nid + LEFT JOIN flagging fl ON fl.entity_id = n.nid + AND fl.fid = 7 + LEFT JOIN node_counter nc ON nc.nid = n.nid WHERE n.type = 'forum' - AND fi.nid = n.nid - AND n.nid = f.entity_id AND n.status = 1 - LIMIT #{batch_size} + LIMIT #{BATCH_SIZE} OFFSET #{offset}; - ", cache_rows: false) + SQL + ).to_a break if results.size < 1 next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" } create_posts(results, total: total_count, offset: offset) do |row| - { + raw = preprocess_raw(row['body']) + topic = { id: "nid:#{row['nid']}", user_id: user_id_from_imported_user_id(row['uid']) || -1, category: category_id_from_imported_category_id(row['tid']), - raw: row['body'], + raw: raw, created_at: Time.zone.at(row['created']), pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, - title: row['title'].try(:strip) + title: row['title'].try(:strip), + views: row['views'] } + topic[:custom_fields] = { import_solved: true } if row['solved'].present? + topic end end end - def create_replies + def import_replies puts '', "creating replies in topics" - total_count = @client.query(" + total_count = mysql_query(<<-SQL SELECT COUNT(*) count FROM comment c, node n WHERE n.nid = c.nid AND c.status = 1 - AND n.type IN ('blog', 'forum') - AND n.status = 1;").first['count'] + AND n.type IN ('article', 'forum') + AND n.status = 1 + SQL + ).first['count'] - batch_size = 1000 - - batches(batch_size) do |offset| - results = @client.query(" + batches(BATCH_SIZE) do |offset| + results = mysql_query(<<-SQL SELECT c.cid, c.pid, c.nid, c.uid, c.created, f.comment_body_value body FROM comment c, @@ -165,9 +238,10 @@ class ImportScripts::Drupal < ImportScripts::Base AND c.status = 1 AND n.type IN ('blog', 'forum') AND n.status = 1 - LIMIT #{batch_size} - OFFSET #{offset}; - ", cache_rows: false) + LIMIT #{BATCH_SIZE} + OFFSET #{offset} + SQL + ).to_a break if results.size < 1 @@ -176,11 +250,12 @@ class ImportScripts::Drupal < ImportScripts::Base create_posts(results, total: total_count, offset: offset) do |row| topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") if topic_mapping && topic_id = topic_mapping[:topic_id] + raw = preprocess_raw(row['body']) h = { id: "cid:#{row['cid']}", topic_id: topic_id, user_id: user_id_from_imported_user_id(row['uid']) || -1, - raw: row['body'], + raw: raw, created_at: Time.zone.at(row['created']), } if row['pid'] @@ -196,6 +271,265 @@ class ImportScripts::Drupal < ImportScripts::Base end end + def import_likes + puts "", "importing post likes" + + batches(BATCH_SIZE) do |offset| + likes = mysql_query(<<-SQL + SELECT flagging_id, + fid, + entity_id, + uid + FROM flagging + WHERE fid = 5 + OR fid = 6 + LIMIT #{BATCH_SIZE} + OFFSET #{offset} + SQL + ).to_a + + break if likes.empty? + + likes.each do |l| + identifier = l['fid'] == 5 ? 'nid' : 'cid' + next unless user_id = user_id_from_imported_user_id(l['uid']) + next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}") + next unless user = User.find_by(id: user_id) + next unless post = Post.find_by(id: post_id) + PostActionCreator.like(user, post) rescue nil + end + end + end + + def mark_topics_as_solved + puts "", "marking topics as solved" + + solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id) + + solved_topics.each do |topic_id| + next unless topic = Topic.find(topic_id) + next unless post = topic.posts.last + post_id = post.id + + PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true) + TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id) + end + end + + def import_sso_records + puts "", "importing sso records" + + start_time = Time.now + current_count = 0 + + users = UserCustomField.where(name: "import_id") + + total_count = users.count + + return if users.empty? + + users.each do |ids| + user_id = ids.user_id + external_id = ids.value + next unless user = User.find(user_id) + + begin + current_count += 1 + print_status(current_count, total_count, start_time) + SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '') + rescue + next + end + end + end + + def import_attachments + puts "", "importing attachments" + + current_count = 0 + success_count = 0 + fail_count = 0 + + total_count = mysql_query(<<-SQL + SELECT count(field_post_attachment_fid) count + FROM field_data_field_post_attachment + SQL + ).first["count"] + + batches(BATCH_SIZE) do |offset| + attachments = mysql_query(<<-SQL + SELECT * + FROM field_data_field_post_attachment fp + LEFT JOIN file_managed fm + ON fp.field_post_attachment_fid = fm.fid + LIMIT #{BATCH_SIZE} + OFFSET #{offset} + SQL + ).to_a + + break if attachments.size < 1 + + attachments.each do |attachment| + current_count += 1 + print_status current_count, total_count + + identifier = attachment['entity_type'] == "comment" ? "cid" : "nid" + next unless user_id = user_id_from_imported_user_id(attachment['uid']) + next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}") + next unless user = User.find(user_id) + next unless post = Post.find(post_id) + + begin + new_raw = post.raw.dup + upload, filename = find_upload(post, attachment) + + unless upload + fail_count += 1 + next + end + + upload_html = html_for_upload(upload, filename) + new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html) + + if new_raw != post.raw + PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal") + else + puts '', 'Skipped upload: already imported' + end + + success_count += 1 + rescue => e + puts e + end + end + end + end + + def create_permalinks + puts '', 'creating permalinks...' + + Topic.listable_topics.find_each do |topic| + begin + tcf = topic.custom_fields + if tcf && tcf['import_id'] + node_id = tcf['import_id'][/nid:(\d+)/, 1] + slug = "/topic/#{node_id}" + Permalink.create(url: slug, topic_id: topic.id) + end + rescue => e + puts e.message + puts "Permalink creation failed for id #{topic.id}" + end + end + end + + def find_upload(post, attachment) + uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1] + real_filename = CGI.unescapeHTML(uri) + file = File.join(ATTACHMENT_DIR, real_filename) + + unless File.exists?(file) + puts "Attachment file #{attachment['filename']} doesn't exist" + + tmpfile = "attachments_failed.txt" + filename = File.join('/tmp/', tmpfile) + File.open(filename, 'a') { |f| + f.puts attachment['filename'] + } + end + + upload = create_upload(post.user.id || -1, file, real_filename) + + if upload.nil? || upload.errors.any? + puts "Upload not valid" + puts upload.errors.inspect if upload + return + end + + [upload, real_filename] + end + + def preprocess_raw(raw) + return if raw.blank? + # quotes on new lines + raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote| + quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" } + quote.gsub!(/\n(.+?)/) { "\n> #{$1}" } + } + + # [QUOTE=]...[/QUOTE] + raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do + username, quote = $1, $2 + "\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n" + end + + raw.strip! + raw + end + + def postprocess_posts + puts '', 'postprocessing posts' + + current = 0 + max = Post.count + + Post.find_each do |post| + begin + raw = post.raw + new_raw = raw.dup + + # replace old topic to new topic links + new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do + post_id = post_id_from_imported_post_id("nid:#{$1}") + next unless post_id + topic = Post.find(post_id).topic + "https://community.site.com/t/-/#{topic.id}" + end + + # replace old comment to reply links + new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do + post_id = post_id_from_imported_post_id("cid:#{$1}") + next unless post_id + post_ref = Post.find(post_id) + "https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}" + end + + if raw != new_raw + post.raw = new_raw + post.save + end + rescue + puts '', "Failed rewrite on post: #{post.id}" + ensure + print_status(current += 1, max) + end + end + end + + def import_gravatars + puts '', 'importing gravatars' + current = 0 + max = User.count + User.find_each do |user| + begin + user.create_user_avatar(user_id: user.id) unless user.user_avatar + user.user_avatar.update_gravatar! + rescue + puts '', 'Failed avatar update on user #{user.id}' + ensure + print_status(current += 1, max) + end + end + end + + def parse_datetime(time) + DateTime.strptime(time, '%s') + end + + def mysql_query(sql) + @client.query(sql, cache_rows: true) + end + end if __FILE__ == $0