discourse/migrations/tooling/scripts/benchmarks/database_write.rb
Gerhard Schlager 06b32204c0
MT: Split the migrations tooling into separate gems (#40492)
Previously, the migrations tooling was a single flat `migrations/` tree,
autoloaded by one global Zeitwerk loader and driven by a Thor CLI, so each
planned next step had nowhere clean to land.

This change splits it into four `path:`-referenced gems — `migrations-core`,
`migrations-tooling`, `migrations-converters`, and `migrations-importer` —
served by a single Samovar-based `disco` binary, without rewriting any domain
logic.

### Why now

The DSL refactor that replaced the IntermediateDB YAML config just landed,
which is the cheapest moment to do this. Everything queued behind it — column
coverage verification, the `discourse-migrations` validation plugin, the
transformer framework, and private converter isolation — either has nowhere
clean to land in the flat tree or would have to be retrofitted into a gem
layout later. Doing the split now, while it's still a pure move (suite green,
no domain logic touched), is far cheaper than after another round of features
has built on the flat layout.

### What changes

- **Four gems under `migrations/`**, all `path:`-referenced from the root
  `Gemfile` (nothing is published to RubyGems): `core` (CLI framework, UI, DB
  infrastructure, IntermediateDB, and the conversion framework), `tooling`
  (schema DSL and `schema` commands), `converters` (implementations and source
  adapters), and `importer` (row and uploads import).
- **A single CLI binary:** `migrations/bin/cli` (Thor) becomes `disco`
  (Samovar), with each gem registering its own commands. Same surface —
  `convert`, `import`, `upload`, `schema generate|validate|…` — and Rails is
  still booted lazily.
- **Isolated test suites:** each gem runs its own no-Rails specs in a new CI
  job, while the existing job keeps running the Rails-integration specs.
2026-06-02 22:20:03 +02:00

203 lines
4.2 KiB
Ruby
Executable file
Vendored

#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
require "benchmark"
require "tempfile"
gemfile(true) do
source "https://rubygems.org"
gem "extralite-bundle", require: "extralite"
gem "sqlite3"
gem "duckdb"
end
ROW_COUNT = 50_000_000
SOME_DATA = ["The quick, brown fox jumps over a lazy dog.", 1_234_567_890]
def with_db_path
tempfile = Tempfile.new
yield tempfile.path
ensure
tempfile.close
tempfile.unlink
end
module Sqlite
TRANSACTION_SIZE = 1000
CREATE_TABLE_SQL = <<~SQL
CREATE TABLE foo
(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
some_text TEXT,
some_number INTEGER
)
SQL
INSERT_SQL = "INSERT INTO foo (some_text, some_number) VALUES (?, ?)"
class Sqlite3Benchmark
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
@connection = SQLite3::Database.new(@tempfile.path)
@connection.journal_mode = "wal"
@connection.synchronous = "off"
@connection.temp_store = "memory"
@connection.locking_mode = "normal"
@connection.cache_size = -10_000 # 10_000 pages
@connection.execute(CREATE_TABLE_SQL)
@stmt = @connection.prepare(INSERT_SQL)
@statement_counter = 0
end
def run
@row_count.times { insert(SOME_DATA) }
close
end
private
def insert(*parameters)
begin_transaction if @statement_counter == 0
@stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_SIZE
commit_transaction
@statement_counter = 0
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.transaction(:deferred)
end
def commit_transaction
return unless @connection.transaction_active?
@connection.commit
end
def close
commit_transaction
@stmt.close
@connection.close
@tempfile.close
@tempfile.unlink
end
end
class ExtraliteBenchmark
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
@connection = Extralite::Database.new(@tempfile.path)
@connection.pragma(
journal_mode: "wal",
synchronous: "off",
temp_store: "memory",
locking_mode: "normal",
cache_size: -10_000, # 10_000 pages
)
@connection.execute(CREATE_TABLE_SQL)
@stmt = @connection.prepare(INSERT_SQL)
@statement_counter = 0
end
def run
@row_count.times { insert(SOME_DATA) }
close
end
private
def insert(*parameters)
begin_transaction if @statement_counter == 0
@stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_SIZE
commit_transaction
@statement_counter = 0
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.execute("BEGIN DEFERRED TRANSACTION")
end
def commit_transaction
return unless @connection.transaction_active?
@connection.execute("COMMIT")
end
def close
commit_transaction
@stmt.close
@connection.close
@tempfile.close
@tempfile.unlink
end
end
end
class DuckDbBenchmark
CREATE_TABLE_SQL = <<~SQL
CREATE TABLE foo
(
id INTEGER NOT NULL PRIMARY KEY,
some_text TEXT,
some_number INTEGER
)
SQL
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
FileUtils.rm(@tempfile.path)
@db = DuckDB::Database.open(@tempfile.path)
@connection = @db.connect
@connection.query(CREATE_TABLE_SQL)
@appender = @connection.appender("foo")
end
def run
@row_count.times do |id|
@appender.begin_row
@appender.append(id)
@appender.append(SOME_DATA[0])
@appender.append(SOME_DATA[1])
@appender.end_row
end
close
end
private
def close
@appender.close
@connection.close
@db.close
end
end
Benchmark.bm(15) do |x|
x.report("SQLite3") { Sqlite::Sqlite3Benchmark.new(ROW_COUNT).run }
x.report("Extralite") { Sqlite::ExtraliteBenchmark.new(ROW_COUNT).run }
x.report("DuckDB") { DuckDbBenchmark.new(ROW_COUNT).run }
end