ランダムなサンプルデータ生成 (MySQL, PostgreSQL, Oracle, SQL Server)
MySQL
1 Sakilaデータベース
MySQL側の準備
- RDSでは、SYSアクセス(SUPER権限)は提供されていません。
- 自動バックアップ有効時にバイナリログも有効になっているが、この状況でトリガーを作成すると、以下のエラーが返されます。そのため、
log_bin_trust_function_creators
のパラメータを有効にしておくERROR 1419 (HY000) at line 214: You do not have the SUPER privilege and binary logging is enabled (you *might* want to use the less safe log_bin_trust_function_creators variable)
以下を実行。すると、
sakila
というデータベースが作成され、複数のサンプルデータを挿入できる。$ mysql -h xxxxxxxxxx.xxxxxxxxxx.ap-northeast-1.rds.amazonaws.com -u hayashier -p < <( \ wget 'http://downloads.mysql.com/docs/sakila-db.zip' \ && unzip -o sakila-db.zip 1>/dev/null \ && cat sakila-db/sakila-schema.sql sakila-db/sakila-data.sql \ )
以下のようなテーブルが確認できます。
mysql> use sakila; mysql> show tables; +----------------------------+ | Tables_in_sakila | +----------------------------+ | actor | | actor_info | | address | | category | | city | | country | | customer | | customer_list | | film | | film_actor | | film_category | | film_list | | film_text | | inventory | | language | | nicer_but_slower_film_list | | payment | | rental | | sales_by_film_category | | sales_by_store | | staff | | staff_list | | store | +----------------------------+ 23 rows in set (0.00 sec)
2 sysbench
データベース作成
CREATE DATABASE sbtest;
sysbenchインストール
# Build Requirements, for RHEL/CentOS/Amazon Linux AMI $ sudo yum -y install make automake libtool pkgconfig libaio-devel # For MySQL support, replace with mysql-devel on RHEL/CentOS 5 $ sudo yum -y install mariadb-devel openssl-devel # For PostgreSQL support $ sudo yum -y install postgresql-devel # For sysbench RPM $ curl -s https://packagecloud.io/install/repositories/akopytov/sysbench/script.rpm.sh | sudo bash $ sudo yum -y install sysbench
sysbench実行
- データ生成という点では最初のコマンドの実行のみでOKです。
sbtest1
というテーブルを作成されます。2つめはベンチマークを取るときのコマンド実行例です。MySQL
# Generate data $ sysbench --db-driver=mysql --table-size=10000000 --mysql-host=xxxxxxxxxx.xxxxxxxxxx.ap-northeast-1.rds.amazonaws.com --mysql-user=hayashier --mysql-password=Test1234 --mysql-db=sbtest oltp_read_write prepare # Run benchmark $ sysbench --db-driver=mysql --mysql-host=xxxxxxxxxx.xxxxxxxxxx.ap-northeast-1.rds.amazonaws.com --mysql-user=hayashier --mysql-password=Test1234 --mysql-db=sbtest --threads=32 --time=1800 oltp_read_write run
PostgreSQL
# Generate data $ sysbench --db-driver=pgsql --table-size=10000000 --pgsql-host=xxxxxxxxxx.xxxxxxxxxx.ap-northeast-1.rds.amazonaws.com --pgsql-user=hayashier --pgsql-password=Test1234 --pgsql-db=sbtest oltp_read_write prepare # Run benchmark $ sysbench --db-driver=pgsql --pgsql-host=xxxxxxxxxx.xxxxxxxxxx.ap-northeast-1.rds.amazonaws.com --pgsql-user=hayashier --pgsql-password=Test1234 --pgsql-db=sbtest --threads=32 --time=1800 oltp_read_write run
- データ生成という点では最初のコマンドの実行のみでOKです。
Others
3 小さめのデータ
create database test; use test; create table t1 (id int primary key); insert into t1 values (1), (22), (333); select * from t1;
4 小さめのデータ
use test; create table t1 (id int not null, text varchar(32)); insert into t1 values (1, 'a'); insert into t1 values (2, 'b'); insert into t1 values (3, 'c'); insert into t1 values (4, 'd'); insert into t1 values (5, 'e'); select * from t1;
5 大きめのデータ
use test; create table bigtable (id int not null, text varchar(600)); insert into bigtable (id, text) values (1, 'J04...8kB59'); insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable; insert into bigtable (id,text) select 0,text from bigtable;
6 大きめのデータ
use test; create table bigtable_inc (id int not null AUTO_INCREMENT, text varchar(600), PRIMARY KEY (id)); insert into bigtable_inc (id, text) values (1, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'); insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc; insert into bigtable_inc (text) select text from bigtable_inc;
7
set global tmp_table_size = 268435456; set global max_heap_table_size = 268435456; create table table1 (id int primary key auto_increment, s1 varchar(32), s2 varchar(32)); insert into pstest.table1 values (null, md5(rand()), md5(rand())); insert into pstest.table1 select null, md5(rand()), md5(rand()) from pstest.table1; : truncate table performance_schema.memory_summary_global_by_event_name;
8
use test; create table randint (id int not null AUTO_INCREMENT, num BIGINT, text varchar(600), PRIMARY KEY (id)); insert into randint (id, num, text) values (1, FLOOR(RAND() * 10000000000000000), 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'); insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint; insert into randint (num, text) select FLOOR(RAND() * 10000000000000000), text from randint;
9 値にちょっとしたバラエティを作る版
CREATE TABLE item ( id INT PRIMARY KEY AUTO_INCREMENT, name VARCHAR(10), description VARCHAR(30), price INT UNSIGNED, created_at DATETIME ); INSERT INTO item () VALUES (); INSERT INTO item (id) SELECT 0 FROM item; INSERT INTO item (id) SELECT 0 FROM item; INSERT INTO item (id) SELECT 0 FROM item; INSERT INTO item (id) SELECT 0 FROM item; INSERT INTO item (id) SELECT 0 FROM item; INSERT INTO item (id) SELECT 0 FROM item; UPDATE item SET name = CONCAT('item-', id), description = SUBSTRING(MD5(RAND()), 1, 30), price = CEIL(RAND() * 10000), created_at = ADDTIME(CONCAT_WS(' ','2017-01-01' + INTERVAL RAND() * 180 DAY, '00:00:00'), SEC_TO_TIME(FLOOR(0 + (RAND() * 86401))));
10 ストアドプロシージャを利用した版
CREATE TABLE t1 (id int, score int); delimiter // CREATE PROCEDURE insert_random_data() BEGIN SET @limit = 20; SET @pos = 0; WHILE @limit > @pos DO insert into t1 values (FLOOR(RAND() * 100000), FLOOR(RAND() * 100000)), (FLOOR(RAND() * 100000), FLOOR(RAND() * 100000)), (FLOOR(RAND() * 100000), FLOOR(RAND() * 100000)), (FLOOR(RAND() * 100000), FLOOR(RAND() * 100000)), (FLOOR(RAND() * 100000), FLOOR(RAND() * 100000)); SET @pos = @pos + 1; END WHILE; END // delimiter ; CALL insert_random_data(); DROP PROCEDURE insert_random_data;
PostgreSQL
1 PostgreSQLのチュートリアルのデータベース
データベース作成
CREATE DATABASE dvdrental;
pg_restoreコマンドを実行
- RDSには
superuser
の権限がないため、--no-owner
オプションを指定しないと、ERROR: role "postgres" does not exist
のようなエラーが出て実行できません。
$ pg_restore -h xxxxxxxxxx.xxxxxxxxxx.ap-northeast-1.rds.amazonaws.com --no-owner -U hayashier -d dvdrental $( \ wget 'https://www.postgresqltutorial.com/wp-content/uploads/2019/05/dvdrental.zip' \ && unzip -o dvdrental.zip 1>/dev/null \ && echo "./dvdrental.tar" \ )
- RDSには
以下のようなテーブルが確認できます。
test=> \l List of databases Name | Owner | Encoding | Collate | Ctype | Access privileges ------------+----------+----------+-------------+-------------+----------------------- db1 | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | dvdrental | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | postgres | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | rdsadmin | rdsadmin | UTF8 | en_US.UTF-8 | en_US.UTF-8 | rdsadmin=CTc/rdsadmin sbtest | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | sysbenchdb | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | template0 | rdsadmin | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =c/rdsadmin + | | | | | rdsadmin=CTc/rdsadmin template1 | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =c/hayashier + | | | | | hayashier=CTc/hayashier test | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =Tc/hayashier + | | | | | hayashier=CTc/hayashier test2 | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | testdb | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | u1db1 | hayashier | UTF8 | en_US.UTF-8 | en_US.UTF-8 | (12 rows) test=> \c dvdrental SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off) You are now connected to database "dvdrental" as user "hayashier". dvdrental=> \dt List of relations Schema | Name | Type | Owner --------+---------------+-------+---------- public | actor | table | hayashier public | address | table | hayashier public | category | table | hayashier public | city | table | hayashier public | country | table | hayashier public | customer | table | hayashier public | film | table | hayashier public | film_actor | table | hayashier public | film_category | table | hayashier public | inventory | table | hayashier public | language | table | hayashier public | payment | table | hayashier public | rental | table | hayashier public | staff | table | hayashier public | store | table | hayashier (15 rows)
2 sysbench
- MySQLの項を参照
3
CREATE TABLE tables ( user_id INTEGER, affiliate_code VARCHAR(50), timestamp timestamp not null default current_timestamp ); INSERT INTO tables ( user_id, affiliate_code, timestamp ) SELECT s as user_id, 'u_' || s as user_code, CAST('2017-1-1' as TIMESTAMP) + CAST(s.a ||' seconds' as interval) as timestamp FROM GENERATE_SERIES(1,1000) as s(a);
Oracle
with SQL*Plus
set pages 200 set lines 200 create table test_random (col1 number,col2 number, col3 varchar2(100), col4 number); DECLARE TYPE tbl_ins IS TABLE OF test_random%ROWTYPE INDEX BY BINARY_INTEGER; w_ins tbl_ins; BEGIN dbms_random.SEED(1); FOR i IN 1 .. 1000000 LOOP w_ins(i).col1 := i; w_ins(i).col2 := mod(i,3); w_ins(i).col3 := 'ABC'||dbms_random.string('x',10); w_ins(i).col4 := ceil(dbms_random.value(1, 10000000000)); END LOOP; FORALL i in 1 .. 1000000 INSERT INTO test_random VALUES w_ins(i); COMMIT; END; / select * from test_random where ROWNUM <= 5 order by col1;
SQL Server
- AdventureWorksのデータセットを利用。ローカルの場合は、データの復元で、対象のファイルを指定することで復元できるが、RDSの場合は、S3にファイルを配置し、RDS用に用意されたストアドプロシージャを実行する必要がある。
リストア
exec msdb.dbo.rds_restore_database @restore_db_name='testdb', @s3_arn_to_restore_from='arn:aws:s3:::sqlserver-backup-test/AdventureWorks2019.bak';
進捗確認
exec msdb.dbo.rds_task_status @db_name='testdb'
Notes
fake2dbも便利かも
References
- MySQL
- PostgreSQL
- SQL Server