· 7 years ago · Sep 28, 2018, 05:24 AM
1#!/usr/bin/perl -w
2# mysql2pgsql
3# MySQL to PostgreSQL dump file converter
4#
5# For usage: perl mysql2pgsql.perl --help
6#
7# ddl statments are changed but none or only minimal real data
8# formatting are done.
9# data consistency is up to the DBA.
10#
11# (c) 2004-2007 Jose M Duarte and Joseph Speigle ... gborg
12#
13# (c) 2000-2004 Maxim Rudensky <fonin@omnistaronline.com>
14# (c) 2000 Valentine Danilchuk <valdan@ziet.zhitomir.ua>
15# All rights reserved.
16#
17# Redistribution and use in source and binary forms, with or without
18# modification, are permitted provided that the following conditions
19# are met:
20# 1. Redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer.
22# 2. Redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution.
25# 3. All advertising materials mentioning features or use of this software
26# must display the following acknowledgement:
27# This product includes software developed by the Max Rudensky
28# and its contributors.
29# 4. Neither the name of the author nor the names of its contributors
30# may be used to endorse or promote products derived from this software
31# without specific prior written permission.
32# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
33# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
36# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42# SUCH DAMAGE.
43
44use Getopt::Long;
45
46use POSIX;
47
48use strict;
49use warnings;
50
51
52# main sections
53# -------------
54# 1 variable declarations
55# 2 subroutines
56# 3 get commandline options and specify help statement
57# 4 loop through file and process
58# 5. print_plpgsql function prototype
59
60#################################################################
61# 1. variable declarations
62#################################################################
63# command line options
64my( $ENC_IN, $ENC_OUT, $PRESERVE_CASE, $HELP, $DEBUG, $SCHEMA, $LOWERCASE, $CHAR2VARCHAR, $NODROP, $SEP_FILE, $opt_debug, $opt_help, $opt_schema, $opt_preserve_case, $opt_char2varchar, $opt_nodrop, $opt_sepfile, $opt_enc_in, $opt_enc_out );
65# variables for constructing pre-create-table entities
66my $pre_create_sql=''; # comments, 'enum' constraints preceding create table statement
67my $auto_increment_seq= ''; # so we can easily substitute it if we need a default value
68my $create_sql=''; # all the datatypes in the create table section
69my $post_create_sql=''; # create indexes, foreign keys, table comments
70my $function_create_sql = ''; # for the set (function,trigger) and CURRENT_TIMESTAMP ( function,trigger )
71# constraints
72my ($type, $column_valuesStr, @column_values, $value );
73my %constraints=(); # holds values constraints used to emulate mysql datatypes (e.g. year, set)
74# datatype conversion variables
75my ( $index,$seq);
76my ( $column_name, $col, $quoted_column);
77my ( @year_holder, $year, $constraint_table_name);
78my $table=""; # table_name for create sql statements
79my $table_no_quotes=""; # table_name for create sql statements
80my $sl = '^\s+\w+\s+'; # matches the column name
81my $tables_first_timestamp_column= 1; # decision to print warnings about default_timestamp not being in postgres
82my $mysql_numeric_datatypes = "TINYINT|SMALLINT|MEDIUMINT|INT|INTEGER|BIGINT|REAL|DOUBLE|FLOAT|DECIMAL|NUMERIC";
83my $mysql_datetime_datatypes = "|DATE|TIME|TIMESTAMP|DATETIME|YEAR";
84my $mysql_text_datatypes = "CHAR|VARCHAR|BINARY|VARBINARY|TINYBLOB|BLOB|MEDIUMBLOB|LONGBLOB|TINYTEXT|TEXT|MEDIUMTEXT|LONGTEXT|ENUM|SET";
85my $mysql_datatypesStr = $mysql_numeric_datatypes . "|". $mysql_datetime_datatypes . "|". $mysql_text_datatypes ;
86# handling INSERT INTO statements
87my $rowRe = qr{
88 \( # opening parens
89 ( # (start capture)
90 (?: # (start group)
91 ' # string start
92 [^'\\]* # up to string-end or backslash (escape)
93 (?: # (start group)
94 \\. # gobble escaped character
95 [^'\\]* # up to string-end of backslash
96 )* # (end group, repeat zero or more)
97 ' # string end
98 | # (OR)
99 .*? # everything else (not strings)
100 )* # (end group, repeat zero or more)
101 ) # (end capture)
102 \) # closing parent
103}x;
104
105my ($insert_table, $valueString);
106#
107########################################################
108# 2. subroutines
109#
110# get_identifier
111# print_post_create_sql()
112# quote_and_lc()
113# make_plpgsql($table,$column_name) -- at end of file
114########################################################
115
116# returns an identifier with the given suffix doing controlled
117# truncation if necessary
118sub get_identifier($$$) {
119 my ($table, $col, $suffix) = @_;
120 my $name = '';
121 $table=~s/\"//g; # make sure that $table doesn't have quotes so we don't end up with redundant quoting
122 # in the case of multiple columns
123 my @cols = split(/,/,$col);
124 $col =~ s/,//g;
125 # in case all columns together too long we have to truncate them
126 if (length($col) > 55) {
127 my $totaltocut = length($col)-55;
128 my $tocut = ceil($totaltocut / @cols);
129 @cols = map {substr($_,0,abs(length($_)-$tocut))} @cols;
130 $col="";
131 foreach (@cols){
132 $col.=$_;
133 }
134 }
135
136 my $max_table_length = 63 - length("_${col}_$suffix");
137
138 if (length($table) > $max_table_length) {
139 $table = substr($table, length($table) - $max_table_length, $max_table_length);
140 }
141 return quote_and_lc("${table}_${col}_${suffix}");
142}
143
144
145#
146#
147# called when we encounter next CREATE TABLE statement
148# also called at EOF to print out for last table
149# prints comments, indexes, foreign key constraints (the latter 2 possibly to a separate file)
150sub print_post_create_sql() {
151 my ( @create_idx_comments_constraints_commandsArr, $stmts, $table_field_combination);
152 my %stmts;
153 # loop to check for duplicates in $post_create_sql
154 # Needed because of duplicate key declarations ( PRIMARY KEY and KEY), auto_increment columns
155
156 @create_idx_comments_constraints_commandsArr = split(';\n?', $post_create_sql);
157 if ($SEP_FILE) {
158 open(SEP_FILE, ">>:encoding($ENC_OUT)", $SEP_FILE) or die "Unable to open $SEP_FILE for output: $!\n";
159 }
160
161 foreach (@create_idx_comments_constraints_commandsArr) {
162 if (m/CREATE INDEX "*(\S+)"*\s/i) { # CREATE INDEX korean_english_wordsize_idx ON korean_english USING btree (wordsize);
163 $table_field_combination = $1;
164 # if this particular table_field_combination was already used do not print the statement:
165 if ($SEP_FILE) {
166 print SEP_FILE "$_;\n" if !defined($stmts{$table_field_combination});
167 } else {
168 print OUT "$_;\n" if !defined($stmts{$table_field_combination});
169 }
170 $stmts{$table_field_combination} = 1;
171 }
172 elsif (m/COMMENT/i) { # COMMENT ON object IS 'text'; but comment may be part of table name so use 'elsif'
173 print OUT "$_;\n"
174 } else { # foreign key constraint or comments (those preceded by -- )
175 if ($SEP_FILE) {
176 print SEP_FILE "$_;\n";
177 } else {
178 print OUT "$_;\n"
179 }
180 }
181 }
182
183 if ($SEP_FILE) {
184 close SEP_FILE;
185 }
186 $post_create_sql='';
187 # empty %constraints for next " create table" statement
188}
189
190# quotes a string or a multicolumn string (comma separated)
191# and optionally lowercase (if LOWERCASE is set)
192# lowercase .... if user wants default postgres behavior
193# quotes .... to preserve keywords and to preserve case when case-sensitive tables are to be used
194sub quote_and_lc($)
195{
196 my $col = shift;
197 if ($LOWERCASE) {
198 $col = lc($col);
199 }
200 if ($col =~ m/,/) {
201 my @cols = split(/,\s?/, $col);
202 @cols = map {"\"$_\""} @cols;
203 return join(', ', @cols);
204 } else {
205 return "\"$col\"";
206 }
207}
208
209########################################################
210# 3. get commandline options and maybe print help
211########################################################
212
213GetOptions("help", "debug"=> \$opt_debug, "schema=s" => \$SCHEMA, "preserve_case" => \$opt_preserve_case, "char2varchar" => \$opt_char2varchar, "nodrop" => \$opt_nodrop, "sepfile=s" => \$opt_sepfile, "enc_in=s" => \$opt_enc_in, "enc_out=s" => \$opt_enc_out );
214
215$HELP = $opt_help || 0;
216$DEBUG = $opt_debug || 0;
217$PRESERVE_CASE = $opt_preserve_case || 0;
218if ($PRESERVE_CASE == 1) { $LOWERCASE = 0; }
219else { $LOWERCASE = 1; }
220$CHAR2VARCHAR = $opt_char2varchar || 0;
221$NODROP = $opt_nodrop || 0;
222$SEP_FILE = $opt_sepfile || 0;
223$ENC_IN = $opt_enc_in || 'utf8';
224$ENC_OUT = $opt_enc_out || 'utf8';
225
226if (($HELP) || ! defined($ARGV[0]) || ! defined($ARGV[1])) {
227 print "\n\nUsage: perl $0 {--help --debug --preserve_case --char2varchar --nodrop --schema --sepfile --enc_in --enc_out } mysql.sql pg.sql\n";
228 print "\t* OPTIONS WITHOUT ARGS\n";
229 print "\t--help: prints this message \n";
230 print "\t--debug: output the commented-out mysql line above the postgres line in pg.sql \n";
231 print "\t--preserve_case: prevents automatic case-lowering of column and table names\n";
232 print "\t\tIf you want to preserve case, you must set this flag. For example,\n";
233 print "\t\tIf your client application quotes table and column-names and they have cases in them, set this flag\n";
234 print "\t--char2varchar: converts all char fields to varchar\n";
235 print "\t--nodrop: strips out DROP TABLE statements\n";
236 print "\t\totherise harmless warnings are printed by psql when the dropped table does not exist\n";
237 print "\n\t* OPTIONS WITH ARGS\n";
238 print "\t--schema: outputs a line into the postgres sql file setting search_path \n";
239 print "\t--sepfile: output foreign key constraints and indexes to a separate file so that it can be\n";
240 print "\t\timported after large data set is inserted from another dump file\n";
241 print "\t--enc_in: encoding of mysql in file (default utf8) \n";
242 print "\t--enc_out: encoding of postgres out file (default utf8) \n";
243 print "\n\t* REQUIRED ARGUMENTS\n";
244 if (defined ($ARGV[0])) {
245 print "\tmysql.sql ($ARGV[0])\n";
246 } else {
247 print "\tmysql.sql (undefined)\n";
248 }
249 if (defined ($ARGV[1])) {
250 print "\tpg.sql ($ARGV[1])\n";
251 } else {
252 print "\tpg.sql (undefined)\n";
253 }
254 print "\n";
255 exit 1;
256}
257########################################################
258# 4. process through mysql_dump.sql file
259# in a big loop
260########################################################
261
262# open in and out files
263open(IN,"<:encoding($ENC_IN)", $ARGV[0]) || die "can't open mysql dump file $ARGV[0]";
264open(OUT,">:encoding($ENC_OUT)", $ARGV[1]) || die "can't open pg dump file $ARGV[1]";
265
266# output header
267print OUT "--\n";
268print OUT "-- Generated from mysql2pgsql.perl\n";
269print OUT "-- http://gborg.postgresql.org/project/mysql2psql/\n";
270print OUT "-- (c) 2001 - 2007 Jose M. Duarte, Joseph Speigle\n";
271print OUT "--\n";
272print OUT "\n";
273print OUT "-- warnings are printed for drop tables if they do not exist\n";
274print OUT "-- please see http://archives.postgresql.org/pgsql-novice/2004-10/msg00158.php\n\n";
275print OUT "-- ##############################################################\n";
276
277if ($SCHEMA ) {
278 print OUT "set search_path='" . $SCHEMA . "'\\g\n" ;
279}
280
281# loop through mysql file on a per-line basis
282while(<IN>) {
283
284############## flow #########################
285# (the lines are directed to different string variables at different times)
286#
287# handle drop table , unlock, connect statements
288# if ( start of create table) {
289# print out post_create table (indexes, foreign key constraints, comments from previous table)
290# add drop table statement if !$NODROP to pre_create_sql
291# next;
292# }
293# else if ( inside create table) {
294# add comments in this portion to create_sql
295# if ( end of create table) {
296# delete mysql-unique CREATE TABLE commands
297# print pre_create_sql
298# print the constraint tables for set and year datatypes
299# print create_sql
300# print function_create_sql (this is for the enum columns only)
301# next;
302# }
303# do substitutions
304# -- NUMERIC DATATYPES
305# -- CHARACTER DATATYPES
306# -- DATE AND TIME DATATYPES
307# -- KEY AND UNIQUE CREATIONS
308# and append them to create_sql
309# } else {
310# print inserts on-the-spot (this script only changes default timestamp of 0000-00-00)
311# }
312# LOOP until EOF
313#
314########################################################
315
316
317if (!/^\s*insert into/i) { # not inside create table so don't worry about data corruption
318 s/`//g; # '`pgsql uses no backticks to denote table name (CREATE TABLE `sd`) or around field
319 # and table names like mysql
320 # doh! we hope all dashes and special chars are caught by the regular expressions :)
321}
322if (/^\s*USE\s*([^;]*);/) {
323 print OUT "\\c ". $1;
324 next;
325}
326if (/^(UN)?LOCK TABLES/i || /drop\s+table/i ) {
327
328 # skip
329 # DROP TABLE is added when we see the CREATE TABLE
330 next;
331}
332if (/(create\s+table\s+)([-_\w]+)\s/i) { # example: CREATE TABLE `english_english`
333 print_post_create_sql(); # for last table
334 $tables_first_timestamp_column= 1; # decision to print warnings about default_timestamp not being in postgres
335 $create_sql = '';
336 $table_no_quotes = $2 ;
337 $table=quote_and_lc($2);
338 if ( !$NODROP ) { # always print drop table if user doesn't explicitly say not to
339 # to drop a table that is referenced by a view or a foreign-key constraint of another table,
340 # CASCADE must be specified. (CASCADE will remove a dependent view entirely, but in the
341 # in the foreign-key case it will only remove the foreign-key constraint, not the other table entirely.)
342 # (source: 8.1.3 docs, section "drop table")
343 warn "table $table will be dropped CASCADE\n";
344 $pre_create_sql .= "DROP TABLE $table CASCADE\\g\n"; # custom dumps may be missing the 'dump' commands
345 }
346
347 s/(create\s+table\s+)([-_\w]+)\s/$1 $table /i;
348 if ($DEBUG) {
349 $create_sql .= '-- ' . $_;
350 }
351 $create_sql .= $_;
352 next;
353}
354if ($create_sql ne "") { # we are inside create table statement so lets process datatypes
355 # print out comments or empty lines in context
356 if ($DEBUG) {
357 $create_sql .= '-- ' . $_;
358 }
359 if (/^#/ || /^$/ || /^\s*--/) {
360 s/^#/--/; # Two hyphens (--) is the SQL-92 standard indicator for comments
361 $create_sql.=$_;
362 next;
363 }
364
365 if (/\).*;/i) { # end of create table squence
366
367 s/INSERT METHOD[=\s+][^;\s]+//i;
368 s/PASSWORD=[^;\s]+//i;
369 s/ROW_FORMAT=(?:DEFAULT|DYNAMIC|FIXED|COMPRESSED|REDUNDANT|COMPACT)+//i;
370 s/DELAY KEY WRITE=[^;\s]+//i;
371 s/INDEX DIRECTORY[=\s+][^;\s]+//i;
372 s/DATA DIRECTORY=[^;\s]+//i;
373 s/CONNECTION=[^;\s]+//i;
374 s/CHECKSUM=[^;\s]+//i;
375 s/Type=[^;\s]+//i; # ISAM , # older versions
376 s/COLLATE=[^;\s]+//i; # table's collate
377 s/COLLATE\s+[^;\s]+//i; # table's collate
378 # possible AUTO_INCREMENT starting index, it is used in mysql 5.0.26, not sure since which version
379 if (/AUTO_INCREMENT=(\d+)/i) {
380 # should take < ---- ) ENGINE=MyISAM AUTO_INCREMENT=16 DEFAULT CHARSET=latin1;
381 # and should ouput ---> CREATE SEQUENCE "rhm_host_info_id_seq" START WITH 16;
382 my $start_value = $1;
383 print $auto_increment_seq . "--\n";
384 # print $pre_create_sql . "--\n";
385 $pre_create_sql =~ s/(CREATE SEQUENCE $auto_increment_seq )/$1 START WITH $start_value /;
386 }
387 s/AUTO_INCREMENT=\d+//i;
388 s/PACK_KEYS=\d//i; # mysql 5.0.22
389 s/DEFAULT CHARSET=[^;\s]+//i; # my mysql version is 4.1.11
390 s/ENGINE\s*=\s*[^;\s]+//i; # my mysql version is 4.1.11
391 s/ROW_FORMAT=[^;\s]+//i; # my mysql version is 5.0.22
392 s/MIN_ROWS=[^;\s]+//i;
393 s/MAX_ROWS=[^;\s]+//i;
394 s/AVG_ROW_LENGTH=[^;\s]+//i;
395 if (/COMMENT='([^']*)'/) { # ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='must be country zones';
396 $post_create_sql.="COMMENT ON TABLE $table IS '$1'\;"; # COMMENT ON table_name IS 'text';
397 s/COMMENT='[^']*'//i;
398 }
399 $create_sql =~ s/,$//g; # strip last , inside create table
400 # make sure we end in a comma, as KEY statments are turned
401 # into post_create_sql indices
402 # they often are the last line so leaving a 'hanging comma'
403 my @array = split("\n", $create_sql);
404 for (my $a = $#array; $a >= 0; $a--) { #loop backwards
405 if ($a == $#array && $array[$a] =~ m/,\s*$/) { # for last line
406 $array[$a] =~ s/,\s*$//;
407 next;
408 }
409 if ($array[$a] !~ m/create table/i) { # i.e. if there was more than one column in table
410 if ($a != $#array && $array[$a] !~ m/,\s*$/ ) { # for second to last
411 $array[$a] =~ s/$/,/;
412 last;
413 }
414 elsif ($a != $#array && $array[$a] =~ m/,\s*$/ ) { # for second to last
415 last;
416 }
417 }
418 }
419 $create_sql = join("\n", @array) . "\n";
420 $create_sql .= $_;
421
422 # put comments out first
423 print OUT $pre_create_sql;
424
425 # create separate table to reference and to hold mysql's possible set data-type
426 # values. do that table's creation before create table
427 # definition
428 foreach $column_name (keys %constraints) {
429 $type=$constraints{$column_name}{'type'};
430 $column_valuesStr = $constraints{$column_name}{'values'};
431 $constraint_table_name = get_identifier(${table},${column_name} ,"constraint_table");
432 if ($type eq 'set') {
433 print OUT qq~DROP TABLE $constraint_table_name CASCADE\\g\n~ ;
434 print OUT qq~create table $constraint_table_name ( set_values varchar UNIQUE)\\g\n~ ;
435 $function_create_sql .= make_plpgsql($table,$column_name);
436 } elsif ($type eq 'year') {
437 print OUT qq~DROP TABLE $constraint_table_name CASCADE\\g\n~ ;
438 print OUT qq~create table $constraint_table_name ( year_values varchar UNIQUE)\\g\n~ ;
439 }
440 @column_values = split /,/, $column_valuesStr;
441 foreach $value (@column_values) {
442 print OUT qq~insert into $constraint_table_name values ( $value )\\g\n~; # ad ' for ints and varchars
443 }
444 }
445
446 # print create table and reset create table vars
447 # when moving from each "create table" to "insert" part of dump
448 print OUT $create_sql;
449 print OUT $function_create_sql;
450 $pre_create_sql="";
451 $auto_increment_seq="";
452 $create_sql="";
453 $function_create_sql='';
454 %constraints=();
455 # the post_create_sql for this table is output at the beginning of the next table def
456 # in case we want to make indexes after doing inserting
457 next;
458 }
459 if (/^\s*(\w+)\s+.*COMMENT\s*'([^']*)'/) { #`zone_country_id` int(11) COMMENT 'column comment here',
460 $quoted_column=quote_and_lc($1);
461 $post_create_sql.="COMMENT ON COLUMN $table"."."." $quoted_column IS '$2'\;"; # COMMENT ON table_name.column_name IS 'text';
462 s/COMMENT\s*'[^']*'//i;
463 }
464
465
466 # NUMERIC DATATYPES
467 #
468 # auto_increment -> sequences
469 # UNSIGNED conversions
470 # TINYINT
471 # SMALLINT
472 # MEDIUMINT
473 # INT, INTEGER
474 # BIGINT
475 #
476 # DOUBLE [PRECISION], REAL
477 # DECIMAL(M,D), NUMERIC(M,D)
478 # FLOAT(p)
479 # FLOAT
480
481 s/(\w*int)\(\d+\)/$1/g; # hack of the (n) stuff for e.g. mediumint(2) int(3)
482
483 if (/^(\s*)(\w+)\s*.*numeric.*auto_increment/i) { # int,auto_increment -> serial
484 $seq = get_identifier($table, $2, 'seq');
485 $quoted_column=quote_and_lc($2);
486 $pre_create_sql.= "DROP SEQUENCE $seq CASCADE\;\n\n"; # cascade will force drop of table, too
487 $pre_create_sql.= "CREATE SEQUENCE $seq \;\n\n";
488 $auto_increment_seq = $seq ; # save in case we have the AUTO_INCREMENT=16 to default to
489 # Note: Before PostgreSQL 8.1, the arguments of the sequence functions were of type text, not regclass,
490 # and the above-described conversion from a text string to an OID value would happen at run time during
491 # each call. For backwards compatibility, this facility still exists, but internally it is now handled
492 # as an implicit coercion from text to regclass before the function is invoked. (source: 8.1.3 manual, section 9.12)
493 s/^(\s*)(\w+)\s*.*NUMERIC(.*)auto_increment([^,]*)/$1 $quoted_column numeric $3 DEFAULT nextval('$seq') $4/ig;
494 # MYSQL: data_id mediumint(8) unsigned NOT NULL auto_increment,
495 $create_sql.=$_;
496 next;
497 }
498 if (/^\s*(\w+)\s+.*int.*auto_increment/i) { # example: data_id mediumint(8) unsigned NOT NULL auto_increment,
499 # int,auto_increment -> serial (same as what is done below)
500 # for postgres side see http://www.postgresql.org/docs/7.4/interactive/datatype.html#DATATYPE-SERIAL
501 $seq = get_identifier($table, $1, 'seq');
502 $quoted_column=quote_and_lc($1);
503 $pre_create_sql.= "DROP SEQUENCE $seq CASCADE \;\n\n"; # cascade will force drop of table, too
504 $auto_increment_seq= $seq ; # save in case we have the AUTO_INCREMENT=16 to default to
505 $pre_create_sql.= "CREATE SEQUENCE $seq \;\n\n";
506 s/(\s*)(\w+)\s+.*int.*auto_increment[^,]*/$1 $quoted_column integer DEFAULT nextval('$seq') NOT NULL/ig;
507 $create_sql.=$_;
508 next;
509 }
510
511
512
513
514 # convert UNSIGNED to CHECK constraints
515 if (m/^(\s*)(\w+)\s+((float|double|double precision|real|decimal|numeric))(.*)unsigned/i) {
516 $quoted_column = quote_and_lc($2);
517 s/^(\s*)(\w+)\s+((float|double|double precision|real|decimal|numeric))(.*)unsigned/$1 $quoted_column $3 $4 CHECK ($quoted_column >= 0)/i;
518 }
519 # example: `wordsize` tinyint(3) unsigned default NULL,
520 if (m/^(\s+)(\w+)\s+(\w+)\s+unsigned/i) {
521 $quoted_column=quote_and_lc($2);
522 s/^(\s+)(\w+)\s+(\w+)\s+unsigned/$1 $quoted_column $3 CHECK ($quoted_column >= 0)/i;
523 }
524 if (m/^(\s*)(\w+)\s+(bigint.*)unsigned/) {
525 $quoted_column=quote_and_lc($2);
526 # see http://archives.postgresql.org/pgsql-general/2005-07/msg01178.php
527 # and see http://www.postgresql.org/docs/8.2/interactive/datatype-numeric.html
528 # see http://dev.mysql.com/doc/refman/5.1/en/numeric-types.html max size == 20 digits
529 s/^(\s*)(\w+)\s+bigint(.*)unsigned/$1 $quoted_column NUMERIC (20,0) CHECK ($quoted_column >= 0)/i;
530
531 }
532
533 # int type conversion
534 # TINYINT (signed) -128 to 127 (unsigned) 0 255
535 # SMALLINT A small integer. The signed range is -32768 to 32767. The unsigned range is 0 to 65535.
536 # MEDIUMINT A medium-sized integer. The signed range is -8388608 to 8388607. The unsigned range is 0 to 16777215.
537 # INT A normal-size integer. The signed range is -2147483648 to 2147483647. The unsigned range is 0 to 4294967295.
538 # BIGINT The signed range is -9223372036854775808 to 9223372036854775807. The unsigned range is 0 to 18446744073709551615
539 # for postgres see http://www.postgresql.org/docs/8.2/static/datatype-numeric.html#DATATYPE-INT
540 s/^(\s+"*\w+"*\s+)tinyint/$1 smallint/i;
541 s/^(\s+"*\w+"*\s+)mediumint/$1 integer/i;
542
543 # the floating point types
544 # double -> double precision
545 # double(n,m) -> double precision
546 # float - no need for conversion
547 # float(n) - no need for conversion
548 # float(n,m) -> double precision
549
550 s/(^\s*\w+\s+)double(\(\d+,\d+\))?/$1double precision/i;
551 s/float(\(\d+,\d+\))/double precision/i;
552
553 #
554 # CHARACTER TYPES
555 #
556 # set
557 # enum
558 # binary(M), VARBINARy(M), tinyblob, tinytext,
559 # bit
560 # char(M), varchar(M)
561 # blob -> text
562 # mediumblob
563 # longblob, longtext
564 # text -> text
565 # mediumtext
566 # longtext
567 # mysql docs: A BLOB is a binary large object that can hold a variable amount of data.
568
569 # set
570 # For example, a column specified as SET('one', 'two') NOT NULL can have any of these values:
571 # ''
572 # 'one'
573 # 'two'
574 # 'one,two'
575 if (/(\w*)\s+set\(((?:['"]\w+['"]\s*,*)+(?:['"]\w+['"])*)\)(.*)$/i) { # example: `au_auth` set('r','w','d') NOT NULL default '',
576 $column_name = $1;
577 $constraints{$column_name}{'values'} = $2; # 'abc','def', ...
578 $constraints{$column_name}{'type'} = "set"; # 'abc','def', ...
579 $_ = qq~ $column_name varchar , ~;
580 $column_name = quote_and_lc($1);
581 $create_sql.=$_;
582 next;
583
584 }
585 if (/(\S*)\s+enum\(((?:['"][^'"]+['"]\s*,)+['"][^'"]+['"])\)(.*)$/i) { # enum handling
586 # example: `test` enum('?','+','-') NOT NULL default '?'
587 # $2 is the values of the enum 'abc','def', ...
588 $quoted_column=quote_and_lc($1);
589 # "test" NOT NULL default '?' CONSTRAINT test_test_constraint CHECK ("test" IN ('?','+','-'))
590 $_ = qq~ $quoted_column varchar CHECK ($quoted_column IN ( $2 ))$3\n~; # just assume varchar?
591 $create_sql.=$_;
592 next;
593 }
594 # Take care of "binary" option for char and varchar
595 # (pre-4.1.2, it indicated a byte array; from 4.1.2, indicates
596 # a binary collation)
597 s/(?:var)?char(?:\(\d+\))? (?:byte|binary)/bytea/i;
598 if (m/(?:var)?binary\s*\(\d+\)/i) { # c varBINARY(3) in Mysql
599 warn "WARNING in table '$table' '$_': binary type is converted to bytea (unsized) for Postgres\n";
600 }
601 s/(?:var)?binary(?:\(\d+\))?/bytea/i; # c varBINARY(3) in Mysql
602 s/bit(?:\(\d+\))?/bytea/i; # bit datatype -> bytea
603
604 # large datatypes
605 s/\w*blob/bytea/gi;
606 s/tinytext/text/gi;
607 s/mediumtext/text/gi;
608 s/longtext/text/gi;
609
610 # char -> varchar -- if specified as a command line option
611 # PostgreSQL would otherwise pad with spaces as opposed
612 # to MySQL! Your user interface may depend on this!
613 if ($CHAR2VARCHAR) {
614 s/(^\s+\S+\s+)char/${1}varchar/gi;
615 }
616
617 # nuke column's collate and character set
618 s/(\S+)\s+character\s+set\s+\w+/$1/gi;
619 s/(\S+)\s+collate\s+\w+/$1/gi;
620
621 #
622 # DATE AND TIME TYPES
623 #
624 # date time
625 # year
626 # datetime
627 # timestamp
628
629 # date time
630 # these are the same types in postgres, just do the replacement of 0000-00-00 date
631
632 if (m/default '(\d+)-(\d+)-(\d+)([^']*)'/i) { # we grab the year, month and day
633 # NOTE: times of 00:00:00 are possible and are okay
634 my $time = '';
635 my $year=$1;
636 my $month= $2;
637 my $day = $3;
638 if ($4) {
639 $time = $4;
640 }
641 if ($year eq "0000") { $year = '1970'; }
642 if ($month eq "00") { $month = '01'; }
643 if ($day eq "00") { $day = '01'; }
644 s/default '[^']+'/default '$year-$month-$day$time'/i; # finally we replace with $datetime
645 }
646
647 # convert mysql's year datatype to a constraint
648 if (/(\w*)\s+year\(4\)(.*)$/i) { # can be integer OR string 1901-2155
649 $constraint_table_name = get_identifier($table,$1 ,"constraint_table");
650 $column_name=quote_and_lc($1);
651 @year_holder = ();
652 $year='';
653 for (1901 .. 2155) {
654 $year = "'$_'";
655 unless ($year =~ /2155/) { $year .= ','; }
656 push( @year_holder, $year);
657 }
658 $constraints{$column_name}{'values'} = join('','',@year_holder); # '1901','1902', ...
659 $constraints{$column_name}{'type'} = "year";
660 $_ = qq~ $column_name varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES $constraint_table_name ("year_values") $2\n~;
661 $create_sql.=$_;
662 next;
663 } elsif (/(\w*)\s+year\(2\)(.*)$/i) { # same for a 2-integer string
664 $constraint_table_name = get_identifier($table,$1 ,"constraint_table");
665 $column_name=quote_and_lc($1);
666 @year_holder = ();
667 $year='';
668 for (1970 .. 2069) {
669 $year = "'$_'";
670 if ($year =~ /2069/) { next; }
671 push( @year_holder, $year);
672 }
673 push( @year_holder, '0000');
674 $constraints{$column_name}{'values'} = join(',',@year_holder); # '1971','1972', ...
675 $constraints{$column_name}{'type'} = "year"; # 'abc','def', ...
676 $_ = qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES $constraint_table_name ("year_values") $2\n~;
677 $create_sql.=$_;
678 next;
679 }
680
681 # datetime
682 # Default on a dump from MySQL 5.0.22 is in the same form as datetime so let it flow down
683 # to the timestamp section and deal with it there
684 s/(${sl})datetime /$1timestamp without time zone /i;
685
686 # change not null datetime field to null valid ones
687 # (to support remapping of "zero time" to null
688 # s/($sl)datetime not null/$1timestamp without time zone/i;
689
690
691 # timestamps
692 #
693 # nuke datetime representation (not supported in PostgreSQL)
694 # change default time of 0000-00-00 to 1970-01-01
695
696 # we may possibly need to create a trigger to provide
697 # equal functionality with ON UPDATE CURRENT TIMESTAMP
698
699
700 if (m/${sl}timestamp/i) {
701 if ( m/ON UPDATE CURRENT_TIMESTAMP/i ) { # the ... default CURRENT_TIMESTAMP only applies for blank inserts, not updates
702 s/ON UPDATE CURRENT_TIMESTAMP//i ;
703 m/^\s*(\w+)\s+timestamp/i ;
704 # automatic trigger creation
705 $table_no_quotes =~ s/"//g;
706$function_create_sql .= " CREATE OR REPLACE FUNCTION update_". $table_no_quotes . "() RETURNS trigger AS '
707BEGIN
708 NEW.$1 := CURRENT_TIMESTAMP;
709 RETURN NEW;
710END;
711' LANGUAGE 'plpgsql';
712
713-- before INSERT is handled by 'default CURRENT_TIMESTAMP'
714CREATE TRIGGER add_current_date_to_".$table_no_quotes." BEFORE UPDATE ON ". $table . " FOR EACH ROW EXECUTE PROCEDURE
715update_".$table_no_quotes."();\n";
716
717 }
718 if ($tables_first_timestamp_column && m/DEFAULT NULL/i) {
719 # DEFAULT NULL is the same as DEFAULT CURRENT_TIMESTAMP for the first TIMESTAMP column. (MYSQL manual)
720 s/($sl)(timestamp\s+)default null/$1 $2 DEFAULT CURRENT_TIMESTAMP/i;
721 }
722 $tables_first_timestamp_column= 0;
723 if (m/${sl}timestamp\s*\(\d+\)/i) { # fix for timestamps with width spec not handled (ID: 1628)
724 warn "WARNING for in table '$table' '$_': your default timestamp width is being ignored for table $table \n";
725 s/($sl)timestamp(?:\(\d+\))/$1datetime/i;
726 }
727 } # end timestamp section
728
729 # KEY AND UNIQUE CREATIONS
730 #
731 # unique
732 if ( /^\s+unique\s+\(([^(]+)\)/i ) { # example UNIQUE `name` (`name`), same as UNIQUE KEY
733 # POSTGRESQL: treat same as mysql unique
734 $quoted_column = quote_and_lc($1);
735 s/\s+unique\s+\(([^(]+)\)/ unique ($quoted_column) /i;
736 $create_sql.=$_;
737 next;
738 } elsif ( /^\s+unique\s+key\s*(\w+)\s*\(([^(]+)\)/i ) { # example UNIQUE KEY `name` (`name`)
739 # MYSQL: unique key: allows null=YES, allows duplicates=NO (*)
740 # ... new ... UNIQUE KEY `unique_fullname` (`fullname`) in my mysql v. Ver 14.12 Distrib 5.1.7-beta
741 # POSTGRESQL: treat same as mysql unique
742 # just quote columns
743 $quoted_column = quote_and_lc($2);
744 s/\s+unique\s+key\s*(\w+)\s*\(([^(]+)\)/ unique ($quoted_column) /i;
745 $create_sql.=$_;
746 # the index corresponding to the 'key' is automatically created
747 next;
748 }
749 # keys
750 if ( /^\s+fulltext key\s+/i) { # example: FULLTEXT KEY `commenttext` (`commenttext`)
751 # that is key as a word in the first check for a match
752 # the tsvector datatype is made for these types of things
753 # example mysql file:
754 # what is tsvector datatype?
755 # http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html
756 warn "dba must do fulltext key transformation for $table\n";
757 next;
758 }
759 if ( /^(\s+)constraint (\S+) foreign key \((\S+)\) references (\S+) \((\S+)\)(.*)/i ) {
760 $quoted_column =quote_and_lc($3);
761 $col=quote_and_lc($5);
762 $post_create_sql .= "ALTER TABLE $table ADD FOREIGN KEY ($quoted_column) REFERENCES " . quote_and_lc($4) . " ($col);\n";
763 next;
764 }
765 if ( /^\s*primary key\s*\(([^)]+)\)([,\s]+)/i ) { # example PRIMARY KEY (`name`)
766 # MYSQL: primary key: allows null=NO , allows duplicates=NO
767 # POSTGRESQL: When an index is declared unique, multiple table rows with equal indexed values will not be
768 # allowed. Null values are not considered equal.
769 # POSTGRESQL quote's source: 8.1.3 docs section 11.5 "unique indexes"
770 # so, in postgres, we need to add a NOT NULL to the UNIQUE constraint
771 # and, primary key (mysql) == primary key (postgres) so that we *really* don't need change anything
772 $quoted_column = quote_and_lc($1);
773 s/(\s*)primary key\s+\(([^)]+)\)([,\s]+)/$1 primary key ($quoted_column)$3/i;
774 # indexes are automatically created for unique columns
775 $create_sql.=$_;
776 next;
777 } elsif (m/^\s+key\s[-_\s\w]+\((.+)\)/i ) { # example: KEY `idx_mod_english_def_word` (`word`),
778 # regular key: allows null=YES, allows duplicates=YES
779 # MYSQL: KEY is normally a synonym for INDEX. http://dev.mysql.com/doc/refman/5.1/en/create-table.html
780 #
781 # * MySQL: ALTER TABLE {$table} ADD KEY $column ($column)
782 # * PostgreSQL: CREATE INDEX {$table}_$column_idx ON {$table}($column) // Please note the _idx "extension"
783 # PRIMARY KEY (`postid`),
784 # KEY `ownerid` (`ownerid`)
785 # create an index for everything which has a key listed for it.
786 my $col = $1;
787 # TODO we don't have a translation for the substring syntax in text columns in MySQL (e.g. "KEY my_idx (mytextcol(20))")
788 # for now just getting rid of the brackets and numbers (the substring specifier):
789 $col=~s/\(\d+\)//g;
790 $quoted_column = quote_and_lc($col);
791 if ($col =~ m/,/) {
792 $col = s/,/_/;
793 }
794 $index = get_identifier($table, $col, 'idx');
795 $post_create_sql.="CREATE INDEX $index ON $table USING btree ($quoted_column)\;";
796 # just create index do not add to create table statement
797 next;
798 }
799
800 # handle 'key' declared at end of column
801 if (/\w+.*primary key/i) { # mysql: key is normally just a synonym for index
802 # just leave as is ( postgres has primary key type)
803
804
805 } elsif (/(\w+\s+(?:$mysql_datatypesStr)\s+.*)key/i) { # mysql: key is normally just a synonym for index
806 # I can't find a reference for 'key' in a postgres command without using the word 'primary key'
807 s/$1key/$1/i ;
808 $index = get_identifier($table, $1, 'idx');
809 $quoted_column =quote_and_lc($1);
810 $post_create_sql.="CREATE INDEX $index ON $table USING btree ($quoted_column) \;";
811 $create_sql.=$_;
812 }
813
814
815
816 # do we really need this anymore?
817 # remap colums with names of existing system attribute
818 if (/"oid"/i) {
819 s/"oid"/"_oid"/g;
820 print STDERR "WARNING: table $table uses column \"oid\" which is renamed to \"_oid\"\nYou should fix application manually! Press return to continue.";
821 my $wait=<STDIN>;
822 }
823
824 s/oid/_oid/i if (/key/i && /oid/i); # fix oid in key
825
826 # FINAL QUOTING OF ALL COLUMNS
827 # quote column names which were not already quoted
828 # perhaps they were not quoted because they were not explicitly handled
829 if (!/^\s*"(\w+)"(\s+)/i) {
830 /^(\s*)(\w+)(\s+)(.*)$/i ;
831 $quoted_column= quote_and_lc($2);
832 s/^(\s*)(\w+)(\s+)(.*)$/$1 $quoted_column $3 $4 /;
833 }
834 $create_sql.=$_;
835 # END of if ($create_sql ne "") i.e. were inside create table statement so processed datatypes
836}
837# add "not in create table" comments or empty lines to pre_create_sql
838elsif (/^#/ || /^$/ || /^\s*--/) {
839 s/^#/--/; # Two hyphens (--) is the SQL-92 standard indicator for comments
840 $pre_create_sql .= $_ ; # printed above create table statement
841 next;
842}
843elsif (/^\s*insert into/i) { # not inside create table and doing insert
844 # fix mysql's zero/null value for timestamps
845 s/'0000-00-00/'1970-01-01/gi;
846 # commented out to fix bug "Field contents interpreted as a timestamp", what was the point of this line anyway?
847 #s/([12]\d\d\d)([01]\d)([0-3]\d)([0-2]\d)([0-6]\d)([0-6]\d)/'$1-$2-$3 $4:$5:$6'/;
848
849 #---- fix data in inserted data: (from MS world)
850 s!\x96!-!g; # --
851 s!\x93!"!g; # ``
852 s!\x94!"!g; # ''
853 s!\x85!... !g; # \ldots
854 s!\x92!`!g;
855
856 print OUT $pre_create_sql; # print comments preceding the insert section
857 $pre_create_sql="";
858 $auto_increment_seq = "";
859
860 s/'((?:.*?(?:\\')?.*?)*)'([),])/E'$1'$2/g;
861 # for the E'' see http://www.postgresql.org/docs/8.2/interactive/release-8-1.html
862
863 # split 'extended' INSERT INTO statements to something PostgreSQL can understand
864 ( $insert_table, $valueString) = $_ =~ m/^INSERT\s+INTO\s+['`"]*(.*?)['`"]*\s+VALUES\s*(.*)/i;
865 $insert_table = quote_and_lc($insert_table);
866 # parse valueString
867 my @rows = $valueString =~ m/$rowRe/g;
868
869 s/^INSERT INTO.*?\);//i; # hose the statement which is to be replaced whether a run-on or not
870 # only convert INSERT INTO statements with multiple values
871 if (@rows > 1)
872 {
873 for my $row (@rows)
874 {
875 print OUT qq(INSERT INTO $insert_table VALUES ($row);\n);
876 }
877
878 # end command
879 print OUT "\n";
880 } else { # guarantee table names are quoted
881 print OUT qq(INSERT INTO $insert_table VALUES $valueString \n);
882 }
883
884} else { #
885 print OUT $_ ; # example: /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
886}
887# keep looping and get next line of IN file
888
889} # END while(<IN>)
890
891print_post_create_sql(); # in case there is extra from the last table
892
893#################################################################
894# 5. print_plgsql function prototype
895# emulate the set datatype with the following plpgsql function
896# looks ugly so putting at end of file
897#################################################################
898#
899sub make_plpgsql {
900my ($table,$column_name) = ($_[0],$_[1]);
901$table=~s/\"//g; # make sure that $table doesn't have quotes so we don't end up with redundant quoting
902my $constraint_table = get_identifier($table,$column_name ,"constraint_table");
903return "
904-- this function is called by the insert/update trigger
905-- it checks if the INSERT/UPDATE for the 'set' column
906-- contains members which comprise a valid mysql set
907-- this TRIGGER function therefore acts like a constraint
908-- provided limited functionality for mysql's set datatype
909-- just verifies and matches for string representations of the set at this point
910-- though the set datatype uses bit comparisons, the only supported arguments to our
911-- set datatype are VARCHAR arguments
912-- to add a member to the set add it to the ".$table."_".$column_name." table
913CREATE OR REPLACE FUNCTION check_".$table."_".$column_name."_set( ) RETURNS TRIGGER AS \$\$\n
914DECLARE
915----
916arg_str VARCHAR ;
917argx VARCHAR := '';
918nobreak INT := 1;
919rec_count INT := 0;
920psn INT := 0;
921str_in VARCHAR := NEW.$column_name;
922----
923BEGIN
924----
925IF str_in IS NULL THEN RETURN NEW ; END IF;
926arg_str := REGEXP_REPLACE(str_in, '\\',\\'', ','); -- str_in is CONSTANT
927arg_str := REGEXP_REPLACE(arg_str, '^\\'', '');
928arg_str := REGEXP_REPLACE(arg_str, '\\'\$', '');
929-- RAISE NOTICE 'arg_str %',arg_str;
930psn := POSITION(',' in arg_str);
931IF psn > 0 THEN
932 psn := psn - 1; -- minus-1 from comma position
933 -- RAISE NOTICE 'psn %',psn;
934 argx := SUBSTRING(arg_str FROM 1 FOR psn); -- get one set member
935 psn := psn + 2; -- go to first starting letter
936 arg_str := SUBSTRING(arg_str FROM psn); -- hack it off
937ELSE
938 psn := 0; -- minus-1 from comma position
939 argx := arg_str;
940END IF;
941-- RAISE NOTICE 'argx %',argx;
942-- RAISE NOTICE 'new arg_str: %',arg_str;
943WHILE nobreak LOOP
944 EXECUTE 'SELECT count(*) FROM $constraint_table WHERE set_values = ' || quote_literal(argx) INTO rec_count;
945 IF rec_count = 0 THEN RAISE EXCEPTION 'one of the set values was not found';
946 END IF;
947 IF psn > 0 THEN
948 psn := psn - 1; -- minus-1 from comma position
949 -- RAISE NOTICE 'psn %',psn;
950 argx := SUBSTRING(arg_str FROM 1 FOR psn); -- get one set member
951 psn := psn + 2; -- go to first starting letter
952 arg_str := SUBSTRING(arg_str FROM psn); -- hack it off
953 psn := POSITION(',' in arg_str);
954 ELSE nobreak = 0;
955 END IF;
956 -- RAISE NOTICE 'next argx % and next arg_str %', argx, arg_str;
957END LOOP;
958RETURN NEW;
959----
960END;
961\$\$ LANGUAGE 'plpgsql' VOLATILE;
962
963drop trigger set_test ON $table;
964-- make a trigger for each set field
965-- make trigger and hard-code in column names
966-- see http://archives.postgresql.org/pgsql-interfaces/2005-02/msg00020.php
967CREATE TRIGGER set_test
968BEFORE INSERT OR UPDATE ON $table FOR EACH ROW
969EXECUTE PROCEDURE check_".$table."_".$column_name."_set();\n";
970} # end sub make_plpgsql();