r26085 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r26084‎ | r26085 | r26086 >
Date:20:09, 24 September 2007
Author:greg
Status:old
Tags:
Comment:
Add column type brains, map out all non-standard stuff
Modified paths:
  • /trunk/phase3/maintenance/postgres/compare_schemas.pl (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/postgres/compare_schemas.pl
@@ -112,6 +112,8 @@
113113 }
114114 elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) {
115115 $info{$table}{column}{$1} = $2;
 116+ my $extra = $3 || '';
 117+ $info{$table}{columnfull}{$1} = "$2$extra";
116118 }
117119 elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) {
118120 $info{$table}{lc $1.'_name'} = $2 ? $2 : '';
@@ -209,6 +211,168 @@
210212 }
211213 }
212214
 215+## Which column types are okay to map from mysql to postgres?
 216+my $COLMAP = q{
 217+## INTS:
 218+tinyint SMALLINT
 219+int INTEGER SERIAL
 220+bigint BIGINT
 221+real NUMERIC
 222+float NUMERIC
 223+
 224+## TEXT:
 225+varchar(32) TEXT
 226+varchar(70) TEXT
 227+varchar(255) TEXT
 228+varchar TEXT
 229+text TEXT
 230+tinytext TEXT
 231+ENUM TEXT
 232+
 233+## TIMESTAMPS:
 234+varbinary(14) TIMESTAMPTZ
 235+binary(14) TIMESTAMPTZ
 236+datetime TIMESTAMPTZ
 237+timestamp TIMESTAMPTZ
 238+
 239+## BYTEA:
 240+mediumblob BYTEA
 241+
 242+## OTHER:
 243+bool CHAR # Sigh
 244+
 245+};
 246+## Allow specific exceptions to the above
 247+my $COLMAPOK = q{
 248+## User inputted text strings:
 249+ar_comment tinyblob TEXT
 250+fa_description tinyblob TEXT
 251+img_description tinyblob TEXT
 252+ipb_reason tinyblob TEXT
 253+log_action varbinary(10) TEXT
 254+oi_description tinyblob TEXT
 255+rev_comment tinyblob TEXT
 256+rc_log_action varbinary(255) TEXT
 257+rc_log_type varbinary(255) TEXT
 258+
 259+## Simple text-only strings:
 260+ar_flags tinyblob TEXT
 261+fa_minor_mime varbinary(32) TEXT
 262+fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text
 263+fa_storage_key varbinary(64) TEXT # sha1 plus text extension
 264+ipb_address tinyblob TEXT # IP address or username
 265+ipb_range_end tinyblob TEXT # hexadecimal
 266+ipb_range_start tinyblob TEXT # hexadecimal
 267+img_minor_mime varbinary(32) TEXT
 268+img_sha1 varbinary(32) TEXT
 269+job_cmd varbinary(60) TEXT # Should we limit to 60 as well?
 270+keyname varbinary(255) TEXT # No tablename prefix (objectcache)
 271+ll_lang varbinary(20) TEXT # Language code
 272+log_params blob TEXT # LF separated list of args
 273+log_type varbinary(10) TEXT
 274+oi_minor_mime varbinary(32) TEXT
 275+oi_sha1 varbinary(32) TEXT
 276+old_flags tinyblob TEXT
 277+old_text mediumblob TEXT
 278+page_restrictions tinyblob TEXT # CSV string
 279+pf_server varchar(30) TEXT
 280+pr_level varbinary(60) TEXT
 281+pr_type varbinary(60) TEXT
 282+qc_type varbinary(32) TEXT
 283+qcc_type varbinary(32) TEXT
 284+qci_type varbinary(32) TEXT
 285+rc_params blob TEXT
 286+ug_group varbinary(16) TEXT
 287+user_email_token binary(32) TEXT
 288+user_ip varbinary(40) TEXT
 289+user_newpassword tinyblob TEXT
 290+user_options blob TEXT
 291+user_password tinyblob TEXT
 292+user_token binary(32) TEXT
 293+
 294+## Text URLs:
 295+el_index blob TEXT
 296+el_to blob TEXT
 297+iw_url blob TEXT
 298+tb_url blob TEXT
 299+tc_url varbinary(255) TEXT
 300+
 301+## Deprecated or not yet used:
 302+ar_text mediumblob TEXT
 303+job_params blob TEXT
 304+log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety
 305+rc_type tinyint CHAR
 306+
 307+## Number tweaking:
 308+fa_bits int SMALLINT # bits per pixel
 309+fa_height int SMALLINT
 310+fa_width int SMALLINT # Hope we don't see an image this wide...
 311+hc_id int BIGINT # Odd that site_stats is all bigint...
 312+img_bits int SMALLINT # bits per image should stay sane
 313+oi_bits int SMALLINT
 314+
 315+## True binary fields, usually due to gzdeflate and/or serialize:
 316+math_inputhash varbinary(16) BYTEA
 317+math_outputhash varbinary(16) BYTEA
 318+
 319+## Namespaces: not need for such a high range
 320+ar_namespace int SMALLINT
 321+job_namespace int SMALLINT
 322+log_namespace int SMALLINT
 323+page_namespace int SMALLINT
 324+pl_namespace int SMALLINT
 325+qc_namespace int SMALLINT
 326+rc_namespace int SMALLINT
 327+rd_namespace int SMALLINT
 328+tl_namespace int SMALLINT
 329+wl_namespace int SMALLINT
 330+
 331+## "Bools"
 332+ar_minor_edit tinyint CHAR
 333+iw_trans tinyint CHAR
 334+page_is_new tinyint CHAR
 335+page_is_redirect tinyint CHAR
 336+rc_bot tinyint CHAR
 337+rc_deleted tinyint CHAR
 338+rc_minor tinyint CHAR
 339+rc_new tinyint CHAR
 340+rc_patrolled tinyint CHAR
 341+rev_deleted tinyint CHAR
 342+rev_minor_edit tinyint CHAR
 343+
 344+## Easy enough to change if a wiki ever does grow this big:
 345+ss_good_articles bigint INTEGER
 346+ss_total_edits bigint INTEGER
 347+ss_total_pages bigint INTEGER
 348+ss_total_views bigint INTEGER
 349+ss_users bigint INTEGER
 350+
 351+## True IP - keep an eye on these, coders tend to make textual assumptions
 352+rc_ip varbinary(40) CIDR # Want to keep an eye on this
 353+
 354+## Others:
 355+tc_time int TIMESTAMPTZ
 356+
 357+
 358+};
 359+
 360+my %colmap;
 361+for (split /\n/ => $COLMAP) {
 362+ next unless /^\w/;
 363+ s/(.*?)#.*/$1/;
 364+ my ($col,@maps) = split / +/, $_;
 365+ for (@maps) {
 366+ $colmap{$col}{$_} = 1;
 367+ }
 368+}
 369+
 370+my %colmapok;
 371+for (split /\n/ => $COLMAPOK) {
 372+ next unless /^\w/;
 373+ my ($col,$old,$new) = split / +/, $_;
 374+ $colmapok{$col}{$old}{$new} = 1;
 375+}
 376+
213377 ## Old but not new
214378 for my $t (sort keys %{$old{$oldfile}}) {
215379 if (!exists $new{$t} and !exists $ok{OLD}{$t}) {
@@ -218,6 +382,7 @@
219383 next if exists $ok{OLD}{$t} and !$ok{OLD}{$t};
220384 my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t;
221385 my $oldcol = $old{$oldfile}{$t}{column};
 386+ my $oldcolfull = $old{$oldfile}{$t}{columnfull};
222387 my $newcol = $new{$newt}{column};
223388 for my $c (keys %$oldcol) {
224389 if (!exists $newcol->{$c}) {
@@ -225,11 +390,22 @@
226391 next;
227392 }
228393 }
229 - for my $c (keys %$newcol) {
 394+ for my $c (sort keys %$newcol) {
230395 if (!exists $oldcol->{$c}) {
231396 print "Column $t.$c not in $oldfile\n";
232397 next;
233398 }
 399+ ## Column types (roughly) match up?
 400+ my $new = $newcol->{$c};
 401+ my $old = $oldcolfull->{$c};
 402+
 403+ ## Known exceptions:
 404+ next if exists $colmapok{$c}{$old}{$new};
 405+
 406+ $old =~ s/ENUM.*/ENUM/;
 407+ if (! exists $colmap{$old}{$new}) {
 408+ print "Column types for $t.$c do not match: $old does not map to $new\n";
 409+ }
234410 }
235411 }
236412 ## New but not old:

Follow-up revisions

RevisionCommit summaryAuthorDate
r26135Merged revisions 26012-26133 via svnmerge from...david21:15, 25 September 2007

Status & tagging log