Index: trunk/phase3/maintenance/postgres/compare_schemas.pl |
— | — | @@ -112,6 +112,8 @@ |
113 | 113 | } |
114 | 114 | elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) { |
115 | 115 | $info{$table}{column}{$1} = $2; |
| 116 | + my $extra = $3 || ''; |
| 117 | + $info{$table}{columnfull}{$1} = "$2$extra"; |
116 | 118 | } |
117 | 119 | elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) { |
118 | 120 | $info{$table}{lc $1.'_name'} = $2 ? $2 : ''; |
— | — | @@ -209,6 +211,168 @@ |
210 | 212 | } |
211 | 213 | } |
212 | 214 | |
| 215 | +## Which column types are okay to map from mysql to postgres? |
| 216 | +my $COLMAP = q{ |
| 217 | +## INTS: |
| 218 | +tinyint SMALLINT |
| 219 | +int INTEGER SERIAL |
| 220 | +bigint BIGINT |
| 221 | +real NUMERIC |
| 222 | +float NUMERIC |
| 223 | + |
| 224 | +## TEXT: |
| 225 | +varchar(32) TEXT |
| 226 | +varchar(70) TEXT |
| 227 | +varchar(255) TEXT |
| 228 | +varchar TEXT |
| 229 | +text TEXT |
| 230 | +tinytext TEXT |
| 231 | +ENUM TEXT |
| 232 | + |
| 233 | +## TIMESTAMPS: |
| 234 | +varbinary(14) TIMESTAMPTZ |
| 235 | +binary(14) TIMESTAMPTZ |
| 236 | +datetime TIMESTAMPTZ |
| 237 | +timestamp TIMESTAMPTZ |
| 238 | + |
| 239 | +## BYTEA: |
| 240 | +mediumblob BYTEA |
| 241 | + |
| 242 | +## OTHER: |
| 243 | +bool CHAR # Sigh |
| 244 | + |
| 245 | +}; |
| 246 | +## Allow specific exceptions to the above |
| 247 | +my $COLMAPOK = q{ |
| 248 | +## User inputted text strings: |
| 249 | +ar_comment tinyblob TEXT |
| 250 | +fa_description tinyblob TEXT |
| 251 | +img_description tinyblob TEXT |
| 252 | +ipb_reason tinyblob TEXT |
| 253 | +log_action varbinary(10) TEXT |
| 254 | +oi_description tinyblob TEXT |
| 255 | +rev_comment tinyblob TEXT |
| 256 | +rc_log_action varbinary(255) TEXT |
| 257 | +rc_log_type varbinary(255) TEXT |
| 258 | + |
| 259 | +## Simple text-only strings: |
| 260 | +ar_flags tinyblob TEXT |
| 261 | +fa_minor_mime varbinary(32) TEXT |
| 262 | +fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text |
| 263 | +fa_storage_key varbinary(64) TEXT # sha1 plus text extension |
| 264 | +ipb_address tinyblob TEXT # IP address or username |
| 265 | +ipb_range_end tinyblob TEXT # hexadecimal |
| 266 | +ipb_range_start tinyblob TEXT # hexadecimal |
| 267 | +img_minor_mime varbinary(32) TEXT |
| 268 | +img_sha1 varbinary(32) TEXT |
| 269 | +job_cmd varbinary(60) TEXT # Should we limit to 60 as well? |
| 270 | +keyname varbinary(255) TEXT # No tablename prefix (objectcache) |
| 271 | +ll_lang varbinary(20) TEXT # Language code |
| 272 | +log_params blob TEXT # LF separated list of args |
| 273 | +log_type varbinary(10) TEXT |
| 274 | +oi_minor_mime varbinary(32) TEXT |
| 275 | +oi_sha1 varbinary(32) TEXT |
| 276 | +old_flags tinyblob TEXT |
| 277 | +old_text mediumblob TEXT |
| 278 | +page_restrictions tinyblob TEXT # CSV string |
| 279 | +pf_server varchar(30) TEXT |
| 280 | +pr_level varbinary(60) TEXT |
| 281 | +pr_type varbinary(60) TEXT |
| 282 | +qc_type varbinary(32) TEXT |
| 283 | +qcc_type varbinary(32) TEXT |
| 284 | +qci_type varbinary(32) TEXT |
| 285 | +rc_params blob TEXT |
| 286 | +ug_group varbinary(16) TEXT |
| 287 | +user_email_token binary(32) TEXT |
| 288 | +user_ip varbinary(40) TEXT |
| 289 | +user_newpassword tinyblob TEXT |
| 290 | +user_options blob TEXT |
| 291 | +user_password tinyblob TEXT |
| 292 | +user_token binary(32) TEXT |
| 293 | + |
| 294 | +## Text URLs: |
| 295 | +el_index blob TEXT |
| 296 | +el_to blob TEXT |
| 297 | +iw_url blob TEXT |
| 298 | +tb_url blob TEXT |
| 299 | +tc_url varbinary(255) TEXT |
| 300 | + |
| 301 | +## Deprecated or not yet used: |
| 302 | +ar_text mediumblob TEXT |
| 303 | +job_params blob TEXT |
| 304 | +log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety |
| 305 | +rc_type tinyint CHAR |
| 306 | + |
| 307 | +## Number tweaking: |
| 308 | +fa_bits int SMALLINT # bits per pixel |
| 309 | +fa_height int SMALLINT |
| 310 | +fa_width int SMALLINT # Hope we don't see an image this wide... |
| 311 | +hc_id int BIGINT # Odd that site_stats is all bigint... |
| 312 | +img_bits int SMALLINT # bits per image should stay sane |
| 313 | +oi_bits int SMALLINT |
| 314 | + |
| 315 | +## True binary fields, usually due to gzdeflate and/or serialize: |
| 316 | +math_inputhash varbinary(16) BYTEA |
| 317 | +math_outputhash varbinary(16) BYTEA |
| 318 | + |
| 319 | +## Namespaces: not need for such a high range |
| 320 | +ar_namespace int SMALLINT |
| 321 | +job_namespace int SMALLINT |
| 322 | +log_namespace int SMALLINT |
| 323 | +page_namespace int SMALLINT |
| 324 | +pl_namespace int SMALLINT |
| 325 | +qc_namespace int SMALLINT |
| 326 | +rc_namespace int SMALLINT |
| 327 | +rd_namespace int SMALLINT |
| 328 | +tl_namespace int SMALLINT |
| 329 | +wl_namespace int SMALLINT |
| 330 | + |
| 331 | +## "Bools" |
| 332 | +ar_minor_edit tinyint CHAR |
| 333 | +iw_trans tinyint CHAR |
| 334 | +page_is_new tinyint CHAR |
| 335 | +page_is_redirect tinyint CHAR |
| 336 | +rc_bot tinyint CHAR |
| 337 | +rc_deleted tinyint CHAR |
| 338 | +rc_minor tinyint CHAR |
| 339 | +rc_new tinyint CHAR |
| 340 | +rc_patrolled tinyint CHAR |
| 341 | +rev_deleted tinyint CHAR |
| 342 | +rev_minor_edit tinyint CHAR |
| 343 | + |
| 344 | +## Easy enough to change if a wiki ever does grow this big: |
| 345 | +ss_good_articles bigint INTEGER |
| 346 | +ss_total_edits bigint INTEGER |
| 347 | +ss_total_pages bigint INTEGER |
| 348 | +ss_total_views bigint INTEGER |
| 349 | +ss_users bigint INTEGER |
| 350 | + |
| 351 | +## True IP - keep an eye on these, coders tend to make textual assumptions |
| 352 | +rc_ip varbinary(40) CIDR # Want to keep an eye on this |
| 353 | + |
| 354 | +## Others: |
| 355 | +tc_time int TIMESTAMPTZ |
| 356 | + |
| 357 | + |
| 358 | +}; |
| 359 | + |
| 360 | +my %colmap; |
| 361 | +for (split /\n/ => $COLMAP) { |
| 362 | + next unless /^\w/; |
| 363 | + s/(.*?)#.*/$1/; |
| 364 | + my ($col,@maps) = split / +/, $_; |
| 365 | + for (@maps) { |
| 366 | + $colmap{$col}{$_} = 1; |
| 367 | + } |
| 368 | +} |
| 369 | + |
| 370 | +my %colmapok; |
| 371 | +for (split /\n/ => $COLMAPOK) { |
| 372 | + next unless /^\w/; |
| 373 | + my ($col,$old,$new) = split / +/, $_; |
| 374 | + $colmapok{$col}{$old}{$new} = 1; |
| 375 | +} |
| 376 | + |
213 | 377 | ## Old but not new |
214 | 378 | for my $t (sort keys %{$old{$oldfile}}) { |
215 | 379 | if (!exists $new{$t} and !exists $ok{OLD}{$t}) { |
— | — | @@ -218,6 +382,7 @@ |
219 | 383 | next if exists $ok{OLD}{$t} and !$ok{OLD}{$t}; |
220 | 384 | my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t; |
221 | 385 | my $oldcol = $old{$oldfile}{$t}{column}; |
| 386 | + my $oldcolfull = $old{$oldfile}{$t}{columnfull}; |
222 | 387 | my $newcol = $new{$newt}{column}; |
223 | 388 | for my $c (keys %$oldcol) { |
224 | 389 | if (!exists $newcol->{$c}) { |
— | — | @@ -225,11 +390,22 @@ |
226 | 391 | next; |
227 | 392 | } |
228 | 393 | } |
229 | | - for my $c (keys %$newcol) { |
| 394 | + for my $c (sort keys %$newcol) { |
230 | 395 | if (!exists $oldcol->{$c}) { |
231 | 396 | print "Column $t.$c not in $oldfile\n"; |
232 | 397 | next; |
233 | 398 | } |
| 399 | + ## Column types (roughly) match up? |
| 400 | + my $new = $newcol->{$c}; |
| 401 | + my $old = $oldcolfull->{$c}; |
| 402 | + |
| 403 | + ## Known exceptions: |
| 404 | + next if exists $colmapok{$c}{$old}{$new}; |
| 405 | + |
| 406 | + $old =~ s/ENUM.*/ENUM/; |
| 407 | + if (! exists $colmap{$old}{$new}) { |
| 408 | + print "Column types for $t.$c do not match: $old does not map to $new\n"; |
| 409 | + } |
234 | 410 | } |
235 | 411 | } |
236 | 412 | ## New but not old: |