Index: trunk/extensions/DumpHTML/dumpHTML.inc |
— | — | @@ -88,7 +88,7 @@ |
89 | 89 | # Extension version |
90 | 90 | const VERSION = '2.0'; |
91 | 91 | |
92 | | - function DumpHTML( $settings = array() ) { |
| 92 | + function __construct( $settings = array() ) { |
93 | 93 | foreach ( $settings as $var => $value ) { |
94 | 94 | $this->$var = $value; |
95 | 95 | } |
— | — | @@ -171,7 +171,7 @@ |
172 | 172 | $end = $this->endID; |
173 | 173 | } |
174 | 174 | $start = $this->startID; |
175 | | - |
| 175 | + |
176 | 176 | # Start from the checkpoint |
177 | 177 | $cp = $this->getCheckpoint( 'article' ); |
178 | 178 | if ( $cp == 'done' ) { |
— | — | @@ -204,7 +204,7 @@ |
205 | 205 | $title = Title::newFromID( $id ); |
206 | 206 | if ( $title ) { |
207 | 207 | $ns = $title->getNamespace() ; |
208 | | - if ( $ns != NS_CATEGORY && $ns != NS_MEDIAWIKI && |
| 208 | + if ( $ns != NS_CATEGORY && $ns != NS_MEDIAWIKI && |
209 | 209 | $title->getPrefixedDBkey() != $mainPage ) { |
210 | 210 | $this->doArticle( $title ); |
211 | 211 | } |
— | — | @@ -233,11 +233,11 @@ |
234 | 234 | |
235 | 235 | $title = Title::newMainPage(); |
236 | 236 | $text = $this->getArticleHTML( $title ); |
237 | | - |
| 237 | + |
238 | 238 | # Parse the XHTML to find the images |
239 | 239 | #$images = $this->findImages( $text ); |
240 | 240 | #$this->copyImages( $images ); |
241 | | - |
| 241 | + |
242 | 242 | $file = fopen( "{$this->dest}/index.html", "w" ); |
243 | 243 | if ( !$file ) { |
244 | 244 | print "\nCan't open index.html for writing\n"; |
— | — | @@ -263,7 +263,7 @@ |
264 | 264 | $chunkSize = 1000; |
265 | 265 | |
266 | 266 | $dbr = wfGetDB( DB_SLAVE ); |
267 | | - |
| 267 | + |
268 | 268 | $cp = $this->getCheckpoint( 'local image' ); |
269 | 269 | if ( $cp == 'done' ) { |
270 | 270 | print "Local image descriptions already done\n"; |
— | — | @@ -272,7 +272,7 @@ |
273 | 273 | print "Writing image description pages starting from $cp\n"; |
274 | 274 | $conds = array( 'img_name >= ' . $dbr->addQuotes( $cp ) ); |
275 | 275 | } else { |
276 | | - print "Writing image description pages for local images\n"; |
| 276 | + print "Writing image description pages for local images\n"; |
277 | 277 | $conds = false; |
278 | 278 | } |
279 | 279 | |
— | — | @@ -280,14 +280,14 @@ |
281 | 281 | $i = 0; |
282 | 282 | |
283 | 283 | do { |
284 | | - $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__, |
| 284 | + $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__, |
285 | 285 | array( 'ORDER BY' => 'img_name', 'LIMIT' => $chunkSize ) ); |
286 | 286 | $numRows = $dbr->numRows( $res ); |
287 | 287 | |
288 | | - while ( $row = $dbr->fetchObject( $res ) ) { |
| 288 | + foreach( $res as $row ) { |
289 | 289 | # Update conds for the next chunk query |
290 | 290 | $conds = array( 'img_name > ' . $dbr->addQuotes( $row->img_name ) ); |
291 | | - |
| 291 | + |
292 | 292 | // Slice the result set with a filter |
293 | 293 | if ( !$this->sliceFilter( $row->img_name ) ) { |
294 | 294 | continue; |
— | — | @@ -307,9 +307,8 @@ |
308 | 308 | } |
309 | 309 | $this->doArticle( $title ); |
310 | 310 | } |
311 | | - $dbr->freeResult( $res ); |
312 | 311 | } while ( $numRows ); |
313 | | - |
| 312 | + |
314 | 313 | $this->setCheckpoint( 'local image', 'done' ); |
315 | 314 | print "\n"; |
316 | 315 | } |
— | — | @@ -367,10 +366,10 @@ |
368 | 367 | |
369 | 368 | function doCategories() { |
370 | 369 | $chunkSize = 1000; |
371 | | - |
| 370 | + |
372 | 371 | $this->setupGlobals(); |
373 | 372 | $dbr = wfGetDB( DB_SLAVE ); |
374 | | - |
| 373 | + |
375 | 374 | $cp = $this->getCheckpoint( 'category' ); |
376 | 375 | if ( $cp == 'done' ) { |
377 | 376 | print "Category pages already done\n"; |
— | — | @@ -385,14 +384,14 @@ |
386 | 385 | |
387 | 386 | $i = 0; |
388 | 387 | do { |
389 | | - $res = $dbr->select( 'categorylinks', 'DISTINCT cl_to', $conds, __METHOD__, |
| 388 | + $res = $dbr->select( 'categorylinks', 'DISTINCT cl_to', $conds, __METHOD__, |
390 | 389 | array( 'ORDER BY' => 'cl_to', 'LIMIT' => $chunkSize ) ); |
391 | 390 | $numRows = $dbr->numRows( $res ); |
392 | | - |
393 | | - while ( $row = $dbr->fetchObject( $res ) ) { |
| 391 | + |
| 392 | + foreach( $res as $row ) { |
394 | 393 | // Set conditions for next chunk |
395 | 394 | $conds = array( 'cl_to > ' . $dbr->addQuotes( $row->cl_to ) ); |
396 | | - |
| 395 | + |
397 | 396 | // Filter pages from other slices |
398 | 397 | if ( !$this->sliceFilter( $row->cl_to ) ) { |
399 | 398 | continue; |
— | — | @@ -408,9 +407,8 @@ |
409 | 408 | $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); |
410 | 409 | $this->doArticle( $title ); |
411 | 410 | } |
412 | | - $dbr->freeResult( $res ); |
413 | 411 | } while ( $numRows ); |
414 | | - |
| 412 | + |
415 | 413 | $this->setCheckpoint( 'category', 'done' ); |
416 | 414 | print "\n"; |
417 | 415 | } |
— | — | @@ -437,7 +435,7 @@ |
438 | 436 | |
439 | 437 | for ( $chunkStart = $start; $chunkStart <= $end; $chunkStart += $chunkSize ) { |
440 | 438 | $chunkEnd = min( $end, $chunkStart + $chunkSize - 1 ); |
441 | | - $conds = array( |
| 439 | + $conds = array( |
442 | 440 | 'page_is_redirect' => 1, |
443 | 441 | "page_id BETWEEN $chunkStart AND $chunkEnd" |
444 | 442 | ); |
— | — | @@ -449,8 +447,8 @@ |
450 | 448 | } |
451 | 449 | $res = $dbr->select( 'page', array( 'page_id', 'page_namespace', 'page_title' ), |
452 | 450 | $conds, __METHOD__ ); |
453 | | - |
454 | | - while ( $row = $dbr->fetchObject( $res ) ) { |
| 451 | + |
| 452 | + foreach( $res as $row ) { |
455 | 453 | $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
456 | 454 | if ( !(++$i % (self::REPORTING_INTERVAL*10) ) ) { |
457 | 455 | printf( "Done %d redirects (%2.3f%%)\n", $i, $row->page_id / $end * 100 ); |
— | — | @@ -458,7 +456,6 @@ |
459 | 457 | } |
460 | 458 | $this->doArticle( $title ); |
461 | 459 | } |
462 | | - $dbr->freeResult( $res ); |
463 | 460 | } |
464 | 461 | $this->setCheckpoint( 'redirect', 'done' ); |
465 | 462 | } |
— | — | @@ -523,11 +520,11 @@ |
524 | 521 | wfProfileIn( __METHOD__ ); |
525 | 522 | $filename = $this->getHashedFilename( $title ); |
526 | 523 | |
527 | | - # Temporary hack for current dump, this should be moved to |
| 524 | + # Temporary hack for current dump, this should be moved to |
528 | 525 | # getFriendlyName() at the earliest opportunity. |
529 | 526 | # |
530 | 527 | # Limit filename length to 255 characters, so it works on ext3. |
531 | | - # Titles are in fact limited to 255 characters, but dumpHTML |
| 528 | + # Titles are in fact limited to 255 characters, but dumpHTML |
532 | 529 | # adds a suffix which may put them over the limit. |
533 | 530 | $length = strlen( $filename ); |
534 | 531 | if ( $length > 255 ) { |
— | — | @@ -535,13 +532,13 @@ |
536 | 533 | wfProfileOut( __METHOD__ ); |
537 | 534 | return; |
538 | 535 | } |
539 | | - |
| 536 | + |
540 | 537 | $fullName = "{$this->dest}/$filename"; |
541 | 538 | $fullDir = dirname( $fullName ); |
542 | 539 | |
543 | 540 | if ( $this->compress ) { |
544 | 541 | $fullName .= ".gz"; |
545 | | - $text = gzencode( $text, 9 ); |
| 542 | + $text = gzencode( $text, 9 ); |
546 | 543 | } |
547 | 544 | |
548 | 545 | if ( preg_match( '/[\x80-\xFF]/', $fullName ) && wfIsWindows() ) { |
— | — | @@ -553,7 +550,7 @@ |
554 | 551 | |
555 | 552 | $success = file_put_contents( $tempName, $text ); |
556 | 553 | if ( $success ) { |
557 | | - wfShellExec( "cscript /nologo " . wfEscapeShellArg( |
| 554 | + wfShellExec( "cscript /nologo " . wfEscapeShellArg( |
558 | 555 | dirname( __FILE__ ) . "\\rename-hack.vbs", |
559 | 556 | $this->escapeForVBScript( $tempName ), |
560 | 557 | $this->escapeForVBScript( $fullName ) ) ); |
— | — | @@ -699,7 +696,6 @@ |
700 | 697 | |
701 | 698 | $wgStylePath = "{$this->articleBaseUrl}/skins"; |
702 | 699 | |
703 | | - |
704 | 700 | if ( $this->makeSnapshot ) { |
705 | 701 | $this->destUploadUrl = "{$this->articleBaseUrl}/{$this->imageRel}"; |
706 | 702 | } else { |
— | — | @@ -822,6 +818,10 @@ |
823 | 819 | return $text; |
824 | 820 | } |
825 | 821 | |
| 822 | + /** |
| 823 | + * @param $rt Title |
| 824 | + * @return string |
| 825 | + */ |
826 | 826 | function getRedirect( $rt ) { |
827 | 827 | $url = $rt->escapeLocalURL(); |
828 | 828 | $text = $rt->getPrefixedText(); |
— | — | @@ -865,7 +865,7 @@ |
866 | 866 | |
867 | 867 | /** |
868 | 868 | * Copy a file specified by a URL to a given directory |
869 | | - * |
| 869 | + * |
870 | 870 | * @param string $srcPath The source URL |
871 | 871 | * @param string $srcPathBase The base directory of the source URL |
872 | 872 | * @param string $srcDirBase The base filesystem directory of the source URL |
— | — | @@ -905,14 +905,14 @@ |
906 | 906 | $this->relativeCopy( $newSrc, $srcPathBase, $srcDirBase, $destDirBase ); |
907 | 907 | } |
908 | 908 | } |
909 | | - |
| 909 | + |
910 | 910 | /** |
911 | 911 | * Copy images (or create symlinks) from commons to a static directory. |
912 | 912 | * This is necessary even if you intend to distribute all of commons, because |
913 | 913 | * the directory contents is used to work out which image description pages |
914 | 914 | * are needed. |
915 | 915 | * |
916 | | - * Also copies math images, and full-sized images if the makeSnapshot option |
| 916 | + * Also copies math images, and full-sized images if the makeSnapshot option |
917 | 917 | * is specified. |
918 | 918 | * |
919 | 919 | */ |
— | — | @@ -932,6 +932,12 @@ |
933 | 933 | } |
934 | 934 | } |
935 | 935 | |
| 936 | + /** |
| 937 | + * @param $title Title |
| 938 | + * @param $url |
| 939 | + * @param $query |
| 940 | + * @return bool |
| 941 | + */ |
936 | 942 | function onGetFullURL( &$title, &$url, $query ) { |
937 | 943 | global $wgContLang, $wgArticlePath; |
938 | 944 | |
— | — | @@ -950,6 +956,12 @@ |
951 | 957 | } |
952 | 958 | } |
953 | 959 | |
| 960 | + /** |
| 961 | + * @param $title Title |
| 962 | + * @param $url |
| 963 | + * @param $query |
| 964 | + * @return bool |
| 965 | + */ |
954 | 966 | function onGetLocalURL( &$title, &$url, $query ) { |
955 | 967 | global $wgArticlePath; |
956 | 968 | |
— | — | @@ -984,6 +996,11 @@ |
985 | 997 | return false; |
986 | 998 | } |
987 | 999 | |
| 1000 | + /** |
| 1001 | + * @throws MWException |
| 1002 | + * @param $title Title |
| 1003 | + * @return string |
| 1004 | + */ |
988 | 1005 | function getHashedFilename( &$title ) { |
989 | 1006 | if ( !$title ) { |
990 | 1007 | throw new MWException( 'Invalid $title parameter to '.__METHOD__ ); |
— | — | @@ -1029,6 +1046,8 @@ |
1030 | 1047 | |
1031 | 1048 | /** |
1032 | 1049 | * Get a relative directory for putting a title into |
| 1050 | + * |
| 1051 | + * @param $title Title |
1033 | 1052 | */ |
1034 | 1053 | function getHashedDirectory( &$title ) { |
1035 | 1054 | if ( '' != $title->getInterwiki() ) { |
— | — | @@ -1117,6 +1136,7 @@ |
1118 | 1137 | $text = ''; |
1119 | 1138 | return false; |
1120 | 1139 | } |
| 1140 | + |
1121 | 1141 | function onSiteNoticeAfter( &$text ) { |
1122 | 1142 | $text = ''; |
1123 | 1143 | return false; |
— | — | @@ -1157,7 +1177,7 @@ |
1158 | 1178 | function debug( $text ) { |
1159 | 1179 | print "$text\n"; |
1160 | 1180 | } |
1161 | | - |
| 1181 | + |
1162 | 1182 | function mkdir( $dir ) { |
1163 | 1183 | //if ( wfIsWindows() ) { |
1164 | 1184 | return wfMkdirParents( $dir, 0755 ); |
— | — | @@ -1206,7 +1226,7 @@ |
1207 | 1227 | } else { |
1208 | 1228 | $foreignDest = "{$dump->destUploadDirectory}/$friendlyName"; |
1209 | 1229 | } |
1210 | | - $this->foreignRepos[] = new DumpHTML_ProxyRepo( $repo, $dump, $foreignDest, |
| 1230 | + $this->foreignRepos[] = new DumpHTML_ProxyRepo( $repo, $dump, $foreignDest, |
1211 | 1231 | $dump->destUploadUrl . '/' . urlencode( $friendlyName ) ); |
1212 | 1232 | } |
1213 | 1233 | } |
— | — | @@ -1317,7 +1337,7 @@ |
1318 | 1338 | $result = call_user_func_array( array( $this->backend, $name ), $args ); |
1319 | 1339 | if ( is_string( $result ) ) { |
1320 | 1340 | $result = $this->fixURL( $result ); |
1321 | | - } elseif ( $result instanceof MediaTransformOutput ) { |
| 1341 | + } elseif ( $result instanceof MediaTransformOutput ) { |
1322 | 1342 | $result = $this->fixMTO( $result ); |
1323 | 1343 | } |
1324 | 1344 | return $result; |