r78615 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r78614‎ | r78615 | r78616 >
Date:02:42, 20 December 2010
Author:dantman
Status:deferred
Tags:
Comment:
Implement -n and a --startidfile in SMW_refreshData.php that allows refreshData to be run from cron in a way that will run a limited number of ids and keep track of the id to start from on the next run.
Modified paths:
  • /trunk/extensions/SemanticMediaWiki/maintenance/SMW_refreshData.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SemanticMediaWiki/maintenance/SMW_refreshData.php
@@ -14,6 +14,9 @@
1515 * -d <delay> Wait for this many milliseconds after processing an article, useful for limiting server load.
1616 * -s <startid> Start refreshing at given article ID, useful for partial refreshing
1717 * -e <endid> Stop refreshing at given article ID, useful for partial refreshing
 18+ * -n <numids> Stop refreshing after processing a given number of IDs, useful for partial refreshing
 19+ * --startidfile <startidfile> Read <startid> from a file instead of the arguments and write the next id
 20+ * to the file when finished. Useful for continual partial refreshing from cron.
1821 * -b <backend> Execute the operation for the storage backend of the given name
1922 * (default is to use the current backend)
2023 * -v Be verbose about the progress.
@@ -22,7 +25,7 @@
2326 * -t Will refresh only type pages (and other explicitly named namespaces)
2427 * --page=<pagelist> will refresh only the pages of the given names, with | used as a separator.
2528 * Example: --page="Page 1|Page 2" refreshes Page 1 and Page 2
26 - * Options -s, -e, -c, -p, -t are ignored if --page is given.
 29+ * Options -s, -e, -n, --startidfile, -c, -p, -t are ignored if --page is given.
2730 * -f Fully delete all content instead of just refreshing relevant entries. This will also
2831 * rebuild the whole storage structure. May leave the wiki temporarily incomplete.
2932 * --server=<server> The protocol and server name to as base URLs, e.g.
@@ -35,7 +38,7 @@
3639 * @ingroup SMWMaintenance
3740 */
3841
39 -$optionsWithArgs = array( 'd', 's', 'e', 'b', 'server', 'page' ); // -d <delay>, -s <startid>, -e <endid>, -b <backend>
 42+$optionsWithArgs = array( 'd', 's', 'e', 'n', 'b', 'startidfile', 'server', 'page' ); // -d <delay>, -s <startid>, -e <endid>, -n <numids>, --startidfile <startidfile> -b <backend>
4043
4144 require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
4245 ? getenv( 'MW_INSTALL_PATH' ) . "/maintenance/commandLine.inc"
@@ -61,13 +64,27 @@
6265 $pages = false;
6366 }
6467
 68+$writeToStartidfile = false;
6569 if ( array_key_exists( 's', $options ) ) {
6670 $start = max( 1, intval( $options['s'] ) );
 71+} elseif ( array_key_exists( 'startidfile', $options ) ) {
 72+ if ( !is_writable( file_exists( $options['startidfile'] ) ? $options['startidfile'] : dirname( $options['startidfile'] ) ) ) {
 73+ die("Cannot use a startidfile that we can't write to.\n");
 74+ }
 75+ $writeToStartidfile = true;
 76+ if ( is_readable( $options['startidfile'] ) ) {
 77+ $start = max( 1, intval( file_get_contents( $options['startidfile'] ) ) );
 78+ } else {
 79+ $start = 1;
 80+ }
6781 } else {
6882 $start = 1;
6983 }
 84+
7085 if ( array_key_exists( 'e', $options ) ) { // Note: this might reasonably be larger than the page count
7186 $end = intval( $options['e'] );
 87+} elseif ( array_key_exists( 'n', $options ) ) {
 88+ $end = $start + intval( $options['n'] );
7289 } else {
7390 $end = false;
7491 }
@@ -158,6 +175,9 @@
159176 $num_files++;
160177 $linkCache->clear(); // avoid memory leaks
161178 }
 179+ if ( $writeToStartidfile ) {
 180+ file_put_contents( $options['startidfile'], "$id" );
 181+ }
162182 print "$num_files IDs refreshed.\n";
163183 } else {
164184 print "Refreshing specified pages!\n\n";

Status & tagging log