r61628 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r61627‎ | r61628 | r61629 >
Date:10:49, 28 January 2010
Author:aaron
Status:resolved (Comments)
Tags:
Comment:
Add a script to prune inclusion data for old flagged revisions
Modified paths:
  • /trunk/extensions/FlaggedRevs/maintenance/pruneRevData.inc (added) (history)
  • /trunk/extensions/FlaggedRevs/maintenance/pruneRevData.php (added) (history)

Diff [purge]

Index: trunk/extensions/FlaggedRevs/maintenance/pruneRevData.php
@@ -0,0 +1,37 @@
 2+<?php
 3+
 4+if ( getenv( 'MW_INSTALL_PATH' ) ) {
 5+ $IP = getenv( 'MW_INSTALL_PATH' );
 6+} else {
 7+ $IP = dirname(__FILE__).'/../../..';
 8+}
 9+
 10+$options = array( 'prune', 'help', 'start' );
 11+require "$IP/maintenance/commandLine.inc";
 12+require dirname(__FILE__) . '/pruneRevData.inc';
 13+
 14+if( isset($options['help']) ) {
 15+ echo <<<TEXT
 16+Purpose:
 17+ This script clears template/image data for reviewed versions
 18+ that are 1+ month old and have 50+ newer versions in page. By
 19+ default, it will just output how many rows can be deleted. Use
 20+ the 'prune' option to actually delete them.
 21+Usage:
 22+ php pruneData.php --help
 23+ php pruneData.php [--prune --start <ID> ]
 24+
 25+ --help : This help message
 26+ --prune : Actually do a live run
 27+ --<ID> : The ID of the starting rev
 28+
 29+TEXT;
 30+ exit(0);
 31+}
 32+
 33+error_reporting( E_ALL );
 34+
 35+$start = isset($options['start']) ? $options['start'] : null;
 36+$prune = isset($options['prune']) ? true : null;
 37+
 38+prune_flaggedrevs($start,$prune);
Property changes on: trunk/extensions/FlaggedRevs/maintenance/pruneRevData.php
___________________________________________________________________
Name: svn:eol-style
139 + native
Index: trunk/extensions/FlaggedRevs/maintenance/pruneRevData.inc
@@ -0,0 +1,119 @@
 2+<?php
 3+
 4+function prune_flaggedrevs( $start = null, $prune = false ) {
 5+ if( $prune ) {
 6+ echo "Pruning old flagged revision inclusion data...\n";
 7+ } else {
 8+ echo "Running dry-run of old flagged revision inclusion data pruning...\n";
 9+ }
 10+
 11+ $BATCH_SIZE = 500;
 12+
 13+ $db = wfGetDB( DB_MASTER );
 14+
 15+ if( $start === null ) {
 16+ $start = $db->selectField( 'flaggedpages', 'MIN(fp_page_id)', false, __FUNCTION__ );
 17+ }
 18+ $end = $db->selectField( 'flaggedpages', 'MAX(fp_page_id)', false, __FUNCTION__ );
 19+ if( is_null( $start ) || is_null( $end ) ) {
 20+ echo "...flaggedpages table seems to be empty.\n";
 21+ return;
 22+ }
 23+ $end += $BATCH_SIZE - 1; # Do remaining chunk
 24+ $blockStart = $start;
 25+ $blockEnd = $start + $BATCH_SIZE - 1;
 26+
 27+ $tDeleted = $fDeleted = 0; // tallies
 28+
 29+ $newerRevs = 50;
 30+ $cutoff = $db->timestamp( time() - 30*24*3600 );
 31+ // DEV
 32+ $newerRevs = 1;
 33+ $cutoff = $db->timestamp( time() - 3600 );
 34+ while( $blockEnd <= $end ) {
 35+ echo "...doing fp_page_id from $blockStart to $blockEnd\n";
 36+ $cond = "fp_page_id BETWEEN $blockStart AND $blockEnd";
 37+ $res = $db->select( 'flaggedpages', 'fp_page_id', $cond, __FUNCTION__ );
 38+ $batchCount = 0; // rows deleted without slave lag check
 39+ // Go through a chunk of flagged pages...
 40+ while( $row = $db->fetchObject($res) ) {
 41+ // Get the newest X ($newerRevs) flagged revs for this page
 42+ $sres = $db->select( 'flaggedrevs',
 43+ 'fr_rev_id',
 44+ array( 'fr_page_id' => $row->fp_page_id ),
 45+ __METHOD__,
 46+ array( 'ORDER BY' => 'fr_rev_id DESC', 'LIMIT' => $newerRevs )
 47+ );
 48+ // See if there are older revs that can be pruned...
 49+ if( $db->numRows( $sres ) == $newerRevs ) {
 50+ // Get the oldest of the top X revisions
 51+ $sres->seek( $newerRevs - 1 );
 52+ $lrow = $db->fetchObject( $sres );
 53+ $oldestId = (int)$lrow->fr_rev_id; // oldest revision Id
 54+ // Get revs not in the top X that were not reviewed recently
 55+ $db->freeResult( $sres );
 56+ $sres = $db->select( 'flaggedrevs',
 57+ 'fr_rev_id',
 58+ array(
 59+ 'fr_page_id' => $row->fp_page_id,
 60+ 'fr_rev_id < '.$oldestId, // not in the newest X
 61+ 'fr_timestamp < '.$db->addQuotes( $cutoff ) // not reviewed recently
 62+ ),
 63+ __METHOD__,
 64+ // Sanity check (start with the oldest)
 65+ array( 'ORDER BY' => 'fr_rev_id ASC', 'LIMIT' => 5000 )
 66+ );
 67+ // Build an array of these rev Ids
 68+ $revsClearIncludes = array();
 69+ foreach( $sres as $srow ) {
 70+ $revsClearIncludes[] = $srow->fr_rev_id;
 71+ }
 72+ $batchCount += count($revsClearIncludes); // # of revs to prune
 73+ $db->freeResult( $sres );
 74+ // Write run: clear the include data for these old revs
 75+ if( $prune ) {
 76+ $db->begin();
 77+ $db->delete( 'flaggedtemplates',
 78+ array('ft_rev_id' => $revsClearIncludes),
 79+ __METHOD__
 80+ );
 81+ $tDeleted += $db->affectedRows();
 82+ $db->delete( 'flaggedimages',
 83+ array('fi_rev_id' => $revsClearIncludes),
 84+ __METHOD__
 85+ );
 86+ $fDeleted += $db->affectedRows();
 87+ $db->commit();
 88+ // Dry run: say how many includes rows would have been cleared
 89+ } else if( count($revsClearIncludes) ) {
 90+ $tDeleted += $db->selectField( 'flaggedtemplates',
 91+ 'COUNT(*)',
 92+ array('ft_rev_id' => $revsClearIncludes),
 93+ __METHOD__
 94+ );
 95+ $fDeleted += $db->selectField( 'flaggedimages',
 96+ 'COUNT(*)',
 97+ array('fi_rev_id' => $revsClearIncludes),
 98+ __METHOD__
 99+ );
 100+ }
 101+ // Check slave lag...
 102+ if( $batchCount >= $BATCH_SIZE ) {
 103+ $batchCount = 0;
 104+ wfWaitForSlaves( 5 );
 105+ }
 106+ } else {
 107+ $db->freeResult( $sres );
 108+ }
 109+ }
 110+ $db->freeResult( $res );
 111+ $blockStart += $BATCH_SIZE;
 112+ $blockEnd += $BATCH_SIZE;
 113+ }
 114+ if( $prune ) {
 115+ echo "...flagged revision inclusion prunning complete ...\n";
 116+ } else {
 117+ echo "...flagged revision inclusion prune test complete ...\n";
 118+ }
 119+ echo "Rows: \tflaggedtemplates:$tDeleted\t\tflaggedimages:$fDeleted\n";
 120+}

Follow-up revisions

RevisionCommit summaryAuthorDate
r61654Fix svn:eol-style from r61628tstarling04:54, 29 January 2010

Comments

#Comment by Tim Starling (talk | contribs)   05:00, 29 January 2010

Note that freeResult() is not required. The result memory will be freed when the reference count on the zval goes to zero, that includes assigning another result object to $sres. The existence of mysql_free_result() and the documentation implying that it is necessary is the product of a misunderstanding of PHP's memory management by the author of the mysql extension.

#Comment by Aaron Schulz (talk | contribs)   05:23, 29 January 2010

I was wondering about that. Everything I read kept saying "freed automatically when the script is done", which would be bad.

Status & tagging log