r46727 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46726‎ | r46727 | r46728 >
Date:19:43, 2 February 2009
Author:aaron
Status:deferred (Comments)
Tags:
Comment:
Add mean wait and pending times to stats
Modified paths:
  • /trunk/extensions/FlaggedRevs/language/ValidationStatistics.i18n.php (modified) (history)
  • /trunk/extensions/FlaggedRevs/maintenance/updateStats.inc (modified) (history)
  • /trunk/extensions/FlaggedRevs/specialpages/ValidationStatistics_body.php (modified) (history)

Diff [purge]

Index: trunk/extensions/FlaggedRevs/maintenance/updateStats.inc
@@ -13,11 +13,12 @@
1414 $dbCache->set( $keySQL, '1', $wgFlaggedRevsStatsAge );
1515
1616 $dbr = wfGetDB( DB_SLAVE );
17 - list($page,$flaggedpages,$flaggedrevs_stats) = $dbr->tableNamesN('page','flaggedpages','flaggedrevs_stats');
 17+ list($page,$flaggedpages) = $dbr->tableNamesN('page','flaggedpages');
1818 $ns = $dbr->makeList( $wgFlaggedRevsNamespaces );
19 - if( empty($ns) ) {
20 - return; // no SQL errors please :)
21 - }
 19+ if( empty($ns) ) return; // no SQL errors please :)
 20+
 21+ print( "Updating flaggedrevs stats...\n" );
 22+
2223 $ns_total = array();
2324 $ns_reviewed = array();
2425 $ns_synced = array();
@@ -50,12 +51,122 @@
5152 while( $row = $dbr->fetchObject( $ret ) ) {
5253 $ns_synced[$row->namespace] = intval($row->synced);
5354 }
 55+ // Getting mean pending edit time
 56+ $now = time(); // current time in UNIX TS
 57+ $avePET = (int)$dbr->selectField( array('flaggedpages'),
 58+ "AVG( $now - UNIX_TIMESTAMP(fp_pending_since) )",
 59+ array( 'fp_pending_since IS NOT NULL' ),
 60+ __METHOD__
 61+ );
 62+ // Get the mean edit review time
 63+ $size = 1500; // Sample size
 64+ $seconds = $aveRT = 0;
 65+ # Only go so far back...otherwise we will get garbage values due to
 66+ # the fact that FlaggedRevs wasn't enabled until after a while.
 67+ $installed = $dbr->selectField( 'logging', 'MIN(log_timestamp)', array('log_type' => 'review') );
 68+ if( !$installed ) $installed = $dbr->timestamp(); // now
 69+ # Get corresponding rev_id for better INDEX usage
 70+ $minRev = (int)$dbr->selectField( 'revision', 'rev_id',
 71+ array( 'rev_timestamp >= '.$dbr->addQuotes($installed) ),
 72+ __METHOD__,
 73+ array( 'ORDER BY' => 'rev_timestamp ASC', 'LIMIT' => 1 )
 74+ );
 75+ # Skip the most recent recent revs as they are likely to just
 76+ # be WHERE condition misses. This also gives us more data to use.
 77+ # Lastly, we want to avoid bais that would make the time too low
 78+ # since new revisions could not have "took a long time to sight".
 79+ $worstLagTS = $dbr->timestamp(); // now
 80+ $last = '0';
 81+ while( true ) { // should almost always be ~1 pass
 82+ # Get the page with the worst pending lag...
 83+ $row = $dbr->selectRow( array('flaggedpages','flaggedrevs'),
 84+ array( 'fp_page_id', 'fp_stable', 'fp_pending_since', 'fr_timestamp' ),
 85+ array(
 86+ 'fp_pending_since > '.$dbr->addQuotes($installed), // needs actual display lag
 87+ 'fr_page_id = fp_page_id AND fr_rev_id = fp_stable',
 88+ 'fp_pending_since > '.$dbr->addQuotes($last), // skip failed rows
 89+ ),
 90+ __METHOD__,
 91+ array( 'ORDER BY' => 'fp_pending_since ASC',
 92+ 'USE INDEX' => array('flaggedpages' => 'fp_pending_since') )
 93+ );
 94+ if( !$row ) break;
 95+ # Find the newest revision at the time the page was reviewed,
 96+ # this is the one that *should* have been reviewed.
 97+ $idealRev = (int)$dbr->selectField( 'revision', 'rev_id',
 98+ array( 'rev_page' => $row->fp_page_id,
 99+ 'rev_timestamp < '.$dbr->addQuotes($row->fr_timestamp) ),
 100+ __METHOD__,
 101+ array( 'ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 1 )
 102+ );
 103+ # Fudge factor to prevent deliberate reviewing of non-current revisions
 104+ # from squeezing the range. Shouldn't effect anything otherwise.
 105+ if( $row->fp_stable >= $idealRev ) {
 106+ $worstLagTS = $row->fp_pending_since;
 107+ break;
 108+ } else {
 109+ $last = $row->fp_pending_since; // next iteration
 110+ }
 111+ }
 112+ # Get corresponding rev_id for better INDEX usage
 113+ $maxRev = (int)$dbr->selectField( 'revision', 'rev_id',
 114+ array( 'rev_timestamp <= '.$dbr->addQuotes($worstLagTS) ),
 115+ __METHOD__,
 116+ array( 'ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 1 )
 117+ );
 118+ $rows = $maxRev - $minRev; # rows to scan
 119+ $mod = 1; # Modulus
 120+ if( $rows >= 15000 ) {
 121+ $mod = 3; # Skip some rows to get better coverage
 122+ $minRev = $maxRev - 15000; # Limit rows to scan
 123+ }
 124+ # For edits that started off pending, how long do they take to get reviewed?
 125+ # Get the *first* reviewed rev *after* each RC item and get the average difference.
 126+ # Only do this for revisions to pages that *were* already logged as reviewed.
 127+ $sql = $dbr->selectSQLText( array('revision','flaggedrevs','page','logging'),
 128+ array( 'rev_timestamp AS rt', 'MIN(fr_timestamp) AS ft', 'MIN(log_timestamp) AS lt' ),
 129+ array(
 130+ "rev_id BETWEEN $minRev AND $maxRev", // Only go so far back...
 131+ 'rev_user = 0', // IP edits (should start off unreviewed)
 132+ "(rev_id % $mod) = 0", // Better spread
 133+ 'rev_timestamp >= '.$dbr->addQuotes($installed), // watch for imported revs!
 134+ 'rev_timestamp <= '.$dbr->addQuotes($worstLagTS), // watch for imported revs!
 135+ 'fr_page_id = rev_page AND fr_rev_id >= rev_id AND fr_timestamp > rev_timestamp', // later reviewed
 136+ // Check that this page was stable at the time.
 137+ // Assumes that reviewed pages stay reviewed (reasonable).
 138+ 'page_id = rev_page', // get the title
 139+ 'log_namespace = page_namespace AND log_title = page_title',
 140+ 'log_type = "review" AND log_timestamp < rev_timestamp',
 141+ ),
 142+ __METHOD__,
 143+ array(
 144+ 'GROUP BY' => 'rev_id',
 145+ 'ORDER BY' => 'rev_id ASC', // slight bias avoidance, if any
 146+ 'LIMIT' => $size, // sample size
 147+ 'USE INDEX' => array('flaggedrevs' => 'PRIMARY','logging' => 'page_time')
 148+ )
 149+ );
 150+ # Actually run the query...
 151+ $res = $dbr->doQuery( $sql, __METHOD__ );
 152+ if( $count = $dbr->numRows($res) ) {
 153+ # Get the sum of elapsed times
 154+ while( $row = $dbr->fetchObject($res) ) {
 155+ $seconds += wfTimestamp(TS_UNIX,$row->ft) - wfTimestamp(TS_UNIX,$row->rt);
 156+ #echo( $row->rt . "\t" . $row->ft . "\n" );
 157+ }
 158+ $aveRT = $seconds/$count; // average
 159+ }
 160+ // Save the data
54161 $dbw = wfGetDB( DB_MASTER );
55162 $dbw->begin();
56 - // Create a small stats table if not present
 163+ // Create small stats tables if not present
 164+ list($flaggedrevs_stats,$flaggedrevs_stats2) = $dbr->tableNamesN('flaggedrevs_stats','flaggedrevs_stats2');
57165 if( !$dbw->tableExists( 'flaggedrevs_stats' ) ) {
58166 createFlaggedRevsStatsTable( $dbw, $flaggedrevs_stats );
59167 }
 168+ if( !$dbw->tableExists( 'flaggedrevs_stats2' ) ) {
 169+ createFlaggedRevsStatsTable2( $dbw, $flaggedrevs_stats2 );
 170+ }
60171 foreach( $wgFlaggedRevsNamespaces as $namespace ) {
61172 $dbw->replace( 'flaggedrevs_stats',
62173 array( 'namespace' ),
@@ -66,22 +177,35 @@
67178 __METHOD__
68179 );
69180 }
 181+ $dbw->replace( 'flaggedrevs_stats2', array('stat_id'),
 182+ array('stat_id' => 1, 'ave_review_time' => $aveRT, 'ave_pending_time' => $avePET),
 183+ __METHOD__
 184+ );
70185 $dbw->commit();
71186 // Stats are not up to date!
72187 $key = wfMemcKey( 'flaggedrevs', 'statsUpdated' );
73188 $dbCache->set( $key, '1', $wgFlaggedRevsStatsAge );
74189 $dbCache->delete( $keySQL );
75 - print( "Done updating!\n" );
 190+ print( "...done updating!\n" );
76191 }
77192
78193 function createFlaggedRevsStatsTable( $dbw, $flaggedrevs_stats ) {
79194 $dbw->query(
80195 "CREATE TABLE $flaggedrevs_stats (
81 - namespace INTEGER NOT NULL DEFAULT 0,
82 - total INTEGER NOT NULL DEFAULT 0,
83 - reviewed INTEGER NOT NULL DEFAULT 0,
84 - synced INTEGER NOT NULL DEFAULT 0,
85 - PRIMARY KEY (namespace)
 196+ namespace INTEGER UNSIGNED NOT NULL DEFAULT 0 PRIMARY KEY,
 197+ total INTEGER UNSIGNED NOT NULL DEFAULT 0,
 198+ reviewed INTEGER UNSIGNED NOT NULL DEFAULT 0,
 199+ synced INTEGER UNSIGNED NOT NULL DEFAULT 0
86200 );"
87201 );
88202 }
 203+
 204+function createFlaggedRevsStatsTable2( $dbw, $flaggedrevs_stats2 ) {
 205+ $dbw->query(
 206+ "CREATE TABLE $flaggedrevs_stats2 (
 207+ stat_id INTEGER UNSIGNED NOT NULL DEFAULT 1 PRIMARY KEY,
 208+ ave_review_time INTEGER UNSIGNED NOT NULL DEFAULT 0,
 209+ ave_pending_time INTEGER UNSIGNED NOT NULL DEFAULT 0
 210+ );"
 211+ );
 212+}
Index: trunk/extensions/FlaggedRevs/language/ValidationStatistics.i18n.php
@@ -11,9 +11,12 @@
1212 'validationstatistics' => 'Validation statistics',
1313 'validationstatistics-users' => '\'\'\'{{SITENAME}}\'\'\' currently has \'\'\'$1\'\'\' {{PLURAL:$1|user|users}} with [[{{MediaWiki:Validationpage}}|Editor]] rights
1414 and \'\'\'$2\'\'\' {{PLURAL:$2|user|users}} with [[{{MediaWiki:Validationpage}}|Reviewer]] rights.',
15 - 'validationstatistics-table' => "Statistics for each namespace are shown below, excluding redirects pages.
 15+ 'validationstatistics-time' => 'The average wait for \'\'non-user\'\' edits is \'\'\'$1\'\'\'.
 16+ The average lag for [[Special:OldReviewedPages|outdated pages]] is \'\'\'$2\'\'\'.',
 17+ 'validationstatistics-table' => "Statistics for each namespace are shown below, ''excluding'' redirects pages. ''Outdated'' pages are those
 18+ with edits newer than the stable version. If the stable version is also the lastest version, then the page is ''synced''.
1619
17 -'''Note:''' the following data is cached for several hours and may not be up to date.",
 20+''Note: the following data is cached for several hours and may not be up to date.''",
1821 'validationstatistics-ns' => 'Namespace',
1922 'validationstatistics-total' => 'Pages',
2023 'validationstatistics-stable' => 'Reviewed',
Index: trunk/extensions/FlaggedRevs/specialpages/ValidationStatistics_body.php
@@ -22,15 +22,21 @@
2323
2424 $ec = $this->getEditorCount();
2525 $rc = $this->getReviewerCount();
 26+ $mt = $this->getMeanReviewWait();
 27+ $pt = $this->getMeanPendingWait();
2628
2729 $wgOut->addWikiText( wfMsgExt( 'validationstatistics-users', array( 'parsemag' ),
28 - $wgLang->formatnum( $ec ), $wgLang->formatnum( $rc ) ) );
 30+ $wgLang->formatnum($ec), $wgLang->formatnum($rc) )
 31+ );
 32+ $wgOut->addWikiText( wfMsgExt( 'validationstatistics-time', array( 'parsemag' ),
 33+ $wgLang->formatTimePeriod($mt), $wgLang->formatTimePeriod($pt) )
 34+ );
2935
3036 if( !$this->readyForQuery() ) {
3137 return false;
3238 }
3339
34 - $wgOut->addWikiText( wfMsg('validationstatistics-table') );
 40+ $wgOut->addWikiText( '<hr/>' . wfMsg('validationstatistics-table') );
3541 $wgOut->addHTML( Xml::openElement( 'table', array( 'class' => 'wikitable flaggedrevs_stats_table' ) ) );
3642 $wgOut->addHTML( "<tr>\n" );
3743 // Headings (for a positive grep result):
@@ -138,4 +144,14 @@
139145 array( 'ug_group' => 'reviewer' ),
140146 __METHOD__ );
141147 }
 148+
 149+ protected function getMeanReviewWait() {
 150+ if( !$this->db->tableExists( 'flaggedrevs_stats2' ) ) return '-';
 151+ return $this->db->selectField( 'flaggedrevs_stats2', 'ave_review_time' );
 152+ }
 153+
 154+ protected function getMeanPendingWait() {
 155+ if( !$this->db->tableExists( 'flaggedrevs_stats2' ) ) return '-';
 156+ return $this->db->selectField( 'flaggedrevs_stats2', 'ave_pending_time' );
 157+ }
142158 }

Comments

#Comment by Tim Starling (talk | contribs)   08:36, 3 December 2009

Note that MySQL (4.0 at least, not sure about the others) is not optimised for this kind of modulo sample. It scans the whole result set and does the modulo calculation on each one. If your $mod here is large, then it would be faster to use IN(...) with long lists of matching rev_id values.

Status & tagging log