Index: trunk/extensions/FlaggedRevs/maintenance/updateStats.inc |
— | — | @@ -32,7 +32,7 @@ |
33 | 33 | $ns_total[$row->namespace] = intval($row->total); |
34 | 34 | } |
35 | 35 | // Get reviewed page count for each namespace |
36 | | - $ret = $dbr->query( |
| 36 | + $ret = $dbr->query( |
37 | 37 | "SELECT page_namespace AS namespace, COUNT(*) AS reviewed |
38 | 38 | FROM $flaggedpages, $page WHERE fp_page_id = page_id |
39 | 39 | AND page_is_redirect = 0 AND page_namespace IN($ns) |
— | — | @@ -42,7 +42,7 @@ |
43 | 43 | $ns_reviewed[$row->namespace] = intval($row->reviewed); |
44 | 44 | } |
45 | 45 | // Get synced page count for each namespace |
46 | | - $ret = $dbr->query( |
| 46 | + $ret = $dbr->query( |
47 | 47 | "SELECT page_namespace AS namespace, COUNT(*) AS synced |
48 | 48 | FROM $flaggedpages, $page WHERE fp_page_id = page_id |
49 | 49 | AND fp_reviewed = 1 AND page_is_redirect = 0 AND page_namespace IN($ns) |
— | — | @@ -57,7 +57,7 @@ |
58 | 58 | "AVG( $now - UNIX_TIMESTAMP(fp_pending_since) )", |
59 | 59 | array( 'fp_pending_since IS NOT NULL', |
60 | 60 | 'fp_page_id = page_id', |
61 | | - 'page_namespace' => $wgFlaggedRevsNamespaces // Sanity check: must still be in proper NS |
| 61 | + 'page_namespace' => $wgFlaggedRevsNamespaces // Sanity check the NS |
62 | 62 | ), |
63 | 63 | __METHOD__, |
64 | 64 | array( 'USE INDEX' => array('flaggedpages' => 'fp_pending_since') ) |
— | — | @@ -67,7 +67,8 @@ |
68 | 68 | $seconds = $aveRT = $medianRT = 0; |
69 | 69 | # Only go so far back...otherwise we will get garbage values due to |
70 | 70 | # the fact that FlaggedRevs wasn't enabled until after a while. |
71 | | - $installed = $dbr->selectField( 'logging', 'MIN(log_timestamp)', array('log_type' => 'review') ); |
| 71 | + $installed = $dbr->selectField( 'logging', 'MIN(log_timestamp)', |
| 72 | + array('log_type' => 'review') ); |
72 | 73 | if( !$installed ) $installed = $dbr->timestamp(); // now |
73 | 74 | # Skip the most recent recent revs as they are likely to just |
74 | 75 | # be WHERE condition misses. This also gives us more data to use. |
— | — | @@ -110,25 +111,27 @@ |
111 | 112 | } |
112 | 113 | # Add a day for good measure to avoid bias |
113 | 114 | $maxTS = $dbr->timestamp( wfTimestamp(TS_UNIX,$worstLagTS) - 86400 ); |
| 115 | + $encMaxTS = $dbr->addQuotes( $maxTS ); |
114 | 116 | # Use a one week time range |
115 | 117 | $minTS = $dbr->timestamp( wfTimestamp(TS_UNIX,$worstLagTS) - 7*86400 ); |
| 118 | + $encMinTS = $dbr->addQuotes( $minTS ); |
116 | 119 | # Approximate the number rows to scan |
117 | 120 | $rows = $dbr->estimateRowCount( 'revision', '1', |
118 | | - 'rev_user=0 AND rev_timestamp BETWEEN '.$dbr->addQuotes($minTS).' AND '.$dbr->addQuotes($maxTS) |
| 121 | + 'rev_user=0 AND rev_timestamp BETWEEN '.$encMinTS.' AND '.$encMaxTS |
119 | 122 | ); |
120 | 123 | # If the range doesn't have many rows (like on small wikis), use 14 days |
121 | 124 | if( $rows < 500 ) { |
122 | 125 | $minTS = $dbr->timestamp( wfTimestamp(TS_UNIX,$worstLagTS) - 14*86400 ); |
123 | 126 | # Approximate rows to scan |
124 | 127 | $rows = $dbr->estimateRowCount( 'revision', '1', |
125 | | - 'rev_user=0 AND rev_timestamp BETWEEN '.$dbr->addQuotes($minTS).' AND '.$dbr->addQuotes($maxTS) |
| 128 | + 'rev_user=0 AND rev_timestamp BETWEEN '.$encMinTS.' AND '.$encMaxTS |
126 | 129 | ); |
127 | 130 | # If the range doesn't have many rows (like on really small wikis), use 30 days |
128 | 131 | if( $rows <= 500 ) { |
129 | 132 | $minTS = $dbr->timestamp( wfTimestamp(TS_UNIX,$worstLagTS) - 30*86400 ); |
130 | 133 | # Approximate rows to scan |
131 | 134 | $rows = $dbr->estimateRowCount( 'revision', '1', |
132 | | - 'rev_user=0 AND rev_timestamp BETWEEN '.$dbr->addQuotes($minTS).' AND '.$dbr->addQuotes($maxTS) |
| 135 | + 'rev_user=0 AND rev_timestamp BETWEEN '.$encMinTS.' AND '.$encMaxTS |
133 | 136 | ); |
134 | 137 | # If the range doesn't have many rows (like on really tiny wikis), use 90 days |
135 | 138 | if( $rows <= 500 ) { |
— | — | @@ -139,7 +142,7 @@ |
140 | 143 | # Sanity check the starting timestamp |
141 | 144 | $minTS = max($minTS,$installed); |
142 | 145 | # Get timestamp boundaries |
143 | | - $timeCondition = 'rev_timestamp BETWEEN '.$dbr->addQuotes($minTS).' AND '.$dbr->addQuotes($maxTS); |
| 146 | + $timeCondition = 'rev_timestamp BETWEEN '.$encMinTS.' AND '.$encMaxTS; |
144 | 147 | # Modulus (to spread out over range) |
145 | 148 | $mod = intval($rows/$size); |
146 | 149 | $mod = max($mod,1); # $mod >= 1 |
— | — | @@ -147,8 +150,12 @@ |
148 | 151 | # Get the *first* reviewed rev *after* each RC item and get the average difference. |
149 | 152 | # Only do this for revisions to pages that *were* already logged as reviewed. |
150 | 153 | $sql = $dbr->selectSQLText( array('revision','page','logging','flaggedrevs'), |
151 | | - array( 'UNIX_TIMESTAMP(rev_timestamp) AS rt', 'UNIX_TIMESTAMP(MIN(fr_timestamp)) AS ft', 'MIN(log_timestamp)' ), |
152 | 154 | array( |
| 155 | + 'UNIX_TIMESTAMP(rev_timestamp) AS rt', // edit time |
| 156 | + 'UNIX_TIMESTAMP(MIN(fr_timestamp)) AS ft', // revision flagging time |
| 157 | + 'MIN(log_timestamp)' // page flagging time |
| 158 | + ), |
| 159 | + array( |
153 | 160 | $timeCondition, // in time range |
154 | 161 | 'rev_user = 0', // IP edits (should start off unreviewed) |
155 | 162 | "(rev_id % $mod) = 0", // Better spread |
— | — | @@ -158,16 +165,20 @@ |
159 | 166 | 'GROUP BY' => 'rev_id', // stats are for each edit and ID is unique |
160 | 167 | 'ORDER BY' => 'rev_id ASC', // slight bias avoidance, if any |
161 | 168 | 'LIMIT' => $size, // sample size |
162 | | - 'USE INDEX' => array('page' => 'PRIMARY','flaggedrevs' => 'PRIMARY','logging' => 'page_time') |
| 169 | + 'USE INDEX' => array('page' => 'PRIMARY','flaggedrevs' => 'PRIMARY', |
| 170 | + 'logging' => 'page_time') |
163 | 171 | ), |
164 | 172 | array( // Assumes title unchanged (reasonable). Double-check NS though. |
165 | | - 'page' => array('INNER JOIN',array('page_id = rev_page','page_namespace' => $wgFlaggedRevsNamespaces)), |
| 173 | + 'page' => array('INNER JOIN', array('page_id = rev_page', |
| 174 | + 'page_namespace' => $wgFlaggedRevsNamespaces)), |
166 | 175 | // Check that this page was stable at the time... |
167 | 176 | // Assumes that reviewed pages stay reviewed (reasonable). |
168 | | - 'logging' => array('INNER JOIN','log_namespace = page_namespace AND log_title = page_title |
169 | | - AND log_type = "review" AND log_timestamp < rev_timestamp'), |
| 177 | + 'logging' => array('INNER JOIN','log_namespace = page_namespace |
| 178 | + AND log_title = page_title AND log_type = "review" |
| 179 | + AND log_timestamp < rev_timestamp'), |
170 | 180 | // Check *if* it was later reviewed |
171 | | - 'flaggedrevs' => array('LEFT JOIN', 'fr_page_id = page_id AND fr_rev_id >= rev_id AND fr_timestamp > rev_timestamp'), |
| 181 | + 'flaggedrevs' => array('LEFT JOIN', 'fr_page_id = page_id |
| 182 | + AND fr_rev_id >= rev_id AND fr_timestamp > rev_timestamp'), |
172 | 183 | ) |
173 | 184 | ); |
174 | 185 | # Actually run the query... |
— | — | @@ -211,25 +222,34 @@ |
212 | 223 | $dbw = wfGetDB( DB_MASTER ); |
213 | 224 | $dbw->begin(); |
214 | 225 | // Create small stats tables if not present |
215 | | - list($flaggedrevs_stats,$flaggedrevs_stats2) = $dbr->tableNamesN('flaggedrevs_stats','flaggedrevs_stats2'); |
| 226 | + $flaggedrevs_stats = $dbr->tableName('flaggedrevs_stats'); |
| 227 | + $flaggedrevs_stats2 = $dbr->tableName('flaggedrevs_stats2'); |
216 | 228 | if( !$dbw->tableExists( 'flaggedrevs_stats' ) ) { |
217 | 229 | createFlaggedRevsStatsTable( $dbw, $flaggedrevs_stats ); |
218 | 230 | } |
219 | 231 | if( !$dbw->tableExists( 'flaggedrevs_stats2' ) ) { |
220 | 232 | createFlaggedRevsStatsTable2( $dbw, $flaggedrevs_stats2 ); |
221 | 233 | } |
| 234 | + // Per-namespace stats... |
222 | 235 | foreach( $wgFlaggedRevsNamespaces as $namespace ) { |
223 | 236 | $dbw->replace( 'flaggedrevs_stats', |
224 | 237 | array( 'namespace' ), |
225 | 238 | array( 'namespace' => intval($namespace), |
226 | 239 | 'total' => isset($ns_total[$namespace]) ? $ns_total[$namespace] : 0, |
227 | 240 | 'reviewed' => isset($ns_reviewed[$namespace]) ? $ns_reviewed[$namespace] : 0, |
228 | | - 'synced' => isset($ns_synced[$namespace]) ? $ns_synced[$namespace] : 0 ), |
| 241 | + 'synced' => isset($ns_synced[$namespace]) ? $ns_synced[$namespace] : 0 |
| 242 | + ), |
229 | 243 | __METHOD__ |
230 | 244 | ); |
231 | 245 | } |
232 | | - $dbw->replace( 'flaggedrevs_stats2', array('stat_id'), |
233 | | - array('stat_id' => 1, 'ave_review_time' => $aveRT, 'med_review_time' => $medianRT, 'ave_pending_time' => $avePET), |
| 246 | + // Overall stats... |
| 247 | + $dbw->replace( 'flaggedrevs_stats2', |
| 248 | + array( 'stat_id' ), |
| 249 | + array( 'stat_id' => 1, |
| 250 | + 'ave_review_time' => $aveRT, |
| 251 | + 'med_review_time' => $medianRT, |
| 252 | + 'ave_pending_time' => $avePET |
| 253 | + ), |
234 | 254 | __METHOD__ |
235 | 255 | ); |
236 | 256 | // Update timestamp |