Index: trunk/phase3/includes/HTMLCacheUpdate.php |
— | — | @@ -25,38 +25,119 @@ |
26 | 26 | */ |
27 | 27 | class HTMLCacheUpdate |
28 | 28 | { |
29 | | - public $mTitle, $mTable, $mPrefix; |
| 29 | + public $mTitle, $mTable, $mPrefix, $mStart, $mEnd; |
30 | 30 | public $mRowsPerJob, $mRowsPerQuery; |
31 | 31 | |
32 | | - function __construct( $titleTo, $table ) { |
| 32 | + function __construct( $titleTo, $table, $start = false, $end = false ) { |
33 | 33 | global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; |
34 | 34 | |
35 | 35 | $this->mTitle = $titleTo; |
36 | 36 | $this->mTable = $table; |
| 37 | + $this->mStart = $start; |
| 38 | + $this->mEnd = $end; |
37 | 39 | $this->mRowsPerJob = $wgUpdateRowsPerJob; |
38 | 40 | $this->mRowsPerQuery = $wgUpdateRowsPerQuery; |
39 | 41 | $this->mCache = $this->mTitle->getBacklinkCache(); |
40 | 42 | } |
41 | 43 | |
42 | 44 | public function doUpdate() { |
43 | | - # Fetch the IDs |
| 45 | + if ( $this->mStart || $this->mEnd ) { |
| 46 | + $this->doPartialUpdate(); |
| 47 | + return; |
| 48 | + } |
| 49 | + |
| 50 | + # Get an estimate of the number of rows from the BacklinkCache |
44 | 51 | $numRows = $this->mCache->getNumLinks( $this->mTable ); |
45 | | - |
46 | | - if ( $numRows != 0 ) { |
47 | | - if ( $numRows > $this->mRowsPerJob ) { |
48 | | - $this->insertJobs(); |
| 52 | + if ( $numRows > $this->mRowsPerJob * 2 ) { |
| 53 | + # Do fast cached partition |
| 54 | + $this->insertJobs(); |
| 55 | + } else { |
| 56 | + # Get the links from the DB |
| 57 | + $titleArray = $this->mCache->getLinks( $this->mTable ); |
| 58 | + # Check if the row count estimate was correct |
| 59 | + if ( $titleArray->count() > $this->mRowsPerJob * 2 ) { |
| 60 | + # Not correct, do accurate partition |
| 61 | + wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" ); |
| 62 | + $this->insertJobsFromTitles( $titleArray ); |
49 | 63 | } else { |
50 | | - $this->invalidate(); |
| 64 | + $this->invalidateTitles( $titleArray ); |
51 | 65 | } |
52 | 66 | } |
53 | 67 | wfRunHooks( 'HTMLCacheUpdate::doUpdate', array($this->mTitle) ); |
54 | 68 | } |
55 | 69 | |
| 70 | + /** |
| 71 | + * Update some of the backlinks, defined by a page ID range |
| 72 | + */ |
| 73 | + protected function doPartialUpdate() { |
| 74 | + $titleArray = $this->mCache->getLinks( $this->mTable, $this->mStart, $this->mEnd ); |
| 75 | + if ( $titleArray->count() <= $this->mRowsPerJob * 2 ) { |
| 76 | + # This partition is small enough, do the update |
| 77 | + $this->invalidateTitles( $titleArray ); |
| 78 | + } else { |
| 79 | + # Partitioning was excessively inaccurate. Divide the job further. |
| 80 | + # This can occur when a large number of links are added in a short |
| 81 | + # period of time, say by updating a heavily-used template. |
| 82 | + $this->insertJobsFromTitles( $titleArray ); |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + /** |
| 87 | + * Partition the current range given by $this->mStart and $this->mEnd, |
| 88 | + * using a pre-calculated title array which gives the links in that range. |
| 89 | + * Queue the resulting jobs. |
| 90 | + */ |
| 91 | + protected function insertJobsFromTitles( $titleArray ) { |
| 92 | + # We make subpartitions in the sense that the start of the first job |
| 93 | + # will be the start of the parent partition, and the end of the last |
| 94 | + # job will be the end of the parent partition. |
| 95 | + $jobs = array(); |
| 96 | + $start = $this->mStart; # start of the current job |
| 97 | + $numTitles = 0; |
| 98 | + foreach ( $titleArray as $title ) { |
| 99 | + $id = $title->getArticleID(); |
| 100 | + # $numTitles is now the number of titles in the current job not |
| 101 | + # including the current ID |
| 102 | + if ( $numTitles >= $this->mRowsPerJob ) { |
| 103 | + # Add a job up to but not including the current ID |
| 104 | + $params = array( |
| 105 | + 'table' => $this->mTable, |
| 106 | + 'start' => $start, |
| 107 | + 'end' => $id - 1 |
| 108 | + ); |
| 109 | + $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); |
| 110 | + $start = $id; |
| 111 | + $numTitles = 0; |
| 112 | + } |
| 113 | + $numTitles++; |
| 114 | + } |
| 115 | + # Last job |
| 116 | + $params = array( |
| 117 | + 'table' => $this->mTable, |
| 118 | + 'start' => $start, |
| 119 | + 'end' => $this->mEnd |
| 120 | + ); |
| 121 | + $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); |
| 122 | + wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" ); |
| 123 | + |
| 124 | + if ( count( $jobs ) < 2 ) { |
| 125 | + # I don't think this is possible at present, but handling this case |
| 126 | + # makes the code a bit more robust against future code updates and |
| 127 | + # avoids a potential infinite loop of repartitioning |
| 128 | + wfDebug( __METHOD__.": repartitioning failed!\n" ); |
| 129 | + $this->invalidateTitles( $titleArray ); |
| 130 | + return; |
| 131 | + } |
| 132 | + |
| 133 | + Job::batchInsert( $jobs ); |
| 134 | + } |
| 135 | + |
56 | 136 | protected function insertJobs() { |
57 | 137 | $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob ); |
58 | 138 | if ( !$batches ) { |
59 | 139 | return; |
60 | 140 | } |
| 141 | + $jobs = array(); |
61 | 142 | foreach ( $batches as $batch ) { |
62 | 143 | $params = array( |
63 | 144 | 'table' => $this->mTable, |
— | — | @@ -68,18 +149,21 @@ |
69 | 150 | Job::batchInsert( $jobs ); |
70 | 151 | } |
71 | 152 | |
| 153 | + /** |
| 154 | + * Invalidate a range of pages, right now |
| 155 | + * @deprecated |
| 156 | + */ |
| 157 | + public function invalidate( $startId = false, $endId = false ) { |
| 158 | + $titleArray = $this->mCache->getLinks( $this->mTable, $startId, $endId ); |
| 159 | + $this->invalidateTitles( $titleArray ); |
| 160 | + } |
72 | 161 | |
73 | 162 | /** |
74 | | - * Invalidate a set of pages, right now |
| 163 | + * Invalidate an array (or iterator) of Title objects, right now |
75 | 164 | */ |
76 | | - public function invalidate( $startId = false, $endId = false ) { |
| 165 | + protected function invalidateTitles( $titleArray ) { |
77 | 166 | global $wgUseFileCache, $wgUseSquid; |
78 | 167 | |
79 | | - $titleArray = $this->mCache->getLinks( $this->mTable, $startId, $endId ); |
80 | | - if ( $titleArray->count() == 0 ) { |
81 | | - return; |
82 | | - } |
83 | | - |
84 | 168 | $dbw = wfGetDB( DB_MASTER ); |
85 | 169 | $timestamp = $dbw->timestamp(); |
86 | 170 | |
— | — | @@ -88,12 +172,20 @@ |
89 | 173 | foreach ( $titleArray as $title ) { |
90 | 174 | $ids[] = $title->getArticleID(); |
91 | 175 | } |
| 176 | + |
| 177 | + if ( !$ids ) { |
| 178 | + return; |
| 179 | + } |
| 180 | + |
92 | 181 | # Update page_touched |
93 | | - $dbw->update( 'page', |
94 | | - array( 'page_touched' => $timestamp ), |
95 | | - array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ), |
96 | | - __METHOD__ |
97 | | - ); |
| 182 | + $batches = array_chunk( $ids, $this->mRowsPerQuery ); |
| 183 | + foreach ( $batches as $batch ) { |
| 184 | + $dbw->update( 'page', |
| 185 | + array( 'page_touched' => $timestamp ), |
| 186 | + array( 'page_id IN (' . $dbw->makeList( $batch ) . ')' ), |
| 187 | + __METHOD__ |
| 188 | + ); |
| 189 | + } |
98 | 190 | |
99 | 191 | # Update squid |
100 | 192 | if ( $wgUseSquid ) { |
— | — | @@ -108,6 +200,7 @@ |
109 | 201 | } |
110 | 202 | } |
111 | 203 | } |
| 204 | + |
112 | 205 | } |
113 | 206 | |
114 | 207 | /** |
— | — | @@ -133,8 +226,8 @@ |
134 | 227 | } |
135 | 228 | |
136 | 229 | public function run() { |
137 | | - $update = new HTMLCacheUpdate( $this->title, $this->table ); |
138 | | - $update->invalidate( $this->start, $this->end ); |
| 230 | + $update = new HTMLCacheUpdate( $this->title, $this->table, $this->start, $this->end ); |
| 231 | + $update->doUpdate(); |
139 | 232 | return true; |
140 | 233 | } |
141 | 234 | } |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -3635,7 +3635,7 @@ |
3636 | 3636 | /** |
3637 | 3637 | * Number of rows to update per query |
3638 | 3638 | */ |
3639 | | -$wgUpdateRowsPerQuery = 10; |
| 3639 | +$wgUpdateRowsPerQuery = 100; |
3640 | 3640 | |
3641 | 3641 | /** |
3642 | 3642 | * Enable AJAX framework |