r83814 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r83813‎ | r83814 | r83815 >
Date:10:39, 13 March 2011
Author:catrope
Status:resolved (Comments)
Tags:
Comment:
(bug 27965) Paging in list=categorymembers was completely broken. It was paging by cl_from alone, while the index is on (cl_to, cl_type, cl_sortkey, cl_from) and only cl_to is constant. Fixed by paging on (type, sortkey, from), but using type|from|sortkey for clcontinue so any pipe characters in the sortkey are easier to handle. This needs the schema change in r83812 to work correctly, otherwise rows with cl_type=file will be skipped in certain cases.
Modified paths:
  • /trunk/phase3/includes/api/ApiQueryCategoryMembers.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/api/ApiQueryCategoryMembers.php
@@ -86,17 +86,15 @@
8787 $fld_type = isset( $prop['type'] );
8888
8989 if ( is_null( $resultPageSet ) ) {
90 - $this->addFields( array( 'cl_from', 'page_namespace', 'page_title' ) );
 90+ $this->addFields( array( 'cl_from', 'cl_sortkey', 'cl_type', 'page_namespace', 'page_title' ) );
9191 $this->addFieldsIf( 'page_id', $fld_ids );
9292 $this->addFieldsIf( 'cl_sortkey_prefix', $fld_sortkeyprefix );
93 - $this->addFieldsIf( 'cl_sortkey', $fld_sortkey );
9493 } else {
9594 $this->addFields( $resultPageSet->getPageTableFields() ); // will include page_ id, ns, title
96 - $this->addFields( array( 'cl_from', 'cl_sortkey' ) );
 95+ $this->addFields( array( 'cl_from', 'cl_sortkey', 'cl_type' ) );
9796 }
9897
9998 $this->addFieldsIf( 'cl_timestamp', $fld_timestamp || $params['sort'] == 'timestamp' );
100 - $this->addFieldsIf( 'cl_type', $fld_type );
10199
102100 $this->addTables( array( 'page', 'categorylinks' ) ); // must be in this order for 'USE INDEX'
103101
@@ -123,18 +121,37 @@
124122
125123 $this->addOption( 'USE INDEX', 'cl_timestamp' );
126124 } else {
127 - // The below produces ORDER BY cl_type, cl_sortkey, cl_from, possibly with DESC added to each of them
128 - $this->addWhereRange( 'cl_type', $dir, null, null );
129 - $this->addWhereRange( 'cl_sortkey',
130 - $dir,
131 - $params['startsortkey'],
132 - $params['endsortkey'] );
133 - $this->addWhereRange( 'cl_from', $dir, null, null );
 125+ if ( $params['continue'] ) {
 126+ // type|from|sortkey
 127+ $cont = explode( '|', $params['continue'], 3 );
 128+ if ( count( $cont ) != 3 ) {
 129+ $this->dieUsage( 'Invalid continue param. You should pass the original value returned '.
 130+ 'by the previous query', '_badcontinue'
 131+ );
 132+ }
 133+ $escType = $this->getDB()->addQuotes( $cont[0] );
 134+ $from = intval( $cont[1] );
 135+ $escSortkey = $this->getDB()->addQuotes( $cont[2] );
 136+ $op = $dir == 'newer' ? '>' : '<';
 137+ $this->addWhere( "cl_type $op $escType OR " .
 138+ "(cl_type = $escType AND " .
 139+ "(cl_sortkey $op $escSortkey OR " .
 140+ "(cl_sortkey = $escSortkey AND " .
 141+ "cl_from $op= $from)))"
 142+ );
 143+
 144+ } else {
 145+ // The below produces ORDER BY cl_type, cl_sortkey, cl_from, possibly with DESC added to each of them
 146+ $this->addWhereRange( 'cl_type', $dir, null, null );
 147+ $this->addWhereRange( 'cl_sortkey',
 148+ $dir,
 149+ $params['startsortkey'],
 150+ $params['endsortkey'] );
 151+ $this->addWhereRange( 'cl_from', $dir, null, null );
 152+ }
134153 $this->addOption( 'USE INDEX', 'cl_sortkey' );
135154 }
136155
137 - $this->setContinuation( $params['continue'], $params['dir'] );
138 -
139156 $this->addWhere( 'cl_from=page_id' );
140157
141158 $limit = $params['limit'];
@@ -149,7 +166,13 @@
150167 if ( $params['sort'] == 'timestamp' ) {
151168 $this->setContinueEnumParameter( 'start', wfTimestamp( TS_ISO_8601, $row->cl_timestamp ) );
152169 } else {
153 - $this->setContinueEnumParameter( 'continue', $row->cl_from );
 170+ // Continue format is type|from|sortkey
 171+ // The order is a bit weird but it's convenient to put the sortkey at the end
 172+ // because we don't have to worry about pipes in the sortkey that way
 173+ // (and type and from can't contain pipes anyway)
 174+ $this->setContinueEnumParameter( 'continue',
 175+ "{$row->cl_type}|{$row->cl_from}|{$row->cl_sortkey}"
 176+ );
154177 }
155178 break;
156179 }
@@ -189,7 +212,9 @@
190213 if ( $params['sort'] == 'timestamp' ) {
191214 $this->setContinueEnumParameter( 'start', wfTimestamp( TS_ISO_8601, $row->cl_timestamp ) );
192215 } else {
193 - $this->setContinueEnumParameter( 'continue', $row->cl_from );
 216+ $this->setContinueEnumParameter( 'continue',
 217+ "{$row->cl_type}|{$row->cl_from}|{$row->cl_sortkey}"
 218+ );
194219 }
195220 break;
196221 }
@@ -204,21 +229,6 @@
205230 }
206231 }
207232
208 - /**
209 - * Add DB WHERE clause to continue previous query based on 'continue' parameter
210 - */
211 - private function setContinuation( $continue, $dir ) {
212 - if ( is_null( $continue ) ) {
213 - return; // This is not a continuation request
214 - }
215 -
216 - $encFrom = $this->getDB()->addQuotes( intval( $continue ) );
217 -
218 - $op = ( $dir == 'desc' ? '<=' : '>=' );
219 -
220 - $this->addWhere( "cl_from $op $encFrom" );
221 - }
222 -
223233 public function getAllowedParams() {
224234 return array(
225235 'title' => array(
@@ -316,7 +326,8 @@
317327 $desc['namespace'] = array(
318328 $desc['namespace'],
319329 'NOTE: Due to $wgMiserMode, using this may result in fewer than "limit" results',
320 - 'returned before continuing; in extreme cases, zero results may be returned',
 330+ 'returned before continuing; in extreme cases, zero results may be returned.',
 331+ 'Note that you can use cmtype=subcat or cmtype=file instead of cmnamespace=14 or 6.',
321332 );
322333 }
323334 return $desc;

Follow-up revisions

RevisionCommit summaryAuthorDate
r83835Minor followup to r83814, also fix some other existances...reedy17:26, 13 March 2011
r83993r83812, r83814: Don't use cl_type at all when paging categorylinks...saper02:53, 15 March 2011
r846131.17wmf1: MFT r81692, r82468, r83814, r83885, r83891, r83897, r83902, r83903,...catrope17:42, 23 March 2011
r85354MFT r82518, r82530, r82538, r82547, r82550, r82565, r82572, r82608, r82696, r...demon18:25, 4 April 2011
r98997Fix bug in r83814 reported on IRC: categorymembers did not set an ORDER BY wh...catrope13:15, 5 October 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r83812Schema change: change cl_type from ENUM('page', 'subcat', 'file') to varchar(...catrope10:35, 13 March 2011

Comments

#Comment by Reedy (talk | contribs)   12:29, 13 March 2011

That's a thought, it gives a semi efficient way to do limited me filtering...

#Comment by Catrope (talk | contribs)   15:37, 13 March 2011

???

#Comment by Reedy (talk | contribs)   15:39, 13 March 2011

"That's a thought, it gives a semi efficient way to do limited namespace filtering..."

#Comment by Catrope (talk | contribs)   15:57, 13 March 2011

A very efficient way actually :)

Status & tagging log