r114401 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114400‎ | r114401 | r114402 >
Date:20:27, 21 March 2012
Author:catrope
Status:ok
Tags:
Comment:
Revert r113365, r113372, r113395, r113474, r113475: unreviewed revisions in OpenSearchXml.

All of these revisions are tagged with 'gerritmigration' and will be resubmitted into Gerrit after the Gerrit switchover. See also http://lists.wikimedia.org/pipermail/wikitech-l/2012-March/059124.html
Modified paths:
  • /trunk/extensions/OpenSearchXml/ApiOpenSearchXml.php (modified) (history)
  • /trunk/extensions/OpenSearchXml/OpenSearchXml.php (modified) (history)

Diff [purge]

Index: trunk/extensions/OpenSearchXml/ApiOpenSearchXml.php
@@ -85,10 +85,9 @@
8686 // Open search results may be stored for a very long time
8787 $this->getMain()->setCacheMaxAge( 1200 );
8888
89 - $data = $this->search( $search, $limit, $namespaces );
90 - wfRunHooks( 'OpenSearchXml', array( &$data ) );
 89+ $srchres = PrefixSearch::titleSearch( $search, $limit, $namespaces );
9190
92 - $items = array_map( array( $this, 'formatItem' ), $data );
 91+ $items = array_filter( array_map( array( $this, 'formatItem' ), $srchres ) );
9392
9493 $result = $this->getResult();
9594 $result->addValue( null, 'version', '2.0' );
@@ -98,28 +97,6 @@
9998 $result->addValue( null, 'Section', $items );
10099 }
101100
102 - private function search( $search, $limit, $namespaces ) {
103 - $srchres = PrefixSearch::titleSearch( $search, $limit, $namespaces );
104 - $titles = array_filter( array_map( 'Title::newFromText', $srchres ) );
105 - $lb = new LinkBatch( $titles );
106 - $lb->setCaller( __METHOD__ );
107 - $lb->execute();
108 -
109 - $results = array();
110 - foreach ( $titles as $title ) {
111 - $title = $this->checkRedirect( $title );
112 - if( $this->seen( $title ) ) {
113 - continue;
114 - }
115 - $results[$title->getArticleID()] = array(
116 - 'title' => $title,
117 - 'extract' => false,
118 - 'image' => false,
119 - );
120 - }
121 - return $results;
122 - }
123 -
124101 public function getAllowedParams() {
125102 $params = parent::getAllowedParams();
126103 $params['format'] = array(
@@ -141,54 +118,47 @@
142119 }
143120
144121 /**
145 - * @param $result array
 122+ * @param $name string
146123 * @return array|bool
147124 */
148 - protected function formatItem( $result ) {
149 - $title = $result['title'];
 125+ protected function formatItem( $name ) {
 126+ $title = Title::newFromText( $name );
 127+ if( $title ) {
 128+ $title = $this->_checkRedirect( $title );
 129+ if( $this->_seen( $title ) ) {
 130+ return false;
 131+ }
150132
151 - $item = array();
152 - if ( $result['extract'] === false || $result['image'] === false ) {
153133 list( $extract, $badge ) = $this->getExtract( $title );
154 - if ( $result['image'] === false ) {
155 - $image = $this->getBadge( $title, $badge );
156 - if( $image ) {
157 - $thumb = $image->transform( array( 'width' => 50, 'height' => 50 ), 0 );
158 - if( $thumb ) {
159 - $item['Image'] = array(
160 - 'source' => wfExpandUrl( $thumb->getUrl(), PROTO_CURRENT ),
161 - //alt
162 - 'width' => $thumb->getWidth(),
163 - 'height' => $thumb->getHeight()
164 - );
165 - }
 134+ $image = $this->getBadge( $title, $badge );
 135+
 136+ $item = array();
 137+ $item['Text']['*'] = $title->getPrefixedText();
 138+ $item['Description']['*'] = $extract;
 139+ $item['Url']['*'] = wfExpandUrl( $title->getFullUrl(), PROTO_CURRENT );
 140+ if( $image ) {
 141+ $thumb = $image->transform( array( 'width' => 50, 'height' => 50 ), 0 );
 142+ if( $thumb ) {
 143+ $item['Image'] = array(
 144+ 'source' => wfExpandUrl( $thumb->getUrl(), PROTO_CURRENT ),
 145+ //alt
 146+ 'width' => $thumb->getWidth(),
 147+ 'height' => $thumb->getHeight()
 148+ );
166149 }
167150 }
 151+ } else {
 152+ $item = array( 'Text' => array( '*' => $name ) );
168153 }
169 -
170 - if ( is_string( $result['extract'] ) ) {
171 - $extract = $result['extract'];
172 - if ( !isset( $result['extract trimmed'] ) || !$result['extract trimmed'] ) {
173 - $extract = $this->extractStart( $extract );
174 - }
175 - }
176 - if ( is_array( $result['image'] ) ) {
177 - $item['Image'] = $result['image'];
178 - }
179 -
180 - $item['Text']['*'] = $title->getPrefixedText();
181 - $item['Description']['*'] = $extract;
182 - $item['Url']['*'] = wfExpandUrl( $title->getFullUrl(), PROTO_CURRENT );
183 -
184154 return $item;
185155 }
186156
187157 /**
188158 * @param $title Title
189159 *
190 - * @return Title
 160+ * @return
191161 */
192 - protected function checkRedirect( $title ) {
 162+ protected function _checkRedirect( $title ) {
193163 $art = new Article( $title );
194164 $target = $art->getRedirectTarget();
195165 if( $target ) {
@@ -202,7 +172,7 @@
203173 * @param $title Title
204174 * @return bool
205175 */
206 - protected function seen( $title ) {
 176+ protected function _seen( $title ) {
207177 $name = $title->getPrefixedText();
208178 if( isset( $this->mSeen[$name] ) ) {
209179 return true;
@@ -216,7 +186,7 @@
217187 * @param string $text
218188 * @return string
219189 */
220 - function stripMarkup( $text ) {
 190+ function _stripMarkup( $text ) {
221191 $text = substr( $text, 0, 4096 ); // don't bother with long text...
222192
223193 $text = str_replace( "'''", "", $text );
@@ -233,7 +203,7 @@
234204 (?:\|($pipeContents))?
235205 (?:\|$pipeContents)*
236206 \]\]
237 - #six", array( $this, 'stripLink' ), $text );
 207+ #six", array( $this, '_stripLink' ), $text );
238208
239209 $text = preg_replace( '#\\[(?:$protocols).*? (.*?)\\]#s', '$1', $text ); // URL links
240210 $text = preg_replace( '#</?[a-z0-9]+.*?>#s', '', $text ); // HTML-style tags
@@ -248,7 +218,7 @@
249219 * @param $matches array
250220 * @return string
251221 */
252 - function stripLink( $matches ) {
 222+ function _stripLink( $matches ) {
253223 $target = trim( $matches[1] );
254224 if( isset( $matches[2] ) ) {
255225 $text = trim( $matches[2] );
@@ -274,8 +244,7 @@
275245 * @return string
276246 * @access private
277247 */
278 - function extractStart( $text ) {
279 - global $wgOpenSearchDescriptionLength;
 248+ function _extractStart( $text ) {
280249 $endchars = array(
281250 '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII
282251 '。', // full-width ideographic full-stop
@@ -285,7 +254,7 @@
286255
287256 $endgroup = implode( '|', $endchars );
288257 $end = "(?:$endgroup)";
289 - $sentence = ".{{$wgOpenSearchDescriptionLength},}?$end+";
 258+ $sentence = ".*?$end+";
290259 $firstone = "/^($sentence)/u";
291260 $matches = array();
292261 if( preg_match( $firstone, $text, $matches ) ) {
@@ -303,7 +272,7 @@
304273 * @param $text string
305274 * @return string|bool
306275 */
307 - function extractBadge( $text ) {
 276+ function _extractBadge( $text ) {
308277 global $wgContLang;
309278 $image = preg_quote( $wgContLang->getNsText( NS_IMAGE ), '#' );
310279 $matches = array();
@@ -318,10 +287,10 @@
319288 * @param $arg string
320289 * @return bool|String
321290 */
322 - function validateBadge( $arg ) {
 291+ function _validateBadge( $arg ) {
323292 // Some templates want an entire [[Image:Foo.jpg|250px]]
324293 if( substr( $arg, 0, 2 ) == '[[' ) {
325 - return $this->extractBadge( $arg );
 294+ return $this->_extractBadge( $arg );
326295 }
327296
328297 // Others will take Image:Foo.jpg or Foo.jpg
@@ -376,7 +345,7 @@
377346 //var_dump( $arg );
378347 $argName = trim( $frame->expand( $arg["name"], PPFrame::RECOVER_ORIG ) );
379348 if( in_array( $argName, $imageArgs ) ) {
380 - $badge = $this->validateBadge(
 349+ $badge = $this->_validateBadge(
381350 trim(
382351 $frame->expand( $arg["value"], PPFrame::RECOVER_ORIG ) ) );
383352 if( $badge ) {
@@ -393,18 +362,18 @@
394363 if( !$badge ) {
395364 // Look for the first image in the body text if there wasn't
396365 // one in an infobox.
397 - $badge = $this->extractBadge( $out );
 366+ $badge = $this->_extractBadge( $out );
398367 }
399368
400369 // The remaining text may still contain wiki and HTML markup.
401370 // We'll use our shitty hand parser to strip most of those from
402371 // the beginning of the text.
403 - $stripped = $this->stripMarkup( $out );
 372+ $stripped = $this->_stripMarkup( $out );
404373
405374 // And now, we'll grab just the first sentence as text, and
406375 // also try to rip out a badge image.
407376 return array(
408 - $this->extractStart( $stripped ),
 377+ $this->_extractStart( $stripped ),
409378 $badge );
410379 }
411380 return '';
Index: trunk/extensions/OpenSearchXml/OpenSearchXml.php
@@ -40,11 +40,6 @@
4141 $wgOpenSearchAdvertiseXml = true;
4242
4343 /**
44 - * Minimum length of extract in <Description>. Actual extracts will last until the end of sentence.
45 - */
46 -$wgOpenSearchDescriptionLength = 100;
47 -
48 -/**
4944 * @param $urls array
5045 * @return bool
5146 */

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r113365Extended OpenSearchXml to use extracts and images from other extensions such ...maxsem13:57, 8 March 2012
r113372Remove weird underscore prefixes from function namesmaxsem18:14, 8 March 2012
r113395Follow-up r113372: missed one usemaxsem20:07, 8 March 2012
r113474Plugged MobileFrontend's excerpts into OpenSearchXmlmaxsem15:49, 9 March 2012
r113475Bug 35083 - OpenSearchXml first sentences extraction produces bad results. Ma...maxsem15:54, 9 March 2012

Status & tagging log