Index: trunk/extensions/OpenSearchXml/ApiOpenSearchXml.php |
— | — | @@ -85,10 +85,9 @@ |
86 | 86 | // Open search results may be stored for a very long time |
87 | 87 | $this->getMain()->setCacheMaxAge( 1200 ); |
88 | 88 | |
89 | | - $data = $this->search( $search, $limit, $namespaces ); |
90 | | - wfRunHooks( 'OpenSearchXml', array( &$data ) ); |
| 89 | + $srchres = PrefixSearch::titleSearch( $search, $limit, $namespaces ); |
91 | 90 | |
92 | | - $items = array_map( array( $this, 'formatItem' ), $data ); |
| 91 | + $items = array_filter( array_map( array( $this, 'formatItem' ), $srchres ) ); |
93 | 92 | |
94 | 93 | $result = $this->getResult(); |
95 | 94 | $result->addValue( null, 'version', '2.0' ); |
— | — | @@ -98,28 +97,6 @@ |
99 | 98 | $result->addValue( null, 'Section', $items ); |
100 | 99 | } |
101 | 100 | |
102 | | - private function search( $search, $limit, $namespaces ) { |
103 | | - $srchres = PrefixSearch::titleSearch( $search, $limit, $namespaces ); |
104 | | - $titles = array_filter( array_map( 'Title::newFromText', $srchres ) ); |
105 | | - $lb = new LinkBatch( $titles ); |
106 | | - $lb->setCaller( __METHOD__ ); |
107 | | - $lb->execute(); |
108 | | - |
109 | | - $results = array(); |
110 | | - foreach ( $titles as $title ) { |
111 | | - $title = $this->checkRedirect( $title ); |
112 | | - if( $this->seen( $title ) ) { |
113 | | - continue; |
114 | | - } |
115 | | - $results[$title->getArticleID()] = array( |
116 | | - 'title' => $title, |
117 | | - 'extract' => false, |
118 | | - 'image' => false, |
119 | | - ); |
120 | | - } |
121 | | - return $results; |
122 | | - } |
123 | | - |
124 | 101 | public function getAllowedParams() { |
125 | 102 | $params = parent::getAllowedParams(); |
126 | 103 | $params['format'] = array( |
— | — | @@ -141,54 +118,47 @@ |
142 | 119 | } |
143 | 120 | |
144 | 121 | /** |
145 | | - * @param $result array |
| 122 | + * @param $name string |
146 | 123 | * @return array|bool |
147 | 124 | */ |
148 | | - protected function formatItem( $result ) { |
149 | | - $title = $result['title']; |
| 125 | + protected function formatItem( $name ) { |
| 126 | + $title = Title::newFromText( $name ); |
| 127 | + if( $title ) { |
| 128 | + $title = $this->_checkRedirect( $title ); |
| 129 | + if( $this->_seen( $title ) ) { |
| 130 | + return false; |
| 131 | + } |
150 | 132 | |
151 | | - $item = array(); |
152 | | - if ( $result['extract'] === false || $result['image'] === false ) { |
153 | 133 | list( $extract, $badge ) = $this->getExtract( $title ); |
154 | | - if ( $result['image'] === false ) { |
155 | | - $image = $this->getBadge( $title, $badge ); |
156 | | - if( $image ) { |
157 | | - $thumb = $image->transform( array( 'width' => 50, 'height' => 50 ), 0 ); |
158 | | - if( $thumb ) { |
159 | | - $item['Image'] = array( |
160 | | - 'source' => wfExpandUrl( $thumb->getUrl(), PROTO_CURRENT ), |
161 | | - //alt |
162 | | - 'width' => $thumb->getWidth(), |
163 | | - 'height' => $thumb->getHeight() |
164 | | - ); |
165 | | - } |
| 134 | + $image = $this->getBadge( $title, $badge ); |
| 135 | + |
| 136 | + $item = array(); |
| 137 | + $item['Text']['*'] = $title->getPrefixedText(); |
| 138 | + $item['Description']['*'] = $extract; |
| 139 | + $item['Url']['*'] = wfExpandUrl( $title->getFullUrl(), PROTO_CURRENT ); |
| 140 | + if( $image ) { |
| 141 | + $thumb = $image->transform( array( 'width' => 50, 'height' => 50 ), 0 ); |
| 142 | + if( $thumb ) { |
| 143 | + $item['Image'] = array( |
| 144 | + 'source' => wfExpandUrl( $thumb->getUrl(), PROTO_CURRENT ), |
| 145 | + //alt |
| 146 | + 'width' => $thumb->getWidth(), |
| 147 | + 'height' => $thumb->getHeight() |
| 148 | + ); |
166 | 149 | } |
167 | 150 | } |
| 151 | + } else { |
| 152 | + $item = array( 'Text' => array( '*' => $name ) ); |
168 | 153 | } |
169 | | - |
170 | | - if ( is_string( $result['extract'] ) ) { |
171 | | - $extract = $result['extract']; |
172 | | - if ( !isset( $result['extract trimmed'] ) || !$result['extract trimmed'] ) { |
173 | | - $extract = $this->extractStart( $extract ); |
174 | | - } |
175 | | - } |
176 | | - if ( is_array( $result['image'] ) ) { |
177 | | - $item['Image'] = $result['image']; |
178 | | - } |
179 | | - |
180 | | - $item['Text']['*'] = $title->getPrefixedText(); |
181 | | - $item['Description']['*'] = $extract; |
182 | | - $item['Url']['*'] = wfExpandUrl( $title->getFullUrl(), PROTO_CURRENT ); |
183 | | - |
184 | 154 | return $item; |
185 | 155 | } |
186 | 156 | |
187 | 157 | /** |
188 | 158 | * @param $title Title |
189 | 159 | * |
190 | | - * @return Title |
| 160 | + * @return |
191 | 161 | */ |
192 | | - protected function checkRedirect( $title ) { |
| 162 | + protected function _checkRedirect( $title ) { |
193 | 163 | $art = new Article( $title ); |
194 | 164 | $target = $art->getRedirectTarget(); |
195 | 165 | if( $target ) { |
— | — | @@ -202,7 +172,7 @@ |
203 | 173 | * @param $title Title |
204 | 174 | * @return bool |
205 | 175 | */ |
206 | | - protected function seen( $title ) { |
| 176 | + protected function _seen( $title ) { |
207 | 177 | $name = $title->getPrefixedText(); |
208 | 178 | if( isset( $this->mSeen[$name] ) ) { |
209 | 179 | return true; |
— | — | @@ -216,7 +186,7 @@ |
217 | 187 | * @param string $text |
218 | 188 | * @return string |
219 | 189 | */ |
220 | | - function stripMarkup( $text ) { |
| 190 | + function _stripMarkup( $text ) { |
221 | 191 | $text = substr( $text, 0, 4096 ); // don't bother with long text... |
222 | 192 | |
223 | 193 | $text = str_replace( "'''", "", $text ); |
— | — | @@ -233,7 +203,7 @@ |
234 | 204 | (?:\|($pipeContents))? |
235 | 205 | (?:\|$pipeContents)* |
236 | 206 | \]\] |
237 | | - #six", array( $this, 'stripLink' ), $text ); |
| 207 | + #six", array( $this, '_stripLink' ), $text ); |
238 | 208 | |
239 | 209 | $text = preg_replace( '#\\[(?:$protocols).*? (.*?)\\]#s', '$1', $text ); // URL links |
240 | 210 | $text = preg_replace( '#</?[a-z0-9]+.*?>#s', '', $text ); // HTML-style tags |
— | — | @@ -248,7 +218,7 @@ |
249 | 219 | * @param $matches array |
250 | 220 | * @return string |
251 | 221 | */ |
252 | | - function stripLink( $matches ) { |
| 222 | + function _stripLink( $matches ) { |
253 | 223 | $target = trim( $matches[1] ); |
254 | 224 | if( isset( $matches[2] ) ) { |
255 | 225 | $text = trim( $matches[2] ); |
— | — | @@ -274,8 +244,7 @@ |
275 | 245 | * @return string |
276 | 246 | * @access private |
277 | 247 | */ |
278 | | - function extractStart( $text ) { |
279 | | - global $wgOpenSearchDescriptionLength; |
| 248 | + function _extractStart( $text ) { |
280 | 249 | $endchars = array( |
281 | 250 | '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII |
282 | 251 | '。', // full-width ideographic full-stop |
— | — | @@ -285,7 +254,7 @@ |
286 | 255 | |
287 | 256 | $endgroup = implode( '|', $endchars ); |
288 | 257 | $end = "(?:$endgroup)"; |
289 | | - $sentence = ".{{$wgOpenSearchDescriptionLength},}?$end+"; |
| 258 | + $sentence = ".*?$end+"; |
290 | 259 | $firstone = "/^($sentence)/u"; |
291 | 260 | $matches = array(); |
292 | 261 | if( preg_match( $firstone, $text, $matches ) ) { |
— | — | @@ -303,7 +272,7 @@ |
304 | 273 | * @param $text string |
305 | 274 | * @return string|bool |
306 | 275 | */ |
307 | | - function extractBadge( $text ) { |
| 276 | + function _extractBadge( $text ) { |
308 | 277 | global $wgContLang; |
309 | 278 | $image = preg_quote( $wgContLang->getNsText( NS_IMAGE ), '#' ); |
310 | 279 | $matches = array(); |
— | — | @@ -318,10 +287,10 @@ |
319 | 288 | * @param $arg string |
320 | 289 | * @return bool|String |
321 | 290 | */ |
322 | | - function validateBadge( $arg ) { |
| 291 | + function _validateBadge( $arg ) { |
323 | 292 | // Some templates want an entire [[Image:Foo.jpg|250px]] |
324 | 293 | if( substr( $arg, 0, 2 ) == '[[' ) { |
325 | | - return $this->extractBadge( $arg ); |
| 294 | + return $this->_extractBadge( $arg ); |
326 | 295 | } |
327 | 296 | |
328 | 297 | // Others will take Image:Foo.jpg or Foo.jpg |
— | — | @@ -376,7 +345,7 @@ |
377 | 346 | //var_dump( $arg ); |
378 | 347 | $argName = trim( $frame->expand( $arg["name"], PPFrame::RECOVER_ORIG ) ); |
379 | 348 | if( in_array( $argName, $imageArgs ) ) { |
380 | | - $badge = $this->validateBadge( |
| 349 | + $badge = $this->_validateBadge( |
381 | 350 | trim( |
382 | 351 | $frame->expand( $arg["value"], PPFrame::RECOVER_ORIG ) ) ); |
383 | 352 | if( $badge ) { |
— | — | @@ -393,18 +362,18 @@ |
394 | 363 | if( !$badge ) { |
395 | 364 | // Look for the first image in the body text if there wasn't |
396 | 365 | // one in an infobox. |
397 | | - $badge = $this->extractBadge( $out ); |
| 366 | + $badge = $this->_extractBadge( $out ); |
398 | 367 | } |
399 | 368 | |
400 | 369 | // The remaining text may still contain wiki and HTML markup. |
401 | 370 | // We'll use our shitty hand parser to strip most of those from |
402 | 371 | // the beginning of the text. |
403 | | - $stripped = $this->stripMarkup( $out ); |
| 372 | + $stripped = $this->_stripMarkup( $out ); |
404 | 373 | |
405 | 374 | // And now, we'll grab just the first sentence as text, and |
406 | 375 | // also try to rip out a badge image. |
407 | 376 | return array( |
408 | | - $this->extractStart( $stripped ), |
| 377 | + $this->_extractStart( $stripped ), |
409 | 378 | $badge ); |
410 | 379 | } |
411 | 380 | return ''; |
Index: trunk/extensions/OpenSearchXml/OpenSearchXml.php |
— | — | @@ -40,11 +40,6 @@ |
41 | 41 | $wgOpenSearchAdvertiseXml = true; |
42 | 42 | |
43 | 43 | /** |
44 | | - * Minimum length of extract in <Description>. Actual extracts will last until the end of sentence. |
45 | | - */ |
46 | | -$wgOpenSearchDescriptionLength = 100; |
47 | | - |
48 | | -/** |
49 | 44 | * @param $urls array |
50 | 45 | * @return bool |
51 | 46 | */ |