Index: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php |
— | — | @@ -21,9 +21,10 @@ |
22 | 22 | wfProfileOut( __METHOD__ ); |
23 | 23 | return; |
24 | 24 | } |
25 | | - $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML'; |
| 25 | + $isXml = $this->getMain()->isInternalMode() || $this->getMain()->getPrinter()->getFormat() == 'XML'; |
26 | 26 | $result = $this->getResult(); |
27 | 27 | $params = $this->params = $this->extractRequestParams(); |
| 28 | + $this->requireMaxOneParameter( $params, 'chars', 'sentences' ); |
28 | 29 | $continue = 0; |
29 | 30 | $limit = intval( $params['limit'] ); |
30 | 31 | if ( $limit > 1 && !$params['intro'] ) { |
— | — | @@ -45,9 +46,8 @@ |
46 | 47 | break; |
47 | 48 | } |
48 | 49 | $text = $this->getExtract( $t ); |
49 | | - if ( isset( $params['length'] ) ) { |
50 | | - $text = $this->trimText( $text ); |
51 | | - } |
| 50 | + $text = $this->truncate( $text ); |
| 51 | + |
52 | 52 | if ( $isXml ) { |
53 | 53 | $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', array( '*' => $text ) ); |
54 | 54 | } else { |
— | — | @@ -74,7 +74,7 @@ |
75 | 75 | $api = new ApiMain( new FauxRequest( |
76 | 76 | array( |
77 | 77 | 'action' => 'query', |
78 | | - 'prop' => 'excerpts', |
| 78 | + 'prop' => 'extracts', |
79 | 79 | 'explaintext' => true, |
80 | 80 | 'exlimit' => count( $results ), |
81 | 81 | 'pageids' => implode( '|', $pageIds ), |
— | — | @@ -83,8 +83,8 @@ |
84 | 84 | $api->execute(); |
85 | 85 | $data = $api->getResultData(); |
86 | 86 | foreach ( $pageIds as $id ) { |
87 | | - if ( isset( $data['query']['pages'][$id]['excerpts'][0] ) ) { |
88 | | - $results[$id]['extract'] = $data['query']['pages'][$id]['extract'][0]; |
| 87 | + if ( isset( $data['query']['pages'][$id]['extract']['*'] ) ) { |
| 88 | + $results[$id]['extract'] = $data['query']['pages'][$id]['extract']['*']; |
89 | 89 | $results[$id]['extract trimmed'] = false; |
90 | 90 | } |
91 | 91 | } |
— | — | @@ -92,7 +92,7 @@ |
93 | 93 | } |
94 | 94 | |
95 | 95 | /** |
96 | | - * Returns a processed, but not trimmed excerpt |
| 96 | + * Returns a processed, but not trimmed extract |
97 | 97 | * @param Title $title |
98 | 98 | * @return string |
99 | 99 | */ |
— | — | @@ -188,10 +188,8 @@ |
189 | 189 | } |
190 | 190 | |
191 | 191 | /** |
192 | | - * Converts page HTML into an excerpt |
| 192 | + * Converts page HTML into an extract |
193 | 193 | * @param string $text |
194 | | - * @param Title $title |
195 | | - * @param bool $plainText |
196 | 194 | * @return string |
197 | 195 | */ |
198 | 196 | private function convertText( $text ) { |
— | — | @@ -203,16 +201,22 @@ |
204 | 202 | return trim( $text ); |
205 | 203 | } |
206 | 204 | |
| 205 | + private function truncate( $text ) { |
| 206 | + if ( $this->params['chars'] ) { |
| 207 | + return $this->getFirstChars( $text, $this->params['chars'] ); |
| 208 | + } elseif ( $this->params['sentences'] ) { |
| 209 | + return $this->getFirstSentences( $text, $this->params['sentences'] ); |
| 210 | + } |
| 211 | + return $text; |
| 212 | + } |
| 213 | + |
207 | 214 | /** |
208 | 215 | * |
209 | 216 | * @param string $text |
210 | 217 | * @param int $requestedLength |
211 | | - * @param bool $plainText |
212 | 218 | * @return string |
213 | 219 | */ |
214 | | - private function trimText( $text, $requestedLength, $plainText ) { |
215 | | - global $wgUseTidy; |
216 | | - |
| 220 | + private function getFirstChars( $text, $requestedLength ) { |
217 | 221 | wfProfileIn( __METHOD__ ); |
218 | 222 | $length = mb_strlen( $text ); |
219 | 223 | if ( $length <= $requestedLength ) { |
— | — | @@ -223,20 +227,69 @@ |
224 | 228 | preg_match( $pattern, $text, $m ); |
225 | 229 | $text = $m[0]; |
226 | 230 | // Fix possibly unclosed tags |
227 | | - if ( $wgUseTidy && !$plainText ) { |
| 231 | + $text = $this->tidy( $text ); |
| 232 | + $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); |
| 233 | + wfProfileOut( __METHOD__ ); |
| 234 | + return $text; |
| 235 | + } |
| 236 | + |
| 237 | + /** |
| 238 | + * |
| 239 | + * @param string $text |
| 240 | + * @param int $requestedSentenceCount |
| 241 | + */ |
| 242 | + private function getFirstSentences( $text, $requestedSentenceCount ) { |
| 243 | + wfProfileIn( __METHOD__ ); |
| 244 | + // Based on code from OpenSearchXml by Brion Vibber |
| 245 | + $endchars = array( |
| 246 | + '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII |
| 247 | + '。', // full-width ideographic full-stop |
| 248 | + '.', '!', '?', // double-width roman forms |
| 249 | + '。', // half-width ideographic full stop |
| 250 | + ); |
| 251 | + |
| 252 | + $endgroup = implode( '|', $endchars ); |
| 253 | + $end = "(?:$endgroup)"; |
| 254 | + $sentence = ".+?$end+"; |
| 255 | + $regexp = "/^($sentence){{$requestedSentenceCount}}/u"; |
| 256 | + $matches = array(); |
| 257 | + if( preg_match( $regexp, $text, $matches ) ) { |
| 258 | + return $matches[0]; |
| 259 | + } else { |
| 260 | + // Just return the first line |
| 261 | + $lines = explode( "\n", $text ); |
| 262 | + return trim( $lines[0] ); |
| 263 | + } |
| 264 | + $text = $this->tidy( $text ); |
| 265 | + wfProfileOut( __METHOD__ ); |
| 266 | + return $text; |
| 267 | + } |
| 268 | + |
| 269 | + /** |
| 270 | + * A simple wrapper around tidy |
| 271 | + * @param string $text |
| 272 | + */ |
| 273 | + private function tidy( $text ) { |
| 274 | + global $wgUseTidy; |
| 275 | + |
| 276 | + wfProfileIn( __METHOD__ ); |
| 277 | + if ( $wgUseTidy && !$this->params['plaintext'] ) { |
228 | 278 | $text = trim ( MWTidy::tidy( $text ) ); |
229 | 279 | } |
230 | | - $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); |
231 | 280 | wfProfileOut( __METHOD__ ); |
232 | 281 | return $text; |
233 | 282 | } |
234 | 283 | |
235 | 284 | public function getAllowedParams() { |
236 | 285 | return array( |
237 | | - 'length' => array( |
| 286 | + 'chars' => array( |
238 | 287 | ApiBase::PARAM_TYPE => 'integer', |
239 | 288 | ApiBase::PARAM_MIN => 1, |
240 | 289 | ), |
| 290 | + 'sentences' => array( |
| 291 | + ApiBase::PARAM_TYPE => 'integer', |
| 292 | + ApiBase::PARAM_MIN => 1, |
| 293 | + ), |
241 | 294 | 'limit' => array( |
242 | 295 | ApiBase::PARAM_DFLT => 1, |
243 | 296 | ApiBase::PARAM_TYPE => 'limit', |
— | — | @@ -258,7 +311,8 @@ |
259 | 312 | |
260 | 313 | public function getParamDescription() { |
261 | 314 | return array( |
262 | | - 'length' => 'How many characters to return, actual text returned might be slightly longer.', |
| 315 | + 'chars' => 'How many characters to return, actual text returned might be slightly longer.', |
| 316 | + 'sentences' => 'How many sentences to return', |
263 | 317 | 'limit' => 'How many extracts to return. ', |
264 | 318 | 'intro' => 'Return only content before the first section', |
265 | 319 | 'plaintext' => 'Return extracts as plaintext instead of limited HTML', |
— | — | @@ -284,7 +338,7 @@ |
285 | 339 | |
286 | 340 | public function getExamples() { |
287 | 341 | return array( |
288 | | - 'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a 175-character extract', |
| 342 | + 'api.php?action=query&prop=extracts&exchars=175&titles=Therion' => 'Get a 175-character extract', |
289 | 343 | ); |
290 | 344 | } |
291 | 345 | |
— | — | @@ -329,8 +383,8 @@ |
330 | 384 | $text = parent::getText(); |
331 | 385 | if ( $this->plainText ) { |
332 | 386 | $text = html_entity_decode( $text ); |
333 | | - $text = str_replace( "\r", "\n", $text ); |
334 | | - $text = preg_replace( "/\n{3,}/", "\n\n", $text ); |
| 387 | + $text = str_replace( "\r", "\n", $text ); // for Windows |
| 388 | + $text = preg_replace( "/\n{3,}/", "\n\n", $text ); // normalise newlines |
335 | 389 | $text = preg_replace_callback( |
336 | 390 | "/" . ApiQueryExtracts::SECTION_MARKER_START . '(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m", |
337 | 391 | array( $this, 'sectionCallback' ), |
Index: trunk/extensions/MobileFrontend/MobileFrontend.php |
— | — | @@ -132,7 +132,7 @@ |
133 | 133 | $wgHooks['APIAfterExecute'][] = 'ApiParseExtender::onAPIAfterExecute'; |
134 | 134 | $wgHooks['APIGetParamDescription'][] = 'ApiParseExtender::onAPIGetParamDescription'; |
135 | 135 | $wgHooks['APIGetDescription'][] = 'ApiParseExtender::onAPIGetDescription'; |
136 | | -$wgHooks['OpenSearchXml'][] = 'ApiQueryExcerpts::onOpenSearchXml'; |
| 136 | +$wgHooks['OpenSearchXml'][] = 'ApiQueryExtracts::onOpenSearchXml'; |
137 | 137 | |
138 | 138 | function efMobileFrontend_Setup() { |
139 | 139 | global $wgExtMobileFrontend, $wgHooks; |
— | — | @@ -174,6 +174,6 @@ |
175 | 175 | } |
176 | 176 | |
177 | 177 | /** |
178 | | - * Whether this extension should provide its excerpts to OpenSearchXml extension |
| 178 | + * Whether this extension should provide its extracts to OpenSearchXml extension |
179 | 179 | */ |
180 | 180 | $wgMFExtendOpenSearchXml = false; |