Index: trunk/extensions/MobileFrontend/api/ApiQueryExcerpts.php |
— | — | @@ -1,369 +0,0 @@ |
2 | | -<?php |
3 | | - |
4 | | -class ApiQueryExtracts extends ApiQueryBase { |
5 | | - const SECTION_MARKER_START = "\1\2"; |
6 | | - const SECTION_MARKER_END = "\2\1"; |
7 | | - |
8 | | - /** |
9 | | - * @var ParserOptions |
10 | | - */ |
11 | | - private $parserOptions; |
12 | | - private $params; |
13 | | - |
14 | | - public function __construct( $query, $moduleName ) { |
15 | | - parent::__construct( $query, $moduleName, 'ex' ); |
16 | | - } |
17 | | - |
18 | | - public function execute() { |
19 | | - wfProfileIn( __METHOD__ ); |
20 | | - $titles = $this->getPageSet()->getGoodTitles(); |
21 | | - if ( count( $titles ) == 0 ) { |
22 | | - wfProfileOut( __METHOD__ ); |
23 | | - return; |
24 | | - } |
25 | | - $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML'; |
26 | | - $result = $this->getResult(); |
27 | | - $params = $this->params = $this->extractRequestParams(); |
28 | | - $continue = 0; |
29 | | - $limit = intval( $params['limit'] ); |
30 | | - if ( $limit > 1 && !$params['intro'] ) { |
31 | | - $limit = 1; |
32 | | - ///@todo: |
33 | | - //$result->setWarning( "Provided limit was too large for requests for whole article extracts, lowered to $limit" ); |
34 | | - } |
35 | | - if ( isset( $params['continue'] ) ) { |
36 | | - $continue = intval( $params['continue'] ); |
37 | | - if ( $continue < 0 || $continue > count( $titles ) ) { |
38 | | - $this->dieUsageMsg( '_badcontinue' ); |
39 | | - } |
40 | | - $titles = array_slice( $titles, $continue, null, true ); |
41 | | - } |
42 | | - $count = 0; |
43 | | - foreach ( $titles as $id => $t ) { |
44 | | - if ( ++$count > $limit ) { |
45 | | - $this->setContinueEnumParameter( 'continue', $continue + $count - 1 ); |
46 | | - break; |
47 | | - } |
48 | | - $text = $this->getExtract( $t ); |
49 | | - if ( isset( $params['length'] ) ) { |
50 | | - $text = $this->trimText( $text ); |
51 | | - } |
52 | | - if ( $isXml ) { |
53 | | - $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', array( '*' => $text ) ); |
54 | | - } else { |
55 | | - $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', $text ); |
56 | | - } |
57 | | - if ( !$fit ) { |
58 | | - $this->setContinueEnumParameter( 'continue', $continue + $count - 1 ); |
59 | | - break; |
60 | | - } |
61 | | - } |
62 | | - wfProfileOut( __METHOD__ ); |
63 | | - } |
64 | | - |
65 | | - /** |
66 | | - * OpenSearchXml hook handler |
67 | | - * @param array $results |
68 | | - */ |
69 | | - public static function onOpenSearchXml( &$results ) { |
70 | | - global $wgMFExtendOpenSearchXml; |
71 | | - if ( !$wgMFExtendOpenSearchXml || !count( $results ) ) { |
72 | | - return true; |
73 | | - } |
74 | | - $pageIds = array_keys( $results ); |
75 | | - $api = new ApiMain( new FauxRequest( |
76 | | - array( |
77 | | - 'action' => 'query', |
78 | | - 'prop' => 'excerpts', |
79 | | - 'explaintext' => true, |
80 | | - 'exlimit' => count( $results ), |
81 | | - 'pageids' => implode( '|', $pageIds ), |
82 | | - ) ) |
83 | | - ); |
84 | | - $api->execute(); |
85 | | - $data = $api->getResultData(); |
86 | | - foreach ( $pageIds as $id ) { |
87 | | - if ( isset( $data['query']['pages'][$id]['excerpts'][0] ) ) { |
88 | | - $results[$id]['extract'] = $data['query']['pages'][$id]['extract'][0]; |
89 | | - $results[$id]['extract trimmed'] = false; |
90 | | - } |
91 | | - } |
92 | | - return true; |
93 | | - } |
94 | | - |
95 | | - /** |
96 | | - * Returns a processed, but not trimmed excerpt |
97 | | - * @param Title $title |
98 | | - * @return string |
99 | | - */ |
100 | | - private function getExtract( Title $title ) { |
101 | | - wfProfileIn( __METHOD__ ); |
102 | | - $page = WikiPage::factory( $title ); |
103 | | - |
104 | | - $introOnly = $this->params['intro']; |
105 | | - $text = $this->getFromCache( $page, $introOnly ); |
106 | | - // if we need just first section, try retrieving full page and getting first section out of it |
107 | | - if ( $text === false && $introOnly ) { |
108 | | - $text = $this->getFromCache( $page, false ); |
109 | | - if ( $text !== false ) { |
110 | | - $text = $this->getFirstSection( $text, $this->params['plaintext'] ); |
111 | | - } |
112 | | - } |
113 | | - if ( $text === false ) { |
114 | | - $text = $this->parse( $page ); |
115 | | - $text = $this->convertText( $text, $title, $this->params['plaintext'] ); |
116 | | - $this->setCache( $page, $text ); |
117 | | - } |
118 | | - wfProfileOut( __METHOD__ ); |
119 | | - return $text; |
120 | | - } |
121 | | - |
122 | | - private function cacheKey( WikiPage $page, $introOnly ) { |
123 | | - return wfMemcKey( 'mf', 'extract', $page->getLatest(), $this->params['plaintext'], $introOnly ); |
124 | | - } |
125 | | - |
126 | | - private function getFromCache( WikiPage $page, $introOnly ) { |
127 | | - global $wgMemc; |
128 | | - |
129 | | - $key = $this->cacheKey( $page, $introOnly ); |
130 | | - return $wgMemc->get( $key ); |
131 | | - } |
132 | | - |
133 | | - private function setCache( WikiPage $page, $text ) { |
134 | | - global $wgMemc; |
135 | | - |
136 | | - $key = $this->cacheKey( $page, $this->params['intro'] ); |
137 | | - $wgMemc->set( $key, $text ); |
138 | | - } |
139 | | - |
140 | | - private function getFirstSection( $text, $plainText ) { |
141 | | - if ( $plainText ) { |
142 | | - $regexp = '/^(.*?)(?=' . self::SECTION_MARKER_START . ')/s'; |
143 | | - } else { |
144 | | - $regexp = '/^(.*?)(?=<h[1-6]\b)/s'; |
145 | | - } |
146 | | - if ( preg_match( $regexp, $text, $matches ) ) { |
147 | | - wfDebugDieBacktrace(); |
148 | | - $text = $matches[0]; |
149 | | - } |
150 | | - return $text; |
151 | | - } |
152 | | - |
153 | | - /** |
154 | | - * Returns page HTML |
155 | | - * @param WikiPage $page |
156 | | - * @return string |
157 | | - */ |
158 | | - private function parse( WikiPage $page ) { |
159 | | - wfProfileIn( __METHOD__ ); |
160 | | - if ( !$this->parserOptions ) { |
161 | | - $this->parserOptions = new ParserOptions( new User( '127.0.0.1' ) ); |
162 | | - } |
163 | | - // first try finding full page in parser cache |
164 | | - if ( $page->isParserCacheUsed( $this->parserOptions, 0 ) ) { |
165 | | - $pout = ParserCache::singleton()->get( $page, $this->parserOptions ); |
166 | | - if ( $pout ) { |
167 | | - $text = $pout->getText(); |
168 | | - if ( $this->params['intro'] ) { |
169 | | - $text = $this->getFirstSection( $text, false ); |
170 | | - } |
171 | | - wfProfileOut( __METHOD__ ); |
172 | | - return $text; |
173 | | - } |
174 | | - } |
175 | | - $request = array( |
176 | | - 'action' => 'parse', |
177 | | - 'page' => $page->getTitle()->getPrefixedText(), |
178 | | - 'prop' => 'text' |
179 | | - ); |
180 | | - if ( $this->params['intro'] ) { |
181 | | - $request['section'] = 0; |
182 | | - } |
183 | | - // in case of cache miss, render just the needed section |
184 | | - $api = new ApiMain( new FauxRequest( $request ) ); |
185 | | - $api->execute(); |
186 | | - $data = $api->getResultData(); |
187 | | - wfProfileOut( __METHOD__ ); |
188 | | - return $data['parse']['text']['*']; |
189 | | - } |
190 | | - |
191 | | - /** |
192 | | - * Converts page HTML into an excerpt |
193 | | - * @param string $text |
194 | | - * @param Title $title |
195 | | - * @param bool $plainText |
196 | | - * @return string |
197 | | - */ |
198 | | - private function convertText( $text ) { |
199 | | - wfProfileIn( __METHOD__ ); |
200 | | - $fmt = new ExtractFormatter( $text, $this->params['plaintext'], $this->params['sectionformat'] ); |
201 | | - $text = $fmt->getText(); |
202 | | - |
203 | | - wfProfileOut( __METHOD__ ); |
204 | | - return trim( $text ); |
205 | | - } |
206 | | - |
207 | | - /** |
208 | | - * |
209 | | - * @param string $text |
210 | | - * @param int $requestedLength |
211 | | - * @param bool $plainText |
212 | | - * @return string |
213 | | - */ |
214 | | - private function trimText( $text, $requestedLength, $plainText ) { |
215 | | - global $wgUseTidy; |
216 | | - |
217 | | - wfProfileIn( __METHOD__ ); |
218 | | - $length = mb_strlen( $text ); |
219 | | - if ( $length <= $requestedLength ) { |
220 | | - wfProfileOut( __METHOD__ ); |
221 | | - return $text; |
222 | | - } |
223 | | - $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su"; |
224 | | - preg_match( $pattern, $text, $m ); |
225 | | - $text = $m[0]; |
226 | | - // Fix possibly unclosed tags |
227 | | - if ( $wgUseTidy && !$plainText ) { |
228 | | - $text = trim ( MWTidy::tidy( $text ) ); |
229 | | - } |
230 | | - $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); |
231 | | - wfProfileOut( __METHOD__ ); |
232 | | - return $text; |
233 | | - } |
234 | | - |
235 | | - public function getAllowedParams() { |
236 | | - return array( |
237 | | - 'length' => array( |
238 | | - ApiBase::PARAM_TYPE => 'integer', |
239 | | - ApiBase::PARAM_MIN => 1, |
240 | | - ), |
241 | | - 'limit' => array( |
242 | | - ApiBase::PARAM_DFLT => 1, |
243 | | - ApiBase::PARAM_TYPE => 'limit', |
244 | | - ApiBase::PARAM_MIN => 1, |
245 | | - ApiBase::PARAM_MAX => 20, |
246 | | - ApiBase::PARAM_MAX2 => 20, |
247 | | - ), |
248 | | - 'intro' => false, |
249 | | - 'plaintext' => false, |
250 | | - 'sectionformat' => array( |
251 | | - ApiBase::PARAM_TYPE => ExtractFormatter::$sectionFormats, |
252 | | - ApiBase::PARAM_DFLT => 'wiki', |
253 | | - ), |
254 | | - 'continue' => array( |
255 | | - ApiBase::PARAM_TYPE => 'integer', |
256 | | - ), |
257 | | - ); |
258 | | - } |
259 | | - |
260 | | - public function getParamDescription() { |
261 | | - return array( |
262 | | - 'length' => 'How many characters to return, actual text returned might be slightly longer.', |
263 | | - 'limit' => 'How many extracts to return. ', |
264 | | - 'intro' => 'Return only content before the first section', |
265 | | - 'plaintext' => 'Return extracts as plaintext instead of limited HTML', |
266 | | - 'sectionformat' => array( |
267 | | - 'How to format sections in plaintext mode:', |
268 | | - ' none - No formatting', |
269 | | - ' wiki - Wikitext-style formatting == like this ==', |
270 | | - " raw - Return in this module's internal representation (secton titles prefixed with <ASCII 1><ASCII 2><section level><ASCII 2><ASCII 1>", |
271 | | - ), |
272 | | - 'continue' => 'When more results are available, use this to continue', |
273 | | - ); |
274 | | - } |
275 | | - |
276 | | - public function getDescription() { |
277 | | - return 'Returns plain-text or limited HTML extracts of the given page(s)'; |
278 | | - } |
279 | | - |
280 | | - public function getPossibleErrors() { |
281 | | - return array_merge( parent::getPossibleErrors(), array( |
282 | | - array( 'code' => '_badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ), |
283 | | - ) ); |
284 | | - } |
285 | | - |
286 | | - public function getExamples() { |
287 | | - return array( |
288 | | - 'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a 175-character extract', |
289 | | - ); |
290 | | - } |
291 | | - |
292 | | - |
293 | | - public function getHelpUrls() { |
294 | | - return 'https://www.mediawiki.org/wiki/Extension:MobileFrontend#New_API'; |
295 | | - } |
296 | | - |
297 | | - public function getVersion() { |
298 | | - return __CLASS__ . ': $Id$'; |
299 | | - } |
300 | | -} |
301 | | - |
302 | | -class ExtractFormatter extends HtmlFormatter { |
303 | | - private $plainText; |
304 | | - private $sectionFormat; |
305 | | - |
306 | | - public static $sectionFormats = array( |
307 | | - 'none', |
308 | | - 'wiki', |
309 | | - 'raw', |
310 | | - ); |
311 | | - |
312 | | - public function __construct( $text, $plainText, $sectionFormat ) { |
313 | | - parent::__construct( HtmlFormatter::wrapHTML( $text ) ); |
314 | | - $this->plainText = $plainText; |
315 | | - $this->sectionFormat = $sectionFormat; |
316 | | - |
317 | | - $this->removeImages(); |
318 | | - $this->remove( array( 'table', 'div', '.editsection', 'sup.reference', 'span.coordinates', |
319 | | - 'span.geo-multi-punct', 'span.geo-nondefault', '.noexcerpt', '.error' ) |
320 | | - ); |
321 | | - if ( $plainText ) { |
322 | | - $this->flattenAllTags(); |
323 | | - } else { |
324 | | - $this->flatten( array( 'span', 'a' ) ); |
325 | | - } |
326 | | - } |
327 | | - |
328 | | - public function getText( $dummy = null ) { |
329 | | - $this->filterContent(); |
330 | | - $text = parent::getText(); |
331 | | - if ( $this->plainText ) { |
332 | | - $text = html_entity_decode( $text ); |
333 | | - $text = str_replace( "\r", "\n", $text ); |
334 | | - $text = preg_replace( "/\n{3,}/", "\n\n", $text ); |
335 | | - $text = preg_replace_callback( |
336 | | - "/" . ApiQueryExtracts::SECTION_MARKER_START . '(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m", |
337 | | - array( $this, 'sectionCallback' ), |
338 | | - $text |
339 | | - ); |
340 | | - } |
341 | | - return $text; |
342 | | - } |
343 | | - |
344 | | - public function onHtmlReady( $html ) { |
345 | | - if ( $this->plainText ) { |
346 | | - $html = preg_replace( '/\s*(<h([1-6])\b)/i', |
347 | | - ApiQueryExtracts::SECTION_MARKER_START . '$2' . ApiQueryExtracts::SECTION_MARKER_END . '$1' , |
348 | | - $html |
349 | | - ); |
350 | | - } |
351 | | - return $html; |
352 | | - } |
353 | | - |
354 | | - private function sectionCallback( $matches ) { |
355 | | - if ( $this->sectionFormat == 'raw' ) { |
356 | | - return $matches[0]; |
357 | | - } |
358 | | - $func = "ExtractFormatter::doSection_{$this->sectionFormat}"; |
359 | | - return call_user_func( $func, $matches[1], trim( $matches[2] ) ); |
360 | | - } |
361 | | - |
362 | | - private static function doSection_wiki( $level, $text ) { |
363 | | - $bars = str_repeat( '=', $level ); |
364 | | - return "\n$bars $text $bars"; |
365 | | - } |
366 | | - |
367 | | - private static function doSection_none( $level, $text ) { |
368 | | - return "\n$text"; |
369 | | - } |
370 | | -} |
\ No newline at end of file |
Index: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php |
— | — | @@ -0,0 +1,369 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +class ApiQueryExtracts extends ApiQueryBase { |
| 5 | + const SECTION_MARKER_START = "\1\2"; |
| 6 | + const SECTION_MARKER_END = "\2\1"; |
| 7 | + |
| 8 | + /** |
| 9 | + * @var ParserOptions |
| 10 | + */ |
| 11 | + private $parserOptions; |
| 12 | + private $params; |
| 13 | + |
| 14 | + public function __construct( $query, $moduleName ) { |
| 15 | + parent::__construct( $query, $moduleName, 'ex' ); |
| 16 | + } |
| 17 | + |
| 18 | + public function execute() { |
| 19 | + wfProfileIn( __METHOD__ ); |
| 20 | + $titles = $this->getPageSet()->getGoodTitles(); |
| 21 | + if ( count( $titles ) == 0 ) { |
| 22 | + wfProfileOut( __METHOD__ ); |
| 23 | + return; |
| 24 | + } |
| 25 | + $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML'; |
| 26 | + $result = $this->getResult(); |
| 27 | + $params = $this->params = $this->extractRequestParams(); |
| 28 | + $continue = 0; |
| 29 | + $limit = intval( $params['limit'] ); |
| 30 | + if ( $limit > 1 && !$params['intro'] ) { |
| 31 | + $limit = 1; |
| 32 | + ///@todo: |
| 33 | + //$result->setWarning( "Provided limit was too large for requests for whole article extracts, lowered to $limit" ); |
| 34 | + } |
| 35 | + if ( isset( $params['continue'] ) ) { |
| 36 | + $continue = intval( $params['continue'] ); |
| 37 | + if ( $continue < 0 || $continue > count( $titles ) ) { |
| 38 | + $this->dieUsageMsg( '_badcontinue' ); |
| 39 | + } |
| 40 | + $titles = array_slice( $titles, $continue, null, true ); |
| 41 | + } |
| 42 | + $count = 0; |
| 43 | + foreach ( $titles as $id => $t ) { |
| 44 | + if ( ++$count > $limit ) { |
| 45 | + $this->setContinueEnumParameter( 'continue', $continue + $count - 1 ); |
| 46 | + break; |
| 47 | + } |
| 48 | + $text = $this->getExtract( $t ); |
| 49 | + if ( isset( $params['length'] ) ) { |
| 50 | + $text = $this->trimText( $text ); |
| 51 | + } |
| 52 | + if ( $isXml ) { |
| 53 | + $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', array( '*' => $text ) ); |
| 54 | + } else { |
| 55 | + $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', $text ); |
| 56 | + } |
| 57 | + if ( !$fit ) { |
| 58 | + $this->setContinueEnumParameter( 'continue', $continue + $count - 1 ); |
| 59 | + break; |
| 60 | + } |
| 61 | + } |
| 62 | + wfProfileOut( __METHOD__ ); |
| 63 | + } |
| 64 | + |
| 65 | + /** |
| 66 | + * OpenSearchXml hook handler |
| 67 | + * @param array $results |
| 68 | + */ |
| 69 | + public static function onOpenSearchXml( &$results ) { |
| 70 | + global $wgMFExtendOpenSearchXml; |
| 71 | + if ( !$wgMFExtendOpenSearchXml || !count( $results ) ) { |
| 72 | + return true; |
| 73 | + } |
| 74 | + $pageIds = array_keys( $results ); |
| 75 | + $api = new ApiMain( new FauxRequest( |
| 76 | + array( |
| 77 | + 'action' => 'query', |
| 78 | + 'prop' => 'excerpts', |
| 79 | + 'explaintext' => true, |
| 80 | + 'exlimit' => count( $results ), |
| 81 | + 'pageids' => implode( '|', $pageIds ), |
| 82 | + ) ) |
| 83 | + ); |
| 84 | + $api->execute(); |
| 85 | + $data = $api->getResultData(); |
| 86 | + foreach ( $pageIds as $id ) { |
| 87 | + if ( isset( $data['query']['pages'][$id]['excerpts'][0] ) ) { |
| 88 | + $results[$id]['extract'] = $data['query']['pages'][$id]['extract'][0]; |
| 89 | + $results[$id]['extract trimmed'] = false; |
| 90 | + } |
| 91 | + } |
| 92 | + return true; |
| 93 | + } |
| 94 | + |
| 95 | + /** |
| 96 | + * Returns a processed, but not trimmed excerpt |
| 97 | + * @param Title $title |
| 98 | + * @return string |
| 99 | + */ |
| 100 | + private function getExtract( Title $title ) { |
| 101 | + wfProfileIn( __METHOD__ ); |
| 102 | + $page = WikiPage::factory( $title ); |
| 103 | + |
| 104 | + $introOnly = $this->params['intro']; |
| 105 | + $text = $this->getFromCache( $page, $introOnly ); |
| 106 | + // if we need just first section, try retrieving full page and getting first section out of it |
| 107 | + if ( $text === false && $introOnly ) { |
| 108 | + $text = $this->getFromCache( $page, false ); |
| 109 | + if ( $text !== false ) { |
| 110 | + $text = $this->getFirstSection( $text, $this->params['plaintext'] ); |
| 111 | + } |
| 112 | + } |
| 113 | + if ( $text === false ) { |
| 114 | + $text = $this->parse( $page ); |
| 115 | + $text = $this->convertText( $text, $title, $this->params['plaintext'] ); |
| 116 | + $this->setCache( $page, $text ); |
| 117 | + } |
| 118 | + wfProfileOut( __METHOD__ ); |
| 119 | + return $text; |
| 120 | + } |
| 121 | + |
| 122 | + private function cacheKey( WikiPage $page, $introOnly ) { |
| 123 | + return wfMemcKey( 'mf', 'extract', $page->getLatest(), $this->params['plaintext'], $introOnly ); |
| 124 | + } |
| 125 | + |
| 126 | + private function getFromCache( WikiPage $page, $introOnly ) { |
| 127 | + global $wgMemc; |
| 128 | + |
| 129 | + $key = $this->cacheKey( $page, $introOnly ); |
| 130 | + return $wgMemc->get( $key ); |
| 131 | + } |
| 132 | + |
| 133 | + private function setCache( WikiPage $page, $text ) { |
| 134 | + global $wgMemc; |
| 135 | + |
| 136 | + $key = $this->cacheKey( $page, $this->params['intro'] ); |
| 137 | + $wgMemc->set( $key, $text ); |
| 138 | + } |
| 139 | + |
| 140 | + private function getFirstSection( $text, $plainText ) { |
| 141 | + if ( $plainText ) { |
| 142 | + $regexp = '/^(.*?)(?=' . self::SECTION_MARKER_START . ')/s'; |
| 143 | + } else { |
| 144 | + $regexp = '/^(.*?)(?=<h[1-6]\b)/s'; |
| 145 | + } |
| 146 | + if ( preg_match( $regexp, $text, $matches ) ) { |
| 147 | + wfDebugDieBacktrace(); |
| 148 | + $text = $matches[0]; |
| 149 | + } |
| 150 | + return $text; |
| 151 | + } |
| 152 | + |
| 153 | + /** |
| 154 | + * Returns page HTML |
| 155 | + * @param WikiPage $page |
| 156 | + * @return string |
| 157 | + */ |
| 158 | + private function parse( WikiPage $page ) { |
| 159 | + wfProfileIn( __METHOD__ ); |
| 160 | + if ( !$this->parserOptions ) { |
| 161 | + $this->parserOptions = new ParserOptions( new User( '127.0.0.1' ) ); |
| 162 | + } |
| 163 | + // first try finding full page in parser cache |
| 164 | + if ( $page->isParserCacheUsed( $this->parserOptions, 0 ) ) { |
| 165 | + $pout = ParserCache::singleton()->get( $page, $this->parserOptions ); |
| 166 | + if ( $pout ) { |
| 167 | + $text = $pout->getText(); |
| 168 | + if ( $this->params['intro'] ) { |
| 169 | + $text = $this->getFirstSection( $text, false ); |
| 170 | + } |
| 171 | + wfProfileOut( __METHOD__ ); |
| 172 | + return $text; |
| 173 | + } |
| 174 | + } |
| 175 | + $request = array( |
| 176 | + 'action' => 'parse', |
| 177 | + 'page' => $page->getTitle()->getPrefixedText(), |
| 178 | + 'prop' => 'text' |
| 179 | + ); |
| 180 | + if ( $this->params['intro'] ) { |
| 181 | + $request['section'] = 0; |
| 182 | + } |
| 183 | + // in case of cache miss, render just the needed section |
| 184 | + $api = new ApiMain( new FauxRequest( $request ) ); |
| 185 | + $api->execute(); |
| 186 | + $data = $api->getResultData(); |
| 187 | + wfProfileOut( __METHOD__ ); |
| 188 | + return $data['parse']['text']['*']; |
| 189 | + } |
| 190 | + |
| 191 | + /** |
| 192 | + * Converts page HTML into an excerpt |
| 193 | + * @param string $text |
| 194 | + * @param Title $title |
| 195 | + * @param bool $plainText |
| 196 | + * @return string |
| 197 | + */ |
| 198 | + private function convertText( $text ) { |
| 199 | + wfProfileIn( __METHOD__ ); |
| 200 | + $fmt = new ExtractFormatter( $text, $this->params['plaintext'], $this->params['sectionformat'] ); |
| 201 | + $text = $fmt->getText(); |
| 202 | + |
| 203 | + wfProfileOut( __METHOD__ ); |
| 204 | + return trim( $text ); |
| 205 | + } |
| 206 | + |
| 207 | + /** |
| 208 | + * |
| 209 | + * @param string $text |
| 210 | + * @param int $requestedLength |
| 211 | + * @param bool $plainText |
| 212 | + * @return string |
| 213 | + */ |
| 214 | + private function trimText( $text, $requestedLength, $plainText ) { |
| 215 | + global $wgUseTidy; |
| 216 | + |
| 217 | + wfProfileIn( __METHOD__ ); |
| 218 | + $length = mb_strlen( $text ); |
| 219 | + if ( $length <= $requestedLength ) { |
| 220 | + wfProfileOut( __METHOD__ ); |
| 221 | + return $text; |
| 222 | + } |
| 223 | + $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su"; |
| 224 | + preg_match( $pattern, $text, $m ); |
| 225 | + $text = $m[0]; |
| 226 | + // Fix possibly unclosed tags |
| 227 | + if ( $wgUseTidy && !$plainText ) { |
| 228 | + $text = trim ( MWTidy::tidy( $text ) ); |
| 229 | + } |
| 230 | + $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); |
| 231 | + wfProfileOut( __METHOD__ ); |
| 232 | + return $text; |
| 233 | + } |
| 234 | + |
| 235 | + public function getAllowedParams() { |
| 236 | + return array( |
| 237 | + 'length' => array( |
| 238 | + ApiBase::PARAM_TYPE => 'integer', |
| 239 | + ApiBase::PARAM_MIN => 1, |
| 240 | + ), |
| 241 | + 'limit' => array( |
| 242 | + ApiBase::PARAM_DFLT => 1, |
| 243 | + ApiBase::PARAM_TYPE => 'limit', |
| 244 | + ApiBase::PARAM_MIN => 1, |
| 245 | + ApiBase::PARAM_MAX => 20, |
| 246 | + ApiBase::PARAM_MAX2 => 20, |
| 247 | + ), |
| 248 | + 'intro' => false, |
| 249 | + 'plaintext' => false, |
| 250 | + 'sectionformat' => array( |
| 251 | + ApiBase::PARAM_TYPE => ExtractFormatter::$sectionFormats, |
| 252 | + ApiBase::PARAM_DFLT => 'wiki', |
| 253 | + ), |
| 254 | + 'continue' => array( |
| 255 | + ApiBase::PARAM_TYPE => 'integer', |
| 256 | + ), |
| 257 | + ); |
| 258 | + } |
| 259 | + |
| 260 | + public function getParamDescription() { |
| 261 | + return array( |
| 262 | + 'length' => 'How many characters to return, actual text returned might be slightly longer.', |
| 263 | + 'limit' => 'How many extracts to return. ', |
| 264 | + 'intro' => 'Return only content before the first section', |
| 265 | + 'plaintext' => 'Return extracts as plaintext instead of limited HTML', |
| 266 | + 'sectionformat' => array( |
| 267 | + 'How to format sections in plaintext mode:', |
| 268 | + ' none - No formatting', |
| 269 | + ' wiki - Wikitext-style formatting == like this ==', |
| 270 | + " raw - Return in this module's internal representation (secton titles prefixed with <ASCII 1><ASCII 2><section level><ASCII 2><ASCII 1>", |
| 271 | + ), |
| 272 | + 'continue' => 'When more results are available, use this to continue', |
| 273 | + ); |
| 274 | + } |
| 275 | + |
| 276 | + public function getDescription() { |
| 277 | + return 'Returns plain-text or limited HTML extracts of the given page(s)'; |
| 278 | + } |
| 279 | + |
| 280 | + public function getPossibleErrors() { |
| 281 | + return array_merge( parent::getPossibleErrors(), array( |
| 282 | + array( 'code' => '_badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ), |
| 283 | + ) ); |
| 284 | + } |
| 285 | + |
| 286 | + public function getExamples() { |
| 287 | + return array( |
| 288 | + 'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a 175-character extract', |
| 289 | + ); |
| 290 | + } |
| 291 | + |
| 292 | + |
| 293 | + public function getHelpUrls() { |
| 294 | + return 'https://www.mediawiki.org/wiki/Extension:MobileFrontend#New_API'; |
| 295 | + } |
| 296 | + |
| 297 | + public function getVersion() { |
| 298 | + return __CLASS__ . ': $Id$'; |
| 299 | + } |
| 300 | +} |
| 301 | + |
| 302 | +class ExtractFormatter extends HtmlFormatter { |
| 303 | + private $plainText; |
| 304 | + private $sectionFormat; |
| 305 | + |
| 306 | + public static $sectionFormats = array( |
| 307 | + 'none', |
| 308 | + 'wiki', |
| 309 | + 'raw', |
| 310 | + ); |
| 311 | + |
| 312 | + public function __construct( $text, $plainText, $sectionFormat ) { |
| 313 | + parent::__construct( HtmlFormatter::wrapHTML( $text ) ); |
| 314 | + $this->plainText = $plainText; |
| 315 | + $this->sectionFormat = $sectionFormat; |
| 316 | + |
| 317 | + $this->removeImages(); |
| 318 | + $this->remove( array( 'table', 'div', '.editsection', 'sup.reference', 'span.coordinates', |
| 319 | + 'span.geo-multi-punct', 'span.geo-nondefault', '.noexcerpt', '.error' ) |
| 320 | + ); |
| 321 | + if ( $plainText ) { |
| 322 | + $this->flattenAllTags(); |
| 323 | + } else { |
| 324 | + $this->flatten( array( 'span', 'a' ) ); |
| 325 | + } |
| 326 | + } |
| 327 | + |
| 328 | + public function getText( $dummy = null ) { |
| 329 | + $this->filterContent(); |
| 330 | + $text = parent::getText(); |
| 331 | + if ( $this->plainText ) { |
| 332 | + $text = html_entity_decode( $text ); |
| 333 | + $text = str_replace( "\r", "\n", $text ); |
| 334 | + $text = preg_replace( "/\n{3,}/", "\n\n", $text ); |
| 335 | + $text = preg_replace_callback( |
| 336 | + "/" . ApiQueryExtracts::SECTION_MARKER_START . '(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m", |
| 337 | + array( $this, 'sectionCallback' ), |
| 338 | + $text |
| 339 | + ); |
| 340 | + } |
| 341 | + return $text; |
| 342 | + } |
| 343 | + |
| 344 | + public function onHtmlReady( $html ) { |
| 345 | + if ( $this->plainText ) { |
| 346 | + $html = preg_replace( '/\s*(<h([1-6])\b)/i', |
| 347 | + ApiQueryExtracts::SECTION_MARKER_START . '$2' . ApiQueryExtracts::SECTION_MARKER_END . '$1' , |
| 348 | + $html |
| 349 | + ); |
| 350 | + } |
| 351 | + return $html; |
| 352 | + } |
| 353 | + |
| 354 | + private function sectionCallback( $matches ) { |
| 355 | + if ( $this->sectionFormat == 'raw' ) { |
| 356 | + return $matches[0]; |
| 357 | + } |
| 358 | + $func = "ExtractFormatter::doSection_{$this->sectionFormat}"; |
| 359 | + return call_user_func( $func, $matches[1], trim( $matches[2] ) ); |
| 360 | + } |
| 361 | + |
| 362 | + private static function doSection_wiki( $level, $text ) { |
| 363 | + $bars = str_repeat( '=', $level ); |
| 364 | + return "\n$bars $text $bars"; |
| 365 | + } |
| 366 | + |
| 367 | + private static function doSection_none( $level, $text ) { |
| 368 | + return "\n$text"; |
| 369 | + } |
| 370 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 371 | + native |
Added: svn:keywords |
2 | 372 | + Id |
Index: trunk/extensions/MobileFrontend/MobileFrontend.php |
— | — | @@ -52,7 +52,7 @@ |
53 | 53 | |
54 | 54 | 'ApiMobileView' => 'api/ApiMobileView', |
55 | 55 | 'ApiParseExtender' => 'api/ApiParseExtender', |
56 | | - 'ApiQueryExtracts' => 'api/ApiQueryExcerpts', |
| 56 | + 'ApiQueryExtracts' => 'api/ApiQueryExtracts', |
57 | 57 | |
58 | 58 | 'MobileFrontendTemplate' => 'templates/MobileFrontendTemplate', |
59 | 59 | 'ApplicationTemplate' => 'templates/ApplicationTemplate', |