r114130 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114129‎ | r114130 | r114131 >
Date:12:24, 19 March 2012
Author:maxsem
Status:ok
Tags:
Comment:
Follow-up r114129: rename file
Modified paths:
  • /trunk/extensions/MobileFrontend/MobileFrontend.php (modified) (history)
  • /trunk/extensions/MobileFrontend/api/ApiQueryExcerpts.php (deleted) (history)
  • /trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php (added) (history)

Diff [purge]

Index: trunk/extensions/MobileFrontend/api/ApiQueryExcerpts.php
@@ -1,369 +0,0 @@
2 -<?php
3 -
4 -class ApiQueryExtracts extends ApiQueryBase {
5 - const SECTION_MARKER_START = "\1\2";
6 - const SECTION_MARKER_END = "\2\1";
7 -
8 - /**
9 - * @var ParserOptions
10 - */
11 - private $parserOptions;
12 - private $params;
13 -
14 - public function __construct( $query, $moduleName ) {
15 - parent::__construct( $query, $moduleName, 'ex' );
16 - }
17 -
18 - public function execute() {
19 - wfProfileIn( __METHOD__ );
20 - $titles = $this->getPageSet()->getGoodTitles();
21 - if ( count( $titles ) == 0 ) {
22 - wfProfileOut( __METHOD__ );
23 - return;
24 - }
25 - $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML';
26 - $result = $this->getResult();
27 - $params = $this->params = $this->extractRequestParams();
28 - $continue = 0;
29 - $limit = intval( $params['limit'] );
30 - if ( $limit > 1 && !$params['intro'] ) {
31 - $limit = 1;
32 - ///@todo:
33 - //$result->setWarning( "Provided limit was too large for requests for whole article extracts, lowered to $limit" );
34 - }
35 - if ( isset( $params['continue'] ) ) {
36 - $continue = intval( $params['continue'] );
37 - if ( $continue < 0 || $continue > count( $titles ) ) {
38 - $this->dieUsageMsg( '_badcontinue' );
39 - }
40 - $titles = array_slice( $titles, $continue, null, true );
41 - }
42 - $count = 0;
43 - foreach ( $titles as $id => $t ) {
44 - if ( ++$count > $limit ) {
45 - $this->setContinueEnumParameter( 'continue', $continue + $count - 1 );
46 - break;
47 - }
48 - $text = $this->getExtract( $t );
49 - if ( isset( $params['length'] ) ) {
50 - $text = $this->trimText( $text );
51 - }
52 - if ( $isXml ) {
53 - $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', array( '*' => $text ) );
54 - } else {
55 - $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', $text );
56 - }
57 - if ( !$fit ) {
58 - $this->setContinueEnumParameter( 'continue', $continue + $count - 1 );
59 - break;
60 - }
61 - }
62 - wfProfileOut( __METHOD__ );
63 - }
64 -
65 - /**
66 - * OpenSearchXml hook handler
67 - * @param array $results
68 - */
69 - public static function onOpenSearchXml( &$results ) {
70 - global $wgMFExtendOpenSearchXml;
71 - if ( !$wgMFExtendOpenSearchXml || !count( $results ) ) {
72 - return true;
73 - }
74 - $pageIds = array_keys( $results );
75 - $api = new ApiMain( new FauxRequest(
76 - array(
77 - 'action' => 'query',
78 - 'prop' => 'excerpts',
79 - 'explaintext' => true,
80 - 'exlimit' => count( $results ),
81 - 'pageids' => implode( '|', $pageIds ),
82 - ) )
83 - );
84 - $api->execute();
85 - $data = $api->getResultData();
86 - foreach ( $pageIds as $id ) {
87 - if ( isset( $data['query']['pages'][$id]['excerpts'][0] ) ) {
88 - $results[$id]['extract'] = $data['query']['pages'][$id]['extract'][0];
89 - $results[$id]['extract trimmed'] = false;
90 - }
91 - }
92 - return true;
93 - }
94 -
95 - /**
96 - * Returns a processed, but not trimmed excerpt
97 - * @param Title $title
98 - * @return string
99 - */
100 - private function getExtract( Title $title ) {
101 - wfProfileIn( __METHOD__ );
102 - $page = WikiPage::factory( $title );
103 -
104 - $introOnly = $this->params['intro'];
105 - $text = $this->getFromCache( $page, $introOnly );
106 - // if we need just first section, try retrieving full page and getting first section out of it
107 - if ( $text === false && $introOnly ) {
108 - $text = $this->getFromCache( $page, false );
109 - if ( $text !== false ) {
110 - $text = $this->getFirstSection( $text, $this->params['plaintext'] );
111 - }
112 - }
113 - if ( $text === false ) {
114 - $text = $this->parse( $page );
115 - $text = $this->convertText( $text, $title, $this->params['plaintext'] );
116 - $this->setCache( $page, $text );
117 - }
118 - wfProfileOut( __METHOD__ );
119 - return $text;
120 - }
121 -
122 - private function cacheKey( WikiPage $page, $introOnly ) {
123 - return wfMemcKey( 'mf', 'extract', $page->getLatest(), $this->params['plaintext'], $introOnly );
124 - }
125 -
126 - private function getFromCache( WikiPage $page, $introOnly ) {
127 - global $wgMemc;
128 -
129 - $key = $this->cacheKey( $page, $introOnly );
130 - return $wgMemc->get( $key );
131 - }
132 -
133 - private function setCache( WikiPage $page, $text ) {
134 - global $wgMemc;
135 -
136 - $key = $this->cacheKey( $page, $this->params['intro'] );
137 - $wgMemc->set( $key, $text );
138 - }
139 -
140 - private function getFirstSection( $text, $plainText ) {
141 - if ( $plainText ) {
142 - $regexp = '/^(.*?)(?=' . self::SECTION_MARKER_START . ')/s';
143 - } else {
144 - $regexp = '/^(.*?)(?=<h[1-6]\b)/s';
145 - }
146 - if ( preg_match( $regexp, $text, $matches ) ) {
147 - wfDebugDieBacktrace();
148 - $text = $matches[0];
149 - }
150 - return $text;
151 - }
152 -
153 - /**
154 - * Returns page HTML
155 - * @param WikiPage $page
156 - * @return string
157 - */
158 - private function parse( WikiPage $page ) {
159 - wfProfileIn( __METHOD__ );
160 - if ( !$this->parserOptions ) {
161 - $this->parserOptions = new ParserOptions( new User( '127.0.0.1' ) );
162 - }
163 - // first try finding full page in parser cache
164 - if ( $page->isParserCacheUsed( $this->parserOptions, 0 ) ) {
165 - $pout = ParserCache::singleton()->get( $page, $this->parserOptions );
166 - if ( $pout ) {
167 - $text = $pout->getText();
168 - if ( $this->params['intro'] ) {
169 - $text = $this->getFirstSection( $text, false );
170 - }
171 - wfProfileOut( __METHOD__ );
172 - return $text;
173 - }
174 - }
175 - $request = array(
176 - 'action' => 'parse',
177 - 'page' => $page->getTitle()->getPrefixedText(),
178 - 'prop' => 'text'
179 - );
180 - if ( $this->params['intro'] ) {
181 - $request['section'] = 0;
182 - }
183 - // in case of cache miss, render just the needed section
184 - $api = new ApiMain( new FauxRequest( $request ) );
185 - $api->execute();
186 - $data = $api->getResultData();
187 - wfProfileOut( __METHOD__ );
188 - return $data['parse']['text']['*'];
189 - }
190 -
191 - /**
192 - * Converts page HTML into an excerpt
193 - * @param string $text
194 - * @param Title $title
195 - * @param bool $plainText
196 - * @return string
197 - */
198 - private function convertText( $text ) {
199 - wfProfileIn( __METHOD__ );
200 - $fmt = new ExtractFormatter( $text, $this->params['plaintext'], $this->params['sectionformat'] );
201 - $text = $fmt->getText();
202 -
203 - wfProfileOut( __METHOD__ );
204 - return trim( $text );
205 - }
206 -
207 - /**
208 - *
209 - * @param string $text
210 - * @param int $requestedLength
211 - * @param bool $plainText
212 - * @return string
213 - */
214 - private function trimText( $text, $requestedLength, $plainText ) {
215 - global $wgUseTidy;
216 -
217 - wfProfileIn( __METHOD__ );
218 - $length = mb_strlen( $text );
219 - if ( $length <= $requestedLength ) {
220 - wfProfileOut( __METHOD__ );
221 - return $text;
222 - }
223 - $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su";
224 - preg_match( $pattern, $text, $m );
225 - $text = $m[0];
226 - // Fix possibly unclosed tags
227 - if ( $wgUseTidy && !$plainText ) {
228 - $text = trim ( MWTidy::tidy( $text ) );
229 - }
230 - $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text();
231 - wfProfileOut( __METHOD__ );
232 - return $text;
233 - }
234 -
235 - public function getAllowedParams() {
236 - return array(
237 - 'length' => array(
238 - ApiBase::PARAM_TYPE => 'integer',
239 - ApiBase::PARAM_MIN => 1,
240 - ),
241 - 'limit' => array(
242 - ApiBase::PARAM_DFLT => 1,
243 - ApiBase::PARAM_TYPE => 'limit',
244 - ApiBase::PARAM_MIN => 1,
245 - ApiBase::PARAM_MAX => 20,
246 - ApiBase::PARAM_MAX2 => 20,
247 - ),
248 - 'intro' => false,
249 - 'plaintext' => false,
250 - 'sectionformat' => array(
251 - ApiBase::PARAM_TYPE => ExtractFormatter::$sectionFormats,
252 - ApiBase::PARAM_DFLT => 'wiki',
253 - ),
254 - 'continue' => array(
255 - ApiBase::PARAM_TYPE => 'integer',
256 - ),
257 - );
258 - }
259 -
260 - public function getParamDescription() {
261 - return array(
262 - 'length' => 'How many characters to return, actual text returned might be slightly longer.',
263 - 'limit' => 'How many extracts to return. ',
264 - 'intro' => 'Return only content before the first section',
265 - 'plaintext' => 'Return extracts as plaintext instead of limited HTML',
266 - 'sectionformat' => array(
267 - 'How to format sections in plaintext mode:',
268 - ' none - No formatting',
269 - ' wiki - Wikitext-style formatting == like this ==',
270 - " raw - Return in this module's internal representation (secton titles prefixed with <ASCII 1><ASCII 2><section level><ASCII 2><ASCII 1>",
271 - ),
272 - 'continue' => 'When more results are available, use this to continue',
273 - );
274 - }
275 -
276 - public function getDescription() {
277 - return 'Returns plain-text or limited HTML extracts of the given page(s)';
278 - }
279 -
280 - public function getPossibleErrors() {
281 - return array_merge( parent::getPossibleErrors(), array(
282 - array( 'code' => '_badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ),
283 - ) );
284 - }
285 -
286 - public function getExamples() {
287 - return array(
288 - 'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a 175-character extract',
289 - );
290 - }
291 -
292 -
293 - public function getHelpUrls() {
294 - return 'https://www.mediawiki.org/wiki/Extension:MobileFrontend#New_API';
295 - }
296 -
297 - public function getVersion() {
298 - return __CLASS__ . ': $Id$';
299 - }
300 -}
301 -
302 -class ExtractFormatter extends HtmlFormatter {
303 - private $plainText;
304 - private $sectionFormat;
305 -
306 - public static $sectionFormats = array(
307 - 'none',
308 - 'wiki',
309 - 'raw',
310 - );
311 -
312 - public function __construct( $text, $plainText, $sectionFormat ) {
313 - parent::__construct( HtmlFormatter::wrapHTML( $text ) );
314 - $this->plainText = $plainText;
315 - $this->sectionFormat = $sectionFormat;
316 -
317 - $this->removeImages();
318 - $this->remove( array( 'table', 'div', '.editsection', 'sup.reference', 'span.coordinates',
319 - 'span.geo-multi-punct', 'span.geo-nondefault', '.noexcerpt', '.error' )
320 - );
321 - if ( $plainText ) {
322 - $this->flattenAllTags();
323 - } else {
324 - $this->flatten( array( 'span', 'a' ) );
325 - }
326 - }
327 -
328 - public function getText( $dummy = null ) {
329 - $this->filterContent();
330 - $text = parent::getText();
331 - if ( $this->plainText ) {
332 - $text = html_entity_decode( $text );
333 - $text = str_replace( "\r", "\n", $text );
334 - $text = preg_replace( "/\n{3,}/", "\n\n", $text );
335 - $text = preg_replace_callback(
336 - "/" . ApiQueryExtracts::SECTION_MARKER_START . '(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m",
337 - array( $this, 'sectionCallback' ),
338 - $text
339 - );
340 - }
341 - return $text;
342 - }
343 -
344 - public function onHtmlReady( $html ) {
345 - if ( $this->plainText ) {
346 - $html = preg_replace( '/\s*(<h([1-6])\b)/i',
347 - ApiQueryExtracts::SECTION_MARKER_START . '$2' . ApiQueryExtracts::SECTION_MARKER_END . '$1' ,
348 - $html
349 - );
350 - }
351 - return $html;
352 - }
353 -
354 - private function sectionCallback( $matches ) {
355 - if ( $this->sectionFormat == 'raw' ) {
356 - return $matches[0];
357 - }
358 - $func = "ExtractFormatter::doSection_{$this->sectionFormat}";
359 - return call_user_func( $func, $matches[1], trim( $matches[2] ) );
360 - }
361 -
362 - private static function doSection_wiki( $level, $text ) {
363 - $bars = str_repeat( '=', $level );
364 - return "\n$bars $text $bars";
365 - }
366 -
367 - private static function doSection_none( $level, $text ) {
368 - return "\n$text";
369 - }
370 -}
\ No newline at end of file
Index: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php
@@ -0,0 +1,369 @@
 2+<?php
 3+
 4+class ApiQueryExtracts extends ApiQueryBase {
 5+ const SECTION_MARKER_START = "\1\2";
 6+ const SECTION_MARKER_END = "\2\1";
 7+
 8+ /**
 9+ * @var ParserOptions
 10+ */
 11+ private $parserOptions;
 12+ private $params;
 13+
 14+ public function __construct( $query, $moduleName ) {
 15+ parent::__construct( $query, $moduleName, 'ex' );
 16+ }
 17+
 18+ public function execute() {
 19+ wfProfileIn( __METHOD__ );
 20+ $titles = $this->getPageSet()->getGoodTitles();
 21+ if ( count( $titles ) == 0 ) {
 22+ wfProfileOut( __METHOD__ );
 23+ return;
 24+ }
 25+ $isXml = $this->getMain()->getPrinter()->getFormat() == 'XML';
 26+ $result = $this->getResult();
 27+ $params = $this->params = $this->extractRequestParams();
 28+ $continue = 0;
 29+ $limit = intval( $params['limit'] );
 30+ if ( $limit > 1 && !$params['intro'] ) {
 31+ $limit = 1;
 32+ ///@todo:
 33+ //$result->setWarning( "Provided limit was too large for requests for whole article extracts, lowered to $limit" );
 34+ }
 35+ if ( isset( $params['continue'] ) ) {
 36+ $continue = intval( $params['continue'] );
 37+ if ( $continue < 0 || $continue > count( $titles ) ) {
 38+ $this->dieUsageMsg( '_badcontinue' );
 39+ }
 40+ $titles = array_slice( $titles, $continue, null, true );
 41+ }
 42+ $count = 0;
 43+ foreach ( $titles as $id => $t ) {
 44+ if ( ++$count > $limit ) {
 45+ $this->setContinueEnumParameter( 'continue', $continue + $count - 1 );
 46+ break;
 47+ }
 48+ $text = $this->getExtract( $t );
 49+ if ( isset( $params['length'] ) ) {
 50+ $text = $this->trimText( $text );
 51+ }
 52+ if ( $isXml ) {
 53+ $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', array( '*' => $text ) );
 54+ } else {
 55+ $fit = $result->addValue( array( 'query', 'pages', $id ), 'extract', $text );
 56+ }
 57+ if ( !$fit ) {
 58+ $this->setContinueEnumParameter( 'continue', $continue + $count - 1 );
 59+ break;
 60+ }
 61+ }
 62+ wfProfileOut( __METHOD__ );
 63+ }
 64+
 65+ /**
 66+ * OpenSearchXml hook handler
 67+ * @param array $results
 68+ */
 69+ public static function onOpenSearchXml( &$results ) {
 70+ global $wgMFExtendOpenSearchXml;
 71+ if ( !$wgMFExtendOpenSearchXml || !count( $results ) ) {
 72+ return true;
 73+ }
 74+ $pageIds = array_keys( $results );
 75+ $api = new ApiMain( new FauxRequest(
 76+ array(
 77+ 'action' => 'query',
 78+ 'prop' => 'excerpts',
 79+ 'explaintext' => true,
 80+ 'exlimit' => count( $results ),
 81+ 'pageids' => implode( '|', $pageIds ),
 82+ ) )
 83+ );
 84+ $api->execute();
 85+ $data = $api->getResultData();
 86+ foreach ( $pageIds as $id ) {
 87+ if ( isset( $data['query']['pages'][$id]['excerpts'][0] ) ) {
 88+ $results[$id]['extract'] = $data['query']['pages'][$id]['extract'][0];
 89+ $results[$id]['extract trimmed'] = false;
 90+ }
 91+ }
 92+ return true;
 93+ }
 94+
 95+ /**
 96+ * Returns a processed, but not trimmed excerpt
 97+ * @param Title $title
 98+ * @return string
 99+ */
 100+ private function getExtract( Title $title ) {
 101+ wfProfileIn( __METHOD__ );
 102+ $page = WikiPage::factory( $title );
 103+
 104+ $introOnly = $this->params['intro'];
 105+ $text = $this->getFromCache( $page, $introOnly );
 106+ // if we need just first section, try retrieving full page and getting first section out of it
 107+ if ( $text === false && $introOnly ) {
 108+ $text = $this->getFromCache( $page, false );
 109+ if ( $text !== false ) {
 110+ $text = $this->getFirstSection( $text, $this->params['plaintext'] );
 111+ }
 112+ }
 113+ if ( $text === false ) {
 114+ $text = $this->parse( $page );
 115+ $text = $this->convertText( $text, $title, $this->params['plaintext'] );
 116+ $this->setCache( $page, $text );
 117+ }
 118+ wfProfileOut( __METHOD__ );
 119+ return $text;
 120+ }
 121+
 122+ private function cacheKey( WikiPage $page, $introOnly ) {
 123+ return wfMemcKey( 'mf', 'extract', $page->getLatest(), $this->params['plaintext'], $introOnly );
 124+ }
 125+
 126+ private function getFromCache( WikiPage $page, $introOnly ) {
 127+ global $wgMemc;
 128+
 129+ $key = $this->cacheKey( $page, $introOnly );
 130+ return $wgMemc->get( $key );
 131+ }
 132+
 133+ private function setCache( WikiPage $page, $text ) {
 134+ global $wgMemc;
 135+
 136+ $key = $this->cacheKey( $page, $this->params['intro'] );
 137+ $wgMemc->set( $key, $text );
 138+ }
 139+
 140+ private function getFirstSection( $text, $plainText ) {
 141+ if ( $plainText ) {
 142+ $regexp = '/^(.*?)(?=' . self::SECTION_MARKER_START . ')/s';
 143+ } else {
 144+ $regexp = '/^(.*?)(?=<h[1-6]\b)/s';
 145+ }
 146+ if ( preg_match( $regexp, $text, $matches ) ) {
 147+ wfDebugDieBacktrace();
 148+ $text = $matches[0];
 149+ }
 150+ return $text;
 151+ }
 152+
 153+ /**
 154+ * Returns page HTML
 155+ * @param WikiPage $page
 156+ * @return string
 157+ */
 158+ private function parse( WikiPage $page ) {
 159+ wfProfileIn( __METHOD__ );
 160+ if ( !$this->parserOptions ) {
 161+ $this->parserOptions = new ParserOptions( new User( '127.0.0.1' ) );
 162+ }
 163+ // first try finding full page in parser cache
 164+ if ( $page->isParserCacheUsed( $this->parserOptions, 0 ) ) {
 165+ $pout = ParserCache::singleton()->get( $page, $this->parserOptions );
 166+ if ( $pout ) {
 167+ $text = $pout->getText();
 168+ if ( $this->params['intro'] ) {
 169+ $text = $this->getFirstSection( $text, false );
 170+ }
 171+ wfProfileOut( __METHOD__ );
 172+ return $text;
 173+ }
 174+ }
 175+ $request = array(
 176+ 'action' => 'parse',
 177+ 'page' => $page->getTitle()->getPrefixedText(),
 178+ 'prop' => 'text'
 179+ );
 180+ if ( $this->params['intro'] ) {
 181+ $request['section'] = 0;
 182+ }
 183+ // in case of cache miss, render just the needed section
 184+ $api = new ApiMain( new FauxRequest( $request ) );
 185+ $api->execute();
 186+ $data = $api->getResultData();
 187+ wfProfileOut( __METHOD__ );
 188+ return $data['parse']['text']['*'];
 189+ }
 190+
 191+ /**
 192+ * Converts page HTML into an excerpt
 193+ * @param string $text
 194+ * @param Title $title
 195+ * @param bool $plainText
 196+ * @return string
 197+ */
 198+ private function convertText( $text ) {
 199+ wfProfileIn( __METHOD__ );
 200+ $fmt = new ExtractFormatter( $text, $this->params['plaintext'], $this->params['sectionformat'] );
 201+ $text = $fmt->getText();
 202+
 203+ wfProfileOut( __METHOD__ );
 204+ return trim( $text );
 205+ }
 206+
 207+ /**
 208+ *
 209+ * @param string $text
 210+ * @param int $requestedLength
 211+ * @param bool $plainText
 212+ * @return string
 213+ */
 214+ private function trimText( $text, $requestedLength, $plainText ) {
 215+ global $wgUseTidy;
 216+
 217+ wfProfileIn( __METHOD__ );
 218+ $length = mb_strlen( $text );
 219+ if ( $length <= $requestedLength ) {
 220+ wfProfileOut( __METHOD__ );
 221+ return $text;
 222+ }
 223+ $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su";
 224+ preg_match( $pattern, $text, $m );
 225+ $text = $m[0];
 226+ // Fix possibly unclosed tags
 227+ if ( $wgUseTidy && !$plainText ) {
 228+ $text = trim ( MWTidy::tidy( $text ) );
 229+ }
 230+ $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text();
 231+ wfProfileOut( __METHOD__ );
 232+ return $text;
 233+ }
 234+
 235+ public function getAllowedParams() {
 236+ return array(
 237+ 'length' => array(
 238+ ApiBase::PARAM_TYPE => 'integer',
 239+ ApiBase::PARAM_MIN => 1,
 240+ ),
 241+ 'limit' => array(
 242+ ApiBase::PARAM_DFLT => 1,
 243+ ApiBase::PARAM_TYPE => 'limit',
 244+ ApiBase::PARAM_MIN => 1,
 245+ ApiBase::PARAM_MAX => 20,
 246+ ApiBase::PARAM_MAX2 => 20,
 247+ ),
 248+ 'intro' => false,
 249+ 'plaintext' => false,
 250+ 'sectionformat' => array(
 251+ ApiBase::PARAM_TYPE => ExtractFormatter::$sectionFormats,
 252+ ApiBase::PARAM_DFLT => 'wiki',
 253+ ),
 254+ 'continue' => array(
 255+ ApiBase::PARAM_TYPE => 'integer',
 256+ ),
 257+ );
 258+ }
 259+
 260+ public function getParamDescription() {
 261+ return array(
 262+ 'length' => 'How many characters to return, actual text returned might be slightly longer.',
 263+ 'limit' => 'How many extracts to return. ',
 264+ 'intro' => 'Return only content before the first section',
 265+ 'plaintext' => 'Return extracts as plaintext instead of limited HTML',
 266+ 'sectionformat' => array(
 267+ 'How to format sections in plaintext mode:',
 268+ ' none - No formatting',
 269+ ' wiki - Wikitext-style formatting == like this ==',
 270+ " raw - Return in this module's internal representation (secton titles prefixed with <ASCII 1><ASCII 2><section level><ASCII 2><ASCII 1>",
 271+ ),
 272+ 'continue' => 'When more results are available, use this to continue',
 273+ );
 274+ }
 275+
 276+ public function getDescription() {
 277+ return 'Returns plain-text or limited HTML extracts of the given page(s)';
 278+ }
 279+
 280+ public function getPossibleErrors() {
 281+ return array_merge( parent::getPossibleErrors(), array(
 282+ array( 'code' => '_badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ),
 283+ ) );
 284+ }
 285+
 286+ public function getExamples() {
 287+ return array(
 288+ 'api.php?action=query&prop=extracts&exlength=175&titles=Therion' => 'Get a 175-character extract',
 289+ );
 290+ }
 291+
 292+
 293+ public function getHelpUrls() {
 294+ return 'https://www.mediawiki.org/wiki/Extension:MobileFrontend#New_API';
 295+ }
 296+
 297+ public function getVersion() {
 298+ return __CLASS__ . ': $Id$';
 299+ }
 300+}
 301+
 302+class ExtractFormatter extends HtmlFormatter {
 303+ private $plainText;
 304+ private $sectionFormat;
 305+
 306+ public static $sectionFormats = array(
 307+ 'none',
 308+ 'wiki',
 309+ 'raw',
 310+ );
 311+
 312+ public function __construct( $text, $plainText, $sectionFormat ) {
 313+ parent::__construct( HtmlFormatter::wrapHTML( $text ) );
 314+ $this->plainText = $plainText;
 315+ $this->sectionFormat = $sectionFormat;
 316+
 317+ $this->removeImages();
 318+ $this->remove( array( 'table', 'div', '.editsection', 'sup.reference', 'span.coordinates',
 319+ 'span.geo-multi-punct', 'span.geo-nondefault', '.noexcerpt', '.error' )
 320+ );
 321+ if ( $plainText ) {
 322+ $this->flattenAllTags();
 323+ } else {
 324+ $this->flatten( array( 'span', 'a' ) );
 325+ }
 326+ }
 327+
 328+ public function getText( $dummy = null ) {
 329+ $this->filterContent();
 330+ $text = parent::getText();
 331+ if ( $this->plainText ) {
 332+ $text = html_entity_decode( $text );
 333+ $text = str_replace( "\r", "\n", $text );
 334+ $text = preg_replace( "/\n{3,}/", "\n\n", $text );
 335+ $text = preg_replace_callback(
 336+ "/" . ApiQueryExtracts::SECTION_MARKER_START . '(\d)'. ApiQueryExtracts::SECTION_MARKER_END . "(.*?)$/m",
 337+ array( $this, 'sectionCallback' ),
 338+ $text
 339+ );
 340+ }
 341+ return $text;
 342+ }
 343+
 344+ public function onHtmlReady( $html ) {
 345+ if ( $this->plainText ) {
 346+ $html = preg_replace( '/\s*(<h([1-6])\b)/i',
 347+ ApiQueryExtracts::SECTION_MARKER_START . '$2' . ApiQueryExtracts::SECTION_MARKER_END . '$1' ,
 348+ $html
 349+ );
 350+ }
 351+ return $html;
 352+ }
 353+
 354+ private function sectionCallback( $matches ) {
 355+ if ( $this->sectionFormat == 'raw' ) {
 356+ return $matches[0];
 357+ }
 358+ $func = "ExtractFormatter::doSection_{$this->sectionFormat}";
 359+ return call_user_func( $func, $matches[1], trim( $matches[2] ) );
 360+ }
 361+
 362+ private static function doSection_wiki( $level, $text ) {
 363+ $bars = str_repeat( '=', $level );
 364+ return "\n$bars $text $bars";
 365+ }
 366+
 367+ private static function doSection_none( $level, $text ) {
 368+ return "\n$text";
 369+ }
 370+}
\ No newline at end of file
Property changes on: trunk/extensions/MobileFrontend/api/ApiQueryExtracts.php
___________________________________________________________________
Added: svn:eol-style
1371 + native
Added: svn:keywords
2372 + Id
Index: trunk/extensions/MobileFrontend/MobileFrontend.php
@@ -52,7 +52,7 @@
5353
5454 'ApiMobileView' => 'api/ApiMobileView',
5555 'ApiParseExtender' => 'api/ApiParseExtender',
56 - 'ApiQueryExtracts' => 'api/ApiQueryExcerpts',
 56+ 'ApiQueryExtracts' => 'api/ApiQueryExtracts',
5757
5858 'MobileFrontendTemplate' => 'templates/MobileFrontendTemplate',
5959 'ApplicationTemplate' => 'templates/ApplicationTemplate',

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r114129Text extraction rewrite:...maxsem12:19, 19 March 2012

Status & tagging log