r112190 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r112189‎ | r112190 | r112191 >
Date:07:49, 23 February 2012
Author:maxsem
Status:ok
Tags:
Comment:
Optional plaintext excerpts
Modified paths:
  • /trunk/extensions/MobileFrontend/ApiQueryExcerpt.php (modified) (history)

Diff [purge]

Index: trunk/extensions/MobileFrontend/ApiQueryExcerpt.php
@@ -14,7 +14,7 @@
1515 }
1616 $params = $this->extractRequestParams();
1717 foreach ( $titles as $id => $t ) {
18 - $text = $this->getExcerpt( $t );
 18+ $text = $this->getExcerpt( $t, $params['plaintext'] );
1919 if ( isset( $params['length'] ) ) {
2020 $text = $this->trimText( $text, $params['length'] );
2121 }
@@ -27,7 +27,7 @@
2828 * @param Title $title
2929 * @return string
3030 */
31 - private function getExcerpt( Title $title ) {
 31+ private function getExcerpt( Title $title, $plainText ) {
3232 global $wgMemc;
3333
3434 $key = wfMemcKey( 'mf', 'excerpt', $title->getPrefixedDBkey(), $title->getArticleID() );
@@ -40,7 +40,7 @@
4141 }
4242 $wp = WikiPage::factory( $title );
4343 $pout = $wp->getParserOutput( $this->parserOptions );
44 - $text = $this->processText( $pout->getText(), $title );
 44+ $text = $this->processText( $pout->getText(), $title, $plainText );
4545 $wgMemc->set( $key, $text );
4646 return $text;
4747 }
@@ -49,17 +49,25 @@
5050 * Converts page HTML into an excerpt
5151 * @param string $text
5252 * @param Title $title
 53+ * @param bool $plainText
5354 * @return string
5455 */
55 - private function processText( $text, Title $title ) {
 56+ private function processText( $text, Title $title, $plainText ) {
5657 $text = preg_replace( '/<h[1-6].*$/s', '', $text );
5758 $mf = new MobileFormatter( $text, $title, 'XHTML' );
5859 $mf->removeImages();
5960 $mf->remove( array( 'table', 'div', 'sup.reference', 'span.coordinates', 'span.geo-multi-punct', 'span.geo-nondefault' ) );
60 - $mf->flatten( array( 'span', 'a' ) );
 61+ if ( $plainText ) {
 62+ $mf->flatten( '[?!]?[a-z0-9]+' );
 63+ } else {
 64+ $mf->flatten( array( 'span', 'a' ) );
 65+ }
6166 $mf->filterContent();
6267 $text = $mf->getText();
6368 $text = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $text );
 69+ if ( $plainText ) {
 70+ $text = html_entity_decode( $text );
 71+ }
6472 return trim( $text );
6573 }
6674
@@ -77,7 +85,7 @@
7886 if ( $wgUseTidy ) {
7987 $text = trim ( MWTidy::tidy( $text ) );
8088 }
81 - $text .= wfMessage( 'ellipsis' )->text();
 89+ $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text();
8290 return $text;
8391 }
8492
@@ -94,6 +102,7 @@
95103 ApiBase::PARAM_MAX => 10,
96104 ApiBase::PARAM_MAX2 => 20,
97105 ),
 106+ 'plaintext' => false,
98107 'continue' => array(
99108 ApiBase::PARAM_TYPE => 'string',
100109 ),
@@ -104,6 +113,7 @@
105114 return array(
106115 'length' => 'How many characters to return, actual text returned might be slightly longer.',
107116 'limit' => 'How many excerpts to return',
 117+ 'plaintext' => 'Return excerpts as plaintext instead of limited HTML',
108118 'continue' => 'When more results are available, use this to continue',
109119 );
110120 }

Status & tagging log