Index: trunk/extensions/MobileFrontend/ApiQueryExcerpt.php |
— | — | @@ -14,7 +14,7 @@ |
15 | 15 | } |
16 | 16 | $params = $this->extractRequestParams(); |
17 | 17 | foreach ( $titles as $id => $t ) { |
18 | | - $text = $this->getExcerpt( $t ); |
| 18 | + $text = $this->getExcerpt( $t, $params['plaintext'] ); |
19 | 19 | if ( isset( $params['length'] ) ) { |
20 | 20 | $text = $this->trimText( $text, $params['length'] ); |
21 | 21 | } |
— | — | @@ -27,7 +27,7 @@ |
28 | 28 | * @param Title $title |
29 | 29 | * @return string |
30 | 30 | */ |
31 | | - private function getExcerpt( Title $title ) { |
| 31 | + private function getExcerpt( Title $title, $plainText ) { |
32 | 32 | global $wgMemc; |
33 | 33 | |
34 | 34 | $key = wfMemcKey( 'mf', 'excerpt', $title->getPrefixedDBkey(), $title->getArticleID() ); |
— | — | @@ -40,7 +40,7 @@ |
41 | 41 | } |
42 | 42 | $wp = WikiPage::factory( $title ); |
43 | 43 | $pout = $wp->getParserOutput( $this->parserOptions ); |
44 | | - $text = $this->processText( $pout->getText(), $title ); |
| 44 | + $text = $this->processText( $pout->getText(), $title, $plainText ); |
45 | 45 | $wgMemc->set( $key, $text ); |
46 | 46 | return $text; |
47 | 47 | } |
— | — | @@ -49,17 +49,25 @@ |
50 | 50 | * Converts page HTML into an excerpt |
51 | 51 | * @param string $text |
52 | 52 | * @param Title $title |
| 53 | + * @param bool $plainText |
53 | 54 | * @return string |
54 | 55 | */ |
55 | | - private function processText( $text, Title $title ) { |
| 56 | + private function processText( $text, Title $title, $plainText ) { |
56 | 57 | $text = preg_replace( '/<h[1-6].*$/s', '', $text ); |
57 | 58 | $mf = new MobileFormatter( $text, $title, 'XHTML' ); |
58 | 59 | $mf->removeImages(); |
59 | 60 | $mf->remove( array( 'table', 'div', 'sup.reference', 'span.coordinates', 'span.geo-multi-punct', 'span.geo-nondefault' ) ); |
60 | | - $mf->flatten( array( 'span', 'a' ) ); |
| 61 | + if ( $plainText ) { |
| 62 | + $mf->flatten( '[?!]?[a-z0-9]+' ); |
| 63 | + } else { |
| 64 | + $mf->flatten( array( 'span', 'a' ) ); |
| 65 | + } |
61 | 66 | $mf->filterContent(); |
62 | 67 | $text = $mf->getText(); |
63 | 68 | $text = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $text ); |
| 69 | + if ( $plainText ) { |
| 70 | + $text = html_entity_decode( $text ); |
| 71 | + } |
64 | 72 | return trim( $text ); |
65 | 73 | } |
66 | 74 | |
— | — | @@ -77,7 +85,7 @@ |
78 | 86 | if ( $wgUseTidy ) { |
79 | 87 | $text = trim ( MWTidy::tidy( $text ) ); |
80 | 88 | } |
81 | | - $text .= wfMessage( 'ellipsis' )->text(); |
| 89 | + $text .= wfMessage( 'ellipsis' )->inContentLanguage()->text(); |
82 | 90 | return $text; |
83 | 91 | } |
84 | 92 | |
— | — | @@ -94,6 +102,7 @@ |
95 | 103 | ApiBase::PARAM_MAX => 10, |
96 | 104 | ApiBase::PARAM_MAX2 => 20, |
97 | 105 | ), |
| 106 | + 'plaintext' => false, |
98 | 107 | 'continue' => array( |
99 | 108 | ApiBase::PARAM_TYPE => 'string', |
100 | 109 | ), |
— | — | @@ -104,6 +113,7 @@ |
105 | 114 | return array( |
106 | 115 | 'length' => 'How many characters to return, actual text returned might be slightly longer.', |
107 | 116 | 'limit' => 'How many excerpts to return', |
| 117 | + 'plaintext' => 'Return excerpts as plaintext instead of limited HTML', |
108 | 118 | 'continue' => 'When more results are available, use this to continue', |
109 | 119 | ); |
110 | 120 | } |