Index: trunk/extensions/MobileFrontend/MobileFormatter.php |
— | — | @@ -131,10 +131,10 @@ |
132 | 132 | |
133 | 133 | * @return string: Processed HTML |
134 | 134 | */ |
135 | | - public function getText( $element = false ) { |
| 135 | + public function getText( $element = null ) { |
136 | 136 | wfProfileIn( __METHOD__ ); |
137 | 137 | if ( $this->mainPage ) { |
138 | | - $element = $this->parseMainPage( $this->doc ); |
| 138 | + $element = $this->parseMainPage( $this->getDoc() ); |
139 | 139 | } |
140 | 140 | $html = parent::getText( $element ); |
141 | 141 | wfProfileOut( __METHOD__ ); |
Index: trunk/extensions/MobileFrontend/HtmlFormatter.php |
— | — | @@ -7,8 +7,9 @@ |
8 | 8 | /** |
9 | 9 | * @var DOMDocument |
10 | 10 | */ |
11 | | - protected $doc; |
| 11 | + private $doc; |
12 | 12 | |
| 13 | + private $html; |
13 | 14 | private $itemsToRemove = array(); |
14 | 15 | private $elementsToFlatten = array(); |
15 | 16 | private $removeImages = false; |
— | — | @@ -23,14 +24,7 @@ |
24 | 25 | public function __construct( $html ) { |
25 | 26 | wfProfileIn( __METHOD__ ); |
26 | 27 | |
27 | | - $html = mb_convert_encoding( $html, 'HTML-ENTITIES', "UTF-8" ); |
28 | | - libxml_use_internal_errors( true ); |
29 | | - $this->doc = new DOMDocument(); |
30 | | - $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html ); |
31 | | - libxml_use_internal_errors( false ); |
32 | | - $this->doc->preserveWhiteSpace = false; |
33 | | - $this->doc->strictErrorChecking = false; |
34 | | - $this->doc->encoding = 'UTF-8'; |
| 28 | + $this->html = $html; |
35 | 29 | |
36 | 30 | wfProfileOut( __METHOD__ ); |
37 | 31 | } |
— | — | @@ -57,6 +51,16 @@ |
58 | 52 | * @return DOMDocument: DOM to manipulate |
59 | 53 | */ |
60 | 54 | public function getDoc() { |
| 55 | + if ( !$this->doc ) { |
| 56 | + $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', "UTF-8" ); |
| 57 | + libxml_use_internal_errors( true ); |
| 58 | + $this->doc = new DOMDocument(); |
| 59 | + $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html ); |
| 60 | + libxml_use_internal_errors( false ); |
| 61 | + $this->doc->preserveWhiteSpace = false; |
| 62 | + $this->doc->strictErrorChecking = false; |
| 63 | + $this->doc->encoding = 'UTF-8'; |
| 64 | + } |
61 | 65 | return $this->doc; |
62 | 66 | } |
63 | 67 | |
— | — | @@ -112,6 +116,12 @@ |
113 | 117 | public function filterContent() { |
114 | 118 | $removals = $this->parseItemsToRemove(); |
115 | 119 | |
| 120 | + if ( !$removals ) { |
| 121 | + return; |
| 122 | + } |
| 123 | + |
| 124 | + $doc = $this->getDoc(); |
| 125 | + |
116 | 126 | // Remove tags |
117 | 127 | |
118 | 128 | // You can't remove DOMNodes from a DOMNodeList as you're iterating |
— | — | @@ -122,7 +132,7 @@ |
123 | 133 | |
124 | 134 | $domElemsToRemove = array(); |
125 | 135 | foreach ( $removals['TAG'] as $tagToRemove ) { |
126 | | - $tagToRemoveNodes = $this->doc->getElementsByTagName( $tagToRemove ); |
| 136 | + $tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove ); |
127 | 137 | foreach ( $tagToRemoveNodes as $tagToRemoveNode ) { |
128 | 138 | $tagToRemoveNodeIdAttributeValue = ''; |
129 | 139 | if ( $tagToRemoveNode ) { |
— | — | @@ -143,14 +153,14 @@ |
144 | 154 | |
145 | 155 | // Elements with named IDs |
146 | 156 | foreach ( $removals['ID'] as $itemToRemove ) { |
147 | | - $itemToRemoveNode = $this->doc->getElementById( $itemToRemove ); |
| 157 | + $itemToRemoveNode = $doc->getElementById( $itemToRemove ); |
148 | 158 | if ( $itemToRemoveNode ) { |
149 | 159 | $itemToRemoveNode->parentNode->removeChild( $itemToRemoveNode ); |
150 | 160 | } |
151 | 161 | } |
152 | 162 | |
153 | 163 | // CSS Classes |
154 | | - $xpath = new DOMXpath( $this->doc ); |
| 164 | + $xpath = new DOMXpath( $doc ); |
155 | 165 | foreach ( $removals['CLASS'] as $classToRemove ) { |
156 | 166 | $elements = $xpath->query( '//*[@class="' . $classToRemove . '"]' ); |
157 | 167 | |
— | — | @@ -177,7 +187,7 @@ |
178 | 188 | $redLinks = $xpath->query( '//a[@class="new"]' ); |
179 | 189 | foreach ( $redLinks as $redLink ) { |
180 | 190 | // PHP Bug #36795 — Inappropriate "unterminated entity reference" |
181 | | - $spanNode = $this->doc->createElement( "span", str_replace( "&", "&", $redLink->nodeValue ) ); |
| 191 | + $spanNode = $doc->createElement( "span", str_replace( "&", "&", $redLink->nodeValue ) ); |
182 | 192 | |
183 | 193 | if ( $redLink->hasAttributes() ) { |
184 | 194 | $attributes = $redLink->attributes; |
— | — | @@ -203,10 +213,14 @@ |
204 | 214 | public function getText( $element = null ) { |
205 | 215 | wfProfileIn( __METHOD__ ); |
206 | 216 | |
207 | | - if ( $element !== null && !( $element instanceof DOMElement ) ) { |
208 | | - $element = $this->doc->getElementById( $element ); |
| 217 | + if ( $this->doc ) { |
| 218 | + if ( $element !== null && !( $element instanceof DOMElement ) ) { |
| 219 | + $element = $this->doc->getElementById( $element ); |
| 220 | + } |
| 221 | + $html = $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG ); |
| 222 | + } else { |
| 223 | + $html = $this->html; |
209 | 224 | } |
210 | | - $html = $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG ); |
211 | 225 | if ( !$element ) { |
212 | 226 | $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html ); |
213 | 227 | } |