r113937 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113936‎ | r113937 | r113938 >
Date:17:36, 15 March 2012
Author:maxsem
Status:ok
Tags:
Comment:
Avoid a DOM parse when it's not needed
Modified paths:
  • /trunk/extensions/MobileFrontend/HtmlFormatter.php (modified) (history)
  • /trunk/extensions/MobileFrontend/MobileFormatter.php (modified) (history)

Diff [purge]

Index: trunk/extensions/MobileFrontend/MobileFormatter.php
@@ -131,10 +131,10 @@
132132
133133 * @return string: Processed HTML
134134 */
135 - public function getText( $element = false ) {
 135+ public function getText( $element = null ) {
136136 wfProfileIn( __METHOD__ );
137137 if ( $this->mainPage ) {
138 - $element = $this->parseMainPage( $this->doc );
 138+ $element = $this->parseMainPage( $this->getDoc() );
139139 }
140140 $html = parent::getText( $element );
141141 wfProfileOut( __METHOD__ );
Index: trunk/extensions/MobileFrontend/HtmlFormatter.php
@@ -7,8 +7,9 @@
88 /**
99 * @var DOMDocument
1010 */
11 - protected $doc;
 11+ private $doc;
1212
 13+ private $html;
1314 private $itemsToRemove = array();
1415 private $elementsToFlatten = array();
1516 private $removeImages = false;
@@ -23,14 +24,7 @@
2425 public function __construct( $html ) {
2526 wfProfileIn( __METHOD__ );
2627
27 - $html = mb_convert_encoding( $html, 'HTML-ENTITIES', "UTF-8" );
28 - libxml_use_internal_errors( true );
29 - $this->doc = new DOMDocument();
30 - $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html );
31 - libxml_use_internal_errors( false );
32 - $this->doc->preserveWhiteSpace = false;
33 - $this->doc->strictErrorChecking = false;
34 - $this->doc->encoding = 'UTF-8';
 28+ $this->html = $html;
3529
3630 wfProfileOut( __METHOD__ );
3731 }
@@ -57,6 +51,16 @@
5852 * @return DOMDocument: DOM to manipulate
5953 */
6054 public function getDoc() {
 55+ if ( !$this->doc ) {
 56+ $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', "UTF-8" );
 57+ libxml_use_internal_errors( true );
 58+ $this->doc = new DOMDocument();
 59+ $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html );
 60+ libxml_use_internal_errors( false );
 61+ $this->doc->preserveWhiteSpace = false;
 62+ $this->doc->strictErrorChecking = false;
 63+ $this->doc->encoding = 'UTF-8';
 64+ }
6165 return $this->doc;
6266 }
6367
@@ -112,6 +116,12 @@
113117 public function filterContent() {
114118 $removals = $this->parseItemsToRemove();
115119
 120+ if ( !$removals ) {
 121+ return;
 122+ }
 123+
 124+ $doc = $this->getDoc();
 125+
116126 // Remove tags
117127
118128 // You can't remove DOMNodes from a DOMNodeList as you're iterating
@@ -122,7 +132,7 @@
123133
124134 $domElemsToRemove = array();
125135 foreach ( $removals['TAG'] as $tagToRemove ) {
126 - $tagToRemoveNodes = $this->doc->getElementsByTagName( $tagToRemove );
 136+ $tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
127137 foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
128138 $tagToRemoveNodeIdAttributeValue = '';
129139 if ( $tagToRemoveNode ) {
@@ -143,14 +153,14 @@
144154
145155 // Elements with named IDs
146156 foreach ( $removals['ID'] as $itemToRemove ) {
147 - $itemToRemoveNode = $this->doc->getElementById( $itemToRemove );
 157+ $itemToRemoveNode = $doc->getElementById( $itemToRemove );
148158 if ( $itemToRemoveNode ) {
149159 $itemToRemoveNode->parentNode->removeChild( $itemToRemoveNode );
150160 }
151161 }
152162
153163 // CSS Classes
154 - $xpath = new DOMXpath( $this->doc );
 164+ $xpath = new DOMXpath( $doc );
155165 foreach ( $removals['CLASS'] as $classToRemove ) {
156166 $elements = $xpath->query( '//*[@class="' . $classToRemove . '"]' );
157167
@@ -177,7 +187,7 @@
178188 $redLinks = $xpath->query( '//a[@class="new"]' );
179189 foreach ( $redLinks as $redLink ) {
180190 // PHP Bug #36795 — Inappropriate "unterminated entity reference"
181 - $spanNode = $this->doc->createElement( "span", str_replace( "&", "&amp;", $redLink->nodeValue ) );
 191+ $spanNode = $doc->createElement( "span", str_replace( "&", "&amp;", $redLink->nodeValue ) );
182192
183193 if ( $redLink->hasAttributes() ) {
184194 $attributes = $redLink->attributes;
@@ -203,10 +213,14 @@
204214 public function getText( $element = null ) {
205215 wfProfileIn( __METHOD__ );
206216
207 - if ( $element !== null && !( $element instanceof DOMElement ) ) {
208 - $element = $this->doc->getElementById( $element );
 217+ if ( $this->doc ) {
 218+ if ( $element !== null && !( $element instanceof DOMElement ) ) {
 219+ $element = $this->doc->getElementById( $element );
 220+ }
 221+ $html = $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG );
 222+ } else {
 223+ $html = $this->html;
209224 }
210 - $html = $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG );
211225 if ( !$element ) {
212226 $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
213227 }

Status & tagging log