r113781 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113780‎ | r113781 | r113782 >
Date:23:02, 13 March 2012
Author:maxsem
Status:ok
Tags:
Comment:
Moved all the basic HTML reformatting routines to a base class
Modified paths:
  • /trunk/extensions/MobileFrontend/HtmlFormatter.php (added) (history)
  • /trunk/extensions/MobileFrontend/MobileFormatter.php (modified) (history)
  • /trunk/extensions/MobileFrontend/MobileFrontend.body.php (modified) (history)
  • /trunk/extensions/MobileFrontend/MobileFrontend.php (modified) (history)

Diff [purge]

Index: trunk/extensions/MobileFrontend/MobileFormatter.php
@@ -3,16 +3,10 @@
44 /**
55 * Converts HTML into a mobile-friendly version
66 */
7 -class MobileFormatter {
 7+class MobileFormatter extends HtmlFormatter {
88 const WML_SECTION_SEPARATOR = '***************************************************************************';
99
10 - /**
11 - * @var DOMDocument
12 - */
13 - protected $doc;
1410 protected $format;
15 - protected $removeImages = false;
16 - protected $idWhitelist = array();
1711
1812 /**
1913 * @var Ttile
@@ -69,9 +63,6 @@
7064 '.nomobile',
7165 );
7266
73 - private $itemsToRemove = array();
74 - private $elementsToFlatten = array();
75 -
7667 /**
7768 * Constructor
7869 *
@@ -81,7 +72,7 @@
8273 * @param WmlContext $wmlContext: Context for creation of WML cards, can be omitted if $format == 'XHTML'
8374 */
8475 public function __construct( $html, $title, $format, WmlContext $wmlContext = null ) {
85 - wfProfileIn( __METHOD__ );
 76+ parent::__construct( $html );
8677
8778 $this->title = $title;
8879 $this->format = $format;
@@ -89,27 +80,10 @@
9081 throw new MWException( __METHOD__ . '(): WML context not set' );
9182 }
9283 $this->wmlContext = $wmlContext;
93 -
94 - $html = mb_convert_encoding( $html, 'HTML-ENTITIES', "UTF-8" );
95 - libxml_use_internal_errors( true );
96 - $this->doc = new DOMDocument();
97 - $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html );
98 - libxml_use_internal_errors( false );
99 - $this->doc->preserveWhiteSpace = false;
100 - $this->doc->strictErrorChecking = false;
101 - $this->doc->encoding = 'UTF-8';
 84+ $this->flattenRedLinks();
10285 }
10386
10487 /**
105 - * Turns a chunk of HTML into a proper document
106 - * @param string $html
107 - * @return string
108 - */
109 - public static function wrapHTML( $html ) {
110 - return '<!doctype html><html><head></head><body>' . $html . '</body></html>';
111 - }
112 -
113 - /**
11488 * Use the given message cache
11589 * @param Array $messages
11690 */
@@ -118,13 +92,6 @@
11993 }
12094
12195 /**
122 - * @return DOMDocument: DOM to manipulate
123 - */
124 - public function getDoc() {
125 - return $this->doc;
126 - }
127 -
128 - /**
12996 * @return string: Output format
13097 */
13198 public function getFormat() {
@@ -144,148 +111,37 @@
145112 }
146113
147114 /**
148 - * Sets whether images should be removed from output
149 - * @param bool $flag
150 - */
151 - public function removeImages( $flag = true ) {
152 - $this->removeImages = $flag;
153 - }
154 -
155 - /**
156 - * Adds one or more selector of content to remove
157 - * @param Array|string $selectors: Selector(s) of stuff to remove
158 - */
159 - public function remove( $selectors ) {
160 - $this->itemsToRemove = array_merge( $this->itemsToRemove, (array)$selectors );
161 - }
162 -
163 - /**
164 - * Adds one or more element name to the list to flatten (remove tag, but not its content)
165 - * @param Array|string $elements: Name(s) of tag(s) to flatten
166 - */
167 - public function flatten( $elements ) {
168 - $this->elementsToFlatten = array_merge( $this->elementsToFlatten, (array)$elements );
169 - }
170 -
171 - /**
172 - * @param Array|string $ids: Id(s) of content to keep
173 - */
174 - public function whitelistIds( $ids ) {
175 - $this->idWhitelist = array_merge( $this->idWhitelist, array_flip( (array)$ids ) );
176 - }
177 -
178 - /**
179115 * Removes content inappropriate for mobile devices
180116 * @param bool $removeDefaults: Whether default settings at self::$defaultItemsToRemove should be used
181117 */
182118 public function filterContent( $removeDefaults = true ) {
183119 global $wgMFRemovableClasses;
184120
185 - wfProfileIn(__METHOD__ );
186121 if ( $removeDefaults ) {
187 - $this->itemsToRemove = array_merge( $this->itemsToRemove,
188 - self::$defaultItemsToRemove, $wgMFRemovableClasses
189 - );
 122+ $this->remove( self::$defaultItemsToRemove );
 123+ $this->remove( $wgMFRemovableClasses );
190124 }
191 - $removals = $this->parseItemsToRemove();
192 -
193 - // Remove tags
194 -
195 - // You can't remove DOMNodes from a DOMNodeList as you're iterating
196 - // over them in a foreach loop. It will seemingly leave the internal
197 - // iterator on the foreach out of wack and results will be quite
198 - // strange. Though, making a queue of items to remove seems to work.
199 - // For example:
200 -
201 - $domElemsToRemove = array();
202 - foreach ( $removals['TAG'] as $tagToRemove ) {
203 - $tagToRemoveNodes = $this->doc->getElementsByTagName( $tagToRemove );
204 - foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
205 - $tagToRemoveNodeIdAttributeValue = '';
206 - if ( $tagToRemoveNode ) {
207 - $tagToRemoveNodeIdAttribute = $tagToRemoveNode->getAttributeNode( 'id' );
208 - if ( $tagToRemoveNodeIdAttribute ) {
209 - $tagToRemoveNodeIdAttributeValue = $tagToRemoveNodeIdAttribute->value;
210 - }
211 - if ( !isset( $this->idWhitelist[$tagToRemoveNodeIdAttributeValue] ) ) {
212 - $domElemsToRemove[] = $tagToRemoveNode;
213 - }
214 - }
215 - }
216 - }
217 -
218 - foreach ( $domElemsToRemove as $domElement ) {
219 - $domElement->parentNode->removeChild( $domElement );
220 - }
221 -
222 - // Elements with named IDs
223 - foreach ( $removals['ID'] as $itemToRemove ) {
224 - $itemToRemoveNode = $this->doc->getElementById( $itemToRemove );
225 - if ( $itemToRemoveNode ) {
226 - $itemToRemoveNode->parentNode->removeChild( $itemToRemoveNode );
227 - }
228 - }
229 -
230 - // CSS Classes
231 - $xpath = new DOMXpath( $this->doc );
232 - foreach ( $removals['CLASS'] as $classToRemove ) {
233 - $elements = $xpath->query( '//*[@class="' . $classToRemove . '"]' );
234 -
235 - foreach ( $elements as $element ) {
236 - $element->parentNode->removeChild( $element );
237 - }
238 - }
239 -
240 - // Tags with CSS Classes
241 - foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
242 - $parts = explode( '.', $classToRemove );
243 -
244 - $elements = $xpath->query(
245 - '//' . $parts[0] . '[@class="' . $parts[1] . '"]'
246 - );
247 -
248 - foreach ( $elements as $element ) {
249 - $removedElement = $element->parentNode->removeChild( $element );
250 - }
251 - }
252 -
253 - // Handle red links with action equal to edit
254 - $redLinks = $xpath->query( '//a[@class="new"]' );
255 - foreach ( $redLinks as $redLink ) {
256 - // PHP Bug #36795 — Inappropriate "unterminated entity reference"
257 - $spanNode = $this->doc->createElement( "span", str_replace( "&", "&amp;", $redLink->nodeValue ) );
258 -
259 - if ( $redLink->hasAttributes() ) {
260 - $attributes = $redLink->attributes;
261 - foreach ( $attributes as $i => $attribute ) {
262 - if ( $attribute->name != 'href' ) {
263 - $spanNode->setAttribute( $attribute->name, $attribute->value );
264 - }
265 - }
266 - }
267 -
268 - $redLink->parentNode->replaceChild( $spanNode, $redLink );
269 - }
270 - wfProfileOut( __METHOD__ );
 125+ parent::filterContent();
271126 }
272127
273128 /**
274129 * Performs final transformations to mobile format and returns resulting HTML/WML
275130 *
276 - * @param string|bool $id: ID of element to get HTML from or false to get it from the whole tree
277 - * @param string $prependHtml: HTML to be prepended to result before final transformations
278 - * @param string $appendHtml: HTML to be appended to result before final transformations
 131+ * @param DOMElement|string|null $element: ID of element to get HTML from or false to get it from the whole tree
 132+
279133 * @return string: Processed HTML
280134 */
281 - public function getText( $id = false, $prependHtml = '', $appendHtml = '' ) {
 135+ public function getText( $element = false ) {
282136 wfProfileIn( __METHOD__ );
283137 if ( $this->mainPage ) {
284138 $element = $this->parseMainPage( $this->doc );
285 - } else {
286 - $element = $id ? $this->doc->getElementById( $id ) : null;
287139 }
288 - $html = $prependHtml . $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG ) . $appendHtml;
289 -
 140+ $html = parent::getText( $element );
 141+ wfProfileOut( __METHOD__ );
 142+ return $html;
 143+ }
 144+
 145+ protected function onHtmlReady( $html ) {
290146 switch ( $this->format ) {
291147 case 'XHTML':
292148 if ( $this->expandableSections && !$this->mainPage && strlen( $html ) > 4000 ) {
@@ -300,15 +156,6 @@
301157 $html = $this->createWMLCard( $html );
302158 break;
303159 }
304 - if ( $this->elementsToFlatten ) {
305 - $elements = implode( '|', $this->elementsToFlatten );
306 - $html = preg_replace( "#</?($elements)[^>]*>#is", '', $html );
307 - }
308 - if ( !$element ) {
309 - $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
310 - }
311 -
312 - wfProfileOut( __METHOD__ );
313160 return $html;
314161 }
315162
@@ -496,40 +343,6 @@
497344 }
498345
499346 /**
500 - * Transforms CSS selectors into an internal representation suitable for processing
501 - * @return array
502 - */
503 - private function parseItemsToRemove() {
504 - wfProfileIn( __METHOD__ );
505 - $removals = array(
506 - 'ID' => array(),
507 - 'TAG' => array(),
508 - 'CLASS' => array(),
509 - 'TAG_CLASS' => array(),
510 - );
511 -
512 - foreach ( $this->itemsToRemove as $itemToRemove ) {
513 - $type = '';
514 - $rawName = '';
515 - CssDetection::detectIdCssOrTag( $itemToRemove, $type, $rawName );
516 - $removals[$type][] = $rawName;
517 - }
518 -
519 - if ( $this->removeImages ) {
520 - $removals['TAG'][] = "img";
521 - $removals['TAG'][] = "audio";
522 - $removals['TAG'][] = "video";
523 - $removals['CLASS'][] = "thumb tright";
524 - $removals['CLASS'][] = "thumb tleft";
525 - $removals['CLASS'][] = "thumbcaption";
526 - $removals['CLASS'][] = "gallery";
527 - }
528 -
529 - wfProfileOut( __METHOD__ );
530 - return $removals;
531 - }
532 -
533 - /**
534347 * Performs transformations specific to main page
535348 * @param DOMDocument $mainPage: Tree to process
536349 * @return DOMElement
Index: trunk/extensions/MobileFrontend/MobileFrontend.body.php
@@ -1068,24 +1068,25 @@
10691069
10701070 $formatter->setIsMainPage( self::$isMainPage );
10711071 $prepend = '';
1072 - if ( $this->contentFormat == 'WML' ) {
1073 - // Wml for searching
1074 - $prepend = '<p><input emptyok="true" format="*M" type="text" name="search" value="" size="16" />' .
1075 - '<do type="accept" label="' . self::$messages['mobile-frontend-search-submit'] . '">' .
1076 - '<go href="' . $wgScript . '?title=Special%3ASearch&amp;search=$(search)"></go></do></p>';
1077 - } elseif ( $this->contentFormat == 'XHTML'
 1072+ if ( $this->contentFormat == 'XHTML'
10781073 && self::$device['supports_javascript'] === true
10791074 && empty( self::$search ) )
10801075 {
10811076 $formatter->enableExpandableSections();
10821077 }
1083 - $contentHtml = $formatter->getText( 'content', $prepend );
 1078+ $contentHtml = $formatter->getText( 'content' );
10841079
10851080 $htmlTitle = htmlspecialchars( self::$htmlTitle );
10861081
10871082 if ( $this->contentFormat == 'WML' ) {
10881083 header( 'Content-Type: text/vnd.wap.wml' );
10891084
 1085+ // Wml for searching
 1086+ $prepend = '<p><input emptyok="true" format="*M" type="text" name="search" value="" size="16" />' .
 1087+ '<do type="accept" label="' . self::$messages['mobile-frontend-search-submit'] . '">' .
 1088+ '<go href="' . $wgScript . '?title=Special%3ASearch&amp;search=$(search)"></go></do></p>';
 1089+ $html = $prepend . $html;
 1090+
10901091 $applicationWmlTemplate = new ApplicationWmlTemplate();
10911092 $options = array(
10921093 'mainPageUrl' => self::$mainPageUrl,
Index: trunk/extensions/MobileFrontend/MobileFrontend.php
@@ -46,6 +46,7 @@
4747
4848 'CssDetection' => 'CssDetection',
4949 'DeviceDetection' => 'DeviceDetection',
 50+ 'HtmlFormatter' => 'HtmlFormatter',
5051 'MobileFormatter' => 'MobileFormatter',
5152 'WmlContext' => 'WmlContext',
5253
Index: trunk/extensions/MobileFrontend/HtmlFormatter.php
@@ -0,0 +1,250 @@
 2+<?php
 3+
 4+/**
 5+ * Performs transformations of HTML
 6+ */
 7+class HtmlFormatter {
 8+ /**
 9+ * @var DOMDocument
 10+ */
 11+ protected $doc;
 12+
 13+ private $itemsToRemove = array();
 14+ private $elementsToFlatten = array();
 15+ private $removeImages = false;
 16+ private $idWhitelist = array();
 17+ private $flattenRedLinks = false;
 18+
 19+ /**
 20+ * Constructor
 21+ *
 22+ * @param string $html: Text to process
 23+ */
 24+ public function __construct( $html ) {
 25+ wfProfileIn( __METHOD__ );
 26+
 27+ $html = mb_convert_encoding( $html, 'HTML-ENTITIES', "UTF-8" );
 28+ libxml_use_internal_errors( true );
 29+ $this->doc = new DOMDocument();
 30+ $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html );
 31+ libxml_use_internal_errors( false );
 32+ $this->doc->preserveWhiteSpace = false;
 33+ $this->doc->strictErrorChecking = false;
 34+ $this->doc->encoding = 'UTF-8';
 35+
 36+ wfProfileOut( __METHOD__ );
 37+ }
 38+
 39+ /**
 40+ * Turns a chunk of HTML into a proper document
 41+ * @param string $html
 42+ * @return string
 43+ */
 44+ public static function wrapHTML( $html ) {
 45+ return '<!doctype html><html><head></head><body>' . $html . '</body></html>';
 46+ }
 47+
 48+ /**
 49+ * Override this in descendant class to modify HTML after it has been converted from DOM tree
 50+ * @param string $html: HTML to process
 51+ * @return string: Processed HTML
 52+ */
 53+ protected function onHtmlReady( $html ) {
 54+ return $html;
 55+ }
 56+
 57+ /**
 58+ * @return DOMDocument: DOM to manipulate
 59+ */
 60+ public function getDoc() {
 61+ return $this->doc;
 62+ }
 63+
 64+ /**
 65+ * Sets whether images should be removed from output
 66+ * @param bool $flag
 67+ */
 68+ public function removeImages( $flag = true ) {
 69+ $this->removeImages = $flag;
 70+ }
 71+
 72+ /**
 73+ * Adds one or more selector of content to remove
 74+ * @param Array|string $selectors: Selector(s) of stuff to remove
 75+ */
 76+ public function remove( $selectors ) {
 77+ $this->itemsToRemove = array_merge( $this->itemsToRemove, (array)$selectors );
 78+ }
 79+
 80+ /**
 81+ * Adds one or more element name to the list to flatten (remove tag, but not its content)
 82+ * @param Array|string $elements: Name(s) of tag(s) to flatten
 83+ */
 84+ public function flatten( $elements ) {
 85+ $this->elementsToFlatten = array_merge( $this->elementsToFlatten, (array)$elements );
 86+ }
 87+
 88+ /**
 89+ * Sets whether red links should be flattened
 90+ * @param bool $flag
 91+ */
 92+ public function flattenRedLinks( $flag = true ) {
 93+ $this->flattenRedLinks = $flag;
 94+ }
 95+
 96+ /**
 97+ * @param Array|string $ids: Id(s) of content to keep
 98+ */
 99+ public function whitelistIds( $ids ) {
 100+ $this->idWhitelist = array_merge( $this->idWhitelist, array_flip( (array)$ids ) );
 101+ }
 102+
 103+ /**
 104+ * Removes content inappropriate for mobile devices
 105+ */
 106+ public function filterContent() {
 107+ $removals = $this->parseItemsToRemove();
 108+
 109+ // Remove tags
 110+
 111+ // You can't remove DOMNodes from a DOMNodeList as you're iterating
 112+ // over them in a foreach loop. It will seemingly leave the internal
 113+ // iterator on the foreach out of wack and results will be quite
 114+ // strange. Though, making a queue of items to remove seems to work.
 115+ // For example:
 116+
 117+ $domElemsToRemove = array();
 118+ foreach ( $removals['TAG'] as $tagToRemove ) {
 119+ $tagToRemoveNodes = $this->doc->getElementsByTagName( $tagToRemove );
 120+ foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
 121+ $tagToRemoveNodeIdAttributeValue = '';
 122+ if ( $tagToRemoveNode ) {
 123+ $tagToRemoveNodeIdAttribute = $tagToRemoveNode->getAttributeNode( 'id' );
 124+ if ( $tagToRemoveNodeIdAttribute ) {
 125+ $tagToRemoveNodeIdAttributeValue = $tagToRemoveNodeIdAttribute->value;
 126+ }
 127+ if ( !isset( $this->idWhitelist[$tagToRemoveNodeIdAttributeValue] ) ) {
 128+ $domElemsToRemove[] = $tagToRemoveNode;
 129+ }
 130+ }
 131+ }
 132+ }
 133+
 134+ foreach ( $domElemsToRemove as $domElement ) {
 135+ $domElement->parentNode->removeChild( $domElement );
 136+ }
 137+
 138+ // Elements with named IDs
 139+ foreach ( $removals['ID'] as $itemToRemove ) {
 140+ $itemToRemoveNode = $this->doc->getElementById( $itemToRemove );
 141+ if ( $itemToRemoveNode ) {
 142+ $itemToRemoveNode->parentNode->removeChild( $itemToRemoveNode );
 143+ }
 144+ }
 145+
 146+ // CSS Classes
 147+ $xpath = new DOMXpath( $this->doc );
 148+ foreach ( $removals['CLASS'] as $classToRemove ) {
 149+ $elements = $xpath->query( '//*[@class="' . $classToRemove . '"]' );
 150+
 151+ foreach ( $elements as $element ) {
 152+ $element->parentNode->removeChild( $element );
 153+ }
 154+ }
 155+
 156+ // Tags with CSS Classes
 157+ foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
 158+ $parts = explode( '.', $classToRemove );
 159+
 160+ $elements = $xpath->query(
 161+ '//' . $parts[0] . '[@class="' . $parts[1] . '"]'
 162+ );
 163+
 164+ foreach ( $elements as $element ) {
 165+ $removedElement = $element->parentNode->removeChild( $element );
 166+ }
 167+ }
 168+
 169+ // Handle red links with action equal to edit
 170+ if ( $this->flattenRedLinks ) {
 171+ $redLinks = $xpath->query( '//a[@class="new"]' );
 172+ foreach ( $redLinks as $redLink ) {
 173+ // PHP Bug #36795 — Inappropriate "unterminated entity reference"
 174+ $spanNode = $this->doc->createElement( "span", str_replace( "&", "&amp;", $redLink->nodeValue ) );
 175+
 176+ if ( $redLink->hasAttributes() ) {
 177+ $attributes = $redLink->attributes;
 178+ foreach ( $attributes as $i => $attribute ) {
 179+ if ( $attribute->name != 'href' ) {
 180+ $spanNode->setAttribute( $attribute->name, $attribute->value );
 181+ }
 182+ }
 183+ }
 184+
 185+ $redLink->parentNode->replaceChild( $spanNode, $redLink );
 186+ }
 187+ }
 188+ wfProfileOut( __METHOD__ );
 189+ }
 190+
 191+ /**
 192+ * Performs final transformations and returns resulting HTML
 193+ *
 194+ * @param DOMElement|string|null $element: ID of element to get HTML from or false to get it from the whole tree
 195+ * @return string: Processed HTML
 196+ */
 197+ public function getText( $element = null ) {
 198+ wfProfileIn( __METHOD__ );
 199+
 200+ if ( $element !== null && !( $element instanceof DOMElement ) ) {
 201+ $element = $this->doc->getElementById( $element );
 202+ }
 203+ $html = $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG );
 204+ if ( !$element ) {
 205+ $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
 206+ }
 207+ $html = $this->onHtmlReady( $html );
 208+
 209+ if ( $this->elementsToFlatten ) {
 210+ $elements = implode( '|', $this->elementsToFlatten );
 211+ $html = preg_replace( "#</?($elements)\\b[^>]*>#is", '', $html );
 212+ }
 213+
 214+ wfProfileOut( __METHOD__ );
 215+ return $html;
 216+ }
 217+
 218+ /**
 219+ * Transforms CSS selectors into an internal representation suitable for processing
 220+ * @return array
 221+ */
 222+ protected function parseItemsToRemove() {
 223+ wfProfileIn( __METHOD__ );
 224+ $removals = array(
 225+ 'ID' => array(),
 226+ 'TAG' => array(),
 227+ 'CLASS' => array(),
 228+ 'TAG_CLASS' => array(),
 229+ );
 230+
 231+ foreach ( $this->itemsToRemove as $itemToRemove ) {
 232+ $type = '';
 233+ $rawName = '';
 234+ CssDetection::detectIdCssOrTag( $itemToRemove, $type, $rawName );
 235+ $removals[$type][] = $rawName;
 236+ }
 237+
 238+ if ( $this->removeImages ) {
 239+ $removals['TAG'][] = "img";
 240+ $removals['TAG'][] = "audio";
 241+ $removals['TAG'][] = "video";
 242+ $removals['CLASS'][] = "thumb tright";
 243+ $removals['CLASS'][] = "thumb tleft";
 244+ $removals['CLASS'][] = "thumbcaption";
 245+ $removals['CLASS'][] = "gallery";
 246+ }
 247+
 248+ wfProfileOut( __METHOD__ );
 249+ return $removals;
 250+ }
 251+}
Property changes on: trunk/extensions/MobileFrontend/HtmlFormatter.php
___________________________________________________________________
Added: svn:eol-style
1252 + native

Sign-offs

UserFlagDate
Laberkisteinspected23:03, 13 March 2012

Status & tagging log