r96799 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r96798‎ | r96799 | r96800 >
Date:18:09, 11 September 2011
Author:maxsem
Status:deferred (Comments)
Tags:
Comment:
Rewrote tokenizer for better readability and less copypasta
Modified paths:
  • /trunk/extensions/wikihiero/wikihiero.body.php (modified) (history)

Diff [purge]

Index: trunk/extensions/wikihiero/wikihiero.body.php
@@ -35,12 +35,6 @@
3636 define( "WH_MODE_IMAGE", 3 ); // picture (PNG) // not supported
3737 define( "WH_MODE_RAW", 4 ); // MdC test as it
3838
39 -define( "WH_TYPE_NONE", 0 );
40 -define( "WH_TYPE_GLYPH", 1 ); // rendered items
41 -define( "WH_TYPE_CODE", 2 ); // single code as ':', '*', '!', '(' or ')'
42 -define( "WH_TYPE_SPECIAL", 3 ); // advanced code (more than 1 caracter)
43 -define( "WH_TYPE_END", 4 ); // end of line '!'
44 -
4539 define( "WH_SCALE_DEFAULT", -1 ); // use default scale
4640
4741 global $wgExtensionAssetsPath;
@@ -278,79 +272,11 @@
279273 $html .= "<hr />\n";
280274 }
281275
282 - // ------------------------------------------------------------------------
283 - // Split text into blocks, then split blocks into items
284 - $blocks = array();
285 - $blocks[0] = array();
286 - $blocks[0][0] = "";
287 - $blocks_id = 0;
288 - $item_id = 0;
289 - $parenthesis = 0;
290 - $type = WH_TYPE_NONE;
 276+ $tokenizer = new HieroTokenizer( $hiero );
 277+ $blocks = $tokenizer->tokenize();
 278+ $contentHtml = $tableHtml = $tableContentHtml = "";
291279 $is_cartouche = false;
292280
293 - for ( $char = 0; $char < strlen( $hiero ); $char++ ) {
294 -
295 - if ( $hiero[$char] == '(' ) {
296 - $parenthesis++;
297 - } elseif ( $hiero[$char] == ')' ) {
298 - $parenthesis--;
299 - }
300 -
301 - if ( $parenthesis == 0 ) {
302 - if ( $hiero[$char] == '-' || $hiero[$char] == ' ' ) {
303 - if ( $type != WH_TYPE_NONE ) {
304 - $blocks_id++;
305 - $blocks[$blocks_id] = array();
306 - $item_id = 0;
307 - $blocks[$blocks_id][$item_id] = "";
308 - $type = WH_TYPE_NONE;
309 - }
310 - }
311 - } else {// don't split block if inside parenthesis
312 - if ( $hiero[$char] == '-' ) {
313 - $item_id++;
314 - $blocks[$blocks_id][$item_id] = '-';
315 - $type = WH_TYPE_CODE;
316 - }
317 - }
318 -
319 - if ( $hiero[$char] == '!' ) {
320 - if ( $item_id > 0 ) {
321 - $blocks_id++;
322 - $blocks[$blocks_id] = array();
323 - $item_id = 0;
324 - }
325 - $blocks[$blocks_id][$item_id] = $hiero[$char];
326 - $type = WH_TYPE_END;
327 -
328 - } elseif ( preg_match( "/[*:()]/", $hiero[$char] ) ) {
329 -
330 - if ( $type == WH_TYPE_GLYPH || $type == WH_TYPE_CODE ) {
331 - $item_id++;
332 - $blocks[$blocks_id][$item_id] = "";
333 - }
334 - $blocks[$blocks_id][$item_id] = $hiero[$char];
335 - $type = WH_TYPE_CODE;
336 -
337 - } elseif ( ctype_alnum( $hiero[$char] ) || $hiero[$char] == '.' || $hiero[$char] == '<'
338 - || $hiero[$char] == '>' || $hiero[$char] == '\\' ) {
339 - if ( $type == WH_TYPE_END ) {
340 - $blocks_id++;
341 - $blocks[$blocks_id] = array();
342 - $item_id = 0;
343 - $blocks[$blocks_id][$item_id] = "";
344 - } elseif ( $type == WH_TYPE_CODE ) {
345 - $item_id++;
346 - $blocks[$blocks_id][$item_id] = "";
347 - }
348 - $blocks[$blocks_id][$item_id] .= $hiero[$char];
349 - $type = WH_TYPE_GLYPH;
350 - }
351 - }
352 -
353 - $contentHtml = $tableHtml = $tableContentHtml = "";
354 -
355281 // ------------------------------------------------------------------------
356282 // Loop into all blocks
357283 foreach ( $blocks as $code ) {
@@ -499,3 +425,106 @@
500426 return substr( $file, strlen( self::IMAGE_PREFIX ), -( 1 + strlen( self::IMAGE_EXT ) ) );
501427 }
502428 }
 429+
 430+/**
 431+ * Hieroglyphs tokenizer class
 432+ */
 433+/*private*/ class HieroTokenizer {
 434+ const TYPE_NONE = 0;
 435+ const TYPE_GLYPH = 1; // rendered items
 436+ const TYPE_CODE = 2; // single code as ':', '*', '!', '(' or ')'
 437+ const TYPE_SPECIAL = 3; // advanced code (more than 1 caracter)
 438+ const TYPE_END = 4; // end of line '!'
 439+
 440+ private $text;
 441+ private $blocks = false;
 442+ private $blocks_id = 0;
 443+ private $item_id = 0;
 444+
 445+ /**
 446+ * Constructor
 447+ *
 448+ * @param $text string:
 449+ */
 450+ public function __construct( $text ) {
 451+ $this->text = $text;
 452+ }
 453+
 454+ /**
 455+ * Split text into blocks, then split blocks into items
 456+ *
 457+ * @return array: tokenized text
 458+ */
 459+ public function tokenize() {
 460+ if ( $this->blocks !== false ) {
 461+ return $this->blocks;
 462+ }
 463+ $this->blocks = array( array( '' ) );
 464+ $parentheses = 0;
 465+ $type = self::TYPE_NONE;
 466+
 467+ for ( $i = 0; $i < strlen( $this->text ); $i++ ) {
 468+ $char = $this->text[$i];
 469+
 470+ if ( $char == '(' ) {
 471+ $parentheses++;
 472+ } elseif ( $char == ')' ) {
 473+ $parentheses--;
 474+ }
 475+
 476+ if ( $parentheses == 0 ) {
 477+ if ( $char == '-' || $char == ' ' ) {
 478+ if ( $type != self::TYPE_NONE ) {
 479+ $this->addBlock( '' );
 480+ $type = self::TYPE_NONE;
 481+ }
 482+ }
 483+ } else {// don't split block if inside parentheses
 484+ if ( $char == '-' ) {
 485+ $this->addItem( '-' );
 486+ $type = self::TYPE_CODE;
 487+ }
 488+ }
 489+
 490+ if ( $char == '!' ) {
 491+ if ( $this->item_id > 0 ) {
 492+ $this->addBlock();
 493+ }
 494+ $this->blocks[$this->blocks_id][$this->item_id] = $char;
 495+ $type = self::TYPE_END;
 496+
 497+ } elseif ( preg_match( '/[*:()]/', $char ) ) {
 498+ if ( $type == self::TYPE_GLYPH || $type == self::TYPE_CODE ) {
 499+ $this->addItem( '' );
 500+ }
 501+ $this->blocks[$this->blocks_id][$this->item_id] = $char;
 502+ $type = self::TYPE_CODE;
 503+
 504+ } elseif ( ctype_alnum( $char ) || $char == '.' || $char == '<'
 505+ || $char == '>' || $char == '\\' ) {
 506+ if ( $type == self::TYPE_END ) {
 507+ $this->addBlock( '' );
 508+ } elseif ( $type == self::TYPE_CODE ) {
 509+ $this->addItem( '' );
 510+ }
 511+ $this->blocks[$this->blocks_id][$this->item_id] .= $char;
 512+ $type = self::TYPE_GLYPH;
 513+ }
 514+ }
 515+ return $this->blocks;
 516+ }
 517+
 518+ private function addBlock( $newItem = false ) {
 519+ $this->blocks_id++;
 520+ $this->blocks[$this->blocks_id] = array();
 521+ $this->item_id = 0;
 522+ if ( $newItem !== false ) {
 523+ $this->blocks[$this->blocks_id][$this->item_id] = $newItem;
 524+ }
 525+ }
 526+
 527+ private function addItem( $item ) {
 528+ $this->item_id++;
 529+ $this->blocks[$this->blocks_id][$this->item_id] = $item;
 530+ }
 531+}

Comments

#Comment by Tim Starling (talk | contribs)   05:09, 25 November 2011

Superseded by r97542.

Status & tagging log