Index: trunk/extensions/wikihiero/tests.txt |
— | — | @@ -12,7 +12,7 @@ |
13 | 13 | !! input |
14 | 14 | <hiero><script>alert("FAIL")</script></hiero> |
15 | 15 | !! result |
16 | | -<table class='mw-hiero-table mw-hiero-outer' dir='ltr'><tr><td> <table class="mw-hiero-table"><tr> <td><script>alert(FAIL)<script></td> </tr></table> </td></tr></table> |
| 16 | +<table class='mw-hiero-table mw-hiero-outer' dir='ltr'><tr><td> <table class="mw-hiero-table"><tr> <td><script>alert("FAIL")</script></td> </tr></table> </td></tr></table> |
17 | 17 | |
18 | 18 | !! end |
19 | 19 | |
— | — | @@ -44,6 +44,15 @@ |
45 | 45 | !! end |
46 | 46 | |
47 | 47 | !! test |
| 48 | +WikiHiero - EOL |
| 49 | +!! input |
| 50 | +<hiero>A1!B1 ! C1</hiero> |
| 51 | +!! result |
| 52 | +<table class='mw-hiero-table mw-hiero-outer' dir='ltr'><tr><td> <table class="mw-hiero-table"><tr> <td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_A1.png' title='A1' alt='A1' /></td></tr></table><table class="mw-hiero-table"><tr> <td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_B1.png' title='B1' alt='B1' /></td></tr></table><table class="mw-hiero-table"><tr> <td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_C1.png' title='C1' alt='C1' /></td></tr></table> </td></tr></table> |
| 53 | + |
| 54 | +!! end |
| 55 | + |
| 56 | +!! test |
48 | 57 | WikiHiero - complex text with EOL |
49 | 58 | !! input |
50 | 59 | <hiero>M23-X1:R4-X8-Q2:D4-W17-R14-G4-R8-O29:V30-U23 ! |
— | — | @@ -61,3 +70,14 @@ |
62 | 71 | <table class='mw-hiero-table mw-hiero-outer' dir='ltr'><tr><td> <table class="mw-hiero-table"><tr> <td><img class="mw-mirrored" style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_A1.png' title='A1' alt='A1' /></td><td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_A1.png' title='A1' alt='A1' /></td></tr></table> </td></tr></table> |
63 | 72 | |
64 | 73 | !! end |
| 74 | + |
| 75 | +!! test |
| 76 | +WikiHiero - void blocks |
| 77 | +!! input |
| 78 | +<hiero>A1..B1.C1</hiero> |
| 79 | +<hiero>A1 .. B1 . C1</hiero> |
| 80 | +!!result |
| 81 | +<table class='mw-hiero-table mw-hiero-outer' dir='ltr'><tr><td> <table class="mw-hiero-table"><tr> <td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_A1.png' title='A1' alt='A1' /></td><td><table class="mw-hiero-table" style="width: 44px;"><tr><td> </td></tr></table></td><td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_B1.png' title='B1' alt='B1' /></td><td><table class="mw-hiero-table" style="width: 22px;"><tr><td> </td></tr></table></td><td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_C1.png' title='C1' alt='C1' /></td></tr></table> </td></tr></table> |
| 82 | +<table class='mw-hiero-table mw-hiero-outer' dir='ltr'><tr><td> <table class="mw-hiero-table"><tr> <td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_A1.png' title='A1' alt='A1' /></td><td><table class="mw-hiero-table" style="width: 44px;"><tr><td> </td></tr></table></td><td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_B1.png' title='B1' alt='B1' /></td><td><table class="mw-hiero-table" style="width: 22px;"><tr><td> </td></tr></table></td><td><img style='margin:1px;' height='38' src='/extensions/wikihiero/img/hiero_C1.png' title='C1' alt='C1' /></td></tr></table> </td></tr></table> |
| 83 | + |
| 84 | +!! end |
Index: trunk/extensions/wikihiero/wikihiero.body.php |
— | — | @@ -430,16 +430,14 @@ |
431 | 431 | * Hieroglyphs tokenizer class |
432 | 432 | */ |
433 | 433 | /*private*/ class HieroTokenizer { |
434 | | - const TYPE_NONE = 0; |
435 | | - const TYPE_GLYPH = 1; // rendered items |
436 | | - const TYPE_CODE = 2; // single code as ':', '*', '!', '(' or ')' |
437 | | - const TYPE_SPECIAL = 3; // advanced code (more than 1 caracter) |
438 | | - const TYPE_END = 4; // end of line '!' |
| 434 | + private static $delimiters = false; |
| 435 | + private static $tokenDelimiters; |
| 436 | + private static $singleChars; |
439 | 437 | |
440 | 438 | private $text; |
441 | 439 | private $blocks = false; |
442 | | - private $blocks_id = 0; |
443 | | - private $item_id = 0; |
| 440 | + private $currentBlock; |
| 441 | + private $token; |
444 | 442 | |
445 | 443 | /** |
446 | 444 | * Constructor |
— | — | @@ -448,8 +446,19 @@ |
449 | 447 | */ |
450 | 448 | public function __construct( $text ) { |
451 | 449 | $this->text = $text; |
| 450 | + self::initStatic(); |
452 | 451 | } |
453 | 452 | |
| 453 | + private static function initStatic() { |
| 454 | + if ( self::$delimiters ) { |
| 455 | + return; |
| 456 | + } |
| 457 | + |
| 458 | + self::$delimiters = array_flip( array( ' ', '-', "\t", "\n" ) ); |
| 459 | + self::$tokenDelimiters = array_flip( array( '*', ':', '(', ')' ) ); |
| 460 | + self::$singleChars = array_flip( array( '!' ) ); |
| 461 | + } |
| 462 | + |
454 | 463 | /** |
455 | 464 | * Split text into blocks, then split blocks into items |
456 | 465 | * |
— | — | @@ -459,72 +468,93 @@ |
460 | 469 | if ( $this->blocks !== false ) { |
461 | 470 | return $this->blocks; |
462 | 471 | } |
463 | | - $this->blocks = array( array( '' ) ); |
464 | | - $parentheses = 0; |
465 | | - $type = self::TYPE_NONE; |
| 472 | + $this->blocks = array(); |
| 473 | + $this->currentBlock = array(); |
| 474 | + $this->token = ''; |
| 475 | + |
| 476 | + $text = preg_replace( '/<!--.*?-->/', '', $this->text ); // remove HTML comments |
466 | 477 | |
467 | | - for ( $i = 0; $i < strlen( $this->text ); $i++ ) { |
| 478 | + for ( $i = 0; $i < strlen( $text ); $i++ ) { |
468 | 479 | $char = $this->text[$i]; |
469 | 480 | |
470 | | - if ( $char == '(' ) { |
471 | | - $parentheses++; |
472 | | - } elseif ( $char == ')' ) { |
473 | | - $parentheses--; |
| 481 | + if ( isset( self::$delimiters[$char] ) ) { |
| 482 | + $this->newBlock(); |
| 483 | + } elseif ( isset( self::$singleChars[$char] ) ) { |
| 484 | + $this->singleCharBlock( $char ); |
| 485 | + } elseif ( $char == '.' ) { |
| 486 | + $this->dot(); |
| 487 | + } elseif ( isset( self::$tokenDelimiters[$char] ) ) { |
| 488 | + $this->newToken( $char ); |
| 489 | + } else { |
| 490 | + $this->char( $char ); |
474 | 491 | } |
| 492 | + } |
475 | 493 | |
476 | | - if ( $parentheses == 0 ) { |
477 | | - if ( $char == '-' || $char == ' ' ) { |
478 | | - if ( $type != self::TYPE_NONE ) { |
479 | | - $this->addBlock( '' ); |
480 | | - $type = self::TYPE_NONE; |
481 | | - } |
482 | | - } |
483 | | - } else {// don't split block if inside parentheses |
484 | | - if ( $char == '-' ) { |
485 | | - $this->addItem( '-' ); |
486 | | - $type = self::TYPE_CODE; |
487 | | - } |
488 | | - } |
| 494 | + $this->newBlock(); // flush stuff being processed |
489 | 495 | |
490 | | - if ( $char == '!' ) { |
491 | | - if ( $this->item_id > 0 ) { |
492 | | - $this->addBlock(); |
493 | | - } |
494 | | - $this->blocks[$this->blocks_id][$this->item_id] = $char; |
495 | | - $type = self::TYPE_END; |
| 496 | + return $this->blocks; |
| 497 | + } |
496 | 498 | |
497 | | - } elseif ( preg_match( '/[*:()]/', $char ) ) { |
498 | | - if ( $type == self::TYPE_GLYPH || $type == self::TYPE_CODE ) { |
499 | | - $this->addItem( '' ); |
500 | | - } |
501 | | - $this->blocks[$this->blocks_id][$this->item_id] = $char; |
502 | | - $type = self::TYPE_CODE; |
| 499 | + /** |
| 500 | + * Handles a block delimiter |
| 501 | + */ |
| 502 | + private function newBlock() { |
| 503 | + $this->newToken(); |
| 504 | + if( $this->currentBlock ) { |
| 505 | + $this->blocks[] = $this->currentBlock; |
| 506 | + $this->currentBlock = array(); |
| 507 | + } |
| 508 | + } |
503 | 509 | |
504 | | - } elseif ( ctype_alnum( $char ) || $char == '.' || $char == '<' |
505 | | - || $char == '>' || $char == '\\' ) { |
506 | | - if ( $type == self::TYPE_END ) { |
507 | | - $this->addBlock( '' ); |
508 | | - } elseif ( $type == self::TYPE_CODE ) { |
509 | | - $this->addItem( '' ); |
510 | | - } |
511 | | - $this->blocks[$this->blocks_id][$this->item_id] .= $char; |
512 | | - $type = self::TYPE_GLYPH; |
513 | | - } |
| 510 | + /** |
| 511 | + * Flushes current token, optionally adds another one |
| 512 | + * |
| 513 | + * @param $token Mixed: token to add or false |
| 514 | + */ |
| 515 | + private function newToken( $token = false ) { |
| 516 | + if ( $this->token !== '' ) { |
| 517 | + $this->currentBlock[] = $this->token; |
| 518 | + $this->token = ''; |
514 | 519 | } |
515 | | - return $this->blocks; |
| 520 | + if ( $token !== false ) { |
| 521 | + $this->currentBlock[] = $token; |
| 522 | + } |
516 | 523 | } |
517 | 524 | |
518 | | - private function addBlock( $newItem = false ) { |
519 | | - $this->blocks_id++; |
520 | | - $this->blocks[$this->blocks_id] = array(); |
521 | | - $this->item_id = 0; |
522 | | - if ( $newItem !== false ) { |
523 | | - $this->blocks[$this->blocks_id][$this->item_id] = $newItem; |
| 525 | + /** |
| 526 | + * Adds a block consisting of one character |
| 527 | + * |
| 528 | + * @param $char string: block character |
| 529 | + */ |
| 530 | + private function singleCharBlock( $char ) { |
| 531 | + $this->newBlock(); |
| 532 | + $this->blocks[] = array( $char ); |
| 533 | + } |
| 534 | + |
| 535 | + /** |
| 536 | + * Handles void blocks represented by dots |
| 537 | + */ |
| 538 | + private function dot() { |
| 539 | + if ( $this->token == '.' ) { |
| 540 | + $this->token = '..'; |
| 541 | + $this->newBlock(); |
| 542 | + } else { |
| 543 | + $this->newBlock(); |
| 544 | + $this->token = '.'; |
524 | 545 | } |
525 | 546 | } |
526 | 547 | |
527 | | - private function addItem( $item ) { |
528 | | - $this->item_id++; |
529 | | - $this->blocks[$this->blocks_id][$this->item_id] = $item; |
| 548 | + /** |
| 549 | + * Adds a miscellaneous character to current token |
| 550 | + * |
| 551 | + * @param $char string: character to add |
| 552 | + */ |
| 553 | + private function char( $char ) { |
| 554 | + if ( $this->token == '.' ) { |
| 555 | + $this->newBlock(); |
| 556 | + $this->token = $char; |
| 557 | + } else { |
| 558 | + $this->token .= $char; |
| 559 | + } |
530 | 560 | } |
531 | | -} |
\ No newline at end of file |
| 561 | +} |