Index: trunk/extensions/wikihiero/wikihiero.body.php |
— | — | @@ -35,12 +35,6 @@ |
36 | 36 | define( "WH_MODE_IMAGE", 3 ); // picture (PNG) // not supported |
37 | 37 | define( "WH_MODE_RAW", 4 ); // MdC test as it |
38 | 38 | |
39 | | -define( "WH_TYPE_NONE", 0 ); |
40 | | -define( "WH_TYPE_GLYPH", 1 ); // rendered items |
41 | | -define( "WH_TYPE_CODE", 2 ); // single code as ':', '*', '!', '(' or ')' |
42 | | -define( "WH_TYPE_SPECIAL", 3 ); // advanced code (more than 1 caracter) |
43 | | -define( "WH_TYPE_END", 4 ); // end of line '!' |
44 | | - |
45 | 39 | define( "WH_SCALE_DEFAULT", -1 ); // use default scale |
46 | 40 | |
47 | 41 | global $wgExtensionAssetsPath; |
— | — | @@ -278,79 +272,11 @@ |
279 | 273 | $html .= "<hr />\n"; |
280 | 274 | } |
281 | 275 | |
282 | | - // ------------------------------------------------------------------------ |
283 | | - // Split text into blocks, then split blocks into items |
284 | | - $blocks = array(); |
285 | | - $blocks[0] = array(); |
286 | | - $blocks[0][0] = ""; |
287 | | - $blocks_id = 0; |
288 | | - $item_id = 0; |
289 | | - $parenthesis = 0; |
290 | | - $type = WH_TYPE_NONE; |
| 276 | + $tokenizer = new HieroTokenizer( $hiero ); |
| 277 | + $blocks = $tokenizer->tokenize(); |
| 278 | + $contentHtml = $tableHtml = $tableContentHtml = ""; |
291 | 279 | $is_cartouche = false; |
292 | 280 | |
293 | | - for ( $char = 0; $char < strlen( $hiero ); $char++ ) { |
294 | | - |
295 | | - if ( $hiero[$char] == '(' ) { |
296 | | - $parenthesis++; |
297 | | - } elseif ( $hiero[$char] == ')' ) { |
298 | | - $parenthesis--; |
299 | | - } |
300 | | - |
301 | | - if ( $parenthesis == 0 ) { |
302 | | - if ( $hiero[$char] == '-' || $hiero[$char] == ' ' ) { |
303 | | - if ( $type != WH_TYPE_NONE ) { |
304 | | - $blocks_id++; |
305 | | - $blocks[$blocks_id] = array(); |
306 | | - $item_id = 0; |
307 | | - $blocks[$blocks_id][$item_id] = ""; |
308 | | - $type = WH_TYPE_NONE; |
309 | | - } |
310 | | - } |
311 | | - } else {// don't split block if inside parenthesis |
312 | | - if ( $hiero[$char] == '-' ) { |
313 | | - $item_id++; |
314 | | - $blocks[$blocks_id][$item_id] = '-'; |
315 | | - $type = WH_TYPE_CODE; |
316 | | - } |
317 | | - } |
318 | | - |
319 | | - if ( $hiero[$char] == '!' ) { |
320 | | - if ( $item_id > 0 ) { |
321 | | - $blocks_id++; |
322 | | - $blocks[$blocks_id] = array(); |
323 | | - $item_id = 0; |
324 | | - } |
325 | | - $blocks[$blocks_id][$item_id] = $hiero[$char]; |
326 | | - $type = WH_TYPE_END; |
327 | | - |
328 | | - } elseif ( preg_match( "/[*:()]/", $hiero[$char] ) ) { |
329 | | - |
330 | | - if ( $type == WH_TYPE_GLYPH || $type == WH_TYPE_CODE ) { |
331 | | - $item_id++; |
332 | | - $blocks[$blocks_id][$item_id] = ""; |
333 | | - } |
334 | | - $blocks[$blocks_id][$item_id] = $hiero[$char]; |
335 | | - $type = WH_TYPE_CODE; |
336 | | - |
337 | | - } elseif ( ctype_alnum( $hiero[$char] ) || $hiero[$char] == '.' || $hiero[$char] == '<' |
338 | | - || $hiero[$char] == '>' || $hiero[$char] == '\\' ) { |
339 | | - if ( $type == WH_TYPE_END ) { |
340 | | - $blocks_id++; |
341 | | - $blocks[$blocks_id] = array(); |
342 | | - $item_id = 0; |
343 | | - $blocks[$blocks_id][$item_id] = ""; |
344 | | - } elseif ( $type == WH_TYPE_CODE ) { |
345 | | - $item_id++; |
346 | | - $blocks[$blocks_id][$item_id] = ""; |
347 | | - } |
348 | | - $blocks[$blocks_id][$item_id] .= $hiero[$char]; |
349 | | - $type = WH_TYPE_GLYPH; |
350 | | - } |
351 | | - } |
352 | | - |
353 | | - $contentHtml = $tableHtml = $tableContentHtml = ""; |
354 | | - |
355 | 281 | // ------------------------------------------------------------------------ |
356 | 282 | // Loop into all blocks |
357 | 283 | foreach ( $blocks as $code ) { |
— | — | @@ -499,3 +425,106 @@ |
500 | 426 | return substr( $file, strlen( self::IMAGE_PREFIX ), -( 1 + strlen( self::IMAGE_EXT ) ) ); |
501 | 427 | } |
502 | 428 | } |
| 429 | + |
| 430 | +/** |
| 431 | + * Hieroglyphs tokenizer class |
| 432 | + */ |
| 433 | +/*private*/ class HieroTokenizer { |
| 434 | + const TYPE_NONE = 0; |
| 435 | + const TYPE_GLYPH = 1; // rendered items |
| 436 | + const TYPE_CODE = 2; // single code as ':', '*', '!', '(' or ')' |
| 437 | + const TYPE_SPECIAL = 3; // advanced code (more than 1 caracter) |
| 438 | + const TYPE_END = 4; // end of line '!' |
| 439 | + |
| 440 | + private $text; |
| 441 | + private $blocks = false; |
| 442 | + private $blocks_id = 0; |
| 443 | + private $item_id = 0; |
| 444 | + |
| 445 | + /** |
| 446 | + * Constructor |
| 447 | + * |
| 448 | + * @param $text string: |
| 449 | + */ |
| 450 | + public function __construct( $text ) { |
| 451 | + $this->text = $text; |
| 452 | + } |
| 453 | + |
| 454 | + /** |
| 455 | + * Split text into blocks, then split blocks into items |
| 456 | + * |
| 457 | + * @return array: tokenized text |
| 458 | + */ |
| 459 | + public function tokenize() { |
| 460 | + if ( $this->blocks !== false ) { |
| 461 | + return $this->blocks; |
| 462 | + } |
| 463 | + $this->blocks = array( array( '' ) ); |
| 464 | + $parentheses = 0; |
| 465 | + $type = self::TYPE_NONE; |
| 466 | + |
| 467 | + for ( $i = 0; $i < strlen( $this->text ); $i++ ) { |
| 468 | + $char = $this->text[$i]; |
| 469 | + |
| 470 | + if ( $char == '(' ) { |
| 471 | + $parentheses++; |
| 472 | + } elseif ( $char == ')' ) { |
| 473 | + $parentheses--; |
| 474 | + } |
| 475 | + |
| 476 | + if ( $parentheses == 0 ) { |
| 477 | + if ( $char == '-' || $char == ' ' ) { |
| 478 | + if ( $type != self::TYPE_NONE ) { |
| 479 | + $this->addBlock( '' ); |
| 480 | + $type = self::TYPE_NONE; |
| 481 | + } |
| 482 | + } |
| 483 | + } else {// don't split block if inside parentheses |
| 484 | + if ( $char == '-' ) { |
| 485 | + $this->addItem( '-' ); |
| 486 | + $type = self::TYPE_CODE; |
| 487 | + } |
| 488 | + } |
| 489 | + |
| 490 | + if ( $char == '!' ) { |
| 491 | + if ( $this->item_id > 0 ) { |
| 492 | + $this->addBlock(); |
| 493 | + } |
| 494 | + $this->blocks[$this->blocks_id][$this->item_id] = $char; |
| 495 | + $type = self::TYPE_END; |
| 496 | + |
| 497 | + } elseif ( preg_match( '/[*:()]/', $char ) ) { |
| 498 | + if ( $type == self::TYPE_GLYPH || $type == self::TYPE_CODE ) { |
| 499 | + $this->addItem( '' ); |
| 500 | + } |
| 501 | + $this->blocks[$this->blocks_id][$this->item_id] = $char; |
| 502 | + $type = self::TYPE_CODE; |
| 503 | + |
| 504 | + } elseif ( ctype_alnum( $char ) || $char == '.' || $char == '<' |
| 505 | + || $char == '>' || $char == '\\' ) { |
| 506 | + if ( $type == self::TYPE_END ) { |
| 507 | + $this->addBlock( '' ); |
| 508 | + } elseif ( $type == self::TYPE_CODE ) { |
| 509 | + $this->addItem( '' ); |
| 510 | + } |
| 511 | + $this->blocks[$this->blocks_id][$this->item_id] .= $char; |
| 512 | + $type = self::TYPE_GLYPH; |
| 513 | + } |
| 514 | + } |
| 515 | + return $this->blocks; |
| 516 | + } |
| 517 | + |
| 518 | + private function addBlock( $newItem = false ) { |
| 519 | + $this->blocks_id++; |
| 520 | + $this->blocks[$this->blocks_id] = array(); |
| 521 | + $this->item_id = 0; |
| 522 | + if ( $newItem !== false ) { |
| 523 | + $this->blocks[$this->blocks_id][$this->item_id] = $newItem; |
| 524 | + } |
| 525 | + } |
| 526 | + |
| 527 | + private function addItem( $item ) { |
| 528 | + $this->item_id++; |
| 529 | + $this->blocks[$this->blocks_id][$this->item_id] = $item; |
| 530 | + } |
| 531 | +} |