Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -292,6 +292,10 @@ |
293 | 293 | |
294 | 294 | } |
295 | 295 | |
| 296 | +/********************************************************* |
| 297 | + * The top-level production |
| 298 | + *********************************************************/ |
| 299 | + |
296 | 300 | start |
297 | 301 | = e:toplevelblock* newline* { |
298 | 302 | // end is passed inline as a token, as well as a separate event for now. |
— | — | @@ -310,241 +314,26 @@ |
311 | 315 | } |
312 | 316 | |
313 | 317 | |
314 | | -/* All chars that cannot start syntactic structures in the middle of a line |
315 | | - * XXX: ] and other end delimiters should probably only be activated inside |
316 | | - * structures to avoid unnecessarily leaving the text production on plain |
317 | | - * content. */ |
318 | | - |
319 | | -text_char = [^'<~[{\n\r:\]}|!=] |
320 | | - |
321 | | -text = t:text_char+ { return t.join(''); } |
322 | | - |
323 | | -/* Legend |
324 | | - * ' quotes (italic/bold) |
325 | | - * < start of xmlish_tag |
326 | | - * ~ signatures/dates |
327 | | - * [ start of links |
328 | | - * { start of parser functions, transclusion and template args |
329 | | - * \n all sort of block-level markup at start of line |
330 | | - * \r ditto |
331 | | - * h http(s) urls |
332 | | - * n nntp(s) urls |
333 | | - * m mailto urls |
334 | | - * |
335 | | - * ! and | table cell delimiters, might be better to specialize those |
336 | | - * = headings - also specialize those! |
337 | | - * |
338 | | - * The following chars are also included for now, but only apply in some |
339 | | - * contexts and should probably be enabled only in those: |
340 | | - * : separate definition in ; term : definition |
341 | | - * ] end of link |
342 | | - * } end of parser func/transclusion/template arg |
| 318 | +/* |
| 319 | + * A document (start production) is a sequence of toplevelblocks. Tokens are |
| 320 | + * emitted in chunks per toplevelblock to avoid buffering the full document. |
343 | 321 | */ |
344 | | - |
345 | | -urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); } |
346 | | - / & url_chars urllink |
347 | | - / htmlentity |
348 | | - // Convert trailing space into |
349 | | - // XXX: This should be moved to a serializer |
350 | | - / ' ' & ':' { return "\u00a0"; } |
351 | | - / t:text_char )+ |
352 | | - |
353 | | - |
354 | | - |
355 | | - |
356 | | -/* |
357 | | - '//', // for protocol-relative URLs, but not in text! |
358 | | - 'ftp://', |
359 | | - 'git://', |
360 | | - 'gopher://', |
361 | | - 'http://', |
362 | | - 'https://', |
363 | | - 'irc://', |
364 | | - 'ircs://', // @bug 28503 |
365 | | - 'mailto:', |
366 | | - 'mms://', |
367 | | - 'news:', |
368 | | - 'nntp://', // @bug 3808 RFC 1738 |
369 | | - 'svn://', |
370 | | - 'telnet://', // Well if we're going to support the above.. -ævar |
371 | | - 'worldwind://', |
372 | | -*/ |
373 | | - |
374 | | -// Old version |
375 | | -//text = t:[A-Za-z0-9,._ "?!\t-]+ { return t.join('') } |
376 | | - |
377 | | -// Experimental tweaked version: avoid expensive single-char substrings |
378 | | -// This did not bring the expected performance boost, however. |
379 | | -//text = [A-Za-z0-9,._ -] { |
380 | | -// textStart = pos; |
381 | | -// |
382 | | -// var res = input.substr(textStart - 1, inputLength) |
383 | | -// .match(/[A-Za-z0-9,._ -]+/)[0]; |
384 | | -// pos = pos + (res.length - 1); |
385 | | -// return res |
386 | | -// } |
387 | | - |
388 | | -htmlentity = "&" c:[#0-9a-zA-Z]+ ";" { |
389 | | - return unentity("&" + c.join('') + ";") |
390 | | -} |
391 | | - |
392 | | -space |
393 | | - = s:[ \t]+ { return s.join(''); } |
394 | | - |
395 | | -optionalSpaceToken |
396 | | - = s:space* { |
397 | | - if ( s.length ) { |
398 | | - return [s.join('')]; |
399 | | - } else { |
400 | | - return []; |
401 | | - } |
402 | | - } |
403 | | - |
404 | | - |
405 | | -// Start of line |
406 | | -sol = nl:(newlineToken / & { return pos === 0; } { return [] }) |
407 | | - // Eat multi-line comments, so that syntax after still matches as if it |
408 | | - // was actually preceded by a newline |
409 | | - cn:( c:comment n:newline? { |
410 | | - if ( n !== '' ) { |
411 | | - return [c, n]; |
412 | | - } else { |
413 | | - return [c]; |
414 | | - } |
415 | | - } |
416 | | - )* |
417 | | - // Eat includeonly/noinclude at start of line, so that start-of-line |
418 | | - // syntax after it still matches |
419 | | - ni:(space* "<" c:"/"? t:("includeonly" / "noinclude") ">" {return [c, t]} )? |
420 | | - { |
421 | | - var niToken = []; |
422 | | - if ( ni !== '') { |
423 | | - if ( ni[0] === '/' ) { |
424 | | - niToken = [new EndTagTk( ni[1] )]; |
425 | | - } else { |
426 | | - niToken = [new TagTk( ni[1] )]; |
427 | | - } |
428 | | - } |
429 | | - |
430 | | - return nl.concat(cn, niToken); |
431 | | - } |
432 | | - |
433 | | -eof = & { return isEOF(pos); } { return true; } |
434 | | - |
435 | | - |
436 | | -newline |
437 | | - = '\n' / '\r\n' |
438 | | - |
439 | | -newlineToken = newline { return [new NlTk()] } |
440 | | - |
441 | | -eolf = newline / eof |
442 | | - |
443 | | - |
444 | | -// 'Preprocessor' directive- higher-level things that can occur in otherwise |
445 | | -// plain-text content. |
446 | | -directive |
447 | | - = comment |
448 | | - / tplarg_or_template |
449 | | - / htmlentity |
450 | | - |
451 | | -// Plain text, but can contain templates, template arguments, comments etc- |
452 | | -// all stuff that is normally handled by the preprocessor |
453 | | -// Returns either a list of tokens, or a plain string (if nothing is to be |
454 | | -// processed). |
455 | | -preprocessor_text |
456 | | - = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
457 | | - / directive |
458 | | - / !inline_breaks text_char )+ { |
459 | | - return flatten ( r ); |
460 | | - } |
461 | | - |
462 | | -spaceless_preprocessor_text |
463 | | - = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); } |
464 | | - / directive |
465 | | - / !inline_breaks !' ' text_char )+ { |
466 | | - return flatten_string ( r ); |
467 | | - } |
468 | | - |
469 | | - |
470 | | -wikilink_preprocessor_text |
471 | | - = r:( t:[^%<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
472 | | - / urlencoded_char |
473 | | - / directive |
474 | | - / !inline_breaks !"]]" text_char )+ { |
475 | | - return flatten_stringlist ( r ); |
476 | | - } |
477 | | - |
478 | | -extlink_preprocessor_text |
479 | | - = r:( t:[^'<~[{\n\r|!\]}\t&="' \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); } |
480 | | - / directive |
481 | | - / urlencoded_char |
482 | | - / !inline_breaks no_punctuation_char |
483 | | - / s:[.:,] !(space / eolf) { return s } |
484 | | - / [&%] )+ { |
485 | | - return flatten_string ( r ); |
486 | | - } |
487 | | - |
488 | | -// Attribute values with preprocessor support |
489 | | -attribute_preprocessor_text |
490 | | - = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return ts.join(''); } |
491 | | - / directive |
492 | | - / !inline_breaks [&%] )+ { |
493 | | - //console.warn('prep'); |
494 | | - return flatten_string ( r ); |
495 | | - } |
496 | | -attribute_preprocessor_text_single |
497 | | - = r:( t:[^{&']+ { return t.join(''); } |
498 | | - / directive |
499 | | - / !inline_breaks [{&] )* { |
500 | | - return flatten_string ( r ); |
501 | | - } |
502 | | -attribute_preprocessor_text_double |
503 | | - = r:( t:[^{&"]+ { return t.join(''); } |
504 | | - / directive |
505 | | - / !inline_breaks [{&] )* { |
506 | | - //console.warn( 'double:' + pp(r) ); |
507 | | - return flatten_string ( r ); |
508 | | - } |
509 | | - |
510 | | -// Variants with the entire attribute on a single line |
511 | | -attribute_preprocessor_text_line |
512 | | - = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return ts.join(''); } |
513 | | - / directive |
514 | | - / !inline_breaks !'\n' [&%] )+ { |
515 | | - //console.warn('prep'); |
516 | | - return flatten_string ( r ); |
517 | | - } |
518 | | -attribute_preprocessor_text_single_line |
519 | | - = r:( t:[^{&']+ { return t.join(''); } |
520 | | - / directive |
521 | | - / !inline_breaks !'\n' [{&] )* { |
522 | | - return flatten_string ( r ); |
523 | | - } |
524 | | -attribute_preprocessor_text_double_line |
525 | | - = r:( t:[^{&"]+ { return t.join(''); } |
526 | | - / directive |
527 | | - / !inline_breaks !'\n' [{&] )* { |
528 | | - //console.warn( 'double:' + pp(r) ); |
529 | | - return flatten_string ( r ); |
530 | | - } |
531 | | - |
532 | | -// A document (start production) is a sequence of toplevelblocks. Tokens are |
533 | | -// emitted in chunks per toplevelblock to avoid buffering the full document. |
534 | 322 | toplevelblock |
535 | 323 | = & { blockStart = pos; return true; } b:block { |
536 | 324 | b = flatten(b); |
| 325 | + |
| 326 | + // Add source offsets for round-tripping. XXX: Add these not just for |
| 327 | + // toplevelblocks! |
537 | 328 | if ( b.length ) { |
538 | 329 | var bs = b[0]; |
539 | 330 | if ( bs.constructor === String && bs.attribs === undefined ) { |
540 | 331 | b[0] = new String( bs ); |
541 | 332 | bs = b[0]; |
542 | 333 | } |
543 | | - //dp('toplevelblock:' + pp(b)); |
544 | 334 | if (bs.dataAttribs === undefined) { |
545 | 335 | bs.dataAttribs = {}; |
546 | 336 | } |
547 | 337 | bs.dataAttribs.sourcePos = [blockStart, pos]; |
548 | | - //console.warn( 'toplevelblock: ' + pp( bs )); |
549 | 338 | } |
550 | 339 | |
551 | 340 | // Emit tokens for this toplevelblock. This feeds a chunk to the parser |
— | — | @@ -556,24 +345,32 @@ |
557 | 346 | return true; |
558 | 347 | } |
559 | 348 | |
| 349 | +/* |
| 350 | + * The actual contents of each block. |
| 351 | + */ |
560 | 352 | block |
561 | | - = !inline_breaks |
562 | | - r:( block_lines |
563 | | - / pre |
564 | | - / comment &eolf |
565 | | - / nowiki |
566 | | - / bt:block_tag { return [bt] } // avoid a paragraph if we know that the line starts with a block tag |
567 | | - / para |
568 | | - / inlineline // includes generic tags; wrapped into paragraphs in DOM postprocessor |
569 | | - / s:sol /*{ |
570 | | - if (s) { |
571 | | - return [s, {type: 'NEWLINE'}]; |
572 | | - } else { |
573 | | - return [{type: 'NEWLINE'}]; |
574 | | - } |
575 | | - }*/ |
576 | | - ) { return r } |
| 353 | + = block_lines |
| 354 | + / & '<' r:( pre // tag variant can start anywhere |
| 355 | + / comment &eolf |
| 356 | + / nowiki |
| 357 | + // avoid a paragraph if we know that the line starts with a block tag |
| 358 | + / bt:block_tag { return [bt] } |
| 359 | + ) { return r; } |
| 360 | + / para |
| 361 | + // Inlineline includes generic tags; wrapped into paragraphs in token |
| 362 | + // transform and DOM postprocessor |
| 363 | + / inlineline |
| 364 | + / sol |
577 | 365 | |
| 366 | +/* |
| 367 | + * A block nested in other constructs. Avoid eating end delimiters for other |
| 368 | + * constructs by checking against inline_breaks first. |
| 369 | + */ |
| 370 | +nested_block = !inline_breaks b:block { return b } |
| 371 | + |
| 372 | +/* |
| 373 | + * Line-based block constructs. |
| 374 | + */ |
578 | 375 | block_lines |
579 | 376 | = s:sol |
580 | 377 | // eat an empty line before the block |
— | — | @@ -583,10 +380,11 @@ |
584 | 381 | return s.concat(s2_, bl); |
585 | 382 | } |
586 | 383 | |
587 | | -// Block structures with start-of-line wiki syntax |
| 384 | +/* |
| 385 | + * Block structures with start-of-line wiki syntax |
| 386 | + */ |
588 | 387 | block_line |
589 | 388 | = h |
590 | | - /// table |
591 | 389 | / & [{}|] tl:table_lines { return tl; } |
592 | 390 | / lists |
593 | 391 | // tag-only lines should not trigger pre |
— | — | @@ -599,9 +397,11 @@ |
600 | 398 | / pre |
601 | 399 | |
602 | 400 | |
603 | | -// A paragraph. We don't emit 'p' tokens to avoid issues with template |
604 | | -// transclusions, <p> tags in the source and the like. Instead, we perform |
605 | | -// some paragraph wrapping on the DOM. |
| 401 | +/* |
| 402 | + * A paragraph. We don't emit 'p' tokens to avoid issues with template |
| 403 | + * transclusions, <p> tags in the source and the like. Instead, we perform |
| 404 | + * some paragraph wrapping on the DOM. |
| 405 | + */ |
606 | 406 | para |
607 | 407 | = s1:sol s2:sol c:inlineline { |
608 | 408 | return s1.concat(s2, /* [new TagTk('p')],*/ c); |
— | — | @@ -609,37 +409,60 @@ |
610 | 410 | |
611 | 411 | br = space* &newline { return new SelfclosingTagTk( 'br' ) } |
612 | 412 | |
613 | | -// Syntax stops to limit inline expansion defending on syntactic context |
| 413 | +/* |
| 414 | + * Syntax stops: Avoid eating significant tokens for higher-level productions |
| 415 | + * in nested inline productions. |
| 416 | + * |
| 417 | + * XXX: Repeated testing of flags is not terribly efficient. |
| 418 | + */ |
614 | 419 | inline_breaks |
615 | | - = |
| 420 | + = & [=|!}:\r\n\]<] // don't check further if char cannot match |
| 421 | + res:( |
| 422 | + & { // Important hack: disable caching for this production, as the default |
| 423 | + // cache key does not take into account flag states! |
| 424 | + cacheKey = ''; |
| 425 | + console.warn('ilb: ' + input.substr(pos, 5) ); |
| 426 | + return true; |
| 427 | + } |
| 428 | + |
| 429 | + & { return syntaxFlags['table']; } |
| 430 | + ( a:(newline [!|] / '||' / '!!' / '|}') { |
| 431 | + //console.warn("table break" + pp(a) + pos); |
| 432 | + return true; |
| 433 | + } |
| 434 | + / & { return syntaxFlags['tableCellArg'] } |
| 435 | + "|" { return true } |
| 436 | + ) |
| 437 | + / & { return (syntaxFlags['colon'] && |
| 438 | + ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition |
| 439 | + ! syntaxFlags.linkdesc); } ":" { return true; } |
| 440 | + / & { return syntaxFlags['extlink']; } "]" { return true; } |
| 441 | + / & { return syntaxFlags['linkdesc']; } link_end { return true; } |
| 442 | + / & { return syntaxFlags['h']; } '='+ space* newline { return true; } |
| 443 | + / & { return syntaxFlags['template']; } ('|' / '}}' ) { |
| 444 | + //console.warn( 'template break @' + pos + input.substr(pos-1, 4) ); |
| 445 | + return true; |
| 446 | + } |
| 447 | + / & { return syntaxFlags['equal']; } '=' { |
| 448 | + //console.warn( 'equal stop @' + pos + input.substr(pos-1, 4) ); |
| 449 | + return true; |
| 450 | + } |
| 451 | + / & { return syntaxFlags['pre']; } '</pre>' { |
| 452 | + //console.warn( 'pre stop @' + pos + input.substr(pos-1, 4) ); |
| 453 | + return true; |
| 454 | + } |
| 455 | + ) { return res } |
| 456 | + |
| 457 | +inline_breaks_experiment |
| 458 | + = & [=|!}:\r\n\]<] |
616 | 459 | & { // Important hack: disable caching for this production, as the default |
617 | 460 | // cache key does not take into account flag states! |
618 | 461 | cacheKey = ''; |
| 462 | + //console.warn('ilbf: ' + input.substr(pos, 5) ); |
619 | 463 | return true; |
620 | | - } |
621 | | - & { return syntaxFlags['table']; } |
622 | | - ( a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; } |
623 | | - / & { return syntaxFlags['tableCellArg'] } |
624 | | - "|" { return true } |
625 | | - ) |
626 | | - / & { return (syntaxFlags['colon'] && |
627 | | - ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition |
628 | | - ! syntaxFlags.linkdesc); } ":" { return true; } |
629 | | - / & { return syntaxFlags['extlink']; } "]" { return true; } |
630 | | - / & { return syntaxFlags['linkdesc']; } link_end { return true; } |
631 | | - / & { return syntaxFlags['h']; } '='+ space* newline { return true; } |
632 | | - / & { return syntaxFlags['template']; } ('|' / '}}' ) { |
633 | | - //console.warn( 'template break @' + pos + input.substr(pos-1, 4) ); |
634 | | - return true; |
635 | | - } |
636 | | - / & { return syntaxFlags['equal']; } '=' { |
637 | | - //console.warn( 'equal stop @' + pos + input.substr(pos-1, 4) ); |
638 | | - return true; |
639 | | - } |
640 | | - / & { return syntaxFlags['pre']; } '</pre>' { |
641 | | - //console.warn( 'pre stop @' + pos + input.substr(pos-1, 4) ); |
642 | | - return true; |
643 | | - } |
| 464 | + } |
| 465 | + . |
| 466 | + { return __parseArgs[3].inline_breaks( input, pos - 1, syntaxFlags ) && true || null ; } |
644 | 467 | |
645 | 468 | inline |
646 | 469 | = c:(urltext / (! inline_breaks (inline_element / . )))+ { |
— | — | @@ -703,38 +526,6 @@ |
704 | 527 | / & { dp('nomatch exit h'); clearFlag('h'); return false } { return null } |
705 | 528 | ) { return r } |
706 | 529 | |
707 | | - |
708 | | -pre_indent |
709 | | - = pre_indent_in_tags |
710 | | - / l:pre_indent_line ls:(sol pre_indent_line)* { |
711 | | - return [new TagTk( 'pre' )] |
712 | | - .concat( [l], ls |
713 | | - , [new EndTagTk( 'pre' )]); |
714 | | - } |
715 | | - |
716 | | -// An indented pre block that is surrounded with pre tags. The pre tags are |
717 | | -// used directly. |
718 | | -pre_indent_in_tags |
719 | | - = space+ // XXX: capture space for round-tripping |
720 | | - "<pre" |
721 | | - attribs:generic_attribute* |
722 | | - ">" |
723 | | - & { return setFlag('pre'); } |
724 | | - l:inlineline |
725 | | - ls:(sol pre_indent_line)* |
726 | | - "</pre>" |
727 | | - { |
728 | | - clearFlag('pre'); |
729 | | - return [ new TagTk( 'pre', attribs ) ] |
730 | | - .concat( l, flatten( ls ), [ new EndTagTk( 'pre' ) ] ); |
731 | | - } |
732 | | - / & { return clearFlag('pre'); } |
733 | | - |
734 | | -pre_indent_line = space l:inlineline { |
735 | | - return [ '\n' ].concat(l); |
736 | | -} |
737 | | - |
738 | | - |
739 | 530 | comment |
740 | 531 | = '<!--' c:comment_chars* ('-->' / eof) |
741 | 532 | cs:(space* newline space* cn:comment { return cn })* { |
— | — | @@ -746,6 +537,11 @@ |
747 | 538 | / c:'-' !'->' { return c; } |
748 | 539 | |
749 | 540 | |
| 541 | + |
| 542 | +/************************************************************** |
| 543 | + * External (bracketed and autolinked) links |
| 544 | + **************************************************************/ |
| 545 | + |
750 | 546 | urllink |
751 | 547 | = ! { return syntaxFlags['extlink'] } |
752 | 548 | target:url { |
— | — | @@ -851,6 +647,11 @@ |
852 | 648 | return flatten( a ).join(''); |
853 | 649 | } |
854 | 650 | |
| 651 | + |
| 652 | +/************************************************************** |
| 653 | + * Templates, -arguments and wikilinks |
| 654 | + **************************************************************/ |
| 655 | + |
855 | 656 | tplarg_or_template = & '{{{{{' template / tplarg / template |
856 | 657 | |
857 | 658 | template |
— | — | @@ -992,17 +793,49 @@ |
993 | 794 | return res; |
994 | 795 | } |
995 | 796 | |
996 | | -/* XXX: Extension tags can require a change in the tokenizer mode, which |
997 | | - * returns any text between extension tags verbatim. For now, we simply |
998 | | - * continue to parse the contained text and return the tokens. The original |
999 | | - * input source can be recovered from the source positions added on tag |
1000 | | - * tokens. This won't however work in all cases. For example, a comment start |
1001 | | - * (<!--) between extension tags would cause the remaining text to be consumed |
1002 | | - * as a comment. To avoid this, we might need to look ahead for the end tag |
1003 | | - * and limit the content parsing to this section. */ |
1004 | 797 | |
1005 | | -xmlish_tag = nowiki / generic_tag |
1006 | 798 | |
| 799 | +/*********************************************************** |
| 800 | + * Pre and xmlish tags |
| 801 | + ***********************************************************/ |
| 802 | + |
| 803 | +// Indented pre blocks differ from their non-indented (purely tag-based) |
| 804 | +// cousins by having their contents parsed. |
| 805 | +pre_indent |
| 806 | + = pre_indent_in_tags |
| 807 | + / l:pre_indent_line ls:(sol pre_indent_line)* { |
| 808 | + return [new TagTk( 'pre' )] |
| 809 | + .concat( [l], ls |
| 810 | + , [new EndTagTk( 'pre' )]); |
| 811 | + } |
| 812 | + |
| 813 | +// An indented pre block that is surrounded with pre tags. The pre tags are |
| 814 | +// used directly. |
| 815 | +pre_indent_in_tags |
| 816 | + = space+ // XXX: capture space for round-tripping |
| 817 | + "<pre" |
| 818 | + attribs:generic_attribute* |
| 819 | + ">" |
| 820 | + & { return setFlag('pre'); } |
| 821 | + l:inlineline |
| 822 | + ls:(sol pre_indent_line)* |
| 823 | + "</pre>" |
| 824 | + { |
| 825 | + clearFlag('pre'); |
| 826 | + return [ new TagTk( 'pre', attribs ) ] |
| 827 | + .concat( l, flatten( ls ), [ new EndTagTk( 'pre' ) ] ); |
| 828 | + } |
| 829 | + / & { return clearFlag('pre'); } |
| 830 | + |
| 831 | +pre_indent_line = space l:inlineline { |
| 832 | + return [ '\n' ].concat(l); |
| 833 | +} |
| 834 | + |
| 835 | +/* |
| 836 | + * Pre blocks defined using non-indented HTML tags only parse nowiki tags |
| 837 | + * inside them, and convert other content to verbatim text. Nowiki inside pre |
| 838 | + * is not functionally needed, but supported for backwards compatibility. |
| 839 | + */ |
1007 | 840 | pre |
1008 | 841 | = "<pre" |
1009 | 842 | attribs:generic_attribute* |
— | — | @@ -1020,6 +853,24 @@ |
1021 | 854 | } |
1022 | 855 | / "</pre>" { return "</pre>"; } |
1023 | 856 | |
| 857 | +/* XXX: Extension tags can require a change in the tokenizer mode, which |
| 858 | + * returns any text between extension tags verbatim. For now, we simply |
| 859 | + * continue to parse the contained text and return the tokens. The original |
| 860 | + * input source can be recovered from the source positions added on tag |
| 861 | + * tokens. This won't however work in all cases. For example, a comment start |
| 862 | + * (<!--) between extension tags would cause the remaining text to be consumed |
| 863 | + * as a comment. To avoid this, we might need to look ahead for the end tag |
| 864 | + * and limit the content parsing to this section. */ |
| 865 | + |
| 866 | +xmlish_tag = nowiki / generic_tag |
| 867 | + |
| 868 | +/* |
| 869 | + * Nowiki treats anything inside it as plain text. It could thus also be |
| 870 | + * defined as an extension that returns its raw input text, possibly wrapped |
| 871 | + * in a span for round-trip information. The special treatment for nowiki in |
| 872 | + * pre blocks would still remain in the grammar though, so overall handling it |
| 873 | + * all here is cleaner. |
| 874 | + */ |
1024 | 875 | nowiki |
1025 | 876 | = "<nowiki>" nc:nowiki_content "</nowiki>" { |
1026 | 877 | //console.warn( 'full nowiki return: ' + pp(nc)); |
— | — | @@ -1050,27 +901,6 @@ |
1051 | 902 | return [ts.join('')]; |
1052 | 903 | } |
1053 | 904 | |
1054 | | -// See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and |
1055 | | -// following paragraphs |
1056 | | -block_tag |
1057 | | - = "<" end:"/"? name:(cs:[a-zA-Z]+ { return cs.join('') }) |
1058 | | - attribs:generic_attribute* |
1059 | | - selfclose:"/"? |
1060 | | - ">" { |
1061 | | - if (block_names[name.toLowerCase()] !== true) { |
1062 | | - // abort match if tag is not block-level |
1063 | | - return null; |
1064 | | - } |
1065 | | - var res; |
1066 | | - if ( end != '' ) { |
1067 | | - res = new EndTagTk( name, attribs ); |
1068 | | - } else if ( selfclose != '' ) { |
1069 | | - res = new SelfclosingTagTk( name, attribs ); |
1070 | | - } else { |
1071 | | - res = new TagTk( name, attribs ); |
1072 | | - } |
1073 | | - return [res]; |
1074 | | - } |
1075 | 905 | |
1076 | 906 | // The list of HTML5 tags, mainly used for the identification of non-html |
1077 | 907 | // tags. These terminate otherwise tag-eating productions (see list below) in |
— | — | @@ -1139,6 +969,7 @@ |
1140 | 970 | return res; |
1141 | 971 | } |
1142 | 972 | |
| 973 | +// A generic attribute that can span multiple lines. |
1143 | 974 | generic_newline_attribute |
1144 | 975 | = s:( space / newline )* |
1145 | 976 | name:generic_attribute_name |
— | — | @@ -1152,6 +983,7 @@ |
1153 | 984 | } |
1154 | 985 | } |
1155 | 986 | |
| 987 | +// A single-line attribute. |
1156 | 988 | generic_attribute |
1157 | 989 | = s:space* |
1158 | 990 | name:generic_attribute_name |
— | — | @@ -1168,12 +1000,13 @@ |
1169 | 1001 | } |
1170 | 1002 | } |
1171 | 1003 | |
1172 | | -// http://dev.w3.org/html5/spec/Overview.html#attributes-0, and we also |
1173 | | -// disallow newlines, | and {. |
1174 | | -generic_attribute_plain_name |
1175 | | - = n:[^ \t\0/"'>=\n|{]+ { |
1176 | | - return n.join(''); |
1177 | | - } |
| 1004 | +// ( Replaced by generic_attribute_name for template / parameter support. ) |
| 1005 | +//// http://dev.w3.org/html5/spec/Overview.html#attributes-0, and we also |
| 1006 | +//// disallow newlines, | and {. |
| 1007 | +//generic_attribute_plain_name |
| 1008 | +// = n:[^ \t\0/"'>=\n|{]+ { |
| 1009 | +// return n.join(''); |
| 1010 | +// } |
1178 | 1011 | |
1179 | 1012 | generic_attribute_name |
1180 | 1013 | = & { return setFlag( 'equal' ) } |
— | — | @@ -1186,34 +1019,61 @@ |
1187 | 1020 | } |
1188 | 1021 | / & { return clearFlag( 'equal' ) } |
1189 | 1022 | |
| 1023 | +// A generic attribute, possibly spanning multiple lines. |
1190 | 1024 | generic_attribute_newline_value |
1191 | 1025 | = "=" (space / newline )* v:xml_att_value { |
1192 | 1026 | return v; |
1193 | 1027 | } |
| 1028 | +// A generic but single-line attribute. |
1194 | 1029 | generic_attribute_value |
1195 | 1030 | = "=" space* v:att_value { |
1196 | 1031 | return v; |
1197 | 1032 | } |
1198 | 1033 | |
1199 | | -// XXX: attributes can contain templates and template args!! |
| 1034 | +// Attribute value, quoted variants can span multiple lines. |
1200 | 1035 | xml_att_value |
1201 | 1036 | = "'" t:attribute_preprocessor_text_single "'" { return t; } |
1202 | 1037 | / '"' t:attribute_preprocessor_text_double '"' { return t; } |
1203 | 1038 | / attribute_preprocessor_text |
1204 | 1039 | |
1205 | | -// XXX: attributes can contain templates and template args!! |
| 1040 | +// Attribute value, restricted to a single line. |
1206 | 1041 | att_value |
1207 | 1042 | = "'" t:attribute_preprocessor_text_single_line "'" { return t; } |
1208 | 1043 | / '"' t:attribute_preprocessor_text_double_line '"' { return t; } |
1209 | 1044 | / attribute_preprocessor_text_line |
1210 | | -// = t:(!inline_breaks c:[^ \t'"<>='\n] { return c } )+ { |
1211 | | -// return t.join(''); |
1212 | | -// } |
1213 | | -// // XXX: is "\"" also valid html? or just Wikitext? |
1214 | | -// / "'" t:[^'>]* "'" { return unquote("'", t.join('')); } |
1215 | | -// / '"' t:[^">]* '"' { return unquote('"', t.join('')); } |
1216 | 1045 | |
1217 | | -/* Lists */ |
| 1046 | +/* |
| 1047 | + * A variant of generic_tag, but also checks if the tag name is a block-level |
| 1048 | + * tag as defined in |
| 1049 | + * http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and following |
| 1050 | + * paragraphs. |
| 1051 | + */ |
| 1052 | +block_tag |
| 1053 | + = "<" end:"/"? |
| 1054 | + name:(cs:[a-zA-Z]+ { return cs.join('') }) |
| 1055 | + attribs:generic_newline_attribute* |
| 1056 | + ( space / newline ) * |
| 1057 | + selfclose:"/"? |
| 1058 | + ">" { |
| 1059 | + if (block_names[name.toLowerCase()] !== true) { |
| 1060 | + // abort match if tag is not block-level |
| 1061 | + return null; |
| 1062 | + } |
| 1063 | + var res; |
| 1064 | + if ( end != '' ) { |
| 1065 | + res = new EndTagTk( name, attribs ); |
| 1066 | + } else if ( selfclose != '' ) { |
| 1067 | + res = new SelfclosingTagTk( name, attribs ); |
| 1068 | + } else { |
| 1069 | + res = new TagTk( name, attribs ); |
| 1070 | + } |
| 1071 | + return [res]; |
| 1072 | + } |
| 1073 | + |
| 1074 | + |
| 1075 | +/********************************************************* |
| 1076 | + * Lists |
| 1077 | + *********************************************************/ |
1218 | 1078 | lists = e:(dtdd / li) es:(sol (dtdd / li))* |
1219 | 1079 | { |
1220 | 1080 | return annotateList( [ new TagTk( 'list' ) ] |
— | — | @@ -1265,7 +1125,9 @@ |
1266 | 1126 | |
1267 | 1127 | list_char = [*#:;] |
1268 | 1128 | |
1269 | | -/** |
| 1129 | + |
| 1130 | + |
| 1131 | +/********************************************************************* |
1270 | 1132 | * Tables |
1271 | 1133 | * |
1272 | 1134 | * Table productions are geared to support independent parsing of fragments in |
— | — | @@ -1276,7 +1138,7 @@ |
1277 | 1139 | * |
1278 | 1140 | * The separate table_lines production is faster than moving those productions |
1279 | 1141 | * directly to block_lines. |
1280 | | - * */ |
| 1142 | + *********************************************************************/ |
1281 | 1143 | |
1282 | 1144 | table_lines |
1283 | 1145 | = & { return setFlag('table'); } |
— | — | @@ -1351,9 +1213,9 @@ |
1352 | 1214 | a:table_cell_args? |
1353 | 1215 | //& { console.warn("past attrib, pos=" + pos + input.substr(pos,10)); return true; } |
1354 | 1216 | // use inline_breaks to break on tr etc |
1355 | | - td:( !inline_breaks |
1356 | | - //& { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; } |
1357 | | - b:block { return b } )* |
| 1217 | + td:( //& { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; } |
| 1218 | + b:nested_block { return b } |
| 1219 | + )* |
1358 | 1220 | { |
1359 | 1221 | if ( a == '' ) { |
1360 | 1222 | a = []; |
— | — | @@ -1477,9 +1339,10 @@ |
1478 | 1340 | a:(as:generic_attribute+ space* pipe !pipe { return as } )? |
1479 | 1341 | //& { dp('past attrib, pos=' + pos); return true; } |
1480 | 1342 | // use inline_breaks to break on tr etc |
1481 | | - td:(!inline_breaks |
| 1343 | + td:( |
1482 | 1344 | //& { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; } |
1483 | | - b:block { return b })* { |
| 1345 | + b:nested_block { return b } |
| 1346 | + )* { |
1484 | 1347 | if ( a == '' ) { |
1485 | 1348 | a = []; |
1486 | 1349 | } |
— | — | @@ -1516,6 +1379,226 @@ |
1517 | 1380 | } |
1518 | 1381 | |
1519 | 1382 | |
| 1383 | + |
| 1384 | +/******************************************************************* |
| 1385 | + * Text variants and other general productions |
| 1386 | + *******************************************************************/ |
| 1387 | + |
| 1388 | +/* All chars that cannot start syntactic structures in the middle of a line |
| 1389 | + * XXX: ] and other end delimiters should probably only be activated inside |
| 1390 | + * structures to avoid unnecessarily leaving the text production on plain |
| 1391 | + * content. */ |
| 1392 | + |
| 1393 | +text_char = [^'<~[{\n\r:\]}|!=] |
| 1394 | + |
| 1395 | +text = t:text_char+ { return t.join(''); } |
| 1396 | + |
| 1397 | +/* Legend |
| 1398 | + * ' quotes (italic/bold) |
| 1399 | + * < start of xmlish_tag |
| 1400 | + * ~ signatures/dates |
| 1401 | + * [ start of links |
| 1402 | + * { start of parser functions, transclusion and template args |
| 1403 | + * \n all sort of block-level markup at start of line |
| 1404 | + * \r ditto |
| 1405 | + * h http(s) urls |
| 1406 | + * n nntp(s) urls |
| 1407 | + * m mailto urls |
| 1408 | + * |
| 1409 | + * ! and | table cell delimiters, might be better to specialize those |
| 1410 | + * = headings - also specialize those! |
| 1411 | + * |
| 1412 | + * The following chars are also included for now, but only apply in some |
| 1413 | + * contexts and should probably be enabled only in those: |
| 1414 | + * : separate definition in ; term : definition |
| 1415 | + * ] end of link |
| 1416 | + * } end of parser func/transclusion/template arg |
| 1417 | + */ |
| 1418 | + |
| 1419 | +urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); } |
| 1420 | + / & url_chars urllink |
| 1421 | + / htmlentity |
| 1422 | + // Convert trailing space into |
| 1423 | + // XXX: This should be moved to a serializer |
| 1424 | + / ' ' & ':' { return "\u00a0"; } |
| 1425 | + / t:text_char )+ |
| 1426 | + |
| 1427 | +/* |
| 1428 | + '//', // for protocol-relative URLs, but not in text! |
| 1429 | + 'ftp://', |
| 1430 | + 'git://', |
| 1431 | + 'gopher://', |
| 1432 | + 'http://', |
| 1433 | + 'https://', |
| 1434 | + 'irc://', |
| 1435 | + 'ircs://', // @bug 28503 |
| 1436 | + 'mailto:', |
| 1437 | + 'mms://', |
| 1438 | + 'news:', |
| 1439 | + 'nntp://', // @bug 3808 RFC 1738 |
| 1440 | + 'svn://', |
| 1441 | + 'telnet://', // Well if we're going to support the above.. -ævar |
| 1442 | + 'worldwind://', |
| 1443 | +*/ |
| 1444 | + |
| 1445 | +// Old version |
| 1446 | +//text = t:[A-Za-z0-9,._ "?!\t-]+ { return t.join('') } |
| 1447 | + |
| 1448 | +// Experimental tweaked version: avoid expensive single-char substrings |
| 1449 | +// This did not bring the expected performance boost, however. |
| 1450 | +//text = [A-Za-z0-9,._ -] { |
| 1451 | +// textStart = pos; |
| 1452 | +// |
| 1453 | +// var res = input.substr(textStart - 1, inputLength) |
| 1454 | +// .match(/[A-Za-z0-9,._ -]+/)[0]; |
| 1455 | +// pos = pos + (res.length - 1); |
| 1456 | +// return res |
| 1457 | +// } |
| 1458 | + |
| 1459 | +htmlentity = "&" c:[#0-9a-zA-Z]+ ";" { |
| 1460 | + return unentity("&" + c.join('') + ";") |
| 1461 | +} |
| 1462 | + |
| 1463 | +space |
| 1464 | + = s:[ \t]+ { return s.join(''); } |
| 1465 | + |
| 1466 | +optionalSpaceToken |
| 1467 | + = s:space* { |
| 1468 | + if ( s.length ) { |
| 1469 | + return [s.join('')]; |
| 1470 | + } else { |
| 1471 | + return []; |
| 1472 | + } |
| 1473 | + } |
| 1474 | + |
| 1475 | + |
| 1476 | +// Start of line |
| 1477 | +sol = nl:(newlineToken / & { return pos === 0; } { return [] }) |
| 1478 | + // Eat multi-line comments, so that syntax after still matches as if it |
| 1479 | + // was actually preceded by a newline |
| 1480 | + cn:( c:comment n:newline? { |
| 1481 | + if ( n !== '' ) { |
| 1482 | + return [c, n]; |
| 1483 | + } else { |
| 1484 | + return [c]; |
| 1485 | + } |
| 1486 | + } |
| 1487 | + )* |
| 1488 | + // Eat includeonly/noinclude at start of line, so that start-of-line |
| 1489 | + // syntax after it still matches |
| 1490 | + ni:(space* "<" c:"/"? t:("includeonly" / "noinclude") ">" {return [c, t]} )? |
| 1491 | + { |
| 1492 | + var niToken = []; |
| 1493 | + if ( ni !== '') { |
| 1494 | + if ( ni[0] === '/' ) { |
| 1495 | + niToken = [new EndTagTk( ni[1] )]; |
| 1496 | + } else { |
| 1497 | + niToken = [new TagTk( ni[1] )]; |
| 1498 | + } |
| 1499 | + } |
| 1500 | + |
| 1501 | + return nl.concat(cn, niToken); |
| 1502 | + } |
| 1503 | + |
| 1504 | +eof = & { return isEOF(pos); } { return true; } |
| 1505 | + |
| 1506 | + |
| 1507 | +newline |
| 1508 | + = '\n' / '\r\n' |
| 1509 | + |
| 1510 | +newlineToken = newline { return [new NlTk()] } |
| 1511 | + |
| 1512 | +eolf = newline / eof |
| 1513 | + |
| 1514 | + |
| 1515 | +// 'Preprocessor' directive- higher-level things that can occur in otherwise |
| 1516 | +// plain-text content. |
| 1517 | +directive |
| 1518 | + = comment |
| 1519 | + / tplarg_or_template |
| 1520 | + / htmlentity |
| 1521 | + |
| 1522 | +// Plain text, but can contain templates, template arguments, comments etc- |
| 1523 | +// all stuff that is normally handled by the preprocessor |
| 1524 | +// Returns either a list of tokens, or a plain string (if nothing is to be |
| 1525 | +// processed). |
| 1526 | +preprocessor_text |
| 1527 | + = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
| 1528 | + / directive |
| 1529 | + / !inline_breaks text_char )+ { |
| 1530 | + return flatten ( r ); |
| 1531 | + } |
| 1532 | + |
| 1533 | +spaceless_preprocessor_text |
| 1534 | + = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); } |
| 1535 | + / directive |
| 1536 | + / !inline_breaks !' ' text_char )+ { |
| 1537 | + return flatten_string ( r ); |
| 1538 | + } |
| 1539 | + |
| 1540 | + |
| 1541 | +wikilink_preprocessor_text |
| 1542 | + = r:( t:[^%<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
| 1543 | + / urlencoded_char |
| 1544 | + / directive |
| 1545 | + / !inline_breaks !"]]" text_char )+ { |
| 1546 | + return flatten_stringlist ( r ); |
| 1547 | + } |
| 1548 | + |
| 1549 | +extlink_preprocessor_text |
| 1550 | + = r:( t:[^'<~[{\n\r|!\]}\t&="' \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); } |
| 1551 | + / directive |
| 1552 | + / urlencoded_char |
| 1553 | + / !inline_breaks no_punctuation_char |
| 1554 | + / s:[.:,] !(space / eolf) { return s } |
| 1555 | + / [&%] )+ { |
| 1556 | + return flatten_string ( r ); |
| 1557 | + } |
| 1558 | + |
| 1559 | +// Attribute values with preprocessor support |
| 1560 | +attribute_preprocessor_text |
| 1561 | + = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return ts.join(''); } |
| 1562 | + / directive |
| 1563 | + / !inline_breaks [&%] )+ { |
| 1564 | + //console.warn('prep'); |
| 1565 | + return flatten_string ( r ); |
| 1566 | + } |
| 1567 | +attribute_preprocessor_text_single |
| 1568 | + = r:( t:[^{&']+ { return t.join(''); } |
| 1569 | + / directive |
| 1570 | + / !inline_breaks [{&] )* { |
| 1571 | + return flatten_string ( r ); |
| 1572 | + } |
| 1573 | +attribute_preprocessor_text_double |
| 1574 | + = r:( t:[^{&"]+ { return t.join(''); } |
| 1575 | + / directive |
| 1576 | + / !inline_breaks [{&] )* { |
| 1577 | + //console.warn( 'double:' + pp(r) ); |
| 1578 | + return flatten_string ( r ); |
| 1579 | + } |
| 1580 | + |
| 1581 | +// Variants with the entire attribute on a single line |
| 1582 | +attribute_preprocessor_text_line |
| 1583 | + = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return ts.join(''); } |
| 1584 | + / directive |
| 1585 | + / !inline_breaks !'\n' [&%] )+ { |
| 1586 | + //console.warn('prep'); |
| 1587 | + return flatten_string ( r ); |
| 1588 | + } |
| 1589 | +attribute_preprocessor_text_single_line |
| 1590 | + = r:( t:[^{&']+ { return t.join(''); } |
| 1591 | + / directive |
| 1592 | + / !inline_breaks !'\n' [{&] )* { |
| 1593 | + return flatten_string ( r ); |
| 1594 | + } |
| 1595 | +attribute_preprocessor_text_double_line |
| 1596 | + = r:( t:[^{&"]+ { return t.join(''); } |
| 1597 | + / directive |
| 1598 | + / !inline_breaks !'\n' [{&] )* { |
| 1599 | + //console.warn( 'double:' + pp(r) ); |
| 1600 | + return flatten_string ( r ); |
| 1601 | + } |
| 1602 | + |
1520 | 1603 | // Special-case support for those pipe templates |
1521 | 1604 | pipe = "|" / "{{!}}" |
1522 | 1605 | |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -54,7 +54,12 @@ |
55 | 55 | // reasonable traces. Calling a trace on the extension does not really cut |
56 | 56 | // it. |
57 | 57 | //try { |
58 | | - this.parser.parse(text, 'start', this.emit.bind( this, 'chunk' )); |
| 58 | + this.parser.parse(text, 'start', |
| 59 | + // callback |
| 60 | + this.emit.bind( this, 'chunk' ), |
| 61 | + // inline break test |
| 62 | + this |
| 63 | + ); |
59 | 64 | // emit tokens here until we get that to work per toplevelblock in the |
60 | 65 | // actual tokenizer |
61 | 66 | //this.emit('chunk', out.concat( [{ type: 'END' }] ) ); |
— | — | @@ -67,6 +72,61 @@ |
68 | 73 | //} |
69 | 74 | }; |
70 | 75 | |
| 76 | +PegTokenizer.prototype.breakMap = { |
| 77 | + '=': function(input, pos, syntaxFlags) { |
| 78 | + return syntaxFlags.equal || |
| 79 | + ( syntaxFlags.h && |
| 80 | + input.substr( pos + 1, 200) |
| 81 | + .match(/[ \t]*[\r\n]/) !== null ) || null; |
| 82 | + }, |
| 83 | + '|': function ( input, pos, syntaxFlags ) { |
| 84 | + return syntaxFlags.template || |
| 85 | + ( syntaxFlags.table && |
| 86 | + ( input[pos + 1].match(/[|}]/) !== null || |
| 87 | + syntaxFlags.tableCellArg |
| 88 | + ) |
| 89 | + ) || null; |
| 90 | + }, |
| 91 | + "!": function ( input, pos, syntaxFlags ) { |
| 92 | + return syntaxFlags.table && input[pos + 1] === "!" || |
| 93 | + null; |
| 94 | + }, |
| 95 | + "}": function ( input, pos, syntaxFlags ) { |
| 96 | + return syntaxFlags.template && input[pos + 1] === "}" || null; |
| 97 | + }, |
| 98 | + ":": function ( input, pos, syntaxFlags ) { |
| 99 | + return syntaxFlags.colon && |
| 100 | + ! syntaxFlags.extlink && |
| 101 | + ! syntaxFlags.linkdesc || null; |
| 102 | + }, |
| 103 | + "\r": function ( input, pos, syntaxFlags ) { |
| 104 | + return syntaxFlags.table && |
| 105 | + input[pos + 1] !== '!' && |
| 106 | + input[pos + 1] !== '|' || |
| 107 | + null; |
| 108 | + }, |
| 109 | + "\n": function ( input, pos, syntaxFlags ) { |
| 110 | + return syntaxFlags.table && |
| 111 | + input[pos + 1] !== '!' && |
| 112 | + input[pos + 1] !== '|' || |
| 113 | + null; |
| 114 | + }, |
| 115 | + "]": function ( input, pos, syntaxFlags ) { |
| 116 | + return syntaxFlags.extlink || |
| 117 | + ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) || |
| 118 | + null; |
| 119 | + }, |
| 120 | + "<": function ( input, pos, syntaxFlags ) { |
| 121 | + return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' || null; |
| 122 | + } |
| 123 | +}; |
| 124 | + |
| 125 | +PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) { |
| 126 | + var res = this.breakMap[ input[pos] ]( input, pos, syntaxFlags); |
| 127 | + console.warn( 'ilb res: ' + JSON.stringify( [ res, input.substr( pos, 4 ) ] ) ); |
| 128 | + return res; |
| 129 | +}; |
| 130 | + |
71 | 131 | /***************************************************************************** |
72 | 132 | * LEGACY stuff |
73 | 133 | * |
— | — | @@ -173,6 +233,7 @@ |
174 | 234 | } |
175 | 235 | }; |
176 | 236 | |
| 237 | + |
177 | 238 | if (typeof module == "object") { |
178 | 239 | module.exports.PegTokenizer = PegTokenizer; |
179 | 240 | } |