Index: trunk/lucene-search-3/src/main/antlr3/org/wikimedia/anrlrSpecs/wikiTable.g |
— | — | @@ -0,0 +1,93 @@ |
| 2 | +grammar wikiTable;
|
| 3 | +
|
| 4 | +@header {
|
| 5 | +package p;
|
| 6 | +
|
| 7 | +}
|
| 8 | +
|
| 9 | +@header {
|
| 10 | +package org.wikimedia.antlrSpec;
|
| 11 | +import org.antlr.test;} // not auto-copied to lexer
|
| 12 | +@lexer::header{
|
| 13 | +package org.wikimedia.antlrSpec;
|
| 14 | +
|
| 15 | +//
|
| 16 | +}
|
| 17 | +
|
| 18 | +@lexer::members {
|
| 19 | +//state check are deeply nested in a table are we?
|
| 20 | +int inTable=0;
|
| 21 | +List tokens = new ArrayList();
|
| 22 | +public void emit(Token token) {
|
| 23 | + state.token = token;
|
| 24 | + tokens.add(token);
|
| 25 | +}
|
| 26 | +public Token nextToken() {
|
| 27 | + super.nextToken();
|
| 28 | + if ( tokens.size()==0 ) {
|
| 29 | + return Token.EOF_TOKEN;
|
| 30 | + }
|
| 31 | + return (Token)tokens.remove(0);
|
| 32 | +}
|
| 33 | +}
|
| 34 | +
|
| 35 | +@members{
|
| 36 | +//int inTable=0;
|
| 37 | +//public void foo(){};
|
| 38 | +//int rows=0;
|
| 39 | +}
|
| 40 | +
|
| 41 | +//Parser Rules
|
| 42 | +
|
| 43 | +wikiTable
|
| 44 | +scope{boolean triedHeader;}
|
| 45 | +@init{$wikiTable::triedHeader=false;}
|
| 46 | + : TBL_START xml_attributes? caption? head? rows TBL_END
|
| 47 | + ;
|
| 48 | +caption
|
| 49 | + : CAPTION_START HS xml_attributes? captionText=TEXT+
|
| 50 | + ;
|
| 51 | +fragment
|
| 52 | +head
|
| 53 | + : {!$wikiTable::triedHeader}?=>(hCell hCellInLine*)+{$wikiTable::triedHeader=true;}
|
| 54 | + ;
|
| 55 | +rows
|
| 56 | + : (firstRow|row) row*
|
| 57 | + ;
|
| 58 | +
|
| 59 | +firstRow : cells ;
|
| 60 | +row : ROW_START xml_attributes? cells;
|
| 61 | +cells :((cell|hCell) (cellInline|hCellInLine)*)+;
|
| 62 | +cell : CELL_START xml_attributes? text=TEXT*;
|
| 63 | +cellInline : CELL_INLINE_STRT xml_attributes? text=TEXT*;
|
| 64 | +hCell : HEAD_START xml_attributes? text=TEXT*;
|
| 65 | +hCellInLine : HEAD_INLINE_STRT xml_attributes? text=TEXT*;
|
| 66 | +
|
| 67 | +
|
| 68 | +//this is the recursive definition allowing table nesting
|
| 69 | +//cells :( {input.LT(0)==CELL_START||input.LT(0)==HEAD_START}?=>(HEAD_START | CELL_START) XHTML_ATTRIBUTES? (TEXT|wikiTable)+ (CELL_INLINE_STRT XHTML_ATTRIBUTES? (TEXT|wikiTable)+)* )+ ;
|
| 70 | +
|
| 71 | +//this needs to be in the parser for LT(2) to mean the second parser token
|
| 72 | +xml_attributes: {input.LT(2).getText().equals("=")}? xml_attribute+ PIPE? ;
|
| 73 | +xml_attribute: name=TEXT EQ DQUOTE value=TEXT* DQUOTE ;
|
| 74 | +//Lexer Rules
|
| 75 | +TBL_START : {getCharPositionInLine()==0}?=> '{|'{inTable++; } ;
|
| 76 | +TBL_END : {getCharPositionInLine()==0&&inTable>0}?=> '|}'{inTable--;} ;
|
| 77 | +HEAD_START : {getCharPositionInLine()==0&&inTable>0}?=> '!';
|
| 78 | +HEAD_INLINE_STRT: {inTable>0}?=> '!!';
|
| 79 | +
|
| 80 | +CELL_START : {getCharPositionInLine()==0&&inTable>0}?=> '|'; //this should only be recognized within a table
|
| 81 | +PIPE : {getCharPositionInLine()>0||inTable==0}?=> '|'; //outside table or not at tart of line
|
| 82 | +
|
| 83 | +CELL_INLINE_STRT: {inTable>0}?=> '||'; //this should only be recognized within a table
|
| 84 | +ROW_START : {getCharPositionInLine()==0&&inTable>0}?=> '|-' ;
|
| 85 | +CAPTION_START : {getCharPositionInLine()==0&&inTable>0}?=> '|+' ;
|
| 86 | +
|
| 87 | +
|
| 88 | +TEXT : ('a'..'z'|'A'..'Z'|'0'..'9'|'.'|'-'|';'|':'|',')+; //simplified
|
| 89 | +
|
| 90 | +DQUOTE : '"';
|
| 91 | +//WS : (HS | VS) ; //{ $channel = HIDDEN; } ;
|
| 92 | +HS : ( ' ' | '\t' )+ { $channel = HIDDEN; } ;
|
| 93 | +VS : ( '\r' | '\n' )+ { $channel = HIDDEN; } ;
|
| 94 | +EQ : '=';
|