Index: trunk/lucene-search-3/src/main/antlr3/org/wikimedia/antlrSpecs/wikiTable.g |
— | — | @@ -0,0 +1,91 @@ |
| 2 | +grammar wikiTable;
|
| 3 | +
|
| 4 | +@header {
|
| 5 | +package org.wikimedia.antlrSpecs;
|
| 6 | +//import org.antlr.test;
|
| 7 | +} // not auto-copied to lexer
|
| 8 | +
|
| 9 | +
|
| 10 | +@lexer::header{
|
| 11 | +package org.wikimedia.antlrSpecs;
|
| 12 | +
|
| 13 | +
|
| 14 | +}
|
| 15 | +
|
| 16 | +@lexer::members {
|
| 17 | +//state check are deeply nested in a table are we?
|
| 18 | +int inTable=0;
|
| 19 | +List tokens = new ArrayList();
|
| 20 | +public void emit(Token token) {
|
| 21 | + state.token = token;
|
| 22 | + tokens.add(token);
|
| 23 | +}
|
| 24 | +public Token nextToken() {
|
| 25 | + super.nextToken();
|
| 26 | + if ( tokens.size()==0 ) {
|
| 27 | + return Token.EOF_TOKEN;
|
| 28 | + }
|
| 29 | + return (Token)tokens.remove(0);
|
| 30 | +}
|
| 31 | +}
|
| 32 | +
|
| 33 | +@members{
|
| 34 | +//int inTable=0;
|
| 35 | +//public void foo(){};
|
| 36 | +//int rows=0;
|
| 37 | +}
|
| 38 | +
|
| 39 | +//Parser Rules
|
| 40 | +
|
| 41 | +wikiTable
|
| 42 | +scope{boolean triedHeader;}
|
| 43 | +@init{$wikiTable::triedHeader=false;}
|
| 44 | + : TBL_START xml_attributes? caption? head? rows TBL_END
|
| 45 | + ;
|
| 46 | +caption
|
| 47 | + : CAPTION_START HS xml_attributes? captionText=TEXT+
|
| 48 | + ;
|
| 49 | +fragment
|
| 50 | +head
|
| 51 | + : {!$wikiTable::triedHeader}?=>(hCell hCellInLine*)+{$wikiTable::triedHeader=true;}
|
| 52 | + ;
|
| 53 | +rows
|
| 54 | + : (firstRow|row) row*
|
| 55 | + ;
|
| 56 | +
|
| 57 | +firstRow : cells ;
|
| 58 | +row : ROW_START xml_attributes? cells;
|
| 59 | +cells :((cell|hCell) (cellInline|hCellInLine)*)+;
|
| 60 | +cell : CELL_START xml_attributes? text=TEXT*;
|
| 61 | +cellInline : CELL_INLINE_STRT xml_attributes? text=TEXT*;
|
| 62 | +hCell : HEAD_START xml_attributes? text=TEXT*;
|
| 63 | +hCellInLine : HEAD_INLINE_STRT xml_attributes? text=TEXT*;
|
| 64 | +
|
| 65 | +
|
| 66 | +//this is the recursive definition allowing table nesting
|
| 67 | +//cells :( {input.LT(0)==CELL_START||input.LT(0)==HEAD_START}?=>(HEAD_START | CELL_START) XHTML_ATTRIBUTES? (TEXT|wikiTable)+ (CELL_INLINE_STRT XHTML_ATTRIBUTES? (TEXT|wikiTable)+)* )+ ;
|
| 68 | +
|
| 69 | +//this needs to be in the parser for LT(2) to mean the second parser token
|
| 70 | +xml_attributes: {input.LT(2).getText().equals("=")}? xml_attribute+ PIPE? ;
|
| 71 | +xml_attribute: name=TEXT EQ DQUOTE value=TEXT* DQUOTE ;
|
| 72 | +//Lexer Rules
|
| 73 | +TBL_START : {getCharPositionInLine()==0}?=> '{|'{inTable++; } ;
|
| 74 | +TBL_END : {getCharPositionInLine()==0&&inTable>0}?=> '|}'{inTable--;} ;
|
| 75 | +HEAD_START : {getCharPositionInLine()==0&&inTable>0}?=> '!';
|
| 76 | +HEAD_INLINE_STRT: {inTable>0}?=> '!!';
|
| 77 | +
|
| 78 | +CELL_START : {getCharPositionInLine()==0&&inTable>0}?=> '|'; //this should only be recognized within a table
|
| 79 | +PIPE : {getCharPositionInLine()>0||inTable==0}?=> '|'; //outside table or not at tart of line
|
| 80 | +
|
| 81 | +CELL_INLINE_STRT: {inTable>0}?=> '||'; //this should only be recognized within a table
|
| 82 | +ROW_START : {getCharPositionInLine()==0&&inTable>0}?=> '|-' ;
|
| 83 | +CAPTION_START : {getCharPositionInLine()==0&&inTable>0}?=> '|+' ;
|
| 84 | +
|
| 85 | +
|
| 86 | +TEXT : ('a'..'z'|'A'..'Z'|'0'..'9'|'.'|'-'|';'|':'|',')+; //simplified
|
| 87 | +
|
| 88 | +DQUOTE : '"';
|
| 89 | +//WS : (HS | VS) ; //{ $channel = HIDDEN; } ;
|
| 90 | +HS : ( ' ' | '\t' )+ { $channel = HIDDEN; } ;
|
| 91 | +VS : ( '\r' | '\n' )+ { $channel = HIDDEN; } ;
|
| 92 | +EQ : '=';
|