Index: trunk/phase3/includes/Tokenizer.php |
— | — | @@ -8,8 +8,11 @@ |
9 | 9 | |
10 | 10 | /* private */ function Tokenizer() |
11 | 11 | { |
| 12 | + global $wgLang; |
| 13 | + |
12 | 14 | $this->mPos=0; |
13 | 15 | $this->mTokenQueue=array(); |
| 16 | + $this->linkPrefixExtension = $wgLang->linkPrefixExtension(); |
14 | 17 | } |
15 | 18 | |
16 | 19 | # factory function |
— | — | @@ -54,8 +57,7 @@ |
55 | 58 | // proceeds character by character through the text, looking for characters needing |
56 | 59 | // special attention. Those are currently: I, R, ', [, ], newline |
57 | 60 | // |
58 | | - // TODO: prefixed links for Arabic wikipedia not implemented yet |
59 | | - // handling of French blanks not yet implemented |
| 61 | + // TODO: handling of French blanks not yet implemented |
60 | 62 | function nextToken() |
61 | 63 | { |
62 | 64 | $fname = "Tokenizer::nextToken"; |
— | — | @@ -64,8 +66,8 @@ |
65 | 67 | if ( count( $this->mQueuedToken ) != 0 ) { |
66 | 68 | // still one token from the last round around. Return that one first. |
67 | 69 | $token = array_shift( $this->mQueuedToken ); |
68 | | - } else if ( $this->mPos > $this->mTextLength ) |
69 | | - { // If no text is left, return "false". |
| 70 | + } else if ( $this->mPos > $this->mTextLength ) { |
| 71 | + // If no text is left, return "false". |
70 | 72 | $token = false; |
71 | 73 | } else { |
72 | 74 | |
— | — | @@ -98,8 +100,21 @@ |
99 | 101 | $this->mPos += 3; |
100 | 102 | break 2; // switch + while |
101 | 103 | } else if ( $this->continues("[") ) { |
102 | | - $queueToken["type"] = "[["; |
| 104 | + $queueToken["type"] = "[["; |
103 | 105 | $queueToken["text"] = ""; |
| 106 | + // Check for a "prefixed link", e.g. Al[[Khazar]] |
| 107 | + // Mostly for arabic wikipedia |
| 108 | + if ( $this->linkPrefixExtension ) { |
| 109 | + while ( $this->linkPrefixExtension |
| 110 | + && ($len = strlen( $token["text"] ) ) > 0 |
| 111 | + && !ctype_space( $token["text"][$len-1] ) ) |
| 112 | + { |
| 113 | + //prepend the character to the link's open tag |
| 114 | + $queueToken["text"] = $token["text"][$len-1] . $queueToken["text"]; |
| 115 | + //remove character from the end of the text token |
| 116 | + $token["text"] = substr( $token["text"], 0, -1); |
| 117 | + } |
| 118 | + } |
104 | 119 | $this->mQueuedToken[] = $queueToken; |
105 | 120 | $this->mPos += 2; |
106 | 121 | break 2; // switch + while |
— | — | @@ -158,7 +173,7 @@ |
159 | 174 | |
160 | 175 | // function continues |
161 | 176 | // checks whether the mText continues with $cont from mPos+1 |
162 | | - function continues( $cont ) |
| 177 | + /* private */ function continues( $cont ) |
163 | 178 | { |
164 | 179 | // If string is not long enough to contain $cont, return false |
165 | 180 | if ( $this->mTextLength < $this->mPos + strlen( $cont ) ) |