Index: trunk/phase3/includes/Defines.php |
— | — | @@ -202,6 +202,9 @@ |
203 | 203 | define( 'SFH_NO_HASH', 1 ); |
204 | 204 | define( 'SFH_OBJECT_ARGS', 2 ); |
205 | 205 | |
| 206 | +# Flags for Parser::setLinkHook |
| 207 | +define( 'SLH_PATTERN', 1 ); |
| 208 | + |
206 | 209 | # Flags for Parser::replaceLinkHolders |
207 | 210 | define( 'RLH_FOR_UPDATE', 1 ); |
208 | 211 | |
Index: trunk/phase3/includes/parser/Parser_LinkHooks.php |
— | — | @@ -12,10 +12,9 @@ |
13 | 13 | */ |
14 | 14 | const VERSION = '1.6.4'; |
15 | 15 | |
16 | | - # Flags for Parser::setFunctionHook |
| 16 | + # Flags for Parser::setLinkHook |
17 | 17 | # Also available as global constants from Defines.php |
18 | | - const SFH_NO_HASH = 1; |
19 | | - const SFH_OBJECT_ARGS = 2; |
| 18 | + const SLH_PATTERN = 1; |
20 | 19 | |
21 | 20 | # Constants needed for external link processing |
22 | 21 | # Everything except bracket, space, or control characters |
— | — | @@ -23,162 +22,86 @@ |
24 | 23 | const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) |
25 | 24 | \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; |
26 | 25 | |
27 | | - // State constants for the definition list colon extraction |
28 | | - const COLON_STATE_TEXT = 0; |
29 | | - const COLON_STATE_TAG = 1; |
30 | | - const COLON_STATE_TAGSTART = 2; |
31 | | - const COLON_STATE_CLOSETAG = 3; |
32 | | - const COLON_STATE_TAGSLASH = 4; |
33 | | - const COLON_STATE_COMMENT = 5; |
34 | | - const COLON_STATE_COMMENTDASH = 6; |
35 | | - const COLON_STATE_COMMENTDASHDASH = 7; |
| 26 | + /**#@+ |
| 27 | + * @private |
| 28 | + */ |
| 29 | + # Persistent: |
| 30 | + var $mLinkHooks; |
36 | 31 | |
37 | | - // Flags for preprocessToDom |
38 | | - const PTD_FOR_INCLUSION = 1; |
| 32 | + /**#@-*/ |
39 | 33 | |
40 | | - // Allowed values for $this->mOutputType |
41 | | - // Parameter to startExternalParse(). |
42 | | - const OT_HTML = 1; |
43 | | - const OT_WIKI = 2; |
44 | | - const OT_PREPROCESS = 3; |
45 | | - const OT_MSG = 3; |
46 | | - |
47 | | - // Marker Suffix needs to be accessible staticly. |
48 | | - const MARKER_SUFFIX = "-QINU\x7f"; |
49 | | - |
50 | 34 | /** |
51 | | - * Replace unusual URL escape codes with their equivalent characters |
52 | | - * @param string |
53 | | - * @return string |
54 | | - * @static |
55 | | - * @todo This can merge genuinely required bits in the path or query string, |
56 | | - * breaking legit URLs. A proper fix would treat the various parts of |
57 | | - * the URL differently; as a workaround, just use the output for |
58 | | - * statistical records, not for actual linking/output. |
| 35 | + * Constructor |
| 36 | + * |
| 37 | + * @public |
59 | 38 | */ |
60 | | - static function replaceUnusualEscapes( $url ) { |
61 | | - return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', |
62 | | - array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); |
| 39 | + function __construct( $conf = array() ) { |
| 40 | + parent::__construct( $conf ); |
| 41 | + $this->mLinkHooks = array(); |
63 | 42 | } |
64 | | - |
| 43 | + |
65 | 44 | /** |
66 | | - * Callback function used in replaceUnusualEscapes(). |
67 | | - * Replaces unusual URL escape codes with their equivalent character |
68 | | - * @static |
69 | | - * @private |
| 45 | + * Do various kinds of initialisation on the first call of the parser |
70 | 46 | */ |
71 | | - private static function replaceUnusualEscapesCallback( $matches ) { |
72 | | - $char = urldecode( $matches[0] ); |
73 | | - $ord = ord( $char ); |
74 | | - // Is it an unsafe or HTTP reserved character according to RFC 1738? |
75 | | - if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { |
76 | | - // No, shouldn't be escaped |
77 | | - return $char; |
78 | | - } else { |
79 | | - // Yes, leave it escaped |
80 | | - return $matches[0]; |
| 47 | + function firstCallInit() { |
| 48 | + parent::__construct(); |
| 49 | + if ( !$this->mFirstCall ) { |
| 50 | + return; |
81 | 51 | } |
| 52 | + $this->mFirstCall = false; |
| 53 | + |
| 54 | + wfProfileIn( __METHOD__ ); |
| 55 | + |
| 56 | + $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); |
| 57 | + CoreParserFunctions::register( $this ); |
| 58 | + CoreLinkFunctions::register( $this ); |
| 59 | + $this->initialiseVariables(); |
| 60 | + |
| 61 | + wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); |
| 62 | + wfProfileOut( __METHOD__ ); |
82 | 63 | } |
83 | | - |
84 | | - /* |
85 | | - * Return a three-element array: leading whitespace, string contents, trailing whitespace |
| 64 | + |
| 65 | + /** |
| 66 | + * Create a link hook, e.g. [[Namepsace:...|display}} |
| 67 | + * The callback function should have the form: |
| 68 | + * function myLinkCallback( $parser, $holders, $markers, |
| 69 | + * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... } |
| 70 | + * |
| 71 | + * Or with SLH_PATTERN: |
| 72 | + * function myLinkCallback( $parser, $holders, $markers, ) |
| 73 | + * &$titleText, &$sortText = null, &$leadingColon = false ) { ... } |
| 74 | + * |
| 75 | + * The callback may either return a number of different possible values: |
| 76 | + * String) Text result of the link |
| 77 | + * True) (Treat as link) Parse the link according to normal link rules |
| 78 | + * False) (Bad link) Just output the raw wikitext (You may modify the text first) |
| 79 | + * |
| 80 | + * @public |
| 81 | + * |
| 82 | + * @param integer|string $ns The Namespace ID or regex pattern if SLH_PATTERN is set |
| 83 | + * @param mixed $callback The callback function (and object) to use |
| 84 | + * @param integer $flags a combination of the following flags: |
| 85 | + * SLH_PATTERN Use a regex link pattern rather than a namespace |
| 86 | + * |
| 87 | + * @return The old callback function for this name, if any |
86 | 88 | */ |
87 | | - public static function splitWhitespace( $s ) { |
88 | | - $ltrimmed = ltrim( $s ); |
89 | | - $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); |
90 | | - $trimmed = rtrim( $ltrimmed ); |
91 | | - $diff = strlen( $ltrimmed ) - strlen( $trimmed ); |
92 | | - if ( $diff > 0 ) { |
93 | | - $w2 = substr( $ltrimmed, -$diff ); |
94 | | - } else { |
95 | | - $w2 = ''; |
96 | | - } |
97 | | - return array( $w1, $trimmed, $w2 ); |
| 89 | + function setLinkHook( $ns, $callback, $flags = 0 ) { |
| 90 | + if( $flags & SLH_PATTERN && !is_string($ns) ) |
| 91 | + throw new MWException( __METHOD__.'() expecting a regex string pattern.' ); |
| 92 | + elseif( $flags | ~SLH_PATTERN && !is_int($ns) ) |
| 93 | + throw new MWException( __METHOD__.'() expecting a namespace index.' ); |
| 94 | + $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null; |
| 95 | + $this->mLinkHooks[$ns] = array( $callback, $flags ); |
| 96 | + return $oldVal; |
98 | 97 | } |
99 | 98 | |
100 | | - /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. |
101 | | - static function createAssocArgs( $args ) { |
102 | | - $assocArgs = array(); |
103 | | - $index = 1; |
104 | | - foreach( $args as $arg ) { |
105 | | - $eqpos = strpos( $arg, '=' ); |
106 | | - if ( $eqpos === false ) { |
107 | | - $assocArgs[$index++] = $arg; |
108 | | - } else { |
109 | | - $name = trim( substr( $arg, 0, $eqpos ) ); |
110 | | - $value = trim( substr( $arg, $eqpos+1 ) ); |
111 | | - if ( $value === false ) { |
112 | | - $value = ''; |
113 | | - } |
114 | | - if ( $name !== false ) { |
115 | | - $assocArgs[$name] = $value; |
116 | | - } |
117 | | - } |
118 | | - } |
119 | | - |
120 | | - return $assocArgs; |
121 | | - } |
122 | | - |
123 | 99 | /** |
124 | | - * Static function to get a template |
125 | | - * Can be overridden via ParserOptions::setTemplateCallback(). |
| 100 | + * Get all registered link hook identifiers |
| 101 | + * |
| 102 | + * @return array |
126 | 103 | */ |
127 | | - static function statelessFetchTemplate( $title, $parser=false ) { |
128 | | - $text = $skip = false; |
129 | | - $finalTitle = $title; |
130 | | - $deps = array(); |
131 | | - |
132 | | - // Loop to fetch the article, with up to 1 redirect |
133 | | - for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { |
134 | | - # Give extensions a chance to select the revision instead |
135 | | - $id = false; // Assume current |
136 | | - wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) ); |
137 | | - |
138 | | - if( $skip ) { |
139 | | - $text = false; |
140 | | - $deps[] = array( |
141 | | - 'title' => $title, |
142 | | - 'page_id' => $title->getArticleID(), |
143 | | - 'rev_id' => null ); |
144 | | - break; |
145 | | - } |
146 | | - $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); |
147 | | - $rev_id = $rev ? $rev->getId() : 0; |
148 | | - // If there is no current revision, there is no page |
149 | | - if( $id === false && !$rev ) { |
150 | | - $linkCache = LinkCache::singleton(); |
151 | | - $linkCache->addBadLinkObj( $title ); |
152 | | - } |
153 | | - |
154 | | - $deps[] = array( |
155 | | - 'title' => $title, |
156 | | - 'page_id' => $title->getArticleID(), |
157 | | - 'rev_id' => $rev_id ); |
158 | | - |
159 | | - if( $rev ) { |
160 | | - $text = $rev->getText(); |
161 | | - } elseif( $title->getNamespace() == NS_MEDIAWIKI ) { |
162 | | - global $wgLang; |
163 | | - $message = $wgLang->lcfirst( $title->getText() ); |
164 | | - $text = wfMsgForContentNoTrans( $message ); |
165 | | - if( wfEmptyMsg( $message, $text ) ) { |
166 | | - $text = false; |
167 | | - break; |
168 | | - } |
169 | | - } else { |
170 | | - break; |
171 | | - } |
172 | | - if ( $text === false ) { |
173 | | - break; |
174 | | - } |
175 | | - // Redirect? |
176 | | - $finalTitle = $title; |
177 | | - $title = Title::newFromRedirect( $text ); |
178 | | - } |
179 | | - return array( |
180 | | - 'text' => $text, |
181 | | - 'finalTitle' => $finalTitle, |
182 | | - 'deps' => $deps ); |
| 104 | + function getLinkHooks() { |
| 105 | + return array_keys( $this->mLinkHooks ); |
183 | 106 | } |
184 | 107 | |
185 | 108 | /** |
— | — | @@ -224,8 +147,7 @@ |
225 | 148 | |
226 | 149 | $offset = 0; |
227 | 150 | $offsetStack = array(); |
228 | | - $markerReplacer = new LinkMarkerReplacer( array( &$this, 'replaceInternalLinksCallback' ) ); |
229 | | - $markerReplacer->holders( $holders ); |
| 151 | + $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) ); |
230 | 152 | while( true ) { |
231 | 153 | $startBracketOffset = strpos( $s, '[[', $offset ); |
232 | 154 | $endBracketOffset = strpos( $s, ']]', $offset ); |
— | — | @@ -257,7 +179,7 @@ |
258 | 180 | # Create markers only for valid links |
259 | 181 | if( preg_match( $titleRegex, $titleText ) ) { |
260 | 182 | # Store the text for the marker |
261 | | - $marker = $markerReplacer->addMarker($titleText, $paramText); |
| 183 | + $marker = $markers->addMarker($titleText, $paramText); |
262 | 184 | # Replace the current link with the marker |
263 | 185 | $s = substr($s,0,$startBracketOffset). |
264 | 186 | $marker. |
— | — | @@ -282,14 +204,14 @@ |
283 | 205 | |
284 | 206 | # Now expand our tree |
285 | 207 | wfProfileIn( __METHOD__.'-expand' ); |
286 | | - $s = $markerReplacer->expand( $s ); |
| 208 | + $s = $markers->expand( $s ); |
287 | 209 | wfProfileOut( __METHOD__.'-expand' ); |
288 | 210 | |
289 | 211 | wfProfileOut( __METHOD__ ); |
290 | 212 | return $holders; |
291 | 213 | } |
292 | 214 | |
293 | | - function replaceInternalLinksCallback( $markerReplacer, $titleText, $paramText ) { |
| 215 | + function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) { |
294 | 216 | wfProfileIn( __METHOD__ ); |
295 | 217 | $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; |
296 | 218 | wfProfileIn( __METHOD__."-misc" ); |
— | — | @@ -322,17 +244,31 @@ |
323 | 245 | $ns = $title->getNamespace(); |
324 | 246 | wfProfileOut( __METHOD__."-title" ); |
325 | 247 | |
326 | | - $callback = array( 'CoreLinkFunctions', 'defaultLinkHook' ); |
327 | | - $args = array( $markerReplacer, $title, $titleText, &$paramText, &$leadingColon ); |
328 | | - $return = call_user_func_array( $callback, $args ); |
| 248 | + # Default for Namespaces is a default link |
| 249 | + # ToDo: Default for patterns is plain wikitext |
| 250 | + $return = true; |
| 251 | + if( isset($this->mLinkHooks[$ns]) ) { |
| 252 | + list( $callback, $flags ) = $this->mLinkHooks[$ns]; |
| 253 | + if( $flags & SLH_PATTERN ) { |
| 254 | + $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon ); |
| 255 | + } else { |
| 256 | + $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon ); |
| 257 | + } |
| 258 | + # Workaround for PHP bug 35229 and similar |
| 259 | + if ( !is_callable( $callback ) ) { |
| 260 | + throw new MWException( "Tag hook for $name is not callable\n" ); |
| 261 | + } |
| 262 | + $return = call_user_func_array( $callback, $args ); |
| 263 | + } |
| 264 | + if( $return === true ) { |
| 265 | + # True (treat as plain link) was returned, call the defaultLinkHook |
| 266 | + $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon ); |
| 267 | + $return = call_user_func_array( array( 'CoreLinkFunctions', 'defaultLinkHook' ), $args ); |
| 268 | + } |
329 | 269 | if( $return === false ) { |
330 | 270 | # False (no link) was returned, output plain wikitext |
331 | 271 | # Build it again as the hook is allowed to modify $paramText |
332 | 272 | return isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; |
333 | | - } elseif( $return === true ) { |
334 | | - # True (treat as plain link) was returned, call the defaultLinkHook |
335 | | - $args = array( $markerReplacer, $title, $titleText, &$paramText, &$leadingColon ); |
336 | | - $return = call_user_func_array( array( &$this, 'defaultLinkHook' ), $args ); |
337 | 273 | } |
338 | 274 | # Content was returned, return it |
339 | 275 | return $return; |
— | — | @@ -342,20 +278,16 @@ |
343 | 279 | |
344 | 280 | class LinkMarkerReplacer { |
345 | 281 | |
346 | | - protected $markers, $nextId, $holders; |
| 282 | + protected $markers, $nextId, $parser, $holders, $callback; |
347 | 283 | |
348 | | - function __construct( $callback ) { |
349 | | - $this->nextId = 0; |
350 | | - $this->markers = array(); |
| 284 | + function __construct( $parser, $holders, $callback ) { |
| 285 | + $this->nextId = 0; |
| 286 | + $this->markers = array(); |
| 287 | + $this->parser = $parser; |
| 288 | + $this->holders = $holders; |
351 | 289 | $this->callback = $callback; |
352 | | - $this->holders = null; |
353 | 290 | } |
354 | 291 | |
355 | | - # Note: This is a bit of an ugly way to do this. It works for now, but before |
356 | | - # this feature becomes usable we should come up with a better arg list. |
357 | | - # $parser, $holders, and $linkMarkers appear to be 3 needed ones |
358 | | - function holders( $holders = null ) { return wfSetVar( $this->holders, $holders ); } |
359 | | - |
360 | 292 | function addMarker($titleText, $paramText) { |
361 | 293 | $id = $this->nextId++; |
362 | 294 | $this->markers[$id] = array( $titleText, $paramText ); |
— | — | @@ -375,6 +307,8 @@ |
376 | 308 | if( !array_key_exists($id, $this->markers) ) return $m[0]; |
377 | 309 | $args = $this->markers[$id]; |
378 | 310 | array_unshift( $args, $this ); |
| 311 | + array_unshift( $args, $this->holders ); |
| 312 | + array_unshift( $args, $this->parser ); |
379 | 313 | return call_user_func_array( $this->callback, $args ); |
380 | 314 | } |
381 | 315 | |
Index: trunk/phase3/includes/parser/CoreLinkFunctions.php |
— | — | @@ -6,14 +6,13 @@ |
7 | 7 | */ |
8 | 8 | class CoreLinkFunctions { |
9 | 9 | static function register( $parser ) { |
10 | | - |
11 | | - |
| 10 | + $parser->setLinkHook( NS_CATEGORY, array( __CLASS__, 'categoryLinkHook' ) ); |
| 11 | + return true; |
12 | 12 | } |
13 | 13 | |
14 | | - static function defaultLinkHook( $markers, Title $title, $titleText, &$displayText = null, &$leadingColon = false ) { |
15 | | - # Warning: This hook should NEVER return true as it is the fallback |
16 | | - # default for when other hooks return true |
17 | | - if( $markers->findMarker( $displayText ) ) { |
| 14 | + static function defaultLinkHook( $parser, $holders, $markers, |
| 15 | + Title $title, $titleText, &$displayText = null, &$leadingColon = false ) { |
| 16 | + if( isset($displayText) && $markers->findMarker( $displayText ) ) { |
18 | 17 | # There are links inside of the displayText |
19 | 18 | # For backwards compatibility the deepest links are dominant so this |
20 | 19 | # link should not be handled |
— | — | @@ -21,7 +20,28 @@ |
22 | 21 | # Return false so that this link is reverted back to WikiText |
23 | 22 | return false; |
24 | 23 | } |
25 | | - return $markers->holders()->makeHolder( $title, isset($displayText) ? $displayText : $titleText, '', '', '' ); |
| 24 | + return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, '', '', '' ); |
26 | 25 | } |
27 | 26 | |
| 27 | + static function categoryLinkHook( $parser, $holders, $markers, |
| 28 | + Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { |
| 29 | + global $wgContLang; |
| 30 | + # When a category link starts with a : treat it as a normal link |
| 31 | + if( $leadingColon ) return true; |
| 32 | + if( isset($sortText) && $markers->findMarker( $sortText ) ) { |
| 33 | + # There are links inside of the sortText |
| 34 | + # For backwards compatibility the deepest links are dominant so this |
| 35 | + # link should not be handled |
| 36 | + $sortText = $markers->expand($sortText); |
| 37 | + # Return false so that this link is reverted back to WikiText |
| 38 | + return false; |
| 39 | + } |
| 40 | + if( !isset($sortText) ) $sortText = $parser->getDefaultSort(); |
| 41 | + $sortText = Sanitizer::decodeCharReferences( $sortText ); |
| 42 | + $sortText = str_replace( "\n", '', $sortText ); |
| 43 | + $sortText = $wgContLang->convertCategoryKey( $sortText ); |
| 44 | + $parser->mOutput->addCategory( $title->getDBkey(), $sortText ); |
| 45 | + return ''; |
| 46 | + } |
| 47 | + |
28 | 48 | } |