Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -3878,7 +3878,7 @@ |
3879 | 3879 | !! input |
3880 | 3880 | {{div style|"><script>alert(document.cookie)</script>}} |
3881 | 3881 | !! result |
3882 | | -<div>Magic div</div> |
| 3882 | +<div style="float: right;"><script>alert(document.cookie)</script>">Magic div</div> |
3883 | 3883 | |
3884 | 3884 | !! end |
3885 | 3885 | |
— | — | @@ -3887,7 +3887,7 @@ |
3888 | 3888 | !! input |
3889 | 3889 | {{div style|" ><script>alert(document.cookie)</script>}} |
3890 | 3890 | !! result |
3891 | | -<div style="float: right;">Magic div</div> |
| 3891 | +<div style="float: right;"><script>alert(document.cookie)</script>">Magic div</div> |
3892 | 3892 | |
3893 | 3893 | !! end |
3894 | 3894 | |
— | — | @@ -4151,7 +4151,7 @@ |
4152 | 4152 | |
4153 | 4153 | |
4154 | 4154 | !! test |
4155 | | -Parser hook: case insensetive |
| 4155 | +Parser hook: case insensitive |
4156 | 4156 | !! input |
4157 | 4157 | <TAG>input</TAG> |
4158 | 4158 | !! result |
— | — | @@ -4165,7 +4165,7 @@ |
4166 | 4166 | |
4167 | 4167 | |
4168 | 4168 | !! test |
4169 | | -Parser hook: case insensetive, redux |
| 4169 | +Parser hook: case insensitive, redux |
4170 | 4170 | !! input |
4171 | 4171 | <TaG>input</TAg> |
4172 | 4172 | !! result |
— | — | @@ -4724,8 +4724,8 @@ |
4725 | 4725 | | |
4726 | 4726 | !! result |
4727 | 4727 | <table> |
4728 | | - |
4729 | | -<u class="|">} > |
| 4728 | +{{{| |
| 4729 | +<u class="|">}}}} > |
4730 | 4730 | <br style="onmouseover='alert(document.cookie);'" /> |
4731 | 4731 | |
4732 | 4732 | MOVE YOUR MOUSE CURSOR OVER THIS TEXT |
— | — | @@ -4749,8 +4749,10 @@ |
4750 | 4750 | > |
4751 | 4751 | }}}blah" onmouseover="alert('hello world');" align="left"'''MOVE MOUSE CURSOR OVER HERE |
4752 | 4752 | !! result |
| 4753 | +<p>{{{| |
| 4754 | +</p> |
4753 | 4755 | <li class="||"> |
4754 | | -blah" onmouseover="alert('hello world');" align="left"<b>MOVE MOUSE CURSOR OVER HERE</b> |
| 4756 | +}}}blah" onmouseover="alert('hello world');" align="left"<b>MOVE MOUSE CURSOR OVER HERE</b> |
4755 | 4757 | |
4756 | 4758 | !! end |
4757 | 4759 | |
— | — | @@ -5251,10 +5253,11 @@ |
5252 | 5254 | section=1 |
5253 | 5255 | !! input |
5254 | 5256 | ==a== |
5255 | | -==legal== <!-- a legal section --> |
| 5257 | +==unmarked== <!-- an unmarked section --> |
5256 | 5258 | ==b== |
5257 | 5259 | !! result |
5258 | 5260 | ==a== |
| 5261 | +==unmarked== <!-- an unmarked section --> |
5259 | 5262 | !! end |
5260 | 5263 | |
5261 | 5264 | !! test |
— | — | @@ -5263,10 +5266,10 @@ |
5264 | 5267 | section=2 |
5265 | 5268 | !! input |
5266 | 5269 | ==a== |
5267 | | -==legal== <!-- a legal section --> |
| 5270 | +==unmarked== <!-- an unmarked section --> |
5268 | 5271 | ==b== |
5269 | 5272 | !! result |
5270 | | -==legal== <!-- a legal section --> |
| 5273 | +==b== |
5271 | 5274 | !! end |
5272 | 5275 | |
5273 | 5276 | !! test |
— | — | @@ -5295,103 +5298,80 @@ |
5296 | 5299 | !! end |
5297 | 5300 | |
5298 | 5301 | |
| 5302 | +# Formerly testing for bug 2587, now resolved by the use of unmarked sections |
| 5303 | +# instead of respecting commented sections |
5299 | 5304 | !! test |
5300 | | -Section extraction prefixed by comment (section 1) (bug 2587) |
| 5305 | +Section extraction prefixed by comment (section 1) |
5301 | 5306 | !! options |
5302 | 5307 | section=1 |
5303 | 5308 | !! input |
5304 | 5309 | <!-- -->==sec1== |
5305 | 5310 | ==sec2== |
5306 | 5311 | !!result |
5307 | | -<!-- -->==sec1== |
| 5312 | +==sec2== |
5308 | 5313 | !!end |
5309 | 5314 | |
5310 | 5315 | !! test |
5311 | | -Section extraction prefixed by comment (section 2) (bug 2587) |
| 5316 | +Section extraction prefixed by comment (section 2) |
5312 | 5317 | !! options |
5313 | 5318 | section=2 |
5314 | 5319 | !! input |
5315 | 5320 | <!-- -->==sec1== |
5316 | 5321 | ==sec2== |
5317 | 5322 | !!result |
5318 | | -==sec2== |
| 5323 | + |
5319 | 5324 | !!end |
5320 | 5325 | |
5321 | 5326 | |
| 5327 | +# Formerly testing for bug 2607, now resolved by the use of unmarked sections |
| 5328 | +# instead of respecting HTML-style headings |
5322 | 5329 | !! test |
5323 | | -Section extraction, mixed wiki and html (section 1) (bug 2607) |
| 5330 | +Section extraction, mixed wiki and html (section 1) |
5324 | 5331 | !! options |
5325 | 5332 | section=1 |
5326 | 5333 | !! input |
5327 | | -<h2>1</h2> |
| 5334 | +<h2>unmarked</h2> |
| 5335 | +unmarked |
| 5336 | +==1== |
5328 | 5337 | one |
5329 | 5338 | ==2== |
5330 | 5339 | two |
5331 | | -==3== |
5332 | | -three |
5333 | 5340 | !! result |
5334 | | -<h2>1</h2> |
| 5341 | +==1== |
5335 | 5342 | one |
5336 | 5343 | !! end |
5337 | 5344 | |
5338 | 5345 | !! test |
5339 | | -Section extraction, mixed wiki and html (section 2) (bug 2607) |
| 5346 | +Section extraction, mixed wiki and html (section 2) |
5340 | 5347 | !! options |
5341 | 5348 | section=2 |
5342 | 5349 | !! input |
5343 | | -<h2>1</h2> |
| 5350 | +<h2>unmarked</h2> |
| 5351 | +unmarked |
| 5352 | +==1== |
5344 | 5353 | one |
5345 | 5354 | ==2== |
5346 | 5355 | two |
5347 | | -==3== |
5348 | | -three |
5349 | 5356 | !! result |
5350 | 5357 | ==2== |
5351 | 5358 | two |
5352 | 5359 | !! end |
5353 | 5360 | |
5354 | 5361 | |
| 5362 | +# Formerly testing for bug 3342 |
5355 | 5363 | !! test |
5356 | | -Section extraction, heading surrounded by <noinclude> (bug 3342) |
| 5364 | +Section extraction, heading surrounded by <noinclude> |
5357 | 5365 | !! options |
5358 | 5366 | section=1 |
5359 | 5367 | !! input |
5360 | | -<noinclude>==a==</noinclude> |
5361 | | -text |
| 5368 | +<noinclude>==unmarked==</noinclude> |
| 5369 | +==marked== |
5362 | 5370 | !! result |
5363 | | -<noinclude>==a==</noinclude> |
5364 | | -text |
| 5371 | +==marked== |
5365 | 5372 | !!end |
5366 | 5373 | |
5367 | 5374 | |
5368 | 5375 | !! test |
5369 | | -Section extraction, HTML heading subsections (bug 5272) |
5370 | | -!! options |
5371 | | -section=1 |
5372 | | -!! input |
5373 | | -<h2>a</h2> |
5374 | | -<h3>aa</h3> |
5375 | | -<h2>b</h2> |
5376 | | -!! result |
5377 | | -<h2>a</h2> |
5378 | | -<h3>aa</h3> |
5379 | | -!! end |
5380 | | - |
5381 | | -!! test |
5382 | | -Section extraction, HTML headings should be ignored in extensions (bug 3476) |
5383 | | -!! options |
5384 | | -section=2 |
5385 | | -!! input |
5386 | | -<h2>a</h2> |
5387 | | -<tag> |
5388 | | -<h2>not b</h2> |
5389 | | -</tag> |
5390 | | -<h2>b</h2> |
5391 | | -!! result |
5392 | | -<h2>b</h2> |
5393 | | -!! end |
5394 | | - |
5395 | | -!! test |
5396 | 5376 | Section replacement test (section 0) |
5397 | 5377 | !! options |
5398 | 5378 | replace=0,"xxx" |
— | — | @@ -5723,94 +5703,6 @@ |
5724 | 5704 | |
5725 | 5705 | |
5726 | 5706 | !! test |
5727 | | -Section extraction, HTML headings not at line boundaries (section 0) |
5728 | | -!! options |
5729 | | -section=0 |
5730 | | -!! input |
5731 | | -<h2>Evil</h2><i>blah blah blah</i> |
5732 | | - |
5733 | | -evil blah |
5734 | | - |
5735 | | -<h2>Nice</h2> |
5736 | | - |
5737 | | -nice blah |
5738 | | - |
5739 | | -<i>extra evil</i><h2>Extra nasty</h2> |
5740 | | - |
5741 | | -extra nasty |
5742 | | -!! result |
5743 | | -!! end |
5744 | | - |
5745 | | -!! test |
5746 | | -Section extraction, HTML headings not at line boundaries (section 1) |
5747 | | -!! options |
5748 | | -section=1 |
5749 | | -!! input |
5750 | | -<h2>Evil</h2><i>blah blah blah</i> |
5751 | | - |
5752 | | -evil blah |
5753 | | - |
5754 | | -<h2>Nice</h2> |
5755 | | - |
5756 | | -nice blah |
5757 | | - |
5758 | | -<i>extra evil</i><h2>Extra nasty</h2> |
5759 | | - |
5760 | | -extra nasty |
5761 | | -!! result |
5762 | | -<h2>Evil</h2><i>blah blah blah</i> |
5763 | | - |
5764 | | -evil blah |
5765 | | -!! end |
5766 | | - |
5767 | | -!! test |
5768 | | -Section extraction, HTML headings not at line boundaries (section 2) |
5769 | | -!! options |
5770 | | -section=2 |
5771 | | -!! input |
5772 | | -<h2>Evil</h2><i>blah blah blah</i> |
5773 | | - |
5774 | | -evil blah |
5775 | | - |
5776 | | -<h2>Nice</h2> |
5777 | | - |
5778 | | -nice blah |
5779 | | - |
5780 | | -<i>extra evil</i><h2>Extra nasty</h2> |
5781 | | - |
5782 | | -extra nasty |
5783 | | -!! result |
5784 | | -<h2>Nice</h2> |
5785 | | - |
5786 | | -nice blah |
5787 | | - |
5788 | | -<i>extra evil</i> |
5789 | | -!! end |
5790 | | - |
5791 | | -!! test |
5792 | | -Section extraction, HTML headings not at line boundaries (section 3) |
5793 | | -!! options |
5794 | | -section=3 |
5795 | | -!! input |
5796 | | -<h2>Evil</h2><i>blah blah blah</i> |
5797 | | - |
5798 | | -evil blah |
5799 | | - |
5800 | | -<h2>Nice</h2> |
5801 | | - |
5802 | | -nice blah |
5803 | | - |
5804 | | -<i>extra evil</i><h2>Extra nasty</h2> |
5805 | | - |
5806 | | -extra nasty |
5807 | | -!! result |
5808 | | -<h2>Extra nasty</h2> |
5809 | | - |
5810 | | -extra nasty |
5811 | | -!! end |
5812 | | - |
5813 | | - |
5814 | | -!! test |
5815 | 5707 | Section extraction, heading followed by pre with 20 spaces (bug 6398) |
5816 | 5708 | !! options |
5817 | 5709 | section=1 |
Index: trunk/phase3/includes/Defines.php |
— | — | @@ -263,4 +263,17 @@ |
264 | 264 | # Hook support constants |
265 | 265 | define( 'MW_SUPPORTS_EDITFILTERMERGED', 1 ); |
266 | 266 | |
| 267 | +# Allowed values for Parser::$mOutputType |
| 268 | +# Parameter to Parser::startExternalParse(). |
| 269 | +define( 'OT_HTML', 1 ); |
| 270 | +define( 'OT_WIKI', 2 ); |
| 271 | +define( 'OT_MSG' , 3 ); |
| 272 | +define( 'OT_PREPROCESS', 4 ); |
267 | 273 | |
| 274 | +# Flags for Parser::setFunctionHook |
| 275 | +define( 'SFH_NO_HASH', 1 ); |
| 276 | +define( 'SFH_OBJECT_ARGS', 2 ); |
| 277 | + |
| 278 | +# Flags for Parser::replaceLinkHolders |
| 279 | +define( 'RLH_FOR_UPDATE', 1 ); |
| 280 | + |
Index: trunk/phase3/includes/Parser.php |
— | — | @@ -7,56 +7,7 @@ |
8 | 8 | * @addtogroup Parser |
9 | 9 | */ |
10 | 10 | |
11 | | -/** |
12 | | - * Update this version number when the ParserOutput format |
13 | | - * changes in an incompatible way, so the parser cache |
14 | | - * can automatically discard old data. |
15 | | - */ |
16 | | -define( 'MW_PARSER_VERSION', '1.6.2' ); |
17 | 11 | |
18 | | -define( 'RLH_FOR_UPDATE', 1 ); |
19 | | - |
20 | | -# Allowed values for $mOutputType |
21 | | -define( 'OT_HTML', 1 ); |
22 | | -define( 'OT_WIKI', 2 ); |
23 | | -define( 'OT_MSG' , 3 ); |
24 | | -define( 'OT_PREPROCESS', 4 ); |
25 | | - |
26 | | -# Flags for setFunctionHook |
27 | | -define( 'SFH_NO_HASH', 1 ); |
28 | | - |
29 | | -# string parameter for extractTags which will cause it |
30 | | -# to strip HTML comments in addition to regular |
31 | | -# <XML>-style tags. This should not be anything we |
32 | | -# may want to use in wikisyntax |
33 | | -define( 'STRIP_COMMENTS', 'HTMLCommentStrip' ); |
34 | | - |
35 | | -# Constants needed for external link processing |
36 | | -define( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' ); |
37 | | -# Everything except bracket, space, or control characters |
38 | | -define( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' ); |
39 | | -# Including space, but excluding newlines |
40 | | -define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' ); |
41 | | -define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' ); |
42 | | -define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' ); |
43 | | -define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'. |
44 | | - EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' ); |
45 | | -define( 'EXT_IMAGE_REGEX', |
46 | | - '/^('.HTTP_PROTOCOLS.')'. # Protocol |
47 | | - '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path |
48 | | - '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename |
49 | | -); |
50 | | - |
51 | | -// State constants for the definition list colon extraction |
52 | | -define( 'MW_COLON_STATE_TEXT', 0 ); |
53 | | -define( 'MW_COLON_STATE_TAG', 1 ); |
54 | | -define( 'MW_COLON_STATE_TAGSTART', 2 ); |
55 | | -define( 'MW_COLON_STATE_CLOSETAG', 3 ); |
56 | | -define( 'MW_COLON_STATE_TAGSLASH', 4 ); |
57 | | -define( 'MW_COLON_STATE_COMMENT', 5 ); |
58 | | -define( 'MW_COLON_STATE_COMMENTDASH', 6 ); |
59 | | -define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 ); |
60 | | - |
61 | 12 | /** |
62 | 13 | * PHP Parser - Processes wiki markup (which uses a more user-friendly |
63 | 14 | * syntax, such as "[[link]]" for making links), and provides a one-way |
— | — | @@ -92,23 +43,51 @@ |
93 | 44 | */ |
94 | 45 | class Parser |
95 | 46 | { |
96 | | - const VERSION = MW_PARSER_VERSION; |
| 47 | + /** |
| 48 | + * Update this version number when the ParserOutput format |
| 49 | + * changes in an incompatible way, so the parser cache |
| 50 | + * can automatically discard old data. |
| 51 | + */ |
| 52 | + const VERSION = '1.6.2'; |
| 53 | + |
| 54 | + # Flags for Parser::setFunctionHook |
| 55 | + # Also available as global constants from Defines.php |
| 56 | + const SFH_NO_HASH = 1; |
| 57 | + const SFH_OBJECT_ARGS = 2; |
| 58 | + |
| 59 | + # Constants needed for external link processing |
| 60 | + # Everything except bracket, space, or control characters |
| 61 | + const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]'; |
| 62 | + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)\ |
| 63 | + \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/S'; |
| 64 | + |
| 65 | + // State constants for the definition list colon extraction |
| 66 | + const COLON_STATE_TEXT = 0; |
| 67 | + const COLON_STATE_TAG = 1; |
| 68 | + const COLON_STATE_TAGSTART = 2; |
| 69 | + const COLON_STATE_CLOSETAG = 3; |
| 70 | + const COLON_STATE_TAGSLASH = 4; |
| 71 | + const COLON_STATE_COMMENT = 5; |
| 72 | + const COLON_STATE_COMMENTDASH = 6; |
| 73 | + const COLON_STATE_COMMENTDASHDASH = 7; |
| 74 | + |
97 | 75 | /**#@+ |
98 | 76 | * @private |
99 | 77 | */ |
100 | 78 | # Persistent: |
101 | 79 | var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, |
102 | | - $mImageParams, $mImageParamsMagicArray; |
| 80 | + $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix, |
| 81 | + $mExtLinkBracketedRegex; |
103 | 82 | |
104 | 83 | # Cleared with clearState(): |
105 | 84 | var $mOutput, $mAutonumber, $mDTopen, $mStripState; |
106 | 85 | var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; |
107 | | - var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix; |
108 | | - var $mIncludeSizes, $mDefaultSort; |
109 | | - var $mTemplates, // cache of already loaded templates, avoids |
110 | | - // multiple SQL queries for the same string |
| 86 | + var $mInterwikiLinkHolders, $mLinkHolders; |
| 87 | + var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; |
| 88 | + var $mTplExpandCache,// empty-frame expansion cache |
111 | 89 | $mTemplatePath; // stores an unsorted hash of all the templates already loaded |
112 | 90 | // in this path. Used for loop detection. |
| 91 | + var $mTplRedirCache, $mTplDomCache, $mHeadings; |
113 | 92 | |
114 | 93 | # Temporary |
115 | 94 | # These are variables reset at least once per parse regardless of $clearState |
— | — | @@ -127,11 +106,15 @@ |
128 | 107 | * |
129 | 108 | * @public |
130 | 109 | */ |
131 | | - function Parser() { |
| 110 | + function __construct( $conf = array() ) { |
132 | 111 | $this->mTagHooks = array(); |
133 | 112 | $this->mTransparentTagHooks = array(); |
134 | 113 | $this->mFunctionHooks = array(); |
135 | 114 | $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); |
| 115 | + $this->mStripList = array( 'nowiki', 'gallery' ); |
| 116 | + $this->mMarkerSuffix = "-QINU\x7f"; |
| 117 | + $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. |
| 118 | + '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; |
136 | 119 | $this->mFirstCall = true; |
137 | 120 | } |
138 | 121 | |
— | — | @@ -145,7 +128,7 @@ |
146 | 129 | |
147 | 130 | wfProfileIn( __METHOD__ ); |
148 | 131 | global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions; |
149 | | - |
| 132 | + |
150 | 133 | $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); |
151 | 134 | |
152 | 135 | $this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH ); |
— | — | @@ -204,7 +187,7 @@ |
205 | 188 | $this->mDTopen = false; |
206 | 189 | $this->mIncludeCount = array(); |
207 | 190 | $this->mStripState = new StripState; |
208 | | - $this->mArgStack = array(); |
| 191 | + $this->mArgStack = false; |
209 | 192 | $this->mInPre = false; |
210 | 193 | $this->mInterwikiLinkHolders = array( |
211 | 194 | 'texts' => array(), |
— | — | @@ -225,21 +208,26 @@ |
226 | 209 | * Using it at the front also gives us a little extra robustness |
227 | 210 | * since it shouldn't match when butted up against identifier-like |
228 | 211 | * string constructs. |
| 212 | + * |
| 213 | + * Must not consist of all title characters, or else it will change |
| 214 | + * the behaviour of <nowiki> in a link. |
229 | 215 | */ |
230 | | - $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); |
| 216 | + #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); |
| 217 | + $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); |
231 | 218 | |
232 | 219 | # Clear these on every parse, bug 4549 |
233 | | - $this->mTemplates = array(); |
234 | 220 | $this->mTemplatePath = array(); |
| 221 | + $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); |
235 | 222 | |
236 | 223 | $this->mShowToc = true; |
237 | 224 | $this->mForceTocPosition = false; |
238 | 225 | $this->mIncludeSizes = array( |
239 | | - 'pre-expand' => 0, |
240 | 226 | 'post-expand' => 0, |
241 | | - 'arg' => 0 |
| 227 | + 'arg' => 0, |
242 | 228 | ); |
| 229 | + $this->mPPNodeCount = 0; |
243 | 230 | $this->mDefaultSort = false; |
| 231 | + $this->mHeadings = array(); |
244 | 232 | |
245 | 233 | wfRunHooks( 'ParserClearState', array( &$this ) ); |
246 | 234 | wfProfileOut( __METHOD__ ); |
— | — | @@ -302,7 +290,7 @@ |
303 | 291 | } |
304 | 292 | $this->setOutputType( OT_HTML ); |
305 | 293 | wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
306 | | - $text = $this->strip( $text, $this->mStripState ); |
| 294 | + # No more strip! |
307 | 295 | wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
308 | 296 | $text = $this->internalParse( $text ); |
309 | 297 | $text = $this->mStripState->unstripGeneral( $text ); |
— | — | @@ -335,17 +323,17 @@ |
336 | 324 | //!JF Move to its own function |
337 | 325 | |
338 | 326 | $uniq_prefix = $this->mUniqPrefix; |
339 | | - $matches = array(); |
| 327 | + $matches = array(); |
340 | 328 | $elements = array_keys( $this->mTransparentTagHooks ); |
341 | | - $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
| 329 | + $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
342 | 330 | |
343 | | - foreach( $matches as $marker => $data ) { |
344 | | - list( $element, $content, $params, $tag ) = $data; |
345 | | - $tagName = strtolower( $element ); |
346 | | - if( isset( $this->mTransparentTagHooks[$tagName] ) ) { |
347 | | - $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], |
348 | | - array( $content, $params, $this ) ); |
349 | | - } else { |
| 331 | + foreach( $matches as $marker => $data ) { |
| 332 | + list( $element, $content, $params, $tag ) = $data; |
| 333 | + $tagName = strtolower( $element ); |
| 334 | + if( isset( $this->mTransparentTagHooks[$tagName] ) ) { |
| 335 | + $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], |
| 336 | + array( $content, $params, $this ) ); |
| 337 | + } else { |
350 | 338 | $output = $tag; |
351 | 339 | } |
352 | 340 | $this->mStripState->general->setPair( $marker, $output ); |
— | — | @@ -390,7 +378,7 @@ |
391 | 379 | if ( max( $this->mIncludeSizes ) > 1000 ) { |
392 | 380 | $max = $this->mOptions->getMaxIncludeSize(); |
393 | 381 | $text .= "<!-- \n" . |
394 | | - "Pre-expand include size: {$this->mIncludeSizes['pre-expand']} bytes\n" . |
| 382 | + "Preprocessor node count: {$this->mPPNodeCount}\n" . |
395 | 383 | "Post-expand include size: {$this->mIncludeSizes['post-expand']} bytes\n" . |
396 | 384 | "Template argument size: {$this->mIncludeSizes['arg']} bytes\n" . |
397 | 385 | "Maximum: $max bytes\n" . |
— | — | @@ -412,7 +400,6 @@ |
413 | 401 | function recursiveTagParse( $text ) { |
414 | 402 | wfProfileIn( __METHOD__ ); |
415 | 403 | wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
416 | | - $text = $this->strip( $text, $this->mStripState ); |
417 | 404 | wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
418 | 405 | $text = $this->internalParse( $text ); |
419 | 406 | wfProfileOut( __METHOD__ ); |
— | — | @@ -433,12 +420,11 @@ |
434 | 421 | $this->mRevisionId = $revid; |
435 | 422 | } |
436 | 423 | wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
437 | | - $text = $this->strip( $text, $this->mStripState ); |
438 | 424 | wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 425 | + $text = $this->replaceVariables( $text ); |
439 | 426 | if ( $this->mOptions->getRemoveComments() ) { |
440 | 427 | $text = Sanitizer::removeHTMLcomments( $text ); |
441 | 428 | } |
442 | | - $text = $this->replaceVariables( $text ); |
443 | 429 | $text = $this->mStripState->unstripBoth( $text ); |
444 | 430 | wfProfileOut( __METHOD__ ); |
445 | 431 | return $text; |
— | — | @@ -508,7 +494,7 @@ |
509 | 495 | $inside = $p[4]; |
510 | 496 | } |
511 | 497 | |
512 | | - $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . "-QINU\x07"; |
| 498 | + $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . $this->mMarkerSuffix; |
513 | 499 | $stripped .= $marker; |
514 | 500 | |
515 | 501 | if ( $close === '/>' ) { |
— | — | @@ -543,125 +529,24 @@ |
544 | 530 | } |
545 | 531 | |
546 | 532 | /** |
547 | | - * Strips and renders nowiki, pre, math, hiero |
548 | | - * If $render is set, performs necessary rendering operations on plugins |
549 | | - * Returns the text, and fills an array with data needed in unstrip() |
550 | | - * |
551 | | - * @param StripState $state |
552 | | - * |
553 | | - * @param bool $stripcomments when set, HTML comments <!-- like this --> |
554 | | - * will be stripped in addition to other tags. This is important |
555 | | - * for section editing, where these comments cause confusion when |
556 | | - * counting the sections in the wikisource |
557 | | - * |
558 | | - * @param array dontstrip contains tags which should not be stripped; |
559 | | - * used to prevent stipping of <gallery> when saving (fixes bug 2700) |
560 | | - * |
561 | | - * @private |
| 533 | + * Get a list of strippable XML-like elements |
562 | 534 | */ |
563 | | - function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) { |
564 | | - global $wgContLang; |
565 | | - wfProfileIn( __METHOD__ ); |
566 | | - $render = ($this->mOutputType == OT_HTML); |
567 | | - |
568 | | - $uniq_prefix = $this->mUniqPrefix; |
569 | | - $commentState = new ReplacementArray; |
570 | | - $nowikiItems = array(); |
571 | | - $generalItems = array(); |
572 | | - |
573 | | - $elements = array_merge( |
574 | | - array( 'nowiki', 'gallery' ), |
575 | | - array_keys( $this->mTagHooks ) ); |
| 535 | + function getStripList() { |
576 | 536 | global $wgRawHtml; |
| 537 | + $elements = $this->mStripList; |
577 | 538 | if( $wgRawHtml ) { |
578 | 539 | $elements[] = 'html'; |
579 | 540 | } |
580 | 541 | if( $this->mOptions->getUseTeX() ) { |
581 | 542 | $elements[] = 'math'; |
582 | 543 | } |
| 544 | + return $elements; |
| 545 | + } |
583 | 546 | |
584 | | - # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700) |
585 | | - foreach ( $elements AS $k => $v ) { |
586 | | - if ( !in_array ( $v , $dontstrip ) ) continue; |
587 | | - unset ( $elements[$k] ); |
588 | | - } |
589 | | - |
590 | | - $matches = array(); |
591 | | - $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
592 | | - |
593 | | - foreach( $matches as $marker => $data ) { |
594 | | - list( $element, $content, $params, $tag ) = $data; |
595 | | - if( $render ) { |
596 | | - $tagName = strtolower( $element ); |
597 | | - wfProfileIn( __METHOD__."-render-$tagName" ); |
598 | | - switch( $tagName ) { |
599 | | - case '!--': |
600 | | - // Comment |
601 | | - if( substr( $tag, -3 ) == '-->' ) { |
602 | | - $output = $tag; |
603 | | - } else { |
604 | | - // Unclosed comment in input. |
605 | | - // Close it so later stripping can remove it |
606 | | - $output = "$tag-->"; |
607 | | - } |
608 | | - break; |
609 | | - case 'html': |
610 | | - if( $wgRawHtml ) { |
611 | | - $output = $content; |
612 | | - break; |
613 | | - } |
614 | | - // Shouldn't happen otherwise. :) |
615 | | - case 'nowiki': |
616 | | - $output = Xml::escapeTagsOnly( $content ); |
617 | | - break; |
618 | | - case 'math': |
619 | | - $output = $wgContLang->armourMath( |
620 | | - MathRenderer::renderMath( $content, $params ) ); |
621 | | - break; |
622 | | - case 'gallery': |
623 | | - $output = $this->renderImageGallery( $content, $params ); |
624 | | - break; |
625 | | - default: |
626 | | - if( isset( $this->mTagHooks[$tagName] ) ) { |
627 | | - $output = call_user_func_array( $this->mTagHooks[$tagName], |
628 | | - array( $content, $params, $this ) ); |
629 | | - } else { |
630 | | - throw new MWException( "Invalid call hook $element" ); |
631 | | - } |
632 | | - } |
633 | | - wfProfileOut( __METHOD__."-render-$tagName" ); |
634 | | - } else { |
635 | | - // Just stripping tags; keep the source |
636 | | - $output = $tag; |
637 | | - } |
638 | | - |
639 | | - // Unstrip the output, to support recursive strip() calls |
640 | | - $output = $state->unstripBoth( $output ); |
641 | | - |
642 | | - if( !$stripcomments && $element == '!--' ) { |
643 | | - $commentState->setPair( $marker, $output ); |
644 | | - } elseif ( $element == 'html' || $element == 'nowiki' ) { |
645 | | - $nowikiItems[$marker] = $output; |
646 | | - } else { |
647 | | - $generalItems[$marker] = $output; |
648 | | - } |
649 | | - } |
650 | | - # Add the new items to the state |
651 | | - # We do this after the loop instead of during it to avoid slowing |
652 | | - # down the recursive unstrip |
653 | | - $state->nowiki->mergeArray( $nowikiItems ); |
654 | | - $state->general->mergeArray( $generalItems ); |
655 | | - |
656 | | - # Unstrip comments unless explicitly told otherwise. |
657 | | - # (The comments are always stripped prior to this point, so as to |
658 | | - # not invoke any extension tags / parser hooks contained within |
659 | | - # a comment.) |
660 | | - if ( !$stripcomments ) { |
661 | | - // Put them all back and forget them |
662 | | - $text = $commentState->replace( $text ); |
663 | | - } |
664 | | - |
665 | | - wfProfileOut( __METHOD__ ); |
| 547 | + /** |
| 548 | + * @deprecated use replaceVariables |
| 549 | + */ |
| 550 | + function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) { |
666 | 551 | return $text; |
667 | 552 | } |
668 | 553 | |
— | — | @@ -700,9 +585,10 @@ |
701 | 586 | * |
702 | 587 | * @private |
703 | 588 | */ |
704 | | - function insertStripItem( $text, &$state ) { |
705 | | - $rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString(); |
706 | | - $state->general->setPair( $rnd, $text ); |
| 589 | + function insertStripItem( $text ) { |
| 590 | + static $n = 0; |
| 591 | + $rnd = "{$this->mUniqPrefix}-item-$n-{$this->mMarkerSuffix}"; |
| 592 | + $this->mStripState->general->setPair( $rnd, $text ); |
707 | 593 | return $rnd; |
708 | 594 | } |
709 | 595 | |
— | — | @@ -786,8 +672,7 @@ |
787 | 673 | |
788 | 674 | /** |
789 | 675 | * Use the HTML tidy PECL extension to use the tidy library in-process, |
790 | | - * saving the overhead of spawning a new process. Currently written to |
791 | | - * the PHP 4.3.x version of the extension, may not work on PHP 5. |
| 676 | + * saving the overhead of spawning a new process. |
792 | 677 | * |
793 | 678 | * 'pear install tidy' should be able to compile the extension module. |
794 | 679 | * |
— | — | @@ -795,20 +680,19 @@ |
796 | 681 | * @static |
797 | 682 | */ |
798 | 683 | function internalTidy( $text ) { |
799 | | - global $wgTidyConf; |
| 684 | + global $wgTidyConf, $IP; |
800 | 685 | $fname = 'Parser::internalTidy'; |
801 | 686 | wfProfileIn( $fname ); |
802 | 687 | |
803 | | - tidy_load_config( $wgTidyConf ); |
804 | | - tidy_set_encoding( 'utf8' ); |
805 | | - tidy_parse_string( $text ); |
806 | | - tidy_clean_repair(); |
807 | | - if( tidy_get_status() == 2 ) { |
| 688 | + $tidy = new tidy; |
| 689 | + $tidy->parseString( $text, $wgTidyConf, 'utf8' ); |
| 690 | + $tidy->cleanRepair(); |
| 691 | + if( $tidy->getStatus() == 2 ) { |
808 | 692 | // 2 is magic number for fatal error |
809 | 693 | // http://www.php.net/manual/en/function.tidy-get-status.php |
810 | 694 | $cleansource = null; |
811 | 695 | } else { |
812 | | - $cleansource = tidy_get_output(); |
| 696 | + $cleansource = tidy_get_output( $tidy ); |
813 | 697 | } |
814 | 698 | wfProfileOut( $fname ); |
815 | 699 | return $cleansource; |
— | — | @@ -1013,7 +897,6 @@ |
1014 | 898 | * @private |
1015 | 899 | */ |
1016 | 900 | function internalParse( $text ) { |
1017 | | - $args = array(); |
1018 | 901 | $isMain = true; |
1019 | 902 | $fname = 'Parser::internalParse'; |
1020 | 903 | wfProfileIn( $fname ); |
— | — | @@ -1029,9 +912,8 @@ |
1030 | 913 | $text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') ); |
1031 | 914 | $text = StringUtils::delimiterReplace( '<includeonly>', '</includeonly>', '', $text ); |
1032 | 915 | |
1033 | | - $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), array(), array_keys( $this->mTransparentTagHooks ) ); |
1034 | | - |
1035 | | - $text = $this->replaceVariables( $text, $args ); |
| 916 | + $text = $this->replaceVariables( $text ); |
| 917 | + $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) ); |
1036 | 918 | wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); |
1037 | 919 | |
1038 | 920 | // Tables need to come after variable replacement for things to work |
— | — | @@ -1070,7 +952,7 @@ |
1071 | 953 | * |
1072 | 954 | * @private |
1073 | 955 | */ |
1074 | | - function &doMagicLinks( &$text ) { |
| 956 | + function doMagicLinks( $text ) { |
1075 | 957 | wfProfileIn( __METHOD__ ); |
1076 | 958 | $text = preg_replace_callback( |
1077 | 959 | '!(?: # Start cases |
— | — | @@ -1134,8 +1016,8 @@ |
1135 | 1017 | wfProfileIn( $fname ); |
1136 | 1018 | for ( $i = 6; $i >= 1; --$i ) { |
1137 | 1019 | $h = str_repeat( '=', $i ); |
1138 | | - $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m", |
1139 | | - "<h{$i}>\\1</h{$i}>\\2", $text ); |
| 1020 | + $text = preg_replace( "/^$h(.+)$h\\s*$/m", |
| 1021 | + "<h$i>\\1</h$i>", $text ); |
1140 | 1022 | } |
1141 | 1023 | wfProfileOut( $fname ); |
1142 | 1024 | return $text; |
— | — | @@ -1339,7 +1221,7 @@ |
1340 | 1222 | |
1341 | 1223 | $sk = $this->mOptions->getSkin(); |
1342 | 1224 | |
1343 | | - $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 1225 | + $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
1344 | 1226 | |
1345 | 1227 | $s = $this->replaceFreeExternalLinks( array_shift( $bits ) ); |
1346 | 1228 | |
— | — | @@ -1433,7 +1315,7 @@ |
1434 | 1316 | $remainder = $bits[$i++]; |
1435 | 1317 | |
1436 | 1318 | $m = array(); |
1437 | | - if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { |
| 1319 | + if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { |
1438 | 1320 | # Found some characters after the protocol that look promising |
1439 | 1321 | $url = $protocol . $m[1]; |
1440 | 1322 | $trail = $m[2]; |
— | — | @@ -1443,7 +1325,7 @@ |
1444 | 1326 | if(strlen($trail) == 0 && |
1445 | 1327 | isset($bits[$i]) && |
1446 | 1328 | preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) && |
1447 | | - preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) |
| 1329 | + preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) |
1448 | 1330 | { |
1449 | 1331 | # add protocol, arg |
1450 | 1332 | $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link |
— | — | @@ -1540,7 +1422,7 @@ |
1541 | 1423 | $text = false; |
1542 | 1424 | if ( $this->mOptions->getAllowExternalImages() |
1543 | 1425 | || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) { |
1544 | | - if ( preg_match( EXT_IMAGE_REGEX, $url ) ) { |
| 1426 | + if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { |
1545 | 1427 | # Image found |
1546 | 1428 | $text = $sk->makeExternalImage( htmlspecialchars( $url ) ); |
1547 | 1429 | } |
— | — | @@ -2287,7 +2169,7 @@ |
2288 | 2170 | } |
2289 | 2171 | |
2290 | 2172 | // Ugly state machine to walk through avoiding tags. |
2291 | | - $state = MW_COLON_STATE_TEXT; |
| 2173 | + $state = self::COLON_STATE_TEXT; |
2292 | 2174 | $stack = 0; |
2293 | 2175 | $len = strlen( $str ); |
2294 | 2176 | for( $i = 0; $i < $len; $i++ ) { |
— | — | @@ -2295,11 +2177,11 @@ |
2296 | 2178 | |
2297 | 2179 | switch( $state ) { |
2298 | 2180 | // (Using the number is a performance hack for common cases) |
2299 | | - case 0: // MW_COLON_STATE_TEXT: |
| 2181 | + case 0: // self::COLON_STATE_TEXT: |
2300 | 2182 | switch( $c ) { |
2301 | 2183 | case "<": |
2302 | 2184 | // Could be either a <start> tag or an </end> tag |
2303 | | - $state = MW_COLON_STATE_TAGSTART; |
| 2185 | + $state = self::COLON_STATE_TAGSTART; |
2304 | 2186 | break; |
2305 | 2187 | case ":": |
2306 | 2188 | if( $stack == 0 ) { |
— | — | @@ -2336,41 +2218,41 @@ |
2337 | 2219 | } |
2338 | 2220 | // Skip ahead to next tag start |
2339 | 2221 | $i = $lt; |
2340 | | - $state = MW_COLON_STATE_TAGSTART; |
| 2222 | + $state = self::COLON_STATE_TAGSTART; |
2341 | 2223 | } |
2342 | 2224 | break; |
2343 | | - case 1: // MW_COLON_STATE_TAG: |
| 2225 | + case 1: // self::COLON_STATE_TAG: |
2344 | 2226 | // In a <tag> |
2345 | 2227 | switch( $c ) { |
2346 | 2228 | case ">": |
2347 | 2229 | $stack++; |
2348 | | - $state = MW_COLON_STATE_TEXT; |
| 2230 | + $state = self::COLON_STATE_TEXT; |
2349 | 2231 | break; |
2350 | 2232 | case "/": |
2351 | 2233 | // Slash may be followed by >? |
2352 | | - $state = MW_COLON_STATE_TAGSLASH; |
| 2234 | + $state = self::COLON_STATE_TAGSLASH; |
2353 | 2235 | break; |
2354 | 2236 | default: |
2355 | 2237 | // ignore |
2356 | 2238 | } |
2357 | 2239 | break; |
2358 | | - case 2: // MW_COLON_STATE_TAGSTART: |
| 2240 | + case 2: // self::COLON_STATE_TAGSTART: |
2359 | 2241 | switch( $c ) { |
2360 | 2242 | case "/": |
2361 | | - $state = MW_COLON_STATE_CLOSETAG; |
| 2243 | + $state = self::COLON_STATE_CLOSETAG; |
2362 | 2244 | break; |
2363 | 2245 | case "!": |
2364 | | - $state = MW_COLON_STATE_COMMENT; |
| 2246 | + $state = self::COLON_STATE_COMMENT; |
2365 | 2247 | break; |
2366 | 2248 | case ">": |
2367 | 2249 | // Illegal early close? This shouldn't happen D: |
2368 | | - $state = MW_COLON_STATE_TEXT; |
| 2250 | + $state = self::COLON_STATE_TEXT; |
2369 | 2251 | break; |
2370 | 2252 | default: |
2371 | | - $state = MW_COLON_STATE_TAG; |
| 2253 | + $state = self::COLON_STATE_TAG; |
2372 | 2254 | } |
2373 | 2255 | break; |
2374 | | - case 3: // MW_COLON_STATE_CLOSETAG: |
| 2256 | + case 3: // self::COLON_STATE_CLOSETAG: |
2375 | 2257 | // In a </tag> |
2376 | 2258 | if( $c == ">" ) { |
2377 | 2259 | $stack--; |
— | — | @@ -2379,35 +2261,35 @@ |
2380 | 2262 | wfProfileOut( $fname ); |
2381 | 2263 | return false; |
2382 | 2264 | } |
2383 | | - $state = MW_COLON_STATE_TEXT; |
| 2265 | + $state = self::COLON_STATE_TEXT; |
2384 | 2266 | } |
2385 | 2267 | break; |
2386 | | - case MW_COLON_STATE_TAGSLASH: |
| 2268 | + case self::COLON_STATE_TAGSLASH: |
2387 | 2269 | if( $c == ">" ) { |
2388 | 2270 | // Yes, a self-closed tag <blah/> |
2389 | | - $state = MW_COLON_STATE_TEXT; |
| 2271 | + $state = self::COLON_STATE_TEXT; |
2390 | 2272 | } else { |
2391 | 2273 | // Probably we're jumping the gun, and this is an attribute |
2392 | | - $state = MW_COLON_STATE_TAG; |
| 2274 | + $state = self::COLON_STATE_TAG; |
2393 | 2275 | } |
2394 | 2276 | break; |
2395 | | - case 5: // MW_COLON_STATE_COMMENT: |
| 2277 | + case 5: // self::COLON_STATE_COMMENT: |
2396 | 2278 | if( $c == "-" ) { |
2397 | | - $state = MW_COLON_STATE_COMMENTDASH; |
| 2279 | + $state = self::COLON_STATE_COMMENTDASH; |
2398 | 2280 | } |
2399 | 2281 | break; |
2400 | | - case MW_COLON_STATE_COMMENTDASH: |
| 2282 | + case self::COLON_STATE_COMMENTDASH: |
2401 | 2283 | if( $c == "-" ) { |
2402 | | - $state = MW_COLON_STATE_COMMENTDASHDASH; |
| 2284 | + $state = self::COLON_STATE_COMMENTDASHDASH; |
2403 | 2285 | } else { |
2404 | | - $state = MW_COLON_STATE_COMMENT; |
| 2286 | + $state = self::COLON_STATE_COMMENT; |
2405 | 2287 | } |
2406 | 2288 | break; |
2407 | | - case MW_COLON_STATE_COMMENTDASHDASH: |
| 2289 | + case self::COLON_STATE_COMMENTDASHDASH: |
2408 | 2290 | if( $c == ">" ) { |
2409 | | - $state = MW_COLON_STATE_TEXT; |
| 2291 | + $state = self::COLON_STATE_TEXT; |
2410 | 2292 | } else { |
2411 | | - $state = MW_COLON_STATE_COMMENT; |
| 2293 | + $state = self::COLON_STATE_COMMENT; |
2412 | 2294 | } |
2413 | 2295 | break; |
2414 | 2296 | default: |
— | — | @@ -2592,9 +2474,9 @@ |
2593 | 2475 | case 'numberofpages': |
2594 | 2476 | return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); |
2595 | 2477 | case 'numberofadmins': |
2596 | | - return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); |
| 2478 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); |
2597 | 2479 | case 'numberofedits': |
2598 | | - return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); |
| 2480 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); |
2599 | 2481 | case 'currenttimestamp': |
2600 | 2482 | return $varCache[$index] = wfTimestampNow(); |
2601 | 2483 | case 'localtimestamp': |
— | — | @@ -2633,189 +2515,479 @@ |
2634 | 2516 | wfProfileIn( $fname ); |
2635 | 2517 | $variableIDs = MagicWord::getVariableIDs(); |
2636 | 2518 | |
2637 | | - $this->mVariables = array(); |
2638 | | - foreach ( $variableIDs as $id ) { |
2639 | | - $mw =& MagicWord::get( $id ); |
2640 | | - $mw->addToArray( $this->mVariables, $id ); |
2641 | | - } |
| 2519 | + $this->mVariables = new MagicWordArray( $variableIDs ); |
2642 | 2520 | wfProfileOut( $fname ); |
2643 | 2521 | } |
2644 | 2522 | |
2645 | 2523 | /** |
2646 | | - * parse any parentheses in format ((title|part|part)) |
2647 | | - * and call callbacks to get a replacement text for any found piece |
| 2524 | + * Parse any parentheses in format ((title|part|part)} and return the document tree |
| 2525 | + * This is the ghost of replace_variables(). |
2648 | 2526 | * |
2649 | 2527 | * @param string $text The text to parse |
2650 | | - * @param array $callbacks rules in form: |
2651 | | - * '{' => array( # opening parentheses |
2652 | | - * 'end' => '}', # closing parentheses |
2653 | | - * 'cb' => array(2 => callback, # replacement callback to call if {{..}} is found |
2654 | | - * 3 => callback # replacement callback to call if {{{..}}} is found |
2655 | | - * ) |
2656 | | - * ) |
2657 | | - * 'min' => 2, # Minimum parenthesis count in cb |
2658 | | - * 'max' => 3, # Maximum parenthesis count in cb |
2659 | 2528 | * @private |
2660 | 2529 | */ |
2661 | | - function replace_callback ($text, $callbacks) { |
| 2530 | + function preprocessToDom ( $text ) { |
2662 | 2531 | wfProfileIn( __METHOD__ ); |
2663 | | - $openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet |
2664 | | - $lastOpeningBrace = -1; # last not closed parentheses |
| 2532 | + wfProfileIn( __METHOD__.'-makexml' ); |
2665 | 2533 | |
2666 | | - $validOpeningBraces = implode( '', array_keys( $callbacks ) ); |
| 2534 | + static $msgRules, $normalRules; |
| 2535 | + if ( !$msgRules ) { |
| 2536 | + $msgRules = array( |
| 2537 | + '{' => array( |
| 2538 | + 'end' => '}', |
| 2539 | + 'names' => array( |
| 2540 | + 2 => 'template', |
| 2541 | + ), |
| 2542 | + 'min' => 2, |
| 2543 | + 'max' => 2, |
| 2544 | + ), |
| 2545 | + '[' => array( |
| 2546 | + 'end' => ']', |
| 2547 | + 'names' => array( 2 => null ), |
| 2548 | + 'min' => 2, |
| 2549 | + 'max' => 2, |
| 2550 | + ) |
| 2551 | + ); |
| 2552 | + $normalRules = array( |
| 2553 | + '{' => array( |
| 2554 | + 'end' => '}', |
| 2555 | + 'names' => array( |
| 2556 | + 2 => 'template', |
| 2557 | + 3 => 'tplarg', |
| 2558 | + ), |
| 2559 | + 'min' => 2, |
| 2560 | + 'max' => 3, |
| 2561 | + ), |
| 2562 | + '[' => array( |
| 2563 | + 'end' => ']', |
| 2564 | + 'names' => array( 2 => null ), |
| 2565 | + 'min' => 2, |
| 2566 | + 'max' => 2, |
| 2567 | + ) |
| 2568 | + ); |
| 2569 | + } |
| 2570 | + if ( $this->ot['msg'] ) { |
| 2571 | + $rules = $msgRules; |
| 2572 | + } else { |
| 2573 | + $rules = $normalRules; |
| 2574 | + } |
2667 | 2575 | |
2668 | | - $i = 0; |
| 2576 | + $extElements = implode( '|', $this->getStripList() ); |
| 2577 | + // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset |
| 2578 | + $extElementsRegex = "/($extElements)(?:\s|\/>|>)|(!--)/iA"; |
| 2579 | + |
| 2580 | + $stack = array(); # Stack of unclosed parentheses |
| 2581 | + $stackIndex = -1; # Stack read pointer |
| 2582 | + |
| 2583 | + $searchBase = implode( '', array_keys( $rules ) ) . '<'; |
| 2584 | + |
| 2585 | + $i = -1; # Input pointer, starts out pointing to a pseudo-newline before the start |
| 2586 | + $topAccum = '<root>'; # Top level text accumulator |
| 2587 | + $accum =& $topAccum; # Current text accumulator |
| 2588 | + $findEquals = false; # True to find equals signs in arguments |
| 2589 | + $findHeading = false; # True to look at LF characters for possible headings |
| 2590 | + $findPipe = false; # True to take notice of pipe characters |
| 2591 | + $headingIndex = 1; |
| 2592 | + $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i |
| 2593 | + |
2669 | 2594 | while ( $i < strlen( $text ) ) { |
2670 | | - # Find next opening brace, closing brace or pipe |
2671 | | - if ( $lastOpeningBrace == -1 ) { |
2672 | | - $currentClosing = ''; |
2673 | | - $search = $validOpeningBraces; |
| 2595 | + if ( $i == -1 ) { |
| 2596 | + $found = 'line-start'; |
| 2597 | + $curChar = ''; |
2674 | 2598 | } else { |
2675 | | - $currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd']; |
2676 | | - $search = $validOpeningBraces . '|' . $currentClosing; |
| 2599 | + # Find next opening brace, closing brace or pipe |
| 2600 | + $search = $searchBase; |
| 2601 | + if ( $stackIndex == -1 ) { |
| 2602 | + $currentClosing = ''; |
| 2603 | + // Look for headings only at the top stack level |
| 2604 | + // Among other things, this resolves the ambiguity between = |
| 2605 | + // for headings and = for template arguments |
| 2606 | + $search .= "\n"; |
| 2607 | + } else { |
| 2608 | + $currentClosing = $stack[$stackIndex]['close']; |
| 2609 | + $search .= $currentClosing; |
| 2610 | + } |
| 2611 | + if ( $findPipe ) { |
| 2612 | + $search .= '|'; |
| 2613 | + } |
| 2614 | + if ( $findEquals ) { |
| 2615 | + $search .= '='; |
| 2616 | + } |
| 2617 | + $rule = null; |
| 2618 | + # Output literal section, advance input counter |
| 2619 | + $literalLength = strcspn( $text, $search, $i ); |
| 2620 | + if ( $literalLength > 0 ) { |
| 2621 | + $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) ); |
| 2622 | + $i += $literalLength; |
| 2623 | + } |
| 2624 | + if ( $i >= strlen( $text ) ) { |
| 2625 | + if ( $currentClosing == "\n" ) { |
| 2626 | + // Do a past-the-end run to finish off the heading |
| 2627 | + $curChar = ''; |
| 2628 | + $found = 'line-end'; |
| 2629 | + } else { |
| 2630 | + # All done |
| 2631 | + break; |
| 2632 | + } |
| 2633 | + } else { |
| 2634 | + $curChar = $text[$i]; |
| 2635 | + if ( $curChar == '|' ) { |
| 2636 | + $found = 'pipe'; |
| 2637 | + } elseif ( $curChar == '=' ) { |
| 2638 | + $found = 'equals'; |
| 2639 | + } elseif ( $curChar == '<' ) { |
| 2640 | + $found = 'angle'; |
| 2641 | + } elseif ( $curChar == "\n" ) { |
| 2642 | + if ( $stackIndex == -1 ) { |
| 2643 | + $found = 'line-start'; |
| 2644 | + } else { |
| 2645 | + $found = 'line-end'; |
| 2646 | + } |
| 2647 | + } elseif ( $curChar == $currentClosing ) { |
| 2648 | + $found = 'close'; |
| 2649 | + } elseif ( isset( $rules[$curChar] ) ) { |
| 2650 | + $found = 'open'; |
| 2651 | + $rule = $rules[$curChar]; |
| 2652 | + } else { |
| 2653 | + # Some versions of PHP have a strcspn which stops on null characters |
| 2654 | + # Ignore and continue |
| 2655 | + ++$i; |
| 2656 | + continue; |
| 2657 | + } |
| 2658 | + } |
2677 | 2659 | } |
2678 | | - $rule = null; |
2679 | | - $i += strcspn( $text, $search, $i ); |
2680 | | - if ( $i < strlen( $text ) ) { |
2681 | | - if ( $text[$i] == '|' ) { |
2682 | | - $found = 'pipe'; |
2683 | | - } elseif ( $text[$i] == $currentClosing ) { |
2684 | | - $found = 'close'; |
2685 | | - } elseif ( isset( $callbacks[$text[$i]] ) ) { |
2686 | | - $found = 'open'; |
2687 | | - $rule = $callbacks[$text[$i]]; |
2688 | | - } else { |
2689 | | - # Some versions of PHP have a strcspn which stops on null characters |
2690 | | - # Ignore and continue |
| 2660 | + |
| 2661 | + if ( $found == 'angle' ) { |
| 2662 | + $matches = false; |
| 2663 | + // Determine element name |
| 2664 | + if ( !preg_match( $extElementsRegex, $text, $matches, 0, $i + 1 ) ) { |
| 2665 | + // Element name missing or not listed |
| 2666 | + $accum .= '<'; |
2691 | 2667 | ++$i; |
2692 | 2668 | continue; |
2693 | 2669 | } |
2694 | | - } else { |
2695 | | - # All done |
2696 | | - break; |
| 2670 | + // Handle comments |
| 2671 | + if ( isset( $matches[2] ) && $matches[2] == '!--' ) { |
| 2672 | + // HTML comment, scan to end |
| 2673 | + $endpos = strpos( $text, '-->', $i + 4 ); |
| 2674 | + if ( $endpos === false ) { |
| 2675 | + // Unclosed comment in input, runs to end |
| 2676 | + $accum .= htmlspecialchars( substr( $text, $i ) ); |
| 2677 | + if ( $this->ot['html'] ) { |
| 2678 | + // Close it so later stripping can remove it |
| 2679 | + $accum .= htmlspecialchars( '-->' ); |
| 2680 | + } |
| 2681 | + $i = strlen( $text ); |
| 2682 | + continue; |
| 2683 | + } |
| 2684 | + $accum .= htmlspecialchars( substr( $text, $i, $endpos - $i + 3 ) ); |
| 2685 | + #$inner = substr( $text, $i + 4, $endpos - $i - 4 ); |
| 2686 | + #$accum .= '<ext><name>!--</name><inner>' . htmlspecialchars( $inner ) . '</inner></ext>'; |
| 2687 | + $i = $endpos + 3; |
| 2688 | + continue; |
| 2689 | + } |
| 2690 | + $name = $matches[1]; |
| 2691 | + $attrStart = $i + strlen( $name ) + 1; |
| 2692 | + |
| 2693 | + // Find end of tag |
| 2694 | + $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); |
| 2695 | + if ( $tagEndPos === false ) { |
| 2696 | + // Infinite backtrack |
| 2697 | + // Disable tag search to prevent worst-case O(N^2) performance |
| 2698 | + $noMoreGT = true; |
| 2699 | + $accum .= '<'; |
| 2700 | + ++$i; |
| 2701 | + continue; |
| 2702 | + } |
| 2703 | + if ( $text[$tagEndPos-1] == '/' ) { |
| 2704 | + $attrEnd = $tagEndPos - 1; |
| 2705 | + $inner = null; |
| 2706 | + $i = $tagEndPos + 1; |
| 2707 | + $close = ''; |
| 2708 | + } else { |
| 2709 | + $attrEnd = $tagEndPos; |
| 2710 | + // Find closing tag |
| 2711 | + if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) { |
| 2712 | + $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); |
| 2713 | + $i = $matches[0][1] + strlen( $matches[0][0] ); |
| 2714 | + $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>'; |
| 2715 | + } else { |
| 2716 | + // No end tag -- let it run out to the end of the text. |
| 2717 | + $inner = substr( $text, $tagEndPos + 1 ); |
| 2718 | + $i = strlen( $text ); |
| 2719 | + $close = ''; |
| 2720 | + } |
| 2721 | + } |
| 2722 | + $accum .= '<ext>'; |
| 2723 | + if ( $attrEnd <= $attrStart ) { |
| 2724 | + $attr = ''; |
| 2725 | + } else { |
| 2726 | + $attr = substr( $text, $attrStart, $attrEnd - $attrStart ); |
| 2727 | + } |
| 2728 | + $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' . |
| 2729 | + // Note that the attr element contains the whitespace between name and attribute, |
| 2730 | + // this is necessary for precise reconstruction during pre-save transform. |
| 2731 | + '<attr>' . htmlspecialchars( $attr ) . '</attr>'; |
| 2732 | + if ( $inner !== null ) { |
| 2733 | + $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>'; |
| 2734 | + } |
| 2735 | + $accum .= $close . '</ext>'; |
2697 | 2736 | } |
2698 | 2737 | |
2699 | | - if ( $found == 'open' ) { |
2700 | | - # found opening brace, let's add it to parentheses stack |
2701 | | - $piece = array('brace' => $text[$i], |
2702 | | - 'braceEnd' => $rule['end'], |
2703 | | - 'title' => '', |
2704 | | - 'parts' => null); |
| 2738 | + elseif ( $found == 'line-start' ) { |
| 2739 | + // Is this the start of a heading? |
| 2740 | + // Line break belongs before the heading element in any case |
| 2741 | + $accum .= $curChar; |
| 2742 | + $i++; |
| 2743 | + |
| 2744 | + $count = strspn( $text, '=', $i, 6 ); |
| 2745 | + if ( $count > 0 ) { |
| 2746 | + $piece = array( |
| 2747 | + 'open' => "\n", |
| 2748 | + 'close' => "\n", |
| 2749 | + 'parts' => array( str_repeat( '=', $count ) ), |
| 2750 | + 'count' => $count ); |
| 2751 | + $stack[++$stackIndex] = $piece; |
| 2752 | + $i += $count; |
| 2753 | + $accum =& $stack[$stackIndex]['parts'][0]; |
| 2754 | + $findPipe = false; |
| 2755 | + } |
| 2756 | + } |
2705 | 2757 | |
| 2758 | + elseif ( $found == 'line-end' ) { |
| 2759 | + $piece = $stack[$stackIndex]; |
| 2760 | + // A heading must be open, otherwise \n wouldn't have been in the search list |
| 2761 | + assert( $piece['open'] == "\n" ); |
| 2762 | + assert( $stackIndex == 0 ); |
| 2763 | + // Search back through the accumulator to see if it has a proper close |
| 2764 | + // No efficient way to do this in PHP AFAICT: strrev, PCRE search with $ anchor |
| 2765 | + // and rtrim are all O(N) in total size. Optimal would be O(N) in trailing |
| 2766 | + // whitespace size only. |
| 2767 | + $m = false; |
| 2768 | + $count = $piece['count']; |
| 2769 | + if ( preg_match( "/(={{$count}})\s*$/", $accum, $m, 0, $count ) ) { |
| 2770 | + // Found match, output <h> |
| 2771 | + $count = min( strlen( $m[1] ), $count ); |
| 2772 | + $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>"; |
| 2773 | + $headingIndex++; |
| 2774 | + } else { |
| 2775 | + // No match, no <h>, just pass down the inner text |
| 2776 | + $element = $accum; |
| 2777 | + } |
| 2778 | + // Unwind the stack |
| 2779 | + // Headings can only occur on the top level, so this is a bit simpler than the |
| 2780 | + // generic stack unwind operation in the close case |
| 2781 | + unset( $stack[$stackIndex--] ); |
| 2782 | + $accum =& $topAccum; |
| 2783 | + $findEquals = false; |
| 2784 | + $findPipe = false; |
| 2785 | + |
| 2786 | + // Append the result to the enclosing accumulator |
| 2787 | + $accum .= $element; |
| 2788 | + // Note that we do NOT increment the input pointer. |
| 2789 | + // This is because the closing linebreak could be the opening linebreak of |
| 2790 | + // another heading. Infinite loops are avoided because the next iteration MUST |
| 2791 | + // hit the heading open case above, which unconditionally increments the |
| 2792 | + // input pointer. |
| 2793 | + } |
| 2794 | + |
| 2795 | + elseif ( $found == 'open' ) { |
2706 | 2796 | # count opening brace characters |
2707 | | - $piece['count'] = strspn( $text, $piece['brace'], $i ); |
2708 | | - $piece['startAt'] = $piece['partStart'] = $i + $piece['count']; |
2709 | | - $i += $piece['count']; |
| 2797 | + $count = strspn( $text, $curChar, $i ); |
2710 | 2798 | |
2711 | 2799 | # we need to add to stack only if opening brace count is enough for one of the rules |
2712 | | - if ( $piece['count'] >= $rule['min'] ) { |
2713 | | - $lastOpeningBrace ++; |
2714 | | - $openingBraceStack[$lastOpeningBrace] = $piece; |
| 2800 | + if ( $count >= $rule['min'] ) { |
| 2801 | + # Add it to the stack |
| 2802 | + $piece = array( |
| 2803 | + 'open' => $curChar, |
| 2804 | + 'close' => $rule['end'], |
| 2805 | + 'count' => $count, |
| 2806 | + 'parts' => array( '' ), |
| 2807 | + 'eqpos' => array(), |
| 2808 | + 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), |
| 2809 | + ); |
| 2810 | + |
| 2811 | + $stackIndex ++; |
| 2812 | + $stack[$stackIndex] = $piece; |
| 2813 | + $accum =& $stack[$stackIndex]['parts'][0]; |
| 2814 | + $findEquals = false; |
| 2815 | + $findPipe = true; |
| 2816 | + } else { |
| 2817 | + # Add literal brace(s) |
| 2818 | + $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
2715 | 2819 | } |
2716 | | - } elseif ( $found == 'close' ) { |
2717 | | - # lets check if it is enough characters for closing brace |
2718 | | - $maxCount = $openingBraceStack[$lastOpeningBrace]['count']; |
2719 | | - $count = strspn( $text, $text[$i], $i, $maxCount ); |
| 2820 | + $i += $count; |
| 2821 | + } |
2720 | 2822 | |
| 2823 | + elseif ( $found == 'close' ) { |
| 2824 | + $piece = $stack[$stackIndex]; |
| 2825 | + # lets check if there are enough characters for closing brace |
| 2826 | + $maxCount = $piece['count']; |
| 2827 | + $count = strspn( $text, $curChar, $i, $maxCount ); |
| 2828 | + |
2721 | 2829 | # check for maximum matching characters (if there are 5 closing |
2722 | 2830 | # characters, we will probably need only 3 - depending on the rules) |
2723 | 2831 | $matchingCount = 0; |
2724 | | - $matchingCallback = null; |
2725 | | - $cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']]; |
2726 | | - if ( $count > $cbType['max'] ) { |
| 2832 | + $rule = $rules[$piece['open']]; |
| 2833 | + if ( $count > $rule['max'] ) { |
2727 | 2834 | # The specified maximum exists in the callback array, unless the caller |
2728 | 2835 | # has made an error |
2729 | | - $matchingCount = $cbType['max']; |
| 2836 | + $matchingCount = $rule['max']; |
2730 | 2837 | } else { |
2731 | 2838 | # Count is less than the maximum |
2732 | 2839 | # Skip any gaps in the callback array to find the true largest match |
2733 | 2840 | # Need to use array_key_exists not isset because the callback can be null |
2734 | 2841 | $matchingCount = $count; |
2735 | | - while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $cbType['cb'] ) ) { |
| 2842 | + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { |
2736 | 2843 | --$matchingCount; |
2737 | 2844 | } |
2738 | 2845 | } |
2739 | 2846 | |
2740 | 2847 | if ($matchingCount <= 0) { |
| 2848 | + # No matching element found in callback array |
| 2849 | + # Output a literal closing brace and continue |
| 2850 | + $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
2741 | 2851 | $i += $count; |
2742 | 2852 | continue; |
2743 | 2853 | } |
2744 | | - $matchingCallback = $cbType['cb'][$matchingCount]; |
| 2854 | + $name = $rule['names'][$matchingCount]; |
| 2855 | + if ( $name === null ) { |
| 2856 | + // No element, just literal text |
| 2857 | + $element = str_repeat( $piece['open'], $matchingCount ) . |
| 2858 | + implode( '|', $piece['parts'] ) . |
| 2859 | + str_repeat( $rule['end'], $matchingCount ); |
| 2860 | + } else { |
| 2861 | + # Create XML element |
| 2862 | + # Note: $parts is already XML, does not need to be encoded further |
| 2863 | + $parts = $piece['parts']; |
| 2864 | + $title = $parts[0]; |
| 2865 | + unset( $parts[0] ); |
2745 | 2866 | |
2746 | | - # let's set a title or last part (if '|' was found) |
2747 | | - if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { |
2748 | | - $openingBraceStack[$lastOpeningBrace]['title'] = |
2749 | | - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
2750 | | - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
2751 | | - } else { |
2752 | | - $openingBraceStack[$lastOpeningBrace]['parts'][] = |
2753 | | - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
2754 | | - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
| 2867 | + # The invocation is at the start of the line if lineStart is set in |
| 2868 | + # the stack, and all opening brackets are used up. |
| 2869 | + if ( $maxCount == $matchingCount && !empty( $piece['lineStart'] ) ) { |
| 2870 | + $attr = ' lineStart="1"'; |
| 2871 | + } else { |
| 2872 | + $attr = ''; |
| 2873 | + } |
| 2874 | + |
| 2875 | + $element = "<$name$attr>"; |
| 2876 | + $element .= "<title>$title</title>"; |
| 2877 | + $argIndex = 1; |
| 2878 | + foreach ( $parts as $partIndex => $part ) { |
| 2879 | + if ( isset( $piece['eqpos'][$partIndex] ) ) { |
| 2880 | + $eqpos = $piece['eqpos'][$partIndex]; |
| 2881 | + list( $ws1, $argName, $ws2 ) = self::splitWhitespace( substr( $part, 0, $eqpos ) ); |
| 2882 | + list( $ws3, $argValue, $ws4 ) = self::splitWhitespace( substr( $part, $eqpos + 1 ) ); |
| 2883 | + $element .= "<part>$ws1<name>$argName</name>$ws2=$ws3<value>$argValue</value>$ws4</part>"; |
| 2884 | + } else { |
| 2885 | + list( $ws1, $value, $ws2 ) = self::splitWhitespace( $part ); |
| 2886 | + $element .= "<part>$ws1<name index=\"$argIndex\" /><value>$value</value>$ws2</part>"; |
| 2887 | + $argIndex++; |
| 2888 | + } |
| 2889 | + } |
| 2890 | + $element .= "</$name>"; |
2755 | 2891 | } |
2756 | 2892 | |
2757 | | - $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount; |
2758 | | - $pieceEnd = $i + $matchingCount; |
| 2893 | + # Advance input pointer |
| 2894 | + $i += $matchingCount; |
2759 | 2895 | |
2760 | | - if( is_callable( $matchingCallback ) ) { |
2761 | | - $cbArgs = array ( |
2762 | | - 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart), |
2763 | | - 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']), |
2764 | | - 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'], |
2765 | | - 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")), |
2766 | | - ); |
2767 | | - # finally we can call a user callback and replace piece of text |
2768 | | - $replaceWith = call_user_func( $matchingCallback, $cbArgs ); |
2769 | | - $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd); |
2770 | | - $i = $pieceStart + strlen($replaceWith); |
| 2896 | + # Unwind the stack |
| 2897 | + unset( $stack[$stackIndex--] ); |
| 2898 | + if ( $stackIndex == -1 ) { |
| 2899 | + $accum =& $topAccum; |
| 2900 | + $findEquals = false; |
| 2901 | + $findPipe = false; |
2771 | 2902 | } else { |
2772 | | - # null value for callback means that parentheses should be parsed, but not replaced |
2773 | | - $i += $matchingCount; |
| 2903 | + $partCount = count( $stack[$stackIndex]['parts'] ); |
| 2904 | + $accum =& $stack[$stackIndex]['parts'][$partCount - 1]; |
| 2905 | + $findPipe = $stack[$stackIndex]['open'] != "\n"; |
| 2906 | + $findEquals = $findPipe && $partCount > 1 |
| 2907 | + && !isset( $stack[$stackIndex]['eqpos'][$partCount - 1] ); |
2774 | 2908 | } |
2775 | 2909 | |
2776 | | - # reset last opening parentheses, but keep it in case there are unused characters |
2777 | | - $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'], |
2778 | | - 'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'], |
2779 | | - 'count' => $openingBraceStack[$lastOpeningBrace]['count'], |
2780 | | - 'title' => '', |
2781 | | - 'parts' => null, |
2782 | | - 'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']); |
2783 | | - $openingBraceStack[$lastOpeningBrace--] = null; |
2784 | | - |
| 2910 | + # Re-add the old stack element if it still has unmatched opening characters remaining |
2785 | 2911 | if ($matchingCount < $piece['count']) { |
| 2912 | + $piece['parts'] = array( '' ); |
2786 | 2913 | $piece['count'] -= $matchingCount; |
2787 | | - $piece['startAt'] -= $matchingCount; |
2788 | | - $piece['partStart'] = $piece['startAt']; |
| 2914 | + $piece['eqpos'] = array(); |
2789 | 2915 | # do we still qualify for any callback with remaining count? |
2790 | | - $currentCbList = $callbacks[$piece['brace']]['cb']; |
| 2916 | + $names = $rules[$piece['open']]['names']; |
| 2917 | + $skippedBraces = 0; |
| 2918 | + $enclosingAccum =& $accum; |
2791 | 2919 | while ( $piece['count'] ) { |
2792 | | - if ( array_key_exists( $piece['count'], $currentCbList ) ) { |
2793 | | - $lastOpeningBrace++; |
2794 | | - $openingBraceStack[$lastOpeningBrace] = $piece; |
| 2920 | + if ( array_key_exists( $piece['count'], $names ) ) { |
| 2921 | + $stackIndex++; |
| 2922 | + $stack[$stackIndex] = $piece; |
| 2923 | + $accum =& $stack[$stackIndex]['parts'][0]; |
| 2924 | + $findEquals = true; |
| 2925 | + $findPipe = true; |
2795 | 2926 | break; |
2796 | 2927 | } |
2797 | 2928 | --$piece['count']; |
| 2929 | + $skippedBraces ++; |
2798 | 2930 | } |
| 2931 | + $enclosingAccum .= str_repeat( $piece['open'], $skippedBraces ); |
2799 | 2932 | } |
2800 | | - } elseif ( $found == 'pipe' ) { |
2801 | | - # lets set a title if it is a first separator, or next part otherwise |
2802 | | - if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { |
2803 | | - $openingBraceStack[$lastOpeningBrace]['title'] = |
2804 | | - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
2805 | | - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
2806 | | - $openingBraceStack[$lastOpeningBrace]['parts'] = array(); |
2807 | | - } else { |
2808 | | - $openingBraceStack[$lastOpeningBrace]['parts'][] = |
2809 | | - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
2810 | | - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
2811 | | - } |
2812 | | - $openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i; |
| 2933 | + |
| 2934 | + # Add XML element to the enclosing accumulator |
| 2935 | + $accum .= $element; |
2813 | 2936 | } |
| 2937 | + |
| 2938 | + elseif ( $found == 'pipe' ) { |
| 2939 | + $stack[$stackIndex]['parts'][] = ''; |
| 2940 | + $partsCount = count( $stack[$stackIndex]['parts'] ); |
| 2941 | + $accum =& $stack[$stackIndex]['parts'][$partsCount - 1]; |
| 2942 | + $findEquals = true; |
| 2943 | + ++$i; |
| 2944 | + } |
| 2945 | + |
| 2946 | + elseif ( $found == 'equals' ) { |
| 2947 | + $findEquals = false; |
| 2948 | + $partsCount = count( $stack[$stackIndex]['parts'] ); |
| 2949 | + $stack[$stackIndex]['eqpos'][$partsCount - 1] = strlen( $accum ); |
| 2950 | + $accum .= '='; |
| 2951 | + ++$i; |
| 2952 | + } |
2814 | 2953 | } |
2815 | 2954 | |
| 2955 | + # Output any remaining unclosed brackets |
| 2956 | + foreach ( $stack as $piece ) { |
| 2957 | + if ( $piece['open'] == "\n" ) { |
| 2958 | + $topAccum .= $piece['parts'][0]; |
| 2959 | + } else { |
| 2960 | + $topAccum .= str_repeat( $piece['open'], $piece['count'] ) . implode( '|', $piece['parts'] ); |
| 2961 | + } |
| 2962 | + } |
| 2963 | + $topAccum .= '</root>'; |
| 2964 | + |
| 2965 | + wfProfileOut( __METHOD__.'-makexml' ); |
| 2966 | + wfProfileIn( __METHOD__.'-loadXML' ); |
| 2967 | + $dom = new DOMDocument; |
| 2968 | + if ( !$dom->loadXML( $topAccum ) ) { |
| 2969 | + throw new MWException( __METHOD__.' generated invalid XML' ); |
| 2970 | + } |
| 2971 | + wfProfileOut( __METHOD__.'-loadXML' ); |
2816 | 2972 | wfProfileOut( __METHOD__ ); |
2817 | | - return $text; |
| 2973 | + return $dom; |
2818 | 2974 | } |
2819 | 2975 | |
| 2976 | + /* |
| 2977 | + * Return a three-element array: leading whitespace, string contents, trailing whitespace |
| 2978 | + */ |
| 2979 | + public static function splitWhitespace( $s ) { |
| 2980 | + $ltrimmed = ltrim( $s ); |
| 2981 | + $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); |
| 2982 | + $trimmed = rtrim( $ltrimmed ); |
| 2983 | + $diff = strlen( $ltrimmed ) - strlen( $trimmed ); |
| 2984 | + if ( $diff > 0 ) { |
| 2985 | + $w2 = substr( $ltrimmed, -$diff ); |
| 2986 | + } else { |
| 2987 | + $w2 = ''; |
| 2988 | + } |
| 2989 | + return array( $w1, $trimmed, $w2 ); |
| 2990 | + } |
| 2991 | + |
2820 | 2992 | /** |
2821 | 2993 | * Replace magic variables, templates, and template arguments |
2822 | 2994 | * with the appropriate text. Templates are substituted recursively, |
— | — | @@ -2827,89 +2999,33 @@ |
2828 | 3000 | * OT_HTML: all templates and magic variables |
2829 | 3001 | * |
2830 | 3002 | * @param string $tex The text to transform |
2831 | | - * @param array $args Key-value pairs representing template parameters to substitute |
| 3003 | + * @param PPFrame $frame Object describing the arguments passed to the template |
2832 | 3004 | * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion |
2833 | 3005 | * @private |
2834 | 3006 | */ |
2835 | | - function replaceVariables( $text, $args = array(), $argsOnly = false ) { |
| 3007 | + function replaceVariables( $text, $frame = false, $argsOnly = false ) { |
2836 | 3008 | # Prevent too big inclusions |
2837 | 3009 | if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { |
2838 | 3010 | return $text; |
2839 | 3011 | } |
2840 | 3012 | |
2841 | | - $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; |
| 3013 | + $fname = __METHOD__; |
2842 | 3014 | wfProfileIn( $fname ); |
2843 | 3015 | |
2844 | | - # This function is called recursively. To keep track of arguments we need a stack: |
2845 | | - array_push( $this->mArgStack, $args ); |
2846 | | - |
2847 | | - $braceCallbacks = array(); |
2848 | | - if ( !$argsOnly ) { |
2849 | | - $braceCallbacks[2] = array( &$this, 'braceSubstitution' ); |
| 3016 | + if ( $frame === false ) { |
| 3017 | + $frame = new PPFrame( $this ); |
| 3018 | + } elseif ( !( $frame instanceof PPFrame ) ) { |
| 3019 | + throw new MWException( __METHOD__ . ' called using the old argument format' ); |
2850 | 3020 | } |
2851 | | - if ( $this->mOutputType != OT_MSG ) { |
2852 | | - $braceCallbacks[3] = array( &$this, 'argSubstitution' ); |
2853 | | - } |
2854 | | - if ( $braceCallbacks ) { |
2855 | | - $callbacks = array( |
2856 | | - '{' => array( |
2857 | | - 'end' => '}', |
2858 | | - 'cb' => $braceCallbacks, |
2859 | | - 'min' => $argsOnly ? 3 : 2, |
2860 | | - 'max' => isset( $braceCallbacks[3] ) ? 3 : 2, |
2861 | | - ), |
2862 | | - '[' => array( |
2863 | | - 'end' => ']', |
2864 | | - 'cb' => array(2=>null), |
2865 | | - 'min' => 2, |
2866 | | - 'max' => 2, |
2867 | | - ) |
2868 | | - ); |
2869 | | - $text = $this->replace_callback ($text, $callbacks); |
2870 | 3021 | |
2871 | | - array_pop( $this->mArgStack ); |
2872 | | - } |
2873 | | - wfProfileOut( $fname ); |
2874 | | - return $text; |
2875 | | - } |
| 3022 | + $dom = $this->preprocessToDom( $text ); |
| 3023 | + $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; |
| 3024 | + $text = $frame->expand( $dom, 0, $flags ); |
2876 | 3025 | |
2877 | | - /** |
2878 | | - * Replace magic variables |
2879 | | - * @private |
2880 | | - */ |
2881 | | - function variableSubstitution( $matches ) { |
2882 | | - global $wgContLang; |
2883 | | - $fname = 'Parser::variableSubstitution'; |
2884 | | - $varname = $wgContLang->lc($matches[1]); |
2885 | | - wfProfileIn( $fname ); |
2886 | | - $skip = false; |
2887 | | - if ( $this->mOutputType == OT_WIKI ) { |
2888 | | - # Do only magic variables prefixed by SUBST |
2889 | | - $mwSubst =& MagicWord::get( 'subst' ); |
2890 | | - if (!$mwSubst->matchStartAndRemove( $varname )) |
2891 | | - $skip = true; |
2892 | | - # Note that if we don't substitute the variable below, |
2893 | | - # we don't remove the {{subst:}} magic word, in case |
2894 | | - # it is a template rather than a magic variable. |
2895 | | - } |
2896 | | - if ( !$skip && array_key_exists( $varname, $this->mVariables ) ) { |
2897 | | - $id = $this->mVariables[$varname]; |
2898 | | - # Now check if we did really match, case sensitive or not |
2899 | | - $mw =& MagicWord::get( $id ); |
2900 | | - if ($mw->match($matches[1])) { |
2901 | | - $text = $this->getVariableValue( $id ); |
2902 | | - $this->mOutput->mContainsOldMagic = true; |
2903 | | - } else { |
2904 | | - $text = $matches[0]; |
2905 | | - } |
2906 | | - } else { |
2907 | | - $text = $matches[0]; |
2908 | | - } |
2909 | 3026 | wfProfileOut( $fname ); |
2910 | 3027 | return $text; |
2911 | 3028 | } |
2912 | 3029 | |
2913 | | - |
2914 | 3030 | /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. |
2915 | 3031 | static function createAssocArgs( $args ) { |
2916 | 3032 | $assocArgs = array(); |
— | — | @@ -2941,12 +3057,13 @@ |
2942 | 3058 | * $piece['text']: matched text |
2943 | 3059 | * $piece['title']: the title, i.e. the part before the | |
2944 | 3060 | * $piece['parts']: the parameter array |
| 3061 | + * @param PPFrame The current frame, contains template arguments |
2945 | 3062 | * @return string the text of the template |
2946 | 3063 | * @private |
2947 | 3064 | */ |
2948 | | - function braceSubstitution( $piece ) { |
| 3065 | + function braceSubstitution( $piece, $frame ) { |
2949 | 3066 | global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; |
2950 | | - $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; |
| 3067 | + $fname = __METHOD__; |
2951 | 3068 | wfProfileIn( $fname ); |
2952 | 3069 | wfProfileIn( __METHOD__.'-setup' ); |
2953 | 3070 | |
— | — | @@ -2955,33 +3072,23 @@ |
2956 | 3073 | $nowiki = false; # wiki markup in $text should be escaped |
2957 | 3074 | $noparse = false; # Unsafe HTML tags should not be stripped, etc. |
2958 | 3075 | $noargs = false; # Don't replace triple-brace arguments in $text |
2959 | | - $replaceHeadings = false; # Make the edit section links go to the template not the article |
2960 | | - $headingOffset = 0; # Skip headings when number, to account for those that weren't transcluded. |
2961 | 3076 | $isHTML = false; # $text is HTML, armour it against wikitext transformation |
2962 | 3077 | $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered |
| 3078 | + $isDOM = false; # $text is a DOM node needing expansion |
2963 | 3079 | |
2964 | 3080 | # Title object, where $text came from |
2965 | 3081 | $title = NULL; |
2966 | 3082 | |
2967 | | - $linestart = ''; |
| 3083 | + # $part1 is the bit before the first |, and must contain only title characters. |
| 3084 | + # Various prefixes will be stripped from it later. |
| 3085 | + $titleWithSpaces = $frame->expand( $piece['title'] ); |
| 3086 | + $part1 = trim( $titleWithSpaces ); |
| 3087 | + $titleText = false; |
2968 | 3088 | |
| 3089 | + # Original title text preserved for various purposes |
| 3090 | + $originalTitle = $part1; |
2969 | 3091 | |
2970 | | - # $part1 is the bit before the first |, and must contain only title characters |
2971 | | - # $args is a list of arguments, starting from index 0, not including $part1 |
2972 | | - |
2973 | | - $titleText = $part1 = $piece['title']; |
2974 | | - # If the third subpattern matched anything, it will start with | |
2975 | | - |
2976 | | - if (null == $piece['parts']) { |
2977 | | - $replaceWith = $this->variableSubstitution (array ($piece['text'], $piece['title'])); |
2978 | | - if ($replaceWith != $piece['text']) { |
2979 | | - $text = $replaceWith; |
2980 | | - $found = true; |
2981 | | - $noparse = true; |
2982 | | - $noargs = true; |
2983 | | - } |
2984 | | - } |
2985 | | - |
| 3092 | + # $args is a list of argument nodes, starting from index 0, not including $part1 |
2986 | 3093 | $args = (null == $piece['parts']) ? array() : $piece['parts']; |
2987 | 3094 | wfProfileOut( __METHOD__.'-setup' ); |
2988 | 3095 | |
— | — | @@ -2994,13 +3101,25 @@ |
2995 | 3102 | # 1) Found SUBST but not in the PST phase |
2996 | 3103 | # 2) Didn't find SUBST and in the PST phase |
2997 | 3104 | # In either case, return without further processing |
2998 | | - $text = $piece['text']; |
| 3105 | + $text = '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; |
2999 | 3106 | $found = true; |
3000 | 3107 | $noparse = true; |
3001 | 3108 | $noargs = true; |
3002 | 3109 | } |
3003 | 3110 | } |
3004 | 3111 | |
| 3112 | + # Variables |
| 3113 | + if ( !$found && $args->length == 0 ) { |
| 3114 | + $id = $this->mVariables->matchStartToEnd( $part1 ); |
| 3115 | + if ( $id !== false ) { |
| 3116 | + $text = $this->getVariableValue( $id ); |
| 3117 | + $this->mOutput->mContainsOldMagic = true; |
| 3118 | + $found = true; |
| 3119 | + $noparse = true; |
| 3120 | + $noargs = true; |
| 3121 | + } |
| 3122 | + } |
| 3123 | + |
3005 | 3124 | # MSG, MSGNW and RAW |
3006 | 3125 | if ( !$found ) { |
3007 | 3126 | # Check for MSGNW: |
— | — | @@ -3021,7 +3140,7 @@ |
3022 | 3141 | } |
3023 | 3142 | wfProfileOut( __METHOD__.'-modifiers' ); |
3024 | 3143 | |
3025 | | - //save path level before recursing into functions & templates. |
| 3144 | + # Save path level before recursing into functions & templates. |
3026 | 3145 | $lastPathLevel = $this->mTemplatePath; |
3027 | 3146 | |
3028 | 3147 | # Parser functions |
— | — | @@ -3044,18 +3163,35 @@ |
3045 | 3164 | } |
3046 | 3165 | } |
3047 | 3166 | if ( $function ) { |
3048 | | - $funcArgs = array_map( 'trim', $args ); |
3049 | | - $funcArgs = array_merge( array( &$this, trim( substr( $part1, $colonPos + 1 ) ) ), $funcArgs ); |
3050 | | - $result = call_user_func_array( $this->mFunctionHooks[$function], $funcArgs ); |
| 3167 | + list( $callback, $flags ) = $this->mFunctionHooks[$function]; |
| 3168 | + $initialArgs = array( &$this ); |
| 3169 | + $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); |
| 3170 | + if ( $flags & SFH_OBJECT_ARGS ) { |
| 3171 | + # Add a frame parameter, and pass the arguments as an array |
| 3172 | + $allArgs = $initialArgs; |
| 3173 | + $allArgs[] = $frame; |
| 3174 | + foreach ( $args as $arg ) { |
| 3175 | + $funcArgs[] = $arg; |
| 3176 | + } |
| 3177 | + $allArgs[] = $funcArgs; |
| 3178 | + } else { |
| 3179 | + # Convert arguments to plain text |
| 3180 | + foreach ( $args as $arg ) { |
| 3181 | + $funcArgs[] = trim( $frame->expand( $arg ) ); |
| 3182 | + } |
| 3183 | + $allArgs = array_merge( $initialArgs, $funcArgs ); |
| 3184 | + } |
| 3185 | + |
| 3186 | + $result = call_user_func_array( $callback, $allArgs ); |
3051 | 3187 | $found = true; |
3052 | 3188 | |
3053 | 3189 | // The text is usually already parsed, doesn't need triple-brace tags expanded, etc. |
3054 | | - //$noargs = true; |
3055 | | - //$noparse = true; |
| 3190 | + $noargs = true; |
| 3191 | + $noparse = true; |
3056 | 3192 | |
3057 | 3193 | if ( is_array( $result ) ) { |
3058 | 3194 | if ( isset( $result[0] ) ) { |
3059 | | - $text = $linestart . $result[0]; |
| 3195 | + $text = $result[0]; |
3060 | 3196 | unset( $result[0] ); |
3061 | 3197 | } |
3062 | 3198 | |
— | — | @@ -3063,171 +3199,123 @@ |
3064 | 3200 | // This allows callers to set flags such as nowiki, noparse, found, etc. |
3065 | 3201 | extract( $result ); |
3066 | 3202 | } else { |
3067 | | - $text = $linestart . $result; |
| 3203 | + $text = $result; |
3068 | 3204 | } |
3069 | 3205 | } |
3070 | 3206 | } |
3071 | 3207 | wfProfileOut( __METHOD__ . '-pfunc' ); |
3072 | 3208 | } |
3073 | 3209 | |
3074 | | - # Template table test |
3075 | | - |
3076 | | - # Did we encounter this template already? If yes, it is in the cache |
3077 | | - # and we need to check for loops. |
3078 | | - if ( !$found && isset( $this->mTemplates[$piece['title']] ) ) { |
3079 | | - $found = true; |
3080 | | - |
3081 | | - # Infinite loop test |
3082 | | - if ( isset( $this->mTemplatePath[$part1] ) ) { |
3083 | | - $noparse = true; |
3084 | | - $noargs = true; |
3085 | | - $found = true; |
3086 | | - $text = $linestart . |
3087 | | - "[[$part1]]<!-- WARNING: template loop detected -->"; |
3088 | | - wfDebug( __METHOD__.": template loop broken at '$part1'\n" ); |
3089 | | - } else { |
3090 | | - # set $text to cached message. |
3091 | | - $text = $linestart . $this->mTemplates[$piece['title']]; |
3092 | | - #treat title for cached page the same as others |
3093 | | - $ns = NS_TEMPLATE; |
3094 | | - $subpage = ''; |
3095 | | - $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); |
3096 | | - if ($subpage !== '') { |
3097 | | - $ns = $this->mTitle->getNamespace(); |
3098 | | - } |
3099 | | - $title = Title::newFromText( $part1, $ns ); |
3100 | | - //used by include size checking |
3101 | | - $titleText = $title->getPrefixedText(); |
3102 | | - //used by edit section links |
3103 | | - $replaceHeadings = true; |
3104 | | - |
3105 | | - } |
3106 | | - } |
3107 | | - |
3108 | | - # Load from database |
| 3210 | + # Finish mangling title and then check for loops. |
| 3211 | + # Set $title to a Title object and $titleText to the PDBK |
3109 | 3212 | if ( !$found ) { |
3110 | | - wfProfileIn( __METHOD__ . '-loadtpl' ); |
3111 | 3213 | $ns = NS_TEMPLATE; |
3112 | | - # declaring $subpage directly in the function call |
3113 | | - # does not work correctly with references and breaks |
3114 | | - # {{/subpage}}-style inclusions |
| 3214 | + # Split the title into page and subpage |
3115 | 3215 | $subpage = ''; |
3116 | 3216 | $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); |
3117 | 3217 | if ($subpage !== '') { |
3118 | 3218 | $ns = $this->mTitle->getNamespace(); |
3119 | 3219 | } |
3120 | 3220 | $title = Title::newFromText( $part1, $ns ); |
3121 | | - |
3122 | | - |
3123 | | - if ( !is_null( $title ) ) { |
| 3221 | + if ( $title ) { |
3124 | 3222 | $titleText = $title->getPrefixedText(); |
3125 | 3223 | # Check for language variants if the template is not found |
3126 | 3224 | if($wgContLang->hasVariants() && $title->getArticleID() == 0){ |
3127 | 3225 | $wgContLang->findVariantLink($part1, $title); |
3128 | 3226 | } |
| 3227 | + # Do infinite loop check |
| 3228 | + if ( isset( $this->mTemplatePath[$titleText] ) ) { |
| 3229 | + $noparse = true; |
| 3230 | + $noargs = true; |
| 3231 | + $found = true; |
| 3232 | + $text = "[[$part1]]" . $this->insertStripItem( '<!-- WARNING: template loop detected -->' ); |
| 3233 | + wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); |
| 3234 | + } |
| 3235 | + } |
| 3236 | + } |
3129 | 3237 | |
3130 | | - if ( !$title->isExternal() ) { |
3131 | | - if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { |
3132 | | - $text = SpecialPage::capturePath( $title ); |
3133 | | - if ( is_string( $text ) ) { |
3134 | | - $found = true; |
3135 | | - $noparse = true; |
3136 | | - $noargs = true; |
3137 | | - $isHTML = true; |
3138 | | - $this->disableCache(); |
3139 | | - } |
3140 | | - } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { |
3141 | | - $found = false; //access denied |
3142 | | - wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); |
3143 | | - } else { |
3144 | | - list($articleContent,$title) = $this->fetchTemplateAndtitle( $title ); |
3145 | | - if ( $articleContent !== false ) { |
3146 | | - $found = true; |
3147 | | - $text = $articleContent; |
3148 | | - $replaceHeadings = true; |
3149 | | - } |
| 3238 | + # Load from database |
| 3239 | + if ( !$found && $title ) { |
| 3240 | + wfProfileIn( __METHOD__ . '-loadtpl' ); |
| 3241 | + if ( !$title->isExternal() ) { |
| 3242 | + if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { |
| 3243 | + $text = SpecialPage::capturePath( $title ); |
| 3244 | + if ( is_string( $text ) ) { |
| 3245 | + $found = true; |
| 3246 | + $noparse = true; |
| 3247 | + $noargs = true; |
| 3248 | + $isHTML = true; |
| 3249 | + $this->disableCache(); |
3150 | 3250 | } |
3151 | | - |
3152 | | - # If the title is valid but undisplayable, make a link to it |
3153 | | - if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
3154 | | - $text = "[[:$titleText]]"; |
| 3251 | + } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { |
| 3252 | + $found = false; //access denied |
| 3253 | + wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); |
| 3254 | + } else { |
| 3255 | + list( $text, $title ) = $this->getTemplateDom( $title ); |
| 3256 | + if ( $text !== false ) { |
3155 | 3257 | $found = true; |
| 3258 | + $isDOM = true; |
3156 | 3259 | } |
3157 | | - } elseif ( $title->isTrans() ) { |
3158 | | - // Interwiki transclusion |
3159 | | - if ( $this->ot['html'] && !$forceRawInterwiki ) { |
3160 | | - $text = $this->interwikiTransclude( $title, 'render' ); |
3161 | | - $isHTML = true; |
3162 | | - $noparse = true; |
3163 | | - } else { |
3164 | | - $text = $this->interwikiTransclude( $title, 'raw' ); |
3165 | | - $replaceHeadings = true; |
3166 | | - } |
3167 | | - $found = true; |
3168 | 3260 | } |
3169 | 3261 | |
3170 | | - # Template cache array insertion |
3171 | | - # Use the original $piece['title'] not the mangled $part1, so that |
3172 | | - # modifiers such as RAW: produce separate cache entries |
3173 | | - if( $found ) { |
3174 | | - if( $isHTML ) { |
3175 | | - // A special page; don't store it in the template cache. |
3176 | | - } else { |
3177 | | - $this->mTemplates[$piece['title']] = $text; |
3178 | | - } |
3179 | | - $text = $linestart . $text; |
| 3262 | + # If the title is valid but undisplayable, make a link to it |
| 3263 | + if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
| 3264 | + $text = "[[:$titleText]]"; |
| 3265 | + $found = true; |
3180 | 3266 | } |
| 3267 | + } elseif ( $title->isTrans() ) { |
| 3268 | + // Interwiki transclusion |
| 3269 | + if ( $this->ot['html'] && !$forceRawInterwiki ) { |
| 3270 | + $text = $this->interwikiTransclude( $title, 'render' ); |
| 3271 | + $isHTML = true; |
| 3272 | + $noparse = true; |
| 3273 | + } else { |
| 3274 | + $text = $this->interwikiTransclude( $title, 'raw' ); |
| 3275 | + } |
| 3276 | + $found = true; |
3181 | 3277 | } |
3182 | 3278 | wfProfileOut( __METHOD__ . '-loadtpl' ); |
3183 | 3279 | } |
3184 | 3280 | |
3185 | | - if ( $found && !$this->incrementIncludeSize( 'pre-expand', strlen( $text ) ) ) { |
3186 | | - # Error, oversize inclusion |
3187 | | - $text = $linestart . |
3188 | | - "[[$titleText]]<!-- WARNING: template omitted, pre-expand include size too large -->"; |
3189 | | - $noparse = true; |
3190 | | - $noargs = true; |
3191 | | - } |
3192 | | - |
3193 | 3281 | # Recursive parsing, escaping and link table handling |
3194 | 3282 | # Only for HTML output |
3195 | 3283 | if ( $nowiki && $found && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
| 3284 | + if ( $isDOM ) { |
| 3285 | + $text = $frame->expand( $text ); |
| 3286 | + } |
3196 | 3287 | $text = wfEscapeWikiText( $text ); |
3197 | 3288 | } elseif ( !$this->ot['msg'] && $found ) { |
3198 | 3289 | if ( $noargs ) { |
3199 | | - $assocArgs = array(); |
| 3290 | + $newFrame = $frame->newChild(); |
3200 | 3291 | } else { |
3201 | 3292 | # Clean up argument array |
3202 | | - $assocArgs = self::createAssocArgs($args); |
| 3293 | + $newFrame = $frame->newChild( $args, $title ); |
3203 | 3294 | # Add a new element to the templace recursion path |
3204 | | - $this->mTemplatePath[$part1] = 1; |
| 3295 | + $this->mTemplatePath[$titleText] = 1; |
3205 | 3296 | } |
3206 | 3297 | |
3207 | 3298 | if ( !$noparse ) { |
3208 | | - # If there are any <onlyinclude> tags, only include them |
3209 | | - if ( in_string( '<onlyinclude>', $text ) && in_string( '</onlyinclude>', $text ) ) { |
3210 | | - $replacer = new OnlyIncludeReplacer; |
3211 | | - StringUtils::delimiterReplaceCallback( '<onlyinclude>', '</onlyinclude>', |
3212 | | - array( &$replacer, 'replace' ), $text ); |
3213 | | - $text = $replacer->output; |
3214 | | - } |
3215 | | - # Remove <noinclude> sections and <includeonly> tags |
3216 | | - $text = StringUtils::delimiterReplace( '<noinclude>', '</noinclude>', '', $text ); |
3217 | | - $text = strtr( $text, array( '<includeonly>' => '' , '</includeonly>' => '' ) ); |
3218 | | - |
3219 | | - if( $this->ot['html'] || $this->ot['pre'] ) { |
3220 | | - # Strip <nowiki>, <pre>, etc. |
3221 | | - $text = $this->strip( $text, $this->mStripState ); |
3222 | | - if ( $this->ot['html'] ) { |
3223 | | - $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs ); |
3224 | | - } elseif ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) { |
3225 | | - $text = Sanitizer::removeHTMLcomments( $text ); |
| 3299 | + if ( $isDOM ) { |
| 3300 | + if ( $titleText !== false && count( $newFrame->args ) == 0 ) { |
| 3301 | + # Expansion is eligible for the empty-frame cache |
| 3302 | + if ( isset( $this->mTplExpandCache[$titleText] ) ) { |
| 3303 | + $text = $this->mTplExpandCache[$titleText]; |
| 3304 | + } else { |
| 3305 | + $text = $newFrame->expand( $text ); |
| 3306 | + $this->mTplExpandCache[$titleText] = $text; |
| 3307 | + } |
| 3308 | + } else { |
| 3309 | + $text = $newFrame->expand( $text ); |
3226 | 3310 | } |
| 3311 | + } else { |
| 3312 | + $text = $this->replaceVariables( $text, $newFrame ); |
3227 | 3313 | } |
3228 | | - $text = $this->replaceVariables( $text, $assocArgs ); |
3229 | 3314 | |
3230 | | - # If the template begins with a table or block-level |
| 3315 | + # strip woz 'ere 2004-07 |
| 3316 | + |
| 3317 | + # Bug 529: if the template begins with a table or block-level |
3231 | 3318 | # element, it should be treated as beginning a new line. |
| 3319 | + # This behaviour is somewhat controversial. |
3232 | 3320 | if (!$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ |
3233 | 3321 | $text = "\n" . $text; |
3234 | 3322 | } |
— | — | @@ -3235,59 +3323,39 @@ |
3236 | 3324 | # $noparse and !$noargs |
3237 | 3325 | # Just replace the arguments, not any double-brace items |
3238 | 3326 | # This is used for rendered interwiki transclusion |
3239 | | - $text = $this->replaceVariables( $text, $assocArgs, true ); |
| 3327 | + if ( $isDOM ) { |
| 3328 | + $text = $newFrame->expand( $text, 0, PPFrame::NO_TEMPLATES ); |
| 3329 | + } else { |
| 3330 | + $text = $this->replaceVariables( $text, $newFrame, true ); |
| 3331 | + } |
| 3332 | + } elseif ( $isDOM ) { |
| 3333 | + $text = $frame->expand( $text ); |
3240 | 3334 | } |
| 3335 | + } elseif ( $isDOM ) { |
| 3336 | + $text = $frame->expand( $text, 0, PPFrame::NO_TEMPLATES | PPFrame::NO_ARGS ); |
3241 | 3337 | } |
| 3338 | + |
3242 | 3339 | # Prune lower levels off the recursion check path |
3243 | 3340 | $this->mTemplatePath = $lastPathLevel; |
3244 | 3341 | |
3245 | 3342 | if ( $found && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { |
3246 | 3343 | # Error, oversize inclusion |
3247 | | - $text = $linestart . |
3248 | | - "[[$titleText]]<!-- WARNING: template omitted, post-expand include size too large -->"; |
| 3344 | + $text = "[[$originalTitle]]" . |
| 3345 | + $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' ); |
3249 | 3346 | $noparse = true; |
3250 | 3347 | $noargs = true; |
3251 | 3348 | } |
3252 | 3349 | |
3253 | 3350 | if ( !$found ) { |
3254 | 3351 | wfProfileOut( $fname ); |
3255 | | - return $piece['text']; |
| 3352 | + return '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; |
3256 | 3353 | } else { |
3257 | 3354 | wfProfileIn( __METHOD__ . '-placeholders' ); |
3258 | 3355 | if ( $isHTML ) { |
3259 | 3356 | # Replace raw HTML by a placeholder |
3260 | 3357 | # Add a blank line preceding, to prevent it from mucking up |
3261 | 3358 | # immediately preceding headings |
3262 | | - $text = "\n\n" . $this->insertStripItem( $text, $this->mStripState ); |
3263 | | - } else { |
3264 | | - # replace ==section headers== |
3265 | | - # XXX this needs to go away once we have a better parser. |
3266 | | - if ( !$this->ot['wiki'] && !$this->ot['pre'] && $replaceHeadings ) { |
3267 | | - if( !is_null( $title ) ) |
3268 | | - $encodedname = base64_encode($title->getPrefixedDBkey()); |
3269 | | - else |
3270 | | - $encodedname = base64_encode(""); |
3271 | | - $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1, |
3272 | | - PREG_SPLIT_DELIM_CAPTURE); |
3273 | | - $text = ''; |
3274 | | - $nsec = $headingOffset; |
3275 | | - |
3276 | | - for( $i = 0; $i < count($m); $i += 2 ) { |
3277 | | - $text .= $m[$i]; |
3278 | | - if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue; |
3279 | | - $hl = $m[$i + 1]; |
3280 | | - if( strstr($hl, "<!--MWTEMPLATESECTION") ) { |
3281 | | - $text .= $hl; |
3282 | | - continue; |
3283 | | - } |
3284 | | - $m2 = array(); |
3285 | | - preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2); |
3286 | | - $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION=" |
3287 | | - . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3]; |
3288 | | - |
3289 | | - $nsec++; |
3290 | | - } |
3291 | | - } |
| 3359 | + $text = "\n\n" . $this->insertStripItem( $text ); |
3292 | 3360 | } |
3293 | 3361 | wfProfileOut( __METHOD__ . '-placeholders' ); |
3294 | 3362 | } |
— | — | @@ -3297,7 +3365,7 @@ |
3298 | 3366 | |
3299 | 3367 | if ( !$found ) { |
3300 | 3368 | wfProfileOut( $fname ); |
3301 | | - return $piece['text']; |
| 3369 | + return '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; |
3302 | 3370 | } else { |
3303 | 3371 | wfProfileOut( $fname ); |
3304 | 3372 | return $text; |
— | — | @@ -3305,27 +3373,99 @@ |
3306 | 3374 | } |
3307 | 3375 | |
3308 | 3376 | /** |
| 3377 | + * Get the semi-parsed DOM representation of a template with a given title, |
| 3378 | + * and its redirect destination title. Cached. |
| 3379 | + */ |
| 3380 | + function getTemplateDom( $title ) { |
| 3381 | + $titleText = $title->getPrefixedDBkey(); |
| 3382 | + |
| 3383 | + if ( isset( $this->mTplRedirCache[$titleText] ) ) { |
| 3384 | + list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; |
| 3385 | + $title = Title::makeTitle( $ns, $dbk ); |
| 3386 | + $titleText = $title->getPrefixedDBkey(); |
| 3387 | + } |
| 3388 | + if ( isset( $this->mTplDomCache[$titleText] ) ) { |
| 3389 | + return array( $this->mTplDomCache[$titleText], $title ); |
| 3390 | + } |
| 3391 | + |
| 3392 | + // Cache miss, go to the database |
| 3393 | + list( $text, $title ) = $this->fetchTemplateAndTitle( $title ); |
| 3394 | + |
| 3395 | + if ( $text === false ) { |
| 3396 | + $this->mTplDomCache[$titleText] = false; |
| 3397 | + return array( false, $title ); |
| 3398 | + } |
| 3399 | + |
| 3400 | + # If there are any <onlyinclude> tags, only include them |
| 3401 | + if ( !$this->ot['msg'] ) { |
| 3402 | + if ( in_string( '<onlyinclude>', $text ) && in_string( '</onlyinclude>', $text ) ) { |
| 3403 | + $replacer = new OnlyIncludeReplacer; |
| 3404 | + StringUtils::delimiterReplaceCallback( '<onlyinclude>', '</onlyinclude>', |
| 3405 | + array( &$replacer, 'replace' ), $text ); |
| 3406 | + $text = $replacer->output; |
| 3407 | + } |
| 3408 | + # Remove <noinclude> sections and <includeonly> tags |
| 3409 | + $text = StringUtils::delimiterReplace( '<noinclude>', '</noinclude>', '', $text ); |
| 3410 | + $text = strtr( $text, array( '<includeonly>' => '' , '</includeonly>' => '' ) ); |
| 3411 | + } |
| 3412 | + |
| 3413 | + $dom = $this->preprocessToDom( $text ); |
| 3414 | + $this->mTplDomCache[$titleText] = $dom; |
| 3415 | + return array( $dom, $title ); |
| 3416 | + } |
| 3417 | + |
| 3418 | + /** |
3309 | 3419 | * Fetch the unparsed text of a template and register a reference to it. |
3310 | 3420 | */ |
3311 | | - function fetchTemplateAndtitle( $title ) { |
| 3421 | + function fetchTemplateAndTitle( $title ) { |
| 3422 | + $templateCb = $this->mOptions->getTemplateCallback(); |
| 3423 | + $stuff = call_user_func( $templateCb, $title ); |
| 3424 | + $text = $stuff['text']; |
| 3425 | + $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; |
| 3426 | + if ( isset( $stuff['deps'] ) ) { |
| 3427 | + foreach ( $stuff['deps'] as $dep ) { |
| 3428 | + $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); |
| 3429 | + } |
| 3430 | + } |
| 3431 | + return array($text,$finalTitle); |
| 3432 | + } |
| 3433 | + |
| 3434 | + function fetchTemplate( $title ) { |
| 3435 | + $rv = $this->fetchTemplateAndTitle($title); |
| 3436 | + return $rv[0]; |
| 3437 | + } |
| 3438 | + |
| 3439 | + /** |
| 3440 | + * Static function to get a template |
| 3441 | + * Can be overridden via ParserOptions::setTemplateCallback(). |
| 3442 | + */ |
| 3443 | + static function statelessFetchTemplate( $title ) { |
3312 | 3444 | $text = $skip = false; |
3313 | 3445 | $finalTitle = $title; |
| 3446 | + $deps = array(); |
| 3447 | + |
3314 | 3448 | // Loop to fetch the article, with up to 1 redirect |
3315 | 3449 | for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { |
3316 | 3450 | # Give extensions a chance to select the revision instead |
3317 | 3451 | $id = false; // Assume current |
3318 | | - wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( &$this, &$title, &$skip, &$id ) ); |
| 3452 | + wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( false, &$title, &$skip, &$id ) ); |
3319 | 3453 | |
3320 | 3454 | if( $skip ) { |
3321 | 3455 | $text = false; |
3322 | | - $this->mOutput->addTemplate( $title, $title->getArticleID(), null ); |
| 3456 | + $deps[] = array( |
| 3457 | + 'title' => $title, |
| 3458 | + 'page_id' => $title->getArticleID(), |
| 3459 | + 'rev_id' => null ); |
3323 | 3460 | break; |
3324 | 3461 | } |
3325 | 3462 | $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); |
3326 | 3463 | $rev_id = $rev ? $rev->getId() : 0; |
3327 | | - |
3328 | | - $this->mOutput->addTemplate( $title, $title->getArticleID(), $rev_id ); |
3329 | | - |
| 3464 | + |
| 3465 | + $deps[] = array( |
| 3466 | + 'title' => $title, |
| 3467 | + 'page_id' => $title->getArticleID(), |
| 3468 | + 'rev_id' => $rev_id ); |
| 3469 | + |
3330 | 3470 | if( $rev ) { |
3331 | 3471 | $text = $rev->getText(); |
3332 | 3472 | } elseif( $title->getNamespace() == NS_MEDIAWIKI ) { |
— | — | @@ -3346,14 +3486,12 @@ |
3347 | 3487 | $finalTitle = $title; |
3348 | 3488 | $title = Title::newFromRedirect( $text ); |
3349 | 3489 | } |
3350 | | - return array($text,$finalTitle); |
| 3490 | + return array( |
| 3491 | + 'text' => $text, |
| 3492 | + 'finalTitle' => $finalTitle, |
| 3493 | + 'deps' => $deps ); |
3351 | 3494 | } |
3352 | 3495 | |
3353 | | - function fetchTemplate( $title ) { |
3354 | | - $rv = $this->fetchTemplateAndtitle($title); |
3355 | | - return $rv[0]; |
3356 | | - } |
3357 | | - |
3358 | 3496 | /** |
3359 | 3497 | * Transclude an interwiki link. |
3360 | 3498 | */ |
— | — | @@ -3400,26 +3538,111 @@ |
3401 | 3539 | * Triple brace replacement -- used for template arguments |
3402 | 3540 | * @private |
3403 | 3541 | */ |
3404 | | - function argSubstitution( $matches ) { |
3405 | | - $arg = trim( $matches['title'] ); |
3406 | | - $text = $matches['text']; |
3407 | | - $inputArgs = end( $this->mArgStack ); |
| 3542 | + function argSubstitution( $piece, $frame ) { |
| 3543 | + wfProfileIn( __METHOD__ ); |
3408 | 3544 | |
3409 | | - if ( array_key_exists( $arg, $inputArgs ) ) { |
3410 | | - $text = $inputArgs[$arg]; |
3411 | | - } else if (($this->mOutputType == OT_HTML || $this->mOutputType == OT_PREPROCESS ) && |
3412 | | - null != $matches['parts'] && count($matches['parts']) > 0) { |
3413 | | - $text = $matches['parts'][0]; |
| 3545 | + $text = false; |
| 3546 | + $error = false; |
| 3547 | + $parts = $piece['parts']; |
| 3548 | + $argWithSpaces = $frame->expand( $piece['title'] ); |
| 3549 | + $arg = trim( $argWithSpaces ); |
| 3550 | + |
| 3551 | + if ( isset( $frame->args[$arg] ) ) { |
| 3552 | + $text = $frame->parent->expand( $frame->args[$arg] ); |
| 3553 | + } else if ( ( $this->ot['html'] || $this->ot['pre'] ) && $parts->length > 0 ) { |
| 3554 | + $text = $frame->expand( $parts->item( 0 ) ); |
3414 | 3555 | } |
3415 | 3556 | if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { |
3416 | | - $text = $matches['text'] . |
3417 | | - '<!-- WARNING: argument omitted, expansion size too large -->'; |
| 3557 | + $error = '<!-- WARNING: argument omitted, expansion size too large -->'; |
3418 | 3558 | } |
3419 | 3559 | |
| 3560 | + if ( $text === false ) { |
| 3561 | + $text = '{{{' . $frame->implode( '|', $argWithSpaces, $parts ) . '}}}'; |
| 3562 | + } |
| 3563 | + if ( $error !== false ) { |
| 3564 | + $text .= $error; |
| 3565 | + } |
| 3566 | + |
| 3567 | + wfProfileOut( __METHOD__ ); |
3420 | 3568 | return $text; |
3421 | 3569 | } |
3422 | 3570 | |
3423 | 3571 | /** |
| 3572 | + * Return the text to be used for a given extension tag. |
| 3573 | + * This is the ghost of strip(). |
| 3574 | + * |
| 3575 | + * @param array $params Associative array of parameters: |
| 3576 | + * name DOMNode for the tag name |
| 3577 | + * attrText DOMNode for unparsed text where tag attributes are thought to be |
| 3578 | + * inner Contents of extension element |
| 3579 | + * noClose Original text did not have a close tag |
| 3580 | + * @param PPFrame $frame |
| 3581 | + */ |
| 3582 | + function extensionSubstitution( $params, $frame ) { |
| 3583 | + global $wgRawHtml, $wgContLang; |
| 3584 | + static $n = 1; |
| 3585 | + |
| 3586 | + $name = $frame->expand( $params['name'] ); |
| 3587 | + $attrText = is_null( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); |
| 3588 | + $content = is_null( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); |
| 3589 | + |
| 3590 | + $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $n++) . $this->mMarkerSuffix; |
| 3591 | + |
| 3592 | + if ( $this->ot['html'] ) { |
| 3593 | + if ( $name == '!--' ) { |
| 3594 | + return ''; |
| 3595 | + } |
| 3596 | + $name = strtolower( $name ); |
| 3597 | + |
| 3598 | + $params = Sanitizer::decodeTagAttributes( $attrText ); |
| 3599 | + switch ( $name ) { |
| 3600 | + case 'html': |
| 3601 | + if( $wgRawHtml ) { |
| 3602 | + $output = $content; |
| 3603 | + break; |
| 3604 | + } else { |
| 3605 | + throw new MWException( '<html> extension tag encountered unexpectedly' ); |
| 3606 | + } |
| 3607 | + case 'nowiki': |
| 3608 | + $output = Xml::escapeTagsOnly( $content ); |
| 3609 | + break; |
| 3610 | + case 'math': |
| 3611 | + $output = $wgContLang->armourMath( |
| 3612 | + MathRenderer::renderMath( $content, $params ) ); |
| 3613 | + break; |
| 3614 | + case 'gallery': |
| 3615 | + $output = $this->renderImageGallery( $content, $params ); |
| 3616 | + break; |
| 3617 | + default: |
| 3618 | + if( isset( $this->mTagHooks[$name] ) ) { |
| 3619 | + $output = call_user_func_array( $this->mTagHooks[$name], |
| 3620 | + array( $content, $params, $this ) ); |
| 3621 | + } else { |
| 3622 | + throw new MWException( "Invalid call hook $name" ); |
| 3623 | + } |
| 3624 | + } |
| 3625 | + } else { |
| 3626 | + if ( $name == '!--' ) { |
| 3627 | + $output = '<!--' . $content . '-->'; |
| 3628 | + } else { |
| 3629 | + if ( $content === null ) { |
| 3630 | + $output = "<$name$attrText/>"; |
| 3631 | + } else { |
| 3632 | + $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); |
| 3633 | + $output = "<$name$attrText>$content$close"; |
| 3634 | + } |
| 3635 | + } |
| 3636 | + } |
| 3637 | + |
| 3638 | + if ( $name == 'html' || $name == 'nowiki' ) { |
| 3639 | + $this->mStripState->nowiki->setPair( $marker, $output ); |
| 3640 | + } else { |
| 3641 | + $this->mStripState->general->setPair( $marker, $output ); |
| 3642 | + } |
| 3643 | + return $marker; |
| 3644 | + } |
| 3645 | + |
| 3646 | + /** |
3424 | 3647 | * Increment an include size counter |
3425 | 3648 | * |
3426 | 3649 | * @param string $type The type of expansion |
— | — | @@ -3427,7 +3650,7 @@ |
3428 | 3651 | * @return boolean False if this inclusion would take it over the maximum, true otherwise |
3429 | 3652 | */ |
3430 | 3653 | function incrementIncludeSize( $type, $size ) { |
3431 | | - if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { |
| 3654 | + if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) { |
3432 | 3655 | return false; |
3433 | 3656 | } else { |
3434 | 3657 | $this->mIncludeSizes[$type] += $size; |
— | — | @@ -3535,7 +3758,6 @@ |
3536 | 3759 | |
3537 | 3760 | # headline counter |
3538 | 3761 | $headlineCount = 0; |
3539 | | - $sectionCount = 0; # headlineCount excluding template sections |
3540 | 3762 | $numVisible = 0; |
3541 | 3763 | |
3542 | 3764 | # Ugh .. the TOC should have neat indentation levels which can be |
— | — | @@ -3550,18 +3772,20 @@ |
3551 | 3773 | $prevlevel = 0; |
3552 | 3774 | $toclevel = 0; |
3553 | 3775 | $prevtoclevel = 0; |
| 3776 | + $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-{$this->mMarkerSuffix}"; |
| 3777 | + $baseTitleText = $this->mTitle->getPrefixedDBkey(); |
3554 | 3778 | |
3555 | 3779 | foreach( $matches[3] as $headline ) { |
3556 | | - $istemplate = 0; |
3557 | | - $templatetitle = ''; |
3558 | | - $templatesection = 0; |
| 3780 | + $isTemplate = false; |
| 3781 | + $titleText = false; |
| 3782 | + $sectionIndex = false; |
3559 | 3783 | $numbering = ''; |
3560 | | - $mat = array(); |
3561 | | - if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) { |
3562 | | - $istemplate = 1; |
3563 | | - $templatetitle = base64_decode($mat[1]); |
3564 | | - $templatesection = 1 + (int)base64_decode($mat[2]); |
3565 | | - $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline); |
| 3784 | + $markerMatches = array(); |
| 3785 | + if (preg_match("/^$markerRegex/", $headline, $markerMatches)) { |
| 3786 | + $serial = $markerMatches[1]; |
| 3787 | + list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; |
| 3788 | + $isTemplate = ($titleText != $baseTitleText); |
| 3789 | + $headline = preg_replace("/^$markerRegex/", "", $headline); |
3566 | 3790 | } |
3567 | 3791 | |
3568 | 3792 | if( $toclevel ) { |
— | — | @@ -3634,41 +3858,41 @@ |
3635 | 3859 | } |
3636 | 3860 | } |
3637 | 3861 | |
3638 | | - # The canonized header is a version of the header text safe to use for links |
| 3862 | + # The safe header is a version of the header text safe to use for links |
3639 | 3863 | # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
3640 | | - $canonized_headline = $this->mStripState->unstripBoth( $headline ); |
| 3864 | + $safeHeadline = $this->mStripState->unstripBoth( $headline ); |
3641 | 3865 | |
3642 | 3866 | # Remove link placeholders by the link text. |
3643 | 3867 | # <!--LINK number--> |
3644 | 3868 | # turns into |
3645 | 3869 | # link text with suffix |
3646 | | - $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e', |
| 3870 | + $safeHeadline = preg_replace( '/<!--LINK ([0-9]*)-->/e', |
3647 | 3871 | "\$this->mLinkHolders['texts'][\$1]", |
3648 | | - $canonized_headline ); |
3649 | | - $canonized_headline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e', |
| 3872 | + $safeHeadline ); |
| 3873 | + $safeHeadline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e', |
3650 | 3874 | "\$this->mInterwikiLinkHolders['texts'][\$1]", |
3651 | | - $canonized_headline ); |
| 3875 | + $safeHeadline ); |
3652 | 3876 | |
3653 | 3877 | # Strip out HTML (other than plain <sup> and <sub>: bug 8393) |
3654 | 3878 | $tocline = preg_replace( |
3655 | 3879 | array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), |
3656 | 3880 | array( '', '<$1>'), |
3657 | | - $canonized_headline |
| 3881 | + $safeHeadline |
3658 | 3882 | ); |
3659 | 3883 | $tocline = trim( $tocline ); |
3660 | 3884 | |
3661 | 3885 | # For the anchor, strip out HTML-y stuff period |
3662 | | - $canonized_headline = preg_replace( '/<.*?'.'>/', '', $canonized_headline ); |
3663 | | - $canonized_headline = trim( $canonized_headline ); |
| 3886 | + $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); |
| 3887 | + $safeHeadline = trim( $safeHeadline ); |
3664 | 3888 | |
3665 | 3889 | # Save headline for section edit hint before it's escaped |
3666 | | - $headline_hint = $canonized_headline; |
3667 | | - $canonized_headline = Sanitizer::escapeId( $canonized_headline ); |
3668 | | - $refers[$headlineCount] = $canonized_headline; |
| 3890 | + $headlineHint = $safeHeadline; |
| 3891 | + $safeHeadline = Sanitizer::escapeId( $safeHeadline ); |
| 3892 | + $refers[$headlineCount] = $safeHeadline; |
3669 | 3893 | |
3670 | 3894 | # count how many in assoc. array so we can track dupes in anchors |
3671 | | - isset( $refers[$canonized_headline] ) ? $refers[$canonized_headline]++ : $refers[$canonized_headline] = 1; |
3672 | | - $refcount[$headlineCount]=$refers[$canonized_headline]; |
| 3895 | + isset( $refers[$safeHeadline] ) ? $refers[$safeHeadline]++ : $refers[$safeHeadline] = 1; |
| 3896 | + $refcount[$headlineCount] = $refers[$safeHeadline]; |
3673 | 3897 | |
3674 | 3898 | # Don't number the heading if it is the only one (looks silly) |
3675 | 3899 | if( $doNumberHeadings && count( $matches[3] ) > 1) { |
— | — | @@ -3677,7 +3901,7 @@ |
3678 | 3902 | } |
3679 | 3903 | |
3680 | 3904 | # Create the anchor for linking from the TOC to the section |
3681 | | - $anchor = $canonized_headline; |
| 3905 | + $anchor = $safeHeadline; |
3682 | 3906 | if($refcount[$headlineCount] > 1 ) { |
3683 | 3907 | $anchor .= '_' . $refcount[$headlineCount]; |
3684 | 3908 | } |
— | — | @@ -3685,19 +3909,17 @@ |
3686 | 3910 | $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); |
3687 | 3911 | } |
3688 | 3912 | # give headline the correct <h#> tag |
3689 | | - if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) { |
3690 | | - if( $istemplate ) |
3691 | | - $editlink = $sk->editSectionLinkForOther($templatetitle, $templatesection); |
| 3913 | + if( $showEditLink && $sectionIndex !== false ) { |
| 3914 | + if( $isTemplate ) |
| 3915 | + $editlink = $sk->editSectionLinkForOther($titleText, $sectionIndex); |
3692 | 3916 | else |
3693 | | - $editlink = $sk->editSectionLink($this->mTitle, $sectionCount+1, $headline_hint); |
| 3917 | + $editlink = $sk->editSectionLink($this->mTitle, $sectionIndex, $headlineHint); |
3694 | 3918 | } else { |
3695 | 3919 | $editlink = ''; |
3696 | 3920 | } |
3697 | 3921 | $head[$headlineCount] = $sk->makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink ); |
3698 | 3922 | |
3699 | 3923 | $headlineCount++; |
3700 | | - if( !$istemplate ) |
3701 | | - $sectionCount++; |
3702 | 3924 | } |
3703 | 3925 | |
3704 | 3926 | # Never ever show TOC if no headers |
— | — | @@ -3765,14 +3987,12 @@ |
3766 | 3988 | $this->clearState(); |
3767 | 3989 | } |
3768 | 3990 | |
3769 | | - $stripState = new StripState; |
3770 | 3991 | $pairs = array( |
3771 | 3992 | "\r\n" => "\n", |
3772 | 3993 | ); |
3773 | 3994 | $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); |
3774 | | - $text = $this->strip( $text, $stripState, true, array( 'gallery' ) ); |
3775 | | - $text = $this->pstPass2( $text, $stripState, $user ); |
3776 | | - $text = $stripState->unstripBoth( $text ); |
| 3995 | + $text = $this->pstPass2( $text, $user ); |
| 3996 | + $text = $this->mStripState->unstripBoth( $text ); |
3777 | 3997 | return $text; |
3778 | 3998 | } |
3779 | 3999 | |
— | — | @@ -3780,7 +4000,7 @@ |
3781 | 4001 | * Pre-save transform helper function |
3782 | 4002 | * @private |
3783 | 4003 | */ |
3784 | | - function pstPass2( $text, &$stripState, $user ) { |
| 4004 | + function pstPass2( $text, $user ) { |
3785 | 4005 | global $wgContLang, $wgLocaltimezone; |
3786 | 4006 | |
3787 | 4007 | /* Note: This is the timestamp saved as hardcoded wikitext to |
— | — | @@ -3803,7 +4023,7 @@ |
3804 | 4024 | $text = $this->replaceVariables( $text ); |
3805 | 4025 | |
3806 | 4026 | # Strip out <nowiki> etc. added via replaceVariables |
3807 | | - $text = $this->strip( $text, $stripState, false, array( 'gallery' ) ); |
| 4027 | + #$text = $this->strip( $text, $this->mStripState, false, array( 'gallery' ) ); |
3808 | 4028 | |
3809 | 4029 | # Signatures |
3810 | 4030 | $sigText = $this->getUserSig( $user ); |
— | — | @@ -3950,6 +4170,11 @@ |
3951 | 4171 | /** |
3952 | 4172 | * Transform a MediaWiki message by replacing magic variables. |
3953 | 4173 | * |
| 4174 | + * For some unknown reason, it also expands templates, but only to the |
| 4175 | + * first recursion level. This is wrong and broken, probably introduced |
| 4176 | + * accidentally during refactoring, but probably relied upon by thousands |
| 4177 | + * of users. |
| 4178 | + * |
3954 | 4179 | * @param string $text the text to transform |
3955 | 4180 | * @param ParserOptions $options options |
3956 | 4181 | * @return string the text with variables substituted |
— | — | @@ -4003,6 +4228,7 @@ |
4004 | 4229 | $tag = strtolower( $tag ); |
4005 | 4230 | $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; |
4006 | 4231 | $this->mTagHooks[$tag] = $callback; |
| 4232 | + $this->mStripList[] = $tag; |
4007 | 4233 | |
4008 | 4234 | return $oldVal; |
4009 | 4235 | } |
— | — | @@ -4040,8 +4266,8 @@ |
4041 | 4267 | * @return The old callback function for this name, if any |
4042 | 4268 | */ |
4043 | 4269 | function setFunctionHook( $id, $callback, $flags = 0 ) { |
4044 | | - $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id] : null; |
4045 | | - $this->mFunctionHooks[$id] = $callback; |
| 4270 | + $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null; |
| 4271 | + $this->mFunctionHooks[$id] = array( $callback, $flags ); |
4046 | 4272 | |
4047 | 4273 | # Add to function cache |
4048 | 4274 | $mw = MagicWord::get( $id ); |
— | — | @@ -4660,12 +4886,12 @@ |
4661 | 4887 | * Callback from the Sanitizer for expanding items found in HTML attribute |
4662 | 4888 | * values, so they can be safely tested and escaped. |
4663 | 4889 | * @param string $text |
4664 | | - * @param array $args |
| 4890 | + * @param PPFrame $frame |
4665 | 4891 | * @return string |
4666 | 4892 | * @private |
4667 | 4893 | */ |
4668 | | - function attributeStripCallback( &$text, $args ) { |
4669 | | - $text = $this->replaceVariables( $text, $args ); |
| 4894 | + function attributeStripCallback( &$text, $frame = false ) { |
| 4895 | + $text = $this->replaceVariables( $text, $frame ); |
4670 | 4896 | $text = $this->mStripState->unstripBoth( $text ); |
4671 | 4897 | return $text; |
4672 | 4898 | } |
— | — | @@ -4696,120 +4922,94 @@ |
4697 | 4923 | * @param $text Page wikitext |
4698 | 4924 | * @param $section Numbered section. 0 pulls the text before the first |
4699 | 4925 | * heading; other numbers will pull the given section |
4700 | | - * along with its lower-level subsections. |
| 4926 | + * along with its lower-level subsections. If the section is |
| 4927 | + * not found, $mode=get will return $newtext, and |
| 4928 | + * $mode=replace will return $text. |
4701 | 4929 | * @param $mode One of "get" or "replace" |
4702 | | - * @param $newtext Replacement text for section data. |
| 4930 | + * @param $newText Replacement text for section data. |
4703 | 4931 | * @return string for "get", the extracted section text. |
4704 | 4932 | * for "replace", the whole page with the section replaced. |
4705 | 4933 | */ |
4706 | | - private function extractSections( $text, $section, $mode, $newtext='' ) { |
4707 | | - # I.... _hope_ this is right. |
4708 | | - # Otherwise, sometimes we don't have things initialized properly. |
| 4934 | + private function extractSections( $text, $section, $mode, $newText='' ) { |
4709 | 4935 | $this->clearState(); |
4710 | | - |
4711 | | - # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML |
4712 | | - # comments to be stripped as well) |
4713 | | - $stripState = new StripState; |
4714 | | - |
4715 | | - $oldOutputType = $this->mOutputType; |
4716 | | - $oldOptions = $this->mOptions; |
4717 | | - $this->mOptions = new ParserOptions(); |
| 4936 | + $this->mOptions = new ParserOptions; |
4718 | 4937 | $this->setOutputType( OT_WIKI ); |
| 4938 | + $curIndex = 0; |
| 4939 | + $outText = ''; |
| 4940 | + $frame = new PPFrame( $this ); |
4719 | 4941 | |
4720 | | - $striptext = $this->strip( $text, $stripState, true ); |
| 4942 | + // Preprocess the text |
| 4943 | + $dom = $this->preprocessToDom( $text ); |
| 4944 | + $root = $dom->documentElement; |
4721 | 4945 | |
4722 | | - $this->setOutputType( $oldOutputType ); |
4723 | | - $this->mOptions = $oldOptions; |
| 4946 | + // <h> nodes indicate section breaks |
| 4947 | + // They can only occur at the top level, so we can find them by iterating the root's children |
| 4948 | + $node = $root->firstChild; |
4724 | 4949 | |
4725 | | - # now that we can be sure that no pseudo-sections are in the source, |
4726 | | - # split it up by section |
4727 | | - $uniq = preg_quote( $this->uniqPrefix(), '/' ); |
4728 | | - $comment = "(?:$uniq-!--.*?QINU\x07)"; |
4729 | | - $secs = preg_split( |
4730 | | - "/ |
4731 | | - ( |
4732 | | - ^ |
4733 | | - (?:$comment|<\/?noinclude>)* # Initial comments will be stripped |
4734 | | - (=+) # Should this be limited to 6? |
4735 | | - .+? # Section title... |
4736 | | - \\2 # Ending = count must match start |
4737 | | - (?:$comment|<\/?noinclude>|[ \\t]+)* # Trailing whitespace ok |
4738 | | - $ |
4739 | | - | |
4740 | | - <h([1-6])\b.*?> |
4741 | | - .*? |
4742 | | - <\/h\\3\s*> |
4743 | | - ) |
4744 | | - /mix", |
4745 | | - $striptext, -1, |
4746 | | - PREG_SPLIT_DELIM_CAPTURE); |
4747 | | - |
4748 | | - if( $mode == "get" ) { |
4749 | | - if( $section == 0 ) { |
4750 | | - // "Section 0" returns the content before any other section. |
4751 | | - $rv = $secs[0]; |
4752 | | - } else { |
4753 | | - //track missing section, will replace if found. |
4754 | | - $rv = $newtext; |
| 4950 | + // Find the target section |
| 4951 | + if ( $section == 0 ) { |
| 4952 | + // Section zero doesn't nest, level=big |
| 4953 | + $targetLevel = 1000; |
| 4954 | + } else { |
| 4955 | + while ( $node ) { |
| 4956 | + if ( $node->nodeName == 'h' ) { |
| 4957 | + if ( $curIndex + 1 == $section ) { |
| 4958 | + break; |
| 4959 | + } |
| 4960 | + $curIndex++; |
| 4961 | + } |
| 4962 | + if ( $mode == 'replace' ) { |
| 4963 | + $outText .= $frame->expand( $node ); |
| 4964 | + } |
| 4965 | + $node = $node->nextSibling; |
4755 | 4966 | } |
4756 | | - } elseif( $mode == "replace" ) { |
4757 | | - if( $section == 0 ) { |
4758 | | - $rv = $newtext . "\n\n"; |
4759 | | - $remainder = true; |
4760 | | - } else { |
4761 | | - $rv = $secs[0]; |
4762 | | - $remainder = false; |
| 4967 | + if ( $node ) { |
| 4968 | + $targetLevel = $node->getAttribute( 'level' ); |
4763 | 4969 | } |
4764 | 4970 | } |
4765 | | - $count = 0; |
4766 | | - $sectionLevel = 0; |
4767 | | - for( $index = 1; $index < count( $secs ); ) { |
4768 | | - $headerLine = $secs[$index++]; |
4769 | | - if( $secs[$index] ) { |
4770 | | - // A wiki header |
4771 | | - $headerLevel = strlen( $secs[$index++] ); |
| 4971 | + |
| 4972 | + if ( !$node ) { |
| 4973 | + // Not found |
| 4974 | + if ( $mode == 'get' ) { |
| 4975 | + return $newText; |
4772 | 4976 | } else { |
4773 | | - // An HTML header |
4774 | | - $index++; |
4775 | | - $headerLevel = intval( $secs[$index++] ); |
| 4977 | + return $text; |
4776 | 4978 | } |
4777 | | - $content = $secs[$index++]; |
| 4979 | + } |
4778 | 4980 | |
4779 | | - $count++; |
4780 | | - if( $mode == "get" ) { |
4781 | | - if( $count == $section ) { |
4782 | | - $rv = $headerLine . $content; |
4783 | | - $sectionLevel = $headerLevel; |
4784 | | - } elseif( $count > $section ) { |
4785 | | - if( $sectionLevel && $headerLevel > $sectionLevel ) { |
4786 | | - $rv .= $headerLine . $content; |
4787 | | - } else { |
4788 | | - // Broke out to a higher-level section |
4789 | | - break; |
4790 | | - } |
| 4981 | + // Find the end of the section, including nested sections |
| 4982 | + do { |
| 4983 | + if ( $node->nodeName == 'h' ) { |
| 4984 | + $curIndex++; |
| 4985 | + $curLevel = $node->getAttribute( 'level' ); |
| 4986 | + if ( $curIndex != $section && $curLevel <= $targetLevel ) { |
| 4987 | + break; |
4791 | 4988 | } |
4792 | | - } elseif( $mode == "replace" ) { |
4793 | | - if( $count < $section ) { |
4794 | | - $rv .= $headerLine . $content; |
4795 | | - } elseif( $count == $section ) { |
4796 | | - $rv .= $newtext . "\n\n"; |
4797 | | - $sectionLevel = $headerLevel; |
4798 | | - } elseif( $count > $section ) { |
4799 | | - if( $headerLevel <= $sectionLevel ) { |
4800 | | - // Passed the section's sub-parts. |
4801 | | - $remainder = true; |
4802 | | - } |
4803 | | - if( $remainder ) { |
4804 | | - $rv .= $headerLine . $content; |
4805 | | - } |
4806 | | - } |
4807 | 4989 | } |
| 4990 | + if ( $mode == 'get' ) { |
| 4991 | + $outText .= $frame->expand( $node ); |
| 4992 | + } |
| 4993 | + $node = $node->nextSibling; |
| 4994 | + } while ( $node ); |
| 4995 | + |
| 4996 | + // Write out the remainder (in replace mode only) |
| 4997 | + if ( $mode == 'replace' ) { |
| 4998 | + // Output the replacement text |
| 4999 | + // Add two newlines on -- trailing whitespace in $newText is conventionally |
| 5000 | + // stripped by the editor, so we need both newlines to restore the paragraph gap |
| 5001 | + $outText .= $newText . "\n\n"; |
| 5002 | + while ( $node ) { |
| 5003 | + $outText .= $frame->expand( $node ); |
| 5004 | + $node = $node->nextSibling; |
| 5005 | + } |
4808 | 5006 | } |
4809 | | - if (is_string($rv)) |
4810 | | - # reinsert stripped tags |
4811 | | - $rv = trim( $stripState->unstripBoth( $rv ) ); |
4812 | 5007 | |
4813 | | - return $rv; |
| 5008 | + if ( is_string( $outText ) ) { |
| 5009 | + // Re-insert stripped tags |
| 5010 | + $outText = trim( $this->mStripState->unstripBoth( $outText ) ); |
| 5011 | + } |
| 5012 | + |
| 5013 | + return $outText; |
4814 | 5014 | } |
4815 | 5015 | |
4816 | 5016 | /** |
— | — | @@ -4942,6 +5142,15 @@ |
4943 | 5143 | $text = StringUtils::delimiterReplace( '<', '>', '', $text ); |
4944 | 5144 | return $text; |
4945 | 5145 | } |
| 5146 | + |
| 5147 | + /** |
| 5148 | + * strip/replaceVariables/unstrip for preprocessor regression testing |
| 5149 | + */ |
| 5150 | + function srvus( $text ) { |
| 5151 | + $text = $this->replaceVariables( $text ); |
| 5152 | + $text = $this->mStripState->unstripBoth( $text ); |
| 5153 | + return $text; |
| 5154 | + } |
4946 | 5155 | } |
4947 | 5156 | |
4948 | 5157 | /** |
— | — | @@ -4974,23 +5183,265 @@ |
4975 | 5184 | |
4976 | 5185 | function unstripGeneral( $text ) { |
4977 | 5186 | wfProfileIn( __METHOD__ ); |
4978 | | - $text = $this->general->replace( $text ); |
| 5187 | + do { |
| 5188 | + $oldText = $text; |
| 5189 | + $text = $this->general->replace( $text ); |
| 5190 | + } while ( $text != $oldText ); |
4979 | 5191 | wfProfileOut( __METHOD__ ); |
4980 | 5192 | return $text; |
4981 | 5193 | } |
4982 | 5194 | |
4983 | 5195 | function unstripNoWiki( $text ) { |
4984 | 5196 | wfProfileIn( __METHOD__ ); |
4985 | | - $text = $this->nowiki->replace( $text ); |
| 5197 | + do { |
| 5198 | + $oldText = $text; |
| 5199 | + $text = $this->nowiki->replace( $text ); |
| 5200 | + } while ( $text != $oldText ); |
4986 | 5201 | wfProfileOut( __METHOD__ ); |
4987 | 5202 | return $text; |
4988 | 5203 | } |
4989 | 5204 | |
4990 | 5205 | function unstripBoth( $text ) { |
4991 | 5206 | wfProfileIn( __METHOD__ ); |
4992 | | - $text = $this->general->replace( $text ); |
4993 | | - $text = $this->nowiki->replace( $text ); |
| 5207 | + do { |
| 5208 | + $oldText = $text; |
| 5209 | + $text = $this->general->replace( $text ); |
| 5210 | + $text = $this->nowiki->replace( $text ); |
| 5211 | + } while ( $text != $oldText ); |
4994 | 5212 | wfProfileOut( __METHOD__ ); |
4995 | 5213 | return $text; |
4996 | 5214 | } |
4997 | 5215 | } |
| 5216 | + |
| 5217 | +/** |
| 5218 | + * An expansion frame, used as a context to expand the result of preprocessToDom() |
| 5219 | + */ |
| 5220 | +class PPFrame { |
| 5221 | + var $parser, $title; |
| 5222 | + |
| 5223 | + const NO_ARGS = 1; |
| 5224 | + const NO_TEMPLATES = 2; |
| 5225 | + |
| 5226 | + /** |
| 5227 | + * Construct a new preprocessor frame. |
| 5228 | + * @param Parser $parser The parent parser |
| 5229 | + * @param Title $title The context title, or false if there isn't one |
| 5230 | + */ |
| 5231 | + function __construct( $parser ) { |
| 5232 | + $this->parser = $parser; |
| 5233 | + $this->title = $parser->mTitle; |
| 5234 | + } |
| 5235 | + |
| 5236 | + /** |
| 5237 | + * Create a new child frame |
| 5238 | + * $args is optionally a DOMNodeList containing the template arguments |
| 5239 | + */ |
| 5240 | + function newChild( $args = false, $title = false ) { |
| 5241 | + $assocArgs = array(); |
| 5242 | + if ( $title === false ) { |
| 5243 | + $title = $this->title; |
| 5244 | + } |
| 5245 | + if ( $args !== false ) { |
| 5246 | + $xpath = false; |
| 5247 | + foreach ( $args as $arg ) { |
| 5248 | + if ( !$xpath ) { |
| 5249 | + $xpath = new DOMXPath( $arg->ownerDocument ); |
| 5250 | + } |
| 5251 | + |
| 5252 | + $nameNodes = $xpath->query( 'name', $arg ); |
| 5253 | + if ( $nameNodes->item( 0 )->hasAttributes() ) { |
| 5254 | + // Numbered parameter |
| 5255 | + $name = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; |
| 5256 | + } else { |
| 5257 | + // Named parameter |
| 5258 | + $name = $this->expand( $nameNodes->item( 0 ) ); |
| 5259 | + } |
| 5260 | + |
| 5261 | + $value = $xpath->query( 'value', $arg ); |
| 5262 | + $assocArgs[$name] = $value->item( 0 ); |
| 5263 | + } |
| 5264 | + } |
| 5265 | + return new PPTemplateFrame( $this->parser, $this, $assocArgs, $title ); |
| 5266 | + } |
| 5267 | + |
| 5268 | + /** |
| 5269 | + * Expand a DOMNode describing a preprocessed document into plain wikitext, |
| 5270 | + * using the current context |
| 5271 | + * @param $root the node |
| 5272 | + */ |
| 5273 | + function expand( $root, $shallowFlags = 0, $deepFlags = 0 ) { |
| 5274 | + if ( is_string( $root ) ) { |
| 5275 | + return $root; |
| 5276 | + } |
| 5277 | + |
| 5278 | + if ( $this->parser->ot['html'] |
| 5279 | + && ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) |
| 5280 | + { |
| 5281 | + return $this->parser->insertStripItem( '<!-- node-count limit exceeded -->' ); |
| 5282 | + } |
| 5283 | + $flags = $shallowFlags | $deepFlags; |
| 5284 | + |
| 5285 | + if ( is_array( $root ) ) { |
| 5286 | + $s = ''; |
| 5287 | + foreach ( $root as $node ) { |
| 5288 | + $s .= $this->expand( $node, 0, $deepFlags ); |
| 5289 | + } |
| 5290 | + } elseif ( $root instanceof DOMNodeList ) { |
| 5291 | + $s = ''; |
| 5292 | + foreach ( $root as $node ) { |
| 5293 | + $s .= $this->expand( $node, 0, $deepFlags ); |
| 5294 | + } |
| 5295 | + } elseif ( $root instanceof DOMNode ) { |
| 5296 | + if ( $root->nodeType == XML_TEXT_NODE ) { |
| 5297 | + $s = $root->nodeValue; |
| 5298 | + } elseif ( $root->nodeName == 'template' ) { |
| 5299 | + # Double-brace expansion |
| 5300 | + $xpath = new DOMXPath( $root->ownerDocument ); |
| 5301 | + $titles = $xpath->query( 'title', $root ); |
| 5302 | + $title = $titles->item( 0 ); |
| 5303 | + $parts = $xpath->query( 'part', $root ); |
| 5304 | + if ( $flags & self::NO_TEMPLATES ) { |
| 5305 | + $s = '{{' . $this->implodeWithFlags( '|', 0, $deepFlags, $title, $parts ) . '}}'; |
| 5306 | + } else { |
| 5307 | + $lineStart = $root->getAttribute( 'lineStart' ); |
| 5308 | + $params = array( |
| 5309 | + 'title' => $title, |
| 5310 | + 'parts' => $parts, |
| 5311 | + 'lineStart' => $lineStart, |
| 5312 | + 'text' => 'FIXME' ); |
| 5313 | + $s = $this->parser->braceSubstitution( $params, $this ); |
| 5314 | + } |
| 5315 | + } elseif ( $root->nodeName == 'tplarg' ) { |
| 5316 | + # Triple-brace expansion |
| 5317 | + $xpath = new DOMXPath( $root->ownerDocument ); |
| 5318 | + $titles = $xpath->query( 'title', $root ); |
| 5319 | + $title = $titles->item( 0 ); |
| 5320 | + $parts = $xpath->query( 'part', $root ); |
| 5321 | + if ( $flags & self::NO_ARGS || $this->parser->ot['msg'] ) { |
| 5322 | + $s = '{{{' . $this->implode( '|', 0, $deepFlags, $title, $parts ) . '}}}'; |
| 5323 | + } else { |
| 5324 | + $params = array( 'title' => $title, 'parts' => $parts, 'text' => 'FIXME' ); |
| 5325 | + $s = $this->parser->argSubstitution( $params, $this ); |
| 5326 | + } |
| 5327 | + } elseif ( $root->nodeName == 'ext' ) { |
| 5328 | + # Extension tag |
| 5329 | + $xpath = new DOMXPath( $root->ownerDocument ); |
| 5330 | + $names = $xpath->query( 'name', $root ); |
| 5331 | + $attrs = $xpath->query( 'attr', $root ); |
| 5332 | + $inners = $xpath->query( 'inner', $root ); |
| 5333 | + $closes = $xpath->query( 'close', $root ); |
| 5334 | + $params = array( |
| 5335 | + 'name' => $names->item( 0 ), |
| 5336 | + 'attr' => $attrs->length > 0 ? $attrs->item( 0 ) : null, |
| 5337 | + 'inner' => $inners->length > 0 ? $inners->item( 0 ) : null, |
| 5338 | + 'close' => $closes->length > 0 ? $closes->item( 0 ) : null, |
| 5339 | + ); |
| 5340 | + $s = $this->parser->extensionSubstitution( $params, $this ); |
| 5341 | + } elseif ( $root->nodeName == 'h' ) { |
| 5342 | + # Heading |
| 5343 | + $s = $this->expand( $root->childNodes, 0, $deepFlags ); |
| 5344 | + |
| 5345 | + if ( $this->parser->ot['html'] ) { |
| 5346 | + # Insert heading index marker |
| 5347 | + $headingIndex = $root->getAttribute( 'i' ); |
| 5348 | + $titleText = $this->title->getPrefixedDBkey(); |
| 5349 | + $this->parser->mHeadings[] = array( $titleText, $headingIndex ); |
| 5350 | + $serial = count( $this->parser->mHeadings ) - 1; |
| 5351 | + $marker = "{$this->parser->mUniqPrefix}-h-$serial-{$this->parser->mMarkerSuffix}"; |
| 5352 | + $count = $root->getAttribute( 'level' ); |
| 5353 | + |
| 5354 | + // FIXME: bug-for-bug with old parser |
| 5355 | + // Lose whitespace for no apparent reason |
| 5356 | + // Remove this after differential testing is done |
| 5357 | + if ( true ) { |
| 5358 | + // Good version |
| 5359 | + $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); |
| 5360 | + } else { |
| 5361 | + // Bad version |
| 5362 | + if ( preg_match( '/^(={1,6})(.*?)(={1,6})\s*?$/', $s, $m ) ) { |
| 5363 | + if ( $m[2] != '' ) { |
| 5364 | + $s = $m[1] . $marker . $m[2] . $m[3]; |
| 5365 | + } |
| 5366 | + } |
| 5367 | + } |
| 5368 | + $this->parser->mStripState->general->setPair( $marker, '' ); |
| 5369 | + } |
| 5370 | + } else { |
| 5371 | + # Generic recursive expansion |
| 5372 | + $s = ''; |
| 5373 | + for ( $node = $root->firstChild; $node; $node = $node->nextSibling ) { |
| 5374 | + if ( $node->nodeType == XML_TEXT_NODE ) { |
| 5375 | + $s .= $node->nodeValue; |
| 5376 | + } elseif ( $node->nodeType == XML_ELEMENT_NODE ) { |
| 5377 | + $s .= $this->expand( $node, 0, $deepFlags ); |
| 5378 | + } |
| 5379 | + } |
| 5380 | + } |
| 5381 | + } else { |
| 5382 | + throw new MWException( __METHOD__.': Invalid parameter type' ); |
| 5383 | + } |
| 5384 | + return $s; |
| 5385 | + } |
| 5386 | + |
| 5387 | + function implodeWithFlags( $sep, $shallowFlags, $deepFlags /*, ... */ ) { |
| 5388 | + $args = array_slice( func_get_args(), 3 ); |
| 5389 | + |
| 5390 | + $first = true; |
| 5391 | + $s = ''; |
| 5392 | + foreach ( $args as $root ) { |
| 5393 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 5394 | + $root = array( $root ); |
| 5395 | + } |
| 5396 | + foreach ( $root as $node ) { |
| 5397 | + if ( $first ) { |
| 5398 | + $first = false; |
| 5399 | + } else { |
| 5400 | + $s .= $sep; |
| 5401 | + } |
| 5402 | + $s .= $this->expand( $node, $shallowFlags, $deepFlags ); |
| 5403 | + } |
| 5404 | + } |
| 5405 | + return $s; |
| 5406 | + } |
| 5407 | + |
| 5408 | + function implode( $sep /*, ... */ ) { |
| 5409 | + $args = func_get_args(); |
| 5410 | + $args = array_merge( array_slice( $args, 0, 1 ), array( 0, 0 ), array_slice( $args, 1 ) ); |
| 5411 | + return call_user_func_array( array( $this, 'implodeWithFlags' ), $args ); |
| 5412 | + } |
| 5413 | + |
| 5414 | + function __toString() { |
| 5415 | + return 'frame{}'; |
| 5416 | + } |
| 5417 | +} |
| 5418 | + |
| 5419 | +/** |
| 5420 | + * Expansion frame with template arguments |
| 5421 | + */ |
| 5422 | +class PPTemplateFrame extends PPFrame { |
| 5423 | + public $parser, $args, $parent, $serial; |
| 5424 | + |
| 5425 | + function __construct( $parser, $parent = false, $args = array(), $title = false ) { |
| 5426 | + $this->parser = $parser; |
| 5427 | + $this->parent = $parent; |
| 5428 | + $this->args = $args; |
| 5429 | + $this->title = $title; |
| 5430 | + } |
| 5431 | + |
| 5432 | + function __toString() { |
| 5433 | + $s = 'tplframe{'; |
| 5434 | + $first = true; |
| 5435 | + foreach ( $this->args as $name => $value ) { |
| 5436 | + if ( $first ) { |
| 5437 | + $first = false; |
| 5438 | + } else { |
| 5439 | + $s .= ', '; |
| 5440 | + } |
| 5441 | + $s .= "\"$name\":\"" . |
| 5442 | + str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"'; |
| 5443 | + } |
| 5444 | + $s .= '}'; |
| 5445 | + return $s; |
| 5446 | + } |
| 5447 | +} |
| 5448 | + |
Index: trunk/phase3/includes/Setup.php |
— | — | @@ -235,7 +235,8 @@ |
236 | 236 | $wgUser = new StubUser; |
237 | 237 | $wgLang = new StubUserLang; |
238 | 238 | $wgOut = new StubObject( 'wgOut', 'OutputPage' ); |
239 | | -$wgParser = new StubObject( 'wgParser', 'Parser' ); |
| 239 | +$wgParser = new StubObject( 'wgParser', $wgParserConf['class'], array( $wgParserConf ) ); |
| 240 | + |
240 | 241 | $wgMessageCache = new StubObject( 'wgMessageCache', 'MessageCache', |
241 | 242 | array( $parserMemc, $wgUseDatabaseMessages, $wgMsgCacheExpiry, wfWikiID() ) ); |
242 | 243 | |
Index: trunk/phase3/includes/Parser_DiffTest.php |
— | — | @@ -0,0 +1,62 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +class Parser_DiffTest |
| 5 | +{ |
| 6 | + var $parsers, $conf; |
| 7 | + |
| 8 | + function __construct( $conf ) { |
| 9 | + if ( !isset( $conf['parsers'] ) ) { |
| 10 | + throw new MWException( __METHOD__ . ': no parsers specified' ); |
| 11 | + } |
| 12 | + $this->conf = $conf; |
| 13 | + } |
| 14 | + |
| 15 | + function init() { |
| 16 | + if ( !is_null( $this->parsers ) ) { |
| 17 | + return; |
| 18 | + } |
| 19 | + foreach ( $this->conf['parsers'] as $i => $parserConf ) { |
| 20 | + if ( !is_array( $parserConf ) ) { |
| 21 | + $class = $parserConf; |
| 22 | + $parserconf = array( 'class' => $parserConf ); |
| 23 | + } else { |
| 24 | + $class = $parserConf['class']; |
| 25 | + } |
| 26 | + $this->parsers[$i] = new $class( $parserConf ); |
| 27 | + } |
| 28 | + } |
| 29 | + |
| 30 | + function __call( $name, $args ) { |
| 31 | + $this->init(); |
| 32 | + $results = array(); |
| 33 | + $mismatch = false; |
| 34 | + $lastResult = null; |
| 35 | + $first = true; |
| 36 | + foreach ( $this->parsers as $i => $parser ) { |
| 37 | + $currentResult = call_user_func_array( array( &$this->parsers[$i], $name ), $args ); |
| 38 | + if ( $first ) { |
| 39 | + $first = false; |
| 40 | + } else { |
| 41 | + if ( $lastResult !== $currentResult ) { |
| 42 | + $mismatch = true; |
| 43 | + } |
| 44 | + } |
| 45 | + $results[$i] = $currentResult; |
| 46 | + $lastResult = $currentResult; |
| 47 | + } |
| 48 | + if ( $mismatch ) { |
| 49 | + throw new MWException( "Parser_DiffTest: results mismatch on call to $name\n" . |
| 50 | + 'Arguments: ' . var_export( $args, true ) . "\n" . |
| 51 | + 'Results: ' . var_export( $results, true ) . "\n" ); |
| 52 | + } |
| 53 | + return $lastResult; |
| 54 | + } |
| 55 | + |
| 56 | + function setFunctionHook( $id, $callback, $flags = 0 ) { |
| 57 | + $this->init(); |
| 58 | + foreach ( $this->parsers as $i => $parser ) { |
| 59 | + $parser->setFunctionHook( $id, $callback, $flags ); |
| 60 | + } |
| 61 | + } |
| 62 | +} |
| 63 | + |
Property changes on: trunk/phase3/includes/Parser_DiffTest.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 64 | + native |
Index: trunk/phase3/includes/Parser_OldPP.php |
— | — | @@ -0,0 +1,4918 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Parser with old preprocessor |
| 5 | + */ |
| 6 | +class Parser_OldPP |
| 7 | +{ |
| 8 | + /** |
| 9 | + * Update this version number when the ParserOutput format |
| 10 | + * changes in an incompatible way, so the parser cache |
| 11 | + * can automatically discard old data. |
| 12 | + */ |
| 13 | + const VERSION = '1.6.2'; |
| 14 | + |
| 15 | + # Flags for Parser::setFunctionHook |
| 16 | + # Also available as global constants from Defines.php |
| 17 | + const SFH_NO_HASH = 1; |
| 18 | + const SFH_OBJECT_ARGS = 2; |
| 19 | + |
| 20 | + # Constants needed for external link processing |
| 21 | + # Everything except bracket, space, or control characters |
| 22 | + const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]'; |
| 23 | + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/S'; |
| 24 | + |
| 25 | + // State constants for the definition list colon extraction |
| 26 | + const COLON_STATE_TEXT = 0; |
| 27 | + const COLON_STATE_TAG = 1; |
| 28 | + const COLON_STATE_TAGSTART = 2; |
| 29 | + const COLON_STATE_CLOSETAG = 3; |
| 30 | + const COLON_STATE_TAGSLASH = 4; |
| 31 | + const COLON_STATE_COMMENT = 5; |
| 32 | + const COLON_STATE_COMMENTDASH = 6; |
| 33 | + const COLON_STATE_COMMENTDASHDASH = 7; |
| 34 | + |
| 35 | + /**#@+ |
| 36 | + * @private |
| 37 | + */ |
| 38 | + # Persistent: |
| 39 | + var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, |
| 40 | + $mImageParams, $mImageParamsMagicArray, $mExtLinkBracketedRegex; |
| 41 | + |
| 42 | + # Cleared with clearState(): |
| 43 | + var $mOutput, $mAutonumber, $mDTopen, $mStripState; |
| 44 | + var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; |
| 45 | + var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix; |
| 46 | + var $mIncludeSizes, $mDefaultSort; |
| 47 | + var $mTemplates, // cache of already loaded templates, avoids |
| 48 | + // multiple SQL queries for the same string |
| 49 | + $mTemplatePath; // stores an unsorted hash of all the templates already loaded |
| 50 | + // in this path. Used for loop detection. |
| 51 | + |
| 52 | + # Temporary |
| 53 | + # These are variables reset at least once per parse regardless of $clearState |
| 54 | + var $mOptions, // ParserOptions object |
| 55 | + $mTitle, // Title context, used for self-link rendering and similar things |
| 56 | + $mOutputType, // Output type, one of the OT_xxx constants |
| 57 | + $ot, // Shortcut alias, see setOutputType() |
| 58 | + $mRevisionId, // ID to display in {{REVISIONID}} tags |
| 59 | + $mRevisionTimestamp, // The timestamp of the specified revision ID |
| 60 | + $mRevIdForTs; // The revision ID which was used to fetch the timestamp |
| 61 | + |
| 62 | + /**#@-*/ |
| 63 | + |
| 64 | + /** |
| 65 | + * Constructor |
| 66 | + * |
| 67 | + * @public |
| 68 | + */ |
| 69 | + function __construct( $conf = array() ) { |
| 70 | + $this->mTagHooks = array(); |
| 71 | + $this->mTransparentTagHooks = array(); |
| 72 | + $this->mFunctionHooks = array(); |
| 73 | + $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); |
| 74 | + $this->mFirstCall = true; |
| 75 | + $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. |
| 76 | + '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; |
| 77 | + } |
| 78 | + |
| 79 | + /** |
| 80 | + * Do various kinds of initialisation on the first call of the parser |
| 81 | + */ |
| 82 | + function firstCallInit() { |
| 83 | + if ( !$this->mFirstCall ) { |
| 84 | + return; |
| 85 | + } |
| 86 | + |
| 87 | + wfProfileIn( __METHOD__ ); |
| 88 | + global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions; |
| 89 | + |
| 90 | + $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); |
| 91 | + |
| 92 | + $this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH ); |
| 93 | + $this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH ); |
| 94 | + $this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH ); |
| 95 | + $this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH ); |
| 96 | + $this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH ); |
| 97 | + $this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH ); |
| 98 | + $this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH ); |
| 99 | + $this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH ); |
| 100 | + $this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH ); |
| 101 | + $this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH ); |
| 102 | + $this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH ); |
| 103 | + $this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH ); |
| 104 | + $this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH ); |
| 105 | + $this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH ); |
| 106 | + $this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH ); |
| 107 | + $this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH ); |
| 108 | + $this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH ); |
| 109 | + $this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH ); |
| 110 | + $this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH ); |
| 111 | + $this->setFunctionHook( 'numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH ); |
| 112 | + $this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH ); |
| 113 | + $this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH ); |
| 114 | + $this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH ); |
| 115 | + $this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH ); |
| 116 | + $this->setFunctionHook( 'special', array( 'CoreParserFunctions', 'special' ) ); |
| 117 | + $this->setFunctionHook( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH ); |
| 118 | + $this->setFunctionHook( 'filepath', array( 'CoreParserFunctions', 'filepath' ), SFH_NO_HASH ); |
| 119 | + |
| 120 | + if ( $wgAllowDisplayTitle ) { |
| 121 | + $this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH ); |
| 122 | + } |
| 123 | + if ( $wgAllowSlowParserFunctions ) { |
| 124 | + $this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH ); |
| 125 | + } |
| 126 | + |
| 127 | + $this->initialiseVariables(); |
| 128 | + $this->mFirstCall = false; |
| 129 | + wfProfileOut( __METHOD__ ); |
| 130 | + } |
| 131 | + |
| 132 | + /** |
| 133 | + * Clear Parser state |
| 134 | + * |
| 135 | + * @private |
| 136 | + */ |
| 137 | + function clearState() { |
| 138 | + wfProfileIn( __METHOD__ ); |
| 139 | + if ( $this->mFirstCall ) { |
| 140 | + $this->firstCallInit(); |
| 141 | + } |
| 142 | + $this->mOutput = new ParserOutput; |
| 143 | + $this->mAutonumber = 0; |
| 144 | + $this->mLastSection = ''; |
| 145 | + $this->mDTopen = false; |
| 146 | + $this->mIncludeCount = array(); |
| 147 | + $this->mStripState = new StripState; |
| 148 | + $this->mArgStack = array(); |
| 149 | + $this->mInPre = false; |
| 150 | + $this->mInterwikiLinkHolders = array( |
| 151 | + 'texts' => array(), |
| 152 | + 'titles' => array() |
| 153 | + ); |
| 154 | + $this->mLinkHolders = array( |
| 155 | + 'namespaces' => array(), |
| 156 | + 'dbkeys' => array(), |
| 157 | + 'queries' => array(), |
| 158 | + 'texts' => array(), |
| 159 | + 'titles' => array() |
| 160 | + ); |
| 161 | + $this->mRevisionTimestamp = $this->mRevisionId = null; |
| 162 | + |
| 163 | + /** |
| 164 | + * Prefix for temporary replacement strings for the multipass parser. |
| 165 | + * \x07 should never appear in input as it's disallowed in XML. |
| 166 | + * Using it at the front also gives us a little extra robustness |
| 167 | + * since it shouldn't match when butted up against identifier-like |
| 168 | + * string constructs. |
| 169 | + */ |
| 170 | + $this->mUniqPrefix = "\x07UNIQ" . self::getRandomString(); |
| 171 | + |
| 172 | + # Clear these on every parse, bug 4549 |
| 173 | + $this->mTemplates = array(); |
| 174 | + $this->mTemplatePath = array(); |
| 175 | + |
| 176 | + $this->mShowToc = true; |
| 177 | + $this->mForceTocPosition = false; |
| 178 | + $this->mIncludeSizes = array( |
| 179 | + 'pre-expand' => 0, |
| 180 | + 'post-expand' => 0, |
| 181 | + 'arg' => 0 |
| 182 | + ); |
| 183 | + $this->mDefaultSort = false; |
| 184 | + |
| 185 | + wfRunHooks( 'ParserClearState', array( &$this ) ); |
| 186 | + wfProfileOut( __METHOD__ ); |
| 187 | + } |
| 188 | + |
| 189 | + function setOutputType( $ot ) { |
| 190 | + $this->mOutputType = $ot; |
| 191 | + // Shortcut alias |
| 192 | + $this->ot = array( |
| 193 | + 'html' => $ot == OT_HTML, |
| 194 | + 'wiki' => $ot == OT_WIKI, |
| 195 | + 'msg' => $ot == OT_MSG, |
| 196 | + 'pre' => $ot == OT_PREPROCESS, |
| 197 | + ); |
| 198 | + } |
| 199 | + |
| 200 | + /** |
| 201 | + * Accessor for mUniqPrefix. |
| 202 | + * |
| 203 | + * @public |
| 204 | + */ |
| 205 | + function uniqPrefix() { |
| 206 | + return $this->mUniqPrefix; |
| 207 | + } |
| 208 | + |
| 209 | + /** |
| 210 | + * Convert wikitext to HTML |
| 211 | + * Do not call this function recursively. |
| 212 | + * |
| 213 | + * @param string $text Text we want to parse |
| 214 | + * @param Title &$title A title object |
| 215 | + * @param array $options |
| 216 | + * @param boolean $linestart |
| 217 | + * @param boolean $clearState |
| 218 | + * @param int $revid number to pass in {{REVISIONID}} |
| 219 | + * @return ParserOutput a ParserOutput |
| 220 | + */ |
| 221 | + public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) { |
| 222 | + /** |
| 223 | + * First pass--just handle <nowiki> sections, pass the rest off |
| 224 | + * to internalParse() which does all the real work. |
| 225 | + */ |
| 226 | + |
| 227 | + global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; |
| 228 | + $fname = 'Parser::parse-' . wfGetCaller(); |
| 229 | + wfProfileIn( __METHOD__ ); |
| 230 | + wfProfileIn( $fname ); |
| 231 | + |
| 232 | + if ( $clearState ) { |
| 233 | + $this->clearState(); |
| 234 | + } |
| 235 | + |
| 236 | + $this->mOptions = $options; |
| 237 | + $this->mTitle =& $title; |
| 238 | + $oldRevisionId = $this->mRevisionId; |
| 239 | + $oldRevisionTimestamp = $this->mRevisionTimestamp; |
| 240 | + if( $revid !== null ) { |
| 241 | + $this->mRevisionId = $revid; |
| 242 | + $this->mRevisionTimestamp = null; |
| 243 | + } |
| 244 | + $this->setOutputType( OT_HTML ); |
| 245 | + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 246 | + $text = $this->strip( $text, $this->mStripState ); |
| 247 | + wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 248 | + $text = $this->internalParse( $text ); |
| 249 | + $text = $this->mStripState->unstripGeneral( $text ); |
| 250 | + |
| 251 | + # Clean up special characters, only run once, next-to-last before doBlockLevels |
| 252 | + $fixtags = array( |
| 253 | + # french spaces, last one Guillemet-left |
| 254 | + # only if there is something before the space |
| 255 | + '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', |
| 256 | + # french spaces, Guillemet-right |
| 257 | + '/(\\302\\253) /' => '\\1 ', |
| 258 | + ); |
| 259 | + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); |
| 260 | + |
| 261 | + # only once and last |
| 262 | + $text = $this->doBlockLevels( $text, $linestart ); |
| 263 | + |
| 264 | + $this->replaceLinkHolders( $text ); |
| 265 | + |
| 266 | + # the position of the parserConvert() call should not be changed. it |
| 267 | + # assumes that the links are all replaced and the only thing left |
| 268 | + # is the <nowiki> mark. |
| 269 | + # Side-effects: this calls $this->mOutput->setTitleText() |
| 270 | + $text = $wgContLang->parserConvert( $text, $this ); |
| 271 | + |
| 272 | + $text = $this->mStripState->unstripNoWiki( $text ); |
| 273 | + |
| 274 | + wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); |
| 275 | + |
| 276 | +//!JF Move to its own function |
| 277 | + |
| 278 | + $uniq_prefix = $this->mUniqPrefix; |
| 279 | + $matches = array(); |
| 280 | + $elements = array_keys( $this->mTransparentTagHooks ); |
| 281 | + $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
| 282 | + |
| 283 | + foreach( $matches as $marker => $data ) { |
| 284 | + list( $element, $content, $params, $tag ) = $data; |
| 285 | + $tagName = strtolower( $element ); |
| 286 | + if( isset( $this->mTransparentTagHooks[$tagName] ) ) { |
| 287 | + $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], |
| 288 | + array( $content, $params, $this ) ); |
| 289 | + } else { |
| 290 | + $output = $tag; |
| 291 | + } |
| 292 | + $this->mStripState->general->setPair( $marker, $output ); |
| 293 | + } |
| 294 | + $text = $this->mStripState->unstripGeneral( $text ); |
| 295 | + |
| 296 | + $text = Sanitizer::normalizeCharReferences( $text ); |
| 297 | + |
| 298 | + if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { |
| 299 | + $text = self::tidy($text); |
| 300 | + } else { |
| 301 | + # attempt to sanitize at least some nesting problems |
| 302 | + # (bug #2702 and quite a few others) |
| 303 | + $tidyregs = array( |
| 304 | + # ''Something [http://www.cool.com cool''] --> |
| 305 | + # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> |
| 306 | + '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => |
| 307 | + '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', |
| 308 | + # fix up an anchor inside another anchor, only |
| 309 | + # at least for a single single nested link (bug 3695) |
| 310 | + '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' => |
| 311 | + '\\1\\2</a>\\3</a>\\1\\4</a>', |
| 312 | + # fix div inside inline elements- doBlockLevels won't wrap a line which |
| 313 | + # contains a div, so fix it up here; replace |
| 314 | + # div with escaped text |
| 315 | + '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => |
| 316 | + '\\1\\3<div\\5>\\6</div>\\8\\9', |
| 317 | + # remove empty italic or bold tag pairs, some |
| 318 | + # introduced by rules above |
| 319 | + '/<([bi])><\/\\1>/' => '', |
| 320 | + ); |
| 321 | + |
| 322 | + $text = preg_replace( |
| 323 | + array_keys( $tidyregs ), |
| 324 | + array_values( $tidyregs ), |
| 325 | + $text ); |
| 326 | + } |
| 327 | + |
| 328 | + wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); |
| 329 | + |
| 330 | + # Information on include size limits, for the benefit of users who try to skirt them |
| 331 | + if ( max( $this->mIncludeSizes ) > 1000 ) { |
| 332 | + $max = $this->mOptions->getMaxIncludeSize(); |
| 333 | + $text .= "<!-- \n" . |
| 334 | + "Pre-expand include size: {$this->mIncludeSizes['pre-expand']} bytes\n" . |
| 335 | + "Post-expand include size: {$this->mIncludeSizes['post-expand']} bytes\n" . |
| 336 | + "Template argument size: {$this->mIncludeSizes['arg']} bytes\n" . |
| 337 | + "Maximum: $max bytes\n" . |
| 338 | + "-->\n"; |
| 339 | + } |
| 340 | + $this->mOutput->setText( $text ); |
| 341 | + $this->mRevisionId = $oldRevisionId; |
| 342 | + $this->mRevisionTimestamp = $oldRevisionTimestamp; |
| 343 | + wfProfileOut( $fname ); |
| 344 | + wfProfileOut( __METHOD__ ); |
| 345 | + |
| 346 | + return $this->mOutput; |
| 347 | + } |
| 348 | + |
| 349 | + /** |
| 350 | + * Recursive parser entry point that can be called from an extension tag |
| 351 | + * hook. |
| 352 | + */ |
| 353 | + function recursiveTagParse( $text ) { |
| 354 | + wfProfileIn( __METHOD__ ); |
| 355 | + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 356 | + $text = $this->strip( $text, $this->mStripState ); |
| 357 | + wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 358 | + $text = $this->internalParse( $text ); |
| 359 | + wfProfileOut( __METHOD__ ); |
| 360 | + return $text; |
| 361 | + } |
| 362 | + |
| 363 | + /** |
| 364 | + * Expand templates and variables in the text, producing valid, static wikitext. |
| 365 | + * Also removes comments. |
| 366 | + */ |
| 367 | + function preprocess( $text, $title, $options, $revid = null ) { |
| 368 | + wfProfileIn( __METHOD__ ); |
| 369 | + $this->clearState(); |
| 370 | + $this->setOutputType( OT_PREPROCESS ); |
| 371 | + $this->mOptions = $options; |
| 372 | + $this->mTitle = $title; |
| 373 | + if( $revid !== null ) { |
| 374 | + $this->mRevisionId = $revid; |
| 375 | + } |
| 376 | + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 377 | + $text = $this->strip( $text, $this->mStripState ); |
| 378 | + wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
| 379 | + if ( $this->mOptions->getRemoveComments() ) { |
| 380 | + $text = Sanitizer::removeHTMLcomments( $text ); |
| 381 | + } |
| 382 | + $text = $this->replaceVariables( $text ); |
| 383 | + $text = $this->mStripState->unstripBoth( $text ); |
| 384 | + wfProfileOut( __METHOD__ ); |
| 385 | + return $text; |
| 386 | + } |
| 387 | + |
| 388 | + /** |
| 389 | + * Get a random string |
| 390 | + * |
| 391 | + * @private |
| 392 | + * @static |
| 393 | + */ |
| 394 | + function getRandomString() { |
| 395 | + return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); |
| 396 | + } |
| 397 | + |
| 398 | + function &getTitle() { return $this->mTitle; } |
| 399 | + function getOptions() { return $this->mOptions; } |
| 400 | + |
| 401 | + function getFunctionLang() { |
| 402 | + global $wgLang, $wgContLang; |
| 403 | + return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; |
| 404 | + } |
| 405 | + |
| 406 | + /** |
| 407 | + * Replaces all occurrences of HTML-style comments and the given tags |
| 408 | + * in the text with a random marker and returns teh next text. The output |
| 409 | + * parameter $matches will be an associative array filled with data in |
| 410 | + * the form: |
| 411 | + * 'UNIQ-xxxxx' => array( |
| 412 | + * 'element', |
| 413 | + * 'tag content', |
| 414 | + * array( 'param' => 'x' ), |
| 415 | + * '<element param="x">tag content</element>' ) ) |
| 416 | + * |
| 417 | + * @param $elements list of element names. Comments are always extracted. |
| 418 | + * @param $text Source text string. |
| 419 | + * @param $uniq_prefix |
| 420 | + * |
| 421 | + * @public |
| 422 | + * @static |
| 423 | + */ |
| 424 | + function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ |
| 425 | + static $n = 1; |
| 426 | + $stripped = ''; |
| 427 | + $matches = array(); |
| 428 | + |
| 429 | + $taglist = implode( '|', $elements ); |
| 430 | + $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i"; |
| 431 | + |
| 432 | + while ( '' != $text ) { |
| 433 | + $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
| 434 | + $stripped .= $p[0]; |
| 435 | + if( count( $p ) < 5 ) { |
| 436 | + break; |
| 437 | + } |
| 438 | + if( count( $p ) > 5 ) { |
| 439 | + // comment |
| 440 | + $element = $p[4]; |
| 441 | + $attributes = ''; |
| 442 | + $close = ''; |
| 443 | + $inside = $p[5]; |
| 444 | + } else { |
| 445 | + // tag |
| 446 | + $element = $p[1]; |
| 447 | + $attributes = $p[2]; |
| 448 | + $close = $p[3]; |
| 449 | + $inside = $p[4]; |
| 450 | + } |
| 451 | + |
| 452 | + $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . "-QINU\x07"; |
| 453 | + $stripped .= $marker; |
| 454 | + |
| 455 | + if ( $close === '/>' ) { |
| 456 | + // Empty element tag, <tag /> |
| 457 | + $content = null; |
| 458 | + $text = $inside; |
| 459 | + $tail = null; |
| 460 | + } else { |
| 461 | + if( $element == '!--' ) { |
| 462 | + $end = '/(-->)/'; |
| 463 | + } else { |
| 464 | + $end = "/(<\\/$element\\s*>)/i"; |
| 465 | + } |
| 466 | + $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); |
| 467 | + $content = $q[0]; |
| 468 | + if( count( $q ) < 3 ) { |
| 469 | + # No end tag -- let it run out to the end of the text. |
| 470 | + $tail = ''; |
| 471 | + $text = ''; |
| 472 | + } else { |
| 473 | + $tail = $q[1]; |
| 474 | + $text = $q[2]; |
| 475 | + } |
| 476 | + } |
| 477 | + |
| 478 | + $matches[$marker] = array( $element, |
| 479 | + $content, |
| 480 | + Sanitizer::decodeTagAttributes( $attributes ), |
| 481 | + "<$element$attributes$close$content$tail" ); |
| 482 | + } |
| 483 | + return $stripped; |
| 484 | + } |
| 485 | + |
| 486 | + /** |
| 487 | + * Strips and renders nowiki, pre, math, hiero |
| 488 | + * If $render is set, performs necessary rendering operations on plugins |
| 489 | + * Returns the text, and fills an array with data needed in unstrip() |
| 490 | + * |
| 491 | + * @param StripState $state |
| 492 | + * |
| 493 | + * @param bool $stripcomments when set, HTML comments <!-- like this --> |
| 494 | + * will be stripped in addition to other tags. This is important |
| 495 | + * for section editing, where these comments cause confusion when |
| 496 | + * counting the sections in the wikisource |
| 497 | + * |
| 498 | + * @param array dontstrip contains tags which should not be stripped; |
| 499 | + * used to prevent stipping of <gallery> when saving (fixes bug 2700) |
| 500 | + * |
| 501 | + * @private |
| 502 | + */ |
| 503 | + function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) { |
| 504 | + global $wgContLang; |
| 505 | + wfProfileIn( __METHOD__ ); |
| 506 | + $render = ($this->mOutputType == OT_HTML); |
| 507 | + |
| 508 | + $uniq_prefix = $this->mUniqPrefix; |
| 509 | + $commentState = new ReplacementArray; |
| 510 | + $nowikiItems = array(); |
| 511 | + $generalItems = array(); |
| 512 | + |
| 513 | + $elements = array_merge( |
| 514 | + array( 'nowiki', 'gallery' ), |
| 515 | + array_keys( $this->mTagHooks ) ); |
| 516 | + global $wgRawHtml; |
| 517 | + if( $wgRawHtml ) { |
| 518 | + $elements[] = 'html'; |
| 519 | + } |
| 520 | + if( $this->mOptions->getUseTeX() ) { |
| 521 | + $elements[] = 'math'; |
| 522 | + } |
| 523 | + |
| 524 | + # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700) |
| 525 | + foreach ( $elements AS $k => $v ) { |
| 526 | + if ( !in_array ( $v , $dontstrip ) ) continue; |
| 527 | + unset ( $elements[$k] ); |
| 528 | + } |
| 529 | + |
| 530 | + $matches = array(); |
| 531 | + $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
| 532 | + |
| 533 | + foreach( $matches as $marker => $data ) { |
| 534 | + list( $element, $content, $params, $tag ) = $data; |
| 535 | + if( $render ) { |
| 536 | + $tagName = strtolower( $element ); |
| 537 | + wfProfileIn( __METHOD__."-render-$tagName" ); |
| 538 | + switch( $tagName ) { |
| 539 | + case '!--': |
| 540 | + // Comment |
| 541 | + if( substr( $tag, -3 ) == '-->' ) { |
| 542 | + $output = $tag; |
| 543 | + } else { |
| 544 | + // Unclosed comment in input. |
| 545 | + // Close it so later stripping can remove it |
| 546 | + $output = "$tag-->"; |
| 547 | + } |
| 548 | + break; |
| 549 | + case 'html': |
| 550 | + if( $wgRawHtml ) { |
| 551 | + $output = $content; |
| 552 | + break; |
| 553 | + } |
| 554 | + // Shouldn't happen otherwise. :) |
| 555 | + case 'nowiki': |
| 556 | + $output = Xml::escapeTagsOnly( $content ); |
| 557 | + break; |
| 558 | + case 'math': |
| 559 | + $output = $wgContLang->armourMath( |
| 560 | + MathRenderer::renderMath( $content, $params ) ); |
| 561 | + break; |
| 562 | + case 'gallery': |
| 563 | + $output = $this->renderImageGallery( $content, $params ); |
| 564 | + break; |
| 565 | + default: |
| 566 | + if( isset( $this->mTagHooks[$tagName] ) ) { |
| 567 | + $output = call_user_func_array( $this->mTagHooks[$tagName], |
| 568 | + array( $content, $params, $this ) ); |
| 569 | + } else { |
| 570 | + throw new MWException( "Invalid call hook $element" ); |
| 571 | + } |
| 572 | + } |
| 573 | + wfProfileOut( __METHOD__."-render-$tagName" ); |
| 574 | + } else { |
| 575 | + // Just stripping tags; keep the source |
| 576 | + $output = $tag; |
| 577 | + } |
| 578 | + |
| 579 | + // Unstrip the output, to support recursive strip() calls |
| 580 | + $output = $state->unstripBoth( $output ); |
| 581 | + |
| 582 | + if( !$stripcomments && $element == '!--' ) { |
| 583 | + $commentState->setPair( $marker, $output ); |
| 584 | + } elseif ( $element == 'html' || $element == 'nowiki' ) { |
| 585 | + $nowikiItems[$marker] = $output; |
| 586 | + } else { |
| 587 | + $generalItems[$marker] = $output; |
| 588 | + } |
| 589 | + } |
| 590 | + # Add the new items to the state |
| 591 | + # We do this after the loop instead of during it to avoid slowing |
| 592 | + # down the recursive unstrip |
| 593 | + $state->nowiki->mergeArray( $nowikiItems ); |
| 594 | + $state->general->mergeArray( $generalItems ); |
| 595 | + |
| 596 | + # Unstrip comments unless explicitly told otherwise. |
| 597 | + # (The comments are always stripped prior to this point, so as to |
| 598 | + # not invoke any extension tags / parser hooks contained within |
| 599 | + # a comment.) |
| 600 | + if ( !$stripcomments ) { |
| 601 | + // Put them all back and forget them |
| 602 | + $text = $commentState->replace( $text ); |
| 603 | + } |
| 604 | + |
| 605 | + wfProfileOut( __METHOD__ ); |
| 606 | + return $text; |
| 607 | + } |
| 608 | + |
| 609 | + /** |
| 610 | + * Restores pre, math, and other extensions removed by strip() |
| 611 | + * |
| 612 | + * always call unstripNoWiki() after this one |
| 613 | + * @private |
| 614 | + * @deprecated use $this->mStripState->unstrip() |
| 615 | + */ |
| 616 | + function unstrip( $text, $state ) { |
| 617 | + return $state->unstripGeneral( $text ); |
| 618 | + } |
| 619 | + |
| 620 | + /** |
| 621 | + * Always call this after unstrip() to preserve the order |
| 622 | + * |
| 623 | + * @private |
| 624 | + * @deprecated use $this->mStripState->unstrip() |
| 625 | + */ |
| 626 | + function unstripNoWiki( $text, $state ) { |
| 627 | + return $state->unstripNoWiki( $text ); |
| 628 | + } |
| 629 | + |
| 630 | + /** |
| 631 | + * @deprecated use $this->mStripState->unstripBoth() |
| 632 | + */ |
| 633 | + function unstripForHTML( $text ) { |
| 634 | + return $this->mStripState->unstripBoth( $text ); |
| 635 | + } |
| 636 | + |
| 637 | + /** |
| 638 | + * Add an item to the strip state |
| 639 | + * Returns the unique tag which must be inserted into the stripped text |
| 640 | + * The tag will be replaced with the original text in unstrip() |
| 641 | + * |
| 642 | + * @private |
| 643 | + */ |
| 644 | + function insertStripItem( $text, &$state ) { |
| 645 | + $rnd = $this->mUniqPrefix . '-item' . self::getRandomString(); |
| 646 | + $state->general->setPair( $rnd, $text ); |
| 647 | + return $rnd; |
| 648 | + } |
| 649 | + |
| 650 | + /** |
| 651 | + * Interface with html tidy, used if $wgUseTidy = true. |
| 652 | + * If tidy isn't able to correct the markup, the original will be |
| 653 | + * returned in all its glory with a warning comment appended. |
| 654 | + * |
| 655 | + * Either the external tidy program or the in-process tidy extension |
| 656 | + * will be used depending on availability. Override the default |
| 657 | + * $wgTidyInternal setting to disable the internal if it's not working. |
| 658 | + * |
| 659 | + * @param string $text Hideous HTML input |
| 660 | + * @return string Corrected HTML output |
| 661 | + * @public |
| 662 | + * @static |
| 663 | + */ |
| 664 | + function tidy( $text ) { |
| 665 | + global $wgTidyInternal; |
| 666 | + $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. |
| 667 | +' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. |
| 668 | +'<head><title>test</title></head><body>'.$text.'</body></html>'; |
| 669 | + if( $wgTidyInternal ) { |
| 670 | + $correctedtext = self::internalTidy( $wrappedtext ); |
| 671 | + } else { |
| 672 | + $correctedtext = self::externalTidy( $wrappedtext ); |
| 673 | + } |
| 674 | + if( is_null( $correctedtext ) ) { |
| 675 | + wfDebug( "Tidy error detected!\n" ); |
| 676 | + return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; |
| 677 | + } |
| 678 | + return $correctedtext; |
| 679 | + } |
| 680 | + |
| 681 | + /** |
| 682 | + * Spawn an external HTML tidy process and get corrected markup back from it. |
| 683 | + * |
| 684 | + * @private |
| 685 | + * @static |
| 686 | + */ |
| 687 | + function externalTidy( $text ) { |
| 688 | + global $wgTidyConf, $wgTidyBin, $wgTidyOpts; |
| 689 | + $fname = 'Parser::externalTidy'; |
| 690 | + wfProfileIn( $fname ); |
| 691 | + |
| 692 | + $cleansource = ''; |
| 693 | + $opts = ' -utf8'; |
| 694 | + |
| 695 | + $descriptorspec = array( |
| 696 | + 0 => array('pipe', 'r'), |
| 697 | + 1 => array('pipe', 'w'), |
| 698 | + 2 => array('file', wfGetNull(), 'a') |
| 699 | + ); |
| 700 | + $pipes = array(); |
| 701 | + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); |
| 702 | + if (is_resource($process)) { |
| 703 | + // Theoretically, this style of communication could cause a deadlock |
| 704 | + // here. If the stdout buffer fills up, then writes to stdin could |
| 705 | + // block. This doesn't appear to happen with tidy, because tidy only |
| 706 | + // writes to stdout after it's finished reading from stdin. Search |
| 707 | + // for tidyParseStdin and tidySaveStdout in console/tidy.c |
| 708 | + fwrite($pipes[0], $text); |
| 709 | + fclose($pipes[0]); |
| 710 | + while (!feof($pipes[1])) { |
| 711 | + $cleansource .= fgets($pipes[1], 1024); |
| 712 | + } |
| 713 | + fclose($pipes[1]); |
| 714 | + proc_close($process); |
| 715 | + } |
| 716 | + |
| 717 | + wfProfileOut( $fname ); |
| 718 | + |
| 719 | + if( $cleansource == '' && $text != '') { |
| 720 | + // Some kind of error happened, so we couldn't get the corrected text. |
| 721 | + // Just give up; we'll use the source text and append a warning. |
| 722 | + return null; |
| 723 | + } else { |
| 724 | + return $cleansource; |
| 725 | + } |
| 726 | + } |
| 727 | + |
| 728 | + /** |
| 729 | + * Use the HTML tidy PECL extension to use the tidy library in-process, |
| 730 | + * saving the overhead of spawning a new process. |
| 731 | + * |
| 732 | + * 'pear install tidy' should be able to compile the extension module. |
| 733 | + * |
| 734 | + * @private |
| 735 | + * @static |
| 736 | + */ |
| 737 | + function internalTidy( $text ) { |
| 738 | + global $wgTidyConf, $IP; |
| 739 | + $fname = 'Parser::internalTidy'; |
| 740 | + wfProfileIn( $fname ); |
| 741 | + |
| 742 | + $tidy = new tidy; |
| 743 | + $tidy->parseString( $text, $wgTidyConf, 'utf8' ); |
| 744 | + $tidy->cleanRepair(); |
| 745 | + if( $tidy->getStatus() == 2 ) { |
| 746 | + // 2 is magic number for fatal error |
| 747 | + // http://www.php.net/manual/en/function.tidy-get-status.php |
| 748 | + $cleansource = null; |
| 749 | + } else { |
| 750 | + $cleansource = tidy_get_output( $tidy ); |
| 751 | + } |
| 752 | + wfProfileOut( $fname ); |
| 753 | + return $cleansource; |
| 754 | + } |
| 755 | + |
| 756 | + /** |
| 757 | + * parse the wiki syntax used to render tables |
| 758 | + * |
| 759 | + * @private |
| 760 | + */ |
| 761 | + function doTableStuff ( $text ) { |
| 762 | + $fname = 'Parser::doTableStuff'; |
| 763 | + wfProfileIn( $fname ); |
| 764 | + |
| 765 | + $lines = explode ( "\n" , $text ); |
| 766 | + $td_history = array (); // Is currently a td tag open? |
| 767 | + $last_tag_history = array (); // Save history of last lag activated (td, th or caption) |
| 768 | + $tr_history = array (); // Is currently a tr tag open? |
| 769 | + $tr_attributes = array (); // history of tr attributes |
| 770 | + $has_opened_tr = array(); // Did this table open a <tr> element? |
| 771 | + $indent_level = 0; // indent level of the table |
| 772 | + foreach ( $lines as $key => $line ) |
| 773 | + { |
| 774 | + $line = trim ( $line ); |
| 775 | + |
| 776 | + if( $line == '' ) { // empty line, go to next line |
| 777 | + continue; |
| 778 | + } |
| 779 | + $first_character = $line{0}; |
| 780 | + $matches = array(); |
| 781 | + |
| 782 | + if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) { |
| 783 | + // First check if we are starting a new table |
| 784 | + $indent_level = strlen( $matches[1] ); |
| 785 | + |
| 786 | + $attributes = $this->mStripState->unstripBoth( $matches[2] ); |
| 787 | + $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' ); |
| 788 | + |
| 789 | + $lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>"; |
| 790 | + array_push ( $td_history , false ); |
| 791 | + array_push ( $last_tag_history , '' ); |
| 792 | + array_push ( $tr_history , false ); |
| 793 | + array_push ( $tr_attributes , '' ); |
| 794 | + array_push ( $has_opened_tr , false ); |
| 795 | + } else if ( count ( $td_history ) == 0 ) { |
| 796 | + // Don't do any of the following |
| 797 | + continue; |
| 798 | + } else if ( substr ( $line , 0 , 2 ) == '|}' ) { |
| 799 | + // We are ending a table |
| 800 | + $line = '</table>' . substr ( $line , 2 ); |
| 801 | + $last_tag = array_pop ( $last_tag_history ); |
| 802 | + |
| 803 | + if ( !array_pop ( $has_opened_tr ) ) { |
| 804 | + $line = "<tr><td></td></tr>{$line}"; |
| 805 | + } |
| 806 | + |
| 807 | + if ( array_pop ( $tr_history ) ) { |
| 808 | + $line = "</tr>{$line}"; |
| 809 | + } |
| 810 | + |
| 811 | + if ( array_pop ( $td_history ) ) { |
| 812 | + $line = "</{$last_tag}>{$line}"; |
| 813 | + } |
| 814 | + array_pop ( $tr_attributes ); |
| 815 | + $lines[$key] = $line . str_repeat( '</dd></dl>' , $indent_level ); |
| 816 | + } else if ( substr ( $line , 0 , 2 ) == '|-' ) { |
| 817 | + // Now we have a table row |
| 818 | + $line = preg_replace( '#^\|-+#', '', $line ); |
| 819 | + |
| 820 | + // Whats after the tag is now only attributes |
| 821 | + $attributes = $this->mStripState->unstripBoth( $line ); |
| 822 | + $attributes = Sanitizer::fixTagAttributes ( $attributes , 'tr' ); |
| 823 | + array_pop ( $tr_attributes ); |
| 824 | + array_push ( $tr_attributes , $attributes ); |
| 825 | + |
| 826 | + $line = ''; |
| 827 | + $last_tag = array_pop ( $last_tag_history ); |
| 828 | + array_pop ( $has_opened_tr ); |
| 829 | + array_push ( $has_opened_tr , true ); |
| 830 | + |
| 831 | + if ( array_pop ( $tr_history ) ) { |
| 832 | + $line = '</tr>'; |
| 833 | + } |
| 834 | + |
| 835 | + if ( array_pop ( $td_history ) ) { |
| 836 | + $line = "</{$last_tag}>{$line}"; |
| 837 | + } |
| 838 | + |
| 839 | + $lines[$key] = $line; |
| 840 | + array_push ( $tr_history , false ); |
| 841 | + array_push ( $td_history , false ); |
| 842 | + array_push ( $last_tag_history , '' ); |
| 843 | + } |
| 844 | + else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) { |
| 845 | + // This might be cell elements, td, th or captions |
| 846 | + if ( substr ( $line , 0 , 2 ) == '|+' ) { |
| 847 | + $first_character = '+'; |
| 848 | + $line = substr ( $line , 1 ); |
| 849 | + } |
| 850 | + |
| 851 | + $line = substr ( $line , 1 ); |
| 852 | + |
| 853 | + if ( $first_character == '!' ) { |
| 854 | + $line = str_replace ( '!!' , '||' , $line ); |
| 855 | + } |
| 856 | + |
| 857 | + // Split up multiple cells on the same line. |
| 858 | + // FIXME : This can result in improper nesting of tags processed |
| 859 | + // by earlier parser steps, but should avoid splitting up eg |
| 860 | + // attribute values containing literal "||". |
| 861 | + $cells = StringUtils::explodeMarkup( '||' , $line ); |
| 862 | + |
| 863 | + $lines[$key] = ''; |
| 864 | + |
| 865 | + // Loop through each table cell |
| 866 | + foreach ( $cells as $cell ) |
| 867 | + { |
| 868 | + $previous = ''; |
| 869 | + if ( $first_character != '+' ) |
| 870 | + { |
| 871 | + $tr_after = array_pop ( $tr_attributes ); |
| 872 | + if ( !array_pop ( $tr_history ) ) { |
| 873 | + $previous = "<tr{$tr_after}>\n"; |
| 874 | + } |
| 875 | + array_push ( $tr_history , true ); |
| 876 | + array_push ( $tr_attributes , '' ); |
| 877 | + array_pop ( $has_opened_tr ); |
| 878 | + array_push ( $has_opened_tr , true ); |
| 879 | + } |
| 880 | + |
| 881 | + $last_tag = array_pop ( $last_tag_history ); |
| 882 | + |
| 883 | + if ( array_pop ( $td_history ) ) { |
| 884 | + $previous = "</{$last_tag}>{$previous}"; |
| 885 | + } |
| 886 | + |
| 887 | + if ( $first_character == '|' ) { |
| 888 | + $last_tag = 'td'; |
| 889 | + } else if ( $first_character == '!' ) { |
| 890 | + $last_tag = 'th'; |
| 891 | + } else if ( $first_character == '+' ) { |
| 892 | + $last_tag = 'caption'; |
| 893 | + } else { |
| 894 | + $last_tag = ''; |
| 895 | + } |
| 896 | + |
| 897 | + array_push ( $last_tag_history , $last_tag ); |
| 898 | + |
| 899 | + // A cell could contain both parameters and data |
| 900 | + $cell_data = explode ( '|' , $cell , 2 ); |
| 901 | + |
| 902 | + // Bug 553: Note that a '|' inside an invalid link should not |
| 903 | + // be mistaken as delimiting cell parameters |
| 904 | + if ( strpos( $cell_data[0], '[[' ) !== false ) { |
| 905 | + $cell = "{$previous}<{$last_tag}>{$cell}"; |
| 906 | + } else if ( count ( $cell_data ) == 1 ) |
| 907 | + $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; |
| 908 | + else { |
| 909 | + $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); |
| 910 | + $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); |
| 911 | + $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; |
| 912 | + } |
| 913 | + |
| 914 | + $lines[$key] .= $cell; |
| 915 | + array_push ( $td_history , true ); |
| 916 | + } |
| 917 | + } |
| 918 | + } |
| 919 | + |
| 920 | + // Closing open td, tr && table |
| 921 | + while ( count ( $td_history ) > 0 ) |
| 922 | + { |
| 923 | + if ( array_pop ( $td_history ) ) { |
| 924 | + $lines[] = '</td>' ; |
| 925 | + } |
| 926 | + if ( array_pop ( $tr_history ) ) { |
| 927 | + $lines[] = '</tr>' ; |
| 928 | + } |
| 929 | + if ( !array_pop ( $has_opened_tr ) ) { |
| 930 | + $lines[] = "<tr><td></td></tr>" ; |
| 931 | + } |
| 932 | + |
| 933 | + $lines[] = '</table>' ; |
| 934 | + } |
| 935 | + |
| 936 | + $output = implode ( "\n" , $lines ) ; |
| 937 | + |
| 938 | + // special case: don't return empty table |
| 939 | + if( $output == "<table>\n<tr><td></td></tr>\n</table>" ) { |
| 940 | + $output = ''; |
| 941 | + } |
| 942 | + |
| 943 | + wfProfileOut( $fname ); |
| 944 | + |
| 945 | + return $output; |
| 946 | + } |
| 947 | + |
| 948 | + /** |
| 949 | + * Helper function for parse() that transforms wiki markup into |
| 950 | + * HTML. Only called for $mOutputType == OT_HTML. |
| 951 | + * |
| 952 | + * @private |
| 953 | + */ |
| 954 | + function internalParse( $text ) { |
| 955 | + $args = array(); |
| 956 | + $isMain = true; |
| 957 | + $fname = 'Parser::internalParse'; |
| 958 | + wfProfileIn( $fname ); |
| 959 | + |
| 960 | + # Hook to suspend the parser in this state |
| 961 | + if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { |
| 962 | + wfProfileOut( $fname ); |
| 963 | + return $text ; |
| 964 | + } |
| 965 | + |
| 966 | + # Remove <noinclude> tags and <includeonly> sections |
| 967 | + $text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) ); |
| 968 | + $text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') ); |
| 969 | + $text = StringUtils::delimiterReplace( '<includeonly>', '</includeonly>', '', $text ); |
| 970 | + |
| 971 | + $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), array(), array_keys( $this->mTransparentTagHooks ) ); |
| 972 | + |
| 973 | + $text = $this->replaceVariables( $text, $args ); |
| 974 | + wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); |
| 975 | + |
| 976 | + // Tables need to come after variable replacement for things to work |
| 977 | + // properly; putting them before other transformations should keep |
| 978 | + // exciting things like link expansions from showing up in surprising |
| 979 | + // places. |
| 980 | + $text = $this->doTableStuff( $text ); |
| 981 | + |
| 982 | + $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); |
| 983 | + |
| 984 | + $text = $this->stripToc( $text ); |
| 985 | + $this->stripNoGallery( $text ); |
| 986 | + $text = $this->doHeadings( $text ); |
| 987 | + if($this->mOptions->getUseDynamicDates()) { |
| 988 | + $df =& DateFormatter::getInstance(); |
| 989 | + $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); |
| 990 | + } |
| 991 | + $text = $this->doAllQuotes( $text ); |
| 992 | + $text = $this->replaceInternalLinks( $text ); |
| 993 | + $text = $this->replaceExternalLinks( $text ); |
| 994 | + |
| 995 | + # replaceInternalLinks may sometimes leave behind |
| 996 | + # absolute URLs, which have to be masked to hide them from replaceExternalLinks |
| 997 | + $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text); |
| 998 | + |
| 999 | + $text = $this->doMagicLinks( $text ); |
| 1000 | + $text = $this->formatHeadings( $text, $isMain ); |
| 1001 | + |
| 1002 | + wfProfileOut( $fname ); |
| 1003 | + return $text; |
| 1004 | + } |
| 1005 | + |
| 1006 | + /** |
| 1007 | + * Replace special strings like "ISBN xxx" and "RFC xxx" with |
| 1008 | + * magic external links. |
| 1009 | + * |
| 1010 | + * @private |
| 1011 | + */ |
| 1012 | + function &doMagicLinks( &$text ) { |
| 1013 | + wfProfileIn( __METHOD__ ); |
| 1014 | + $text = preg_replace_callback( |
| 1015 | + '!(?: # Start cases |
| 1016 | + <a.*?</a> | # Skip link text |
| 1017 | + <.*?> | # Skip stuff inside HTML elements |
| 1018 | + (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1] |
| 1019 | + ISBN\s+(\b # ISBN, capture number as m[2] |
| 1020 | + (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix |
| 1021 | + (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters |
| 1022 | + [0-9Xx] # check digit |
| 1023 | + \b) |
| 1024 | + )!x', array( &$this, 'magicLinkCallback' ), $text ); |
| 1025 | + wfProfileOut( __METHOD__ ); |
| 1026 | + return $text; |
| 1027 | + } |
| 1028 | + |
| 1029 | + function magicLinkCallback( $m ) { |
| 1030 | + if ( substr( $m[0], 0, 1 ) == '<' ) { |
| 1031 | + # Skip HTML element |
| 1032 | + return $m[0]; |
| 1033 | + } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { |
| 1034 | + $isbn = $m[2]; |
| 1035 | + $num = strtr( $isbn, array( |
| 1036 | + '-' => '', |
| 1037 | + ' ' => '', |
| 1038 | + 'x' => 'X', |
| 1039 | + )); |
| 1040 | + $titleObj = SpecialPage::getTitleFor( 'Booksources' ); |
| 1041 | + $text = '<a href="' . |
| 1042 | + $titleObj->escapeLocalUrl( "isbn=$num" ) . |
| 1043 | + "\" class=\"internal\">ISBN $isbn</a>"; |
| 1044 | + } else { |
| 1045 | + if ( substr( $m[0], 0, 3 ) == 'RFC' ) { |
| 1046 | + $keyword = 'RFC'; |
| 1047 | + $urlmsg = 'rfcurl'; |
| 1048 | + $id = $m[1]; |
| 1049 | + } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { |
| 1050 | + $keyword = 'PMID'; |
| 1051 | + $urlmsg = 'pubmedurl'; |
| 1052 | + $id = $m[1]; |
| 1053 | + } else { |
| 1054 | + throw new MWException( __METHOD__.': unrecognised match type "' . |
| 1055 | + substr($m[0], 0, 20 ) . '"' ); |
| 1056 | + } |
| 1057 | + |
| 1058 | + $url = wfMsg( $urlmsg, $id); |
| 1059 | + $sk = $this->mOptions->getSkin(); |
| 1060 | + $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); |
| 1061 | + $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>"; |
| 1062 | + } |
| 1063 | + return $text; |
| 1064 | + } |
| 1065 | + |
| 1066 | + /** |
| 1067 | + * Parse headers and return html |
| 1068 | + * |
| 1069 | + * @private |
| 1070 | + */ |
| 1071 | + function doHeadings( $text ) { |
| 1072 | + $fname = 'Parser::doHeadings'; |
| 1073 | + wfProfileIn( $fname ); |
| 1074 | + for ( $i = 6; $i >= 1; --$i ) { |
| 1075 | + $h = str_repeat( '=', $i ); |
| 1076 | + $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m", |
| 1077 | + "<h{$i}>\\1</h{$i}>\\2", $text ); |
| 1078 | + } |
| 1079 | + wfProfileOut( $fname ); |
| 1080 | + return $text; |
| 1081 | + } |
| 1082 | + |
| 1083 | + /** |
| 1084 | + * Replace single quotes with HTML markup |
| 1085 | + * @private |
| 1086 | + * @return string the altered text |
| 1087 | + */ |
| 1088 | + function doAllQuotes( $text ) { |
| 1089 | + $fname = 'Parser::doAllQuotes'; |
| 1090 | + wfProfileIn( $fname ); |
| 1091 | + $outtext = ''; |
| 1092 | + $lines = explode( "\n", $text ); |
| 1093 | + foreach ( $lines as $line ) { |
| 1094 | + $outtext .= $this->doQuotes ( $line ) . "\n"; |
| 1095 | + } |
| 1096 | + $outtext = substr($outtext, 0,-1); |
| 1097 | + wfProfileOut( $fname ); |
| 1098 | + return $outtext; |
| 1099 | + } |
| 1100 | + |
| 1101 | + /** |
| 1102 | + * Helper function for doAllQuotes() |
| 1103 | + */ |
| 1104 | + public function doQuotes( $text ) { |
| 1105 | + $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 1106 | + if ( count( $arr ) == 1 ) |
| 1107 | + return $text; |
| 1108 | + else |
| 1109 | + { |
| 1110 | + # First, do some preliminary work. This may shift some apostrophes from |
| 1111 | + # being mark-up to being text. It also counts the number of occurrences |
| 1112 | + # of bold and italics mark-ups. |
| 1113 | + $i = 0; |
| 1114 | + $numbold = 0; |
| 1115 | + $numitalics = 0; |
| 1116 | + foreach ( $arr as $r ) |
| 1117 | + { |
| 1118 | + if ( ( $i % 2 ) == 1 ) |
| 1119 | + { |
| 1120 | + # If there are ever four apostrophes, assume the first is supposed to |
| 1121 | + # be text, and the remaining three constitute mark-up for bold text. |
| 1122 | + if ( strlen( $arr[$i] ) == 4 ) |
| 1123 | + { |
| 1124 | + $arr[$i-1] .= "'"; |
| 1125 | + $arr[$i] = "'''"; |
| 1126 | + } |
| 1127 | + # If there are more than 5 apostrophes in a row, assume they're all |
| 1128 | + # text except for the last 5. |
| 1129 | + else if ( strlen( $arr[$i] ) > 5 ) |
| 1130 | + { |
| 1131 | + $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); |
| 1132 | + $arr[$i] = "'''''"; |
| 1133 | + } |
| 1134 | + # Count the number of occurrences of bold and italics mark-ups. |
| 1135 | + # We are not counting sequences of five apostrophes. |
| 1136 | + if ( strlen( $arr[$i] ) == 2 ) { $numitalics++; } |
| 1137 | + else if ( strlen( $arr[$i] ) == 3 ) { $numbold++; } |
| 1138 | + else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; } |
| 1139 | + } |
| 1140 | + $i++; |
| 1141 | + } |
| 1142 | + |
| 1143 | + # If there is an odd number of both bold and italics, it is likely |
| 1144 | + # that one of the bold ones was meant to be an apostrophe followed |
| 1145 | + # by italics. Which one we cannot know for certain, but it is more |
| 1146 | + # likely to be one that has a single-letter word before it. |
| 1147 | + if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) |
| 1148 | + { |
| 1149 | + $i = 0; |
| 1150 | + $firstsingleletterword = -1; |
| 1151 | + $firstmultiletterword = -1; |
| 1152 | + $firstspace = -1; |
| 1153 | + foreach ( $arr as $r ) |
| 1154 | + { |
| 1155 | + if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) |
| 1156 | + { |
| 1157 | + $x1 = substr ($arr[$i-1], -1); |
| 1158 | + $x2 = substr ($arr[$i-1], -2, 1); |
| 1159 | + if ($x1 == ' ') { |
| 1160 | + if ($firstspace == -1) $firstspace = $i; |
| 1161 | + } else if ($x2 == ' ') { |
| 1162 | + if ($firstsingleletterword == -1) $firstsingleletterword = $i; |
| 1163 | + } else { |
| 1164 | + if ($firstmultiletterword == -1) $firstmultiletterword = $i; |
| 1165 | + } |
| 1166 | + } |
| 1167 | + $i++; |
| 1168 | + } |
| 1169 | + |
| 1170 | + # If there is a single-letter word, use it! |
| 1171 | + if ($firstsingleletterword > -1) |
| 1172 | + { |
| 1173 | + $arr [ $firstsingleletterword ] = "''"; |
| 1174 | + $arr [ $firstsingleletterword-1 ] .= "'"; |
| 1175 | + } |
| 1176 | + # If not, but there's a multi-letter word, use that one. |
| 1177 | + else if ($firstmultiletterword > -1) |
| 1178 | + { |
| 1179 | + $arr [ $firstmultiletterword ] = "''"; |
| 1180 | + $arr [ $firstmultiletterword-1 ] .= "'"; |
| 1181 | + } |
| 1182 | + # ... otherwise use the first one that has neither. |
| 1183 | + # (notice that it is possible for all three to be -1 if, for example, |
| 1184 | + # there is only one pentuple-apostrophe in the line) |
| 1185 | + else if ($firstspace > -1) |
| 1186 | + { |
| 1187 | + $arr [ $firstspace ] = "''"; |
| 1188 | + $arr [ $firstspace-1 ] .= "'"; |
| 1189 | + } |
| 1190 | + } |
| 1191 | + |
| 1192 | + # Now let's actually convert our apostrophic mush to HTML! |
| 1193 | + $output = ''; |
| 1194 | + $buffer = ''; |
| 1195 | + $state = ''; |
| 1196 | + $i = 0; |
| 1197 | + foreach ($arr as $r) |
| 1198 | + { |
| 1199 | + if (($i % 2) == 0) |
| 1200 | + { |
| 1201 | + if ($state == 'both') |
| 1202 | + $buffer .= $r; |
| 1203 | + else |
| 1204 | + $output .= $r; |
| 1205 | + } |
| 1206 | + else |
| 1207 | + { |
| 1208 | + if (strlen ($r) == 2) |
| 1209 | + { |
| 1210 | + if ($state == 'i') |
| 1211 | + { $output .= '</i>'; $state = ''; } |
| 1212 | + else if ($state == 'bi') |
| 1213 | + { $output .= '</i>'; $state = 'b'; } |
| 1214 | + else if ($state == 'ib') |
| 1215 | + { $output .= '</b></i><b>'; $state = 'b'; } |
| 1216 | + else if ($state == 'both') |
| 1217 | + { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; } |
| 1218 | + else # $state can be 'b' or '' |
| 1219 | + { $output .= '<i>'; $state .= 'i'; } |
| 1220 | + } |
| 1221 | + else if (strlen ($r) == 3) |
| 1222 | + { |
| 1223 | + if ($state == 'b') |
| 1224 | + { $output .= '</b>'; $state = ''; } |
| 1225 | + else if ($state == 'bi') |
| 1226 | + { $output .= '</i></b><i>'; $state = 'i'; } |
| 1227 | + else if ($state == 'ib') |
| 1228 | + { $output .= '</b>'; $state = 'i'; } |
| 1229 | + else if ($state == 'both') |
| 1230 | + { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; } |
| 1231 | + else # $state can be 'i' or '' |
| 1232 | + { $output .= '<b>'; $state .= 'b'; } |
| 1233 | + } |
| 1234 | + else if (strlen ($r) == 5) |
| 1235 | + { |
| 1236 | + if ($state == 'b') |
| 1237 | + { $output .= '</b><i>'; $state = 'i'; } |
| 1238 | + else if ($state == 'i') |
| 1239 | + { $output .= '</i><b>'; $state = 'b'; } |
| 1240 | + else if ($state == 'bi') |
| 1241 | + { $output .= '</i></b>'; $state = ''; } |
| 1242 | + else if ($state == 'ib') |
| 1243 | + { $output .= '</b></i>'; $state = ''; } |
| 1244 | + else if ($state == 'both') |
| 1245 | + { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; } |
| 1246 | + else # ($state == '') |
| 1247 | + { $buffer = ''; $state = 'both'; } |
| 1248 | + } |
| 1249 | + } |
| 1250 | + $i++; |
| 1251 | + } |
| 1252 | + # Now close all remaining tags. Notice that the order is important. |
| 1253 | + if ($state == 'b' || $state == 'ib') |
| 1254 | + $output .= '</b>'; |
| 1255 | + if ($state == 'i' || $state == 'bi' || $state == 'ib') |
| 1256 | + $output .= '</i>'; |
| 1257 | + if ($state == 'bi') |
| 1258 | + $output .= '</b>'; |
| 1259 | + # There might be lonely ''''', so make sure we have a buffer |
| 1260 | + if ($state == 'both' && $buffer) |
| 1261 | + $output .= '<b><i>'.$buffer.'</i></b>'; |
| 1262 | + return $output; |
| 1263 | + } |
| 1264 | + } |
| 1265 | + |
| 1266 | + /** |
| 1267 | + * Replace external links |
| 1268 | + * |
| 1269 | + * Note: this is all very hackish and the order of execution matters a lot. |
| 1270 | + * Make sure to run maintenance/parserTests.php if you change this code. |
| 1271 | + * |
| 1272 | + * @private |
| 1273 | + */ |
| 1274 | + function replaceExternalLinks( $text ) { |
| 1275 | + global $wgContLang; |
| 1276 | + $fname = 'Parser::replaceExternalLinks'; |
| 1277 | + wfProfileIn( $fname ); |
| 1278 | + |
| 1279 | + $sk = $this->mOptions->getSkin(); |
| 1280 | + |
| 1281 | + $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 1282 | + |
| 1283 | + $s = $this->replaceFreeExternalLinks( array_shift( $bits ) ); |
| 1284 | + |
| 1285 | + $i = 0; |
| 1286 | + while ( $i<count( $bits ) ) { |
| 1287 | + $url = $bits[$i++]; |
| 1288 | + $protocol = $bits[$i++]; |
| 1289 | + $text = $bits[$i++]; |
| 1290 | + $trail = $bits[$i++]; |
| 1291 | + |
| 1292 | + # The characters '<' and '>' (which were escaped by |
| 1293 | + # removeHTMLtags()) should not be included in |
| 1294 | + # URLs, per RFC 2396. |
| 1295 | + $m2 = array(); |
| 1296 | + if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { |
| 1297 | + $text = substr($url, $m2[0][1]) . ' ' . $text; |
| 1298 | + $url = substr($url, 0, $m2[0][1]); |
| 1299 | + } |
| 1300 | + |
| 1301 | + # If the link text is an image URL, replace it with an <img> tag |
| 1302 | + # This happened by accident in the original parser, but some people used it extensively |
| 1303 | + $img = $this->maybeMakeExternalImage( $text ); |
| 1304 | + if ( $img !== false ) { |
| 1305 | + $text = $img; |
| 1306 | + } |
| 1307 | + |
| 1308 | + $dtrail = ''; |
| 1309 | + |
| 1310 | + # Set linktype for CSS - if URL==text, link is essentially free |
| 1311 | + $linktype = ($text == $url) ? 'free' : 'text'; |
| 1312 | + |
| 1313 | + # No link text, e.g. [http://domain.tld/some.link] |
| 1314 | + if ( $text == '' ) { |
| 1315 | + # Autonumber if allowed. See bug #5918 |
| 1316 | + if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) { |
| 1317 | + $text = '[' . ++$this->mAutonumber . ']'; |
| 1318 | + $linktype = 'autonumber'; |
| 1319 | + } else { |
| 1320 | + # Otherwise just use the URL |
| 1321 | + $text = htmlspecialchars( $url ); |
| 1322 | + $linktype = 'free'; |
| 1323 | + } |
| 1324 | + } else { |
| 1325 | + # Have link text, e.g. [http://domain.tld/some.link text]s |
| 1326 | + # Check for trail |
| 1327 | + list( $dtrail, $trail ) = Linker::splitTrail( $trail ); |
| 1328 | + } |
| 1329 | + |
| 1330 | + $text = $wgContLang->markNoConversion($text); |
| 1331 | + |
| 1332 | + $url = Sanitizer::cleanUrl( $url ); |
| 1333 | + |
| 1334 | + # Process the trail (i.e. everything after this link up until start of the next link), |
| 1335 | + # replacing any non-bracketed links |
| 1336 | + $trail = $this->replaceFreeExternalLinks( $trail ); |
| 1337 | + |
| 1338 | + # Use the encoded URL |
| 1339 | + # This means that users can paste URLs directly into the text |
| 1340 | + # Funny characters like ö aren't valid in URLs anyway |
| 1341 | + # This was changed in August 2004 |
| 1342 | + $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail; |
| 1343 | + |
| 1344 | + # Register link in the output object. |
| 1345 | + # Replace unnecessary URL escape codes with the referenced character |
| 1346 | + # This prevents spammers from hiding links from the filters |
| 1347 | + $pasteurized = self::replaceUnusualEscapes( $url ); |
| 1348 | + $this->mOutput->addExternalLink( $pasteurized ); |
| 1349 | + } |
| 1350 | + |
| 1351 | + wfProfileOut( $fname ); |
| 1352 | + return $s; |
| 1353 | + } |
| 1354 | + |
| 1355 | + /** |
| 1356 | + * Replace anything that looks like a URL with a link |
| 1357 | + * @private |
| 1358 | + */ |
| 1359 | + function replaceFreeExternalLinks( $text ) { |
| 1360 | + global $wgContLang; |
| 1361 | + $fname = 'Parser::replaceFreeExternalLinks'; |
| 1362 | + wfProfileIn( $fname ); |
| 1363 | + |
| 1364 | + $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 1365 | + $s = array_shift( $bits ); |
| 1366 | + $i = 0; |
| 1367 | + |
| 1368 | + $sk = $this->mOptions->getSkin(); |
| 1369 | + |
| 1370 | + while ( $i < count( $bits ) ){ |
| 1371 | + $protocol = $bits[$i++]; |
| 1372 | + $remainder = $bits[$i++]; |
| 1373 | + |
| 1374 | + $m = array(); |
| 1375 | + if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { |
| 1376 | + # Found some characters after the protocol that look promising |
| 1377 | + $url = $protocol . $m[1]; |
| 1378 | + $trail = $m[2]; |
| 1379 | + |
| 1380 | + # special case: handle urls as url args: |
| 1381 | + # http://www.example.com/foo?=http://www.example.com/bar |
| 1382 | + if(strlen($trail) == 0 && |
| 1383 | + isset($bits[$i]) && |
| 1384 | + preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) && |
| 1385 | + preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) |
| 1386 | + { |
| 1387 | + # add protocol, arg |
| 1388 | + $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link |
| 1389 | + $i += 2; |
| 1390 | + $trail = $m[2]; |
| 1391 | + } |
| 1392 | + |
| 1393 | + # The characters '<' and '>' (which were escaped by |
| 1394 | + # removeHTMLtags()) should not be included in |
| 1395 | + # URLs, per RFC 2396. |
| 1396 | + $m2 = array(); |
| 1397 | + if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { |
| 1398 | + $trail = substr($url, $m2[0][1]) . $trail; |
| 1399 | + $url = substr($url, 0, $m2[0][1]); |
| 1400 | + } |
| 1401 | + |
| 1402 | + # Move trailing punctuation to $trail |
| 1403 | + $sep = ',;\.:!?'; |
| 1404 | + # If there is no left bracket, then consider right brackets fair game too |
| 1405 | + if ( strpos( $url, '(' ) === false ) { |
| 1406 | + $sep .= ')'; |
| 1407 | + } |
| 1408 | + |
| 1409 | + $numSepChars = strspn( strrev( $url ), $sep ); |
| 1410 | + if ( $numSepChars ) { |
| 1411 | + $trail = substr( $url, -$numSepChars ) . $trail; |
| 1412 | + $url = substr( $url, 0, -$numSepChars ); |
| 1413 | + } |
| 1414 | + |
| 1415 | + $url = Sanitizer::cleanUrl( $url ); |
| 1416 | + |
| 1417 | + # Is this an external image? |
| 1418 | + $text = $this->maybeMakeExternalImage( $url ); |
| 1419 | + if ( $text === false ) { |
| 1420 | + # Not an image, make a link |
| 1421 | + $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); |
| 1422 | + # Register it in the output object... |
| 1423 | + # Replace unnecessary URL escape codes with their equivalent characters |
| 1424 | + $pasteurized = self::replaceUnusualEscapes( $url ); |
| 1425 | + $this->mOutput->addExternalLink( $pasteurized ); |
| 1426 | + } |
| 1427 | + $s .= $text . $trail; |
| 1428 | + } else { |
| 1429 | + $s .= $protocol . $remainder; |
| 1430 | + } |
| 1431 | + } |
| 1432 | + wfProfileOut( $fname ); |
| 1433 | + return $s; |
| 1434 | + } |
| 1435 | + |
| 1436 | + /** |
| 1437 | + * Replace unusual URL escape codes with their equivalent characters |
| 1438 | + * @param string |
| 1439 | + * @return string |
| 1440 | + * @static |
| 1441 | + * @todo This can merge genuinely required bits in the path or query string, |
| 1442 | + * breaking legit URLs. A proper fix would treat the various parts of |
| 1443 | + * the URL differently; as a workaround, just use the output for |
| 1444 | + * statistical records, not for actual linking/output. |
| 1445 | + */ |
| 1446 | + static function replaceUnusualEscapes( $url ) { |
| 1447 | + return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', |
| 1448 | + array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); |
| 1449 | + } |
| 1450 | + |
| 1451 | + /** |
| 1452 | + * Callback function used in replaceUnusualEscapes(). |
| 1453 | + * Replaces unusual URL escape codes with their equivalent character |
| 1454 | + * @static |
| 1455 | + * @private |
| 1456 | + */ |
| 1457 | + private static function replaceUnusualEscapesCallback( $matches ) { |
| 1458 | + $char = urldecode( $matches[0] ); |
| 1459 | + $ord = ord( $char ); |
| 1460 | + // Is it an unsafe or HTTP reserved character according to RFC 1738? |
| 1461 | + if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { |
| 1462 | + // No, shouldn't be escaped |
| 1463 | + return $char; |
| 1464 | + } else { |
| 1465 | + // Yes, leave it escaped |
| 1466 | + return $matches[0]; |
| 1467 | + } |
| 1468 | + } |
| 1469 | + |
| 1470 | + /** |
| 1471 | + * make an image if it's allowed, either through the global |
| 1472 | + * option or through the exception |
| 1473 | + * @private |
| 1474 | + */ |
| 1475 | + function maybeMakeExternalImage( $url ) { |
| 1476 | + $sk = $this->mOptions->getSkin(); |
| 1477 | + $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); |
| 1478 | + $imagesexception = !empty($imagesfrom); |
| 1479 | + $text = false; |
| 1480 | + if ( $this->mOptions->getAllowExternalImages() |
| 1481 | + || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) { |
| 1482 | + if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { |
| 1483 | + # Image found |
| 1484 | + $text = $sk->makeExternalImage( htmlspecialchars( $url ) ); |
| 1485 | + } |
| 1486 | + } |
| 1487 | + return $text; |
| 1488 | + } |
| 1489 | + |
| 1490 | + /** |
| 1491 | + * Process [[ ]] wikilinks |
| 1492 | + * |
| 1493 | + * @private |
| 1494 | + */ |
| 1495 | + function replaceInternalLinks( $s ) { |
| 1496 | + global $wgContLang; |
| 1497 | + static $fname = 'Parser::replaceInternalLinks' ; |
| 1498 | + |
| 1499 | + wfProfileIn( $fname ); |
| 1500 | + |
| 1501 | + wfProfileIn( $fname.'-setup' ); |
| 1502 | + static $tc = FALSE; |
| 1503 | + # the % is needed to support urlencoded titles as well |
| 1504 | + if ( !$tc ) { $tc = Title::legalChars() . '#%'; } |
| 1505 | + |
| 1506 | + $sk = $this->mOptions->getSkin(); |
| 1507 | + |
| 1508 | + #split the entire text string on occurences of [[ |
| 1509 | + $a = explode( '[[', ' ' . $s ); |
| 1510 | + #get the first element (all text up to first [[), and remove the space we added |
| 1511 | + $s = array_shift( $a ); |
| 1512 | + $s = substr( $s, 1 ); |
| 1513 | + |
| 1514 | + # Match a link having the form [[namespace:link|alternate]]trail |
| 1515 | + static $e1 = FALSE; |
| 1516 | + if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } |
| 1517 | + # Match cases where there is no "]]", which might still be images |
| 1518 | + static $e1_img = FALSE; |
| 1519 | + if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } |
| 1520 | + # Match the end of a line for a word that's not followed by whitespace, |
| 1521 | + # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched |
| 1522 | + $e2 = wfMsgForContent( 'linkprefix' ); |
| 1523 | + |
| 1524 | + $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); |
| 1525 | + if( is_null( $this->mTitle ) ) { |
| 1526 | + throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); |
| 1527 | + } |
| 1528 | + $nottalk = !$this->mTitle->isTalkPage(); |
| 1529 | + |
| 1530 | + if ( $useLinkPrefixExtension ) { |
| 1531 | + $m = array(); |
| 1532 | + if ( preg_match( $e2, $s, $m ) ) { |
| 1533 | + $first_prefix = $m[2]; |
| 1534 | + } else { |
| 1535 | + $first_prefix = false; |
| 1536 | + } |
| 1537 | + } else { |
| 1538 | + $prefix = ''; |
| 1539 | + } |
| 1540 | + |
| 1541 | + if($wgContLang->hasVariants()) { |
| 1542 | + $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText()); |
| 1543 | + } else { |
| 1544 | + $selflink = array($this->mTitle->getPrefixedText()); |
| 1545 | + } |
| 1546 | + $useSubpages = $this->areSubpagesAllowed(); |
| 1547 | + wfProfileOut( $fname.'-setup' ); |
| 1548 | + |
| 1549 | + # Loop for each link |
| 1550 | + for ($k = 0; isset( $a[$k] ); $k++) { |
| 1551 | + $line = $a[$k]; |
| 1552 | + if ( $useLinkPrefixExtension ) { |
| 1553 | + wfProfileIn( $fname.'-prefixhandling' ); |
| 1554 | + if ( preg_match( $e2, $s, $m ) ) { |
| 1555 | + $prefix = $m[2]; |
| 1556 | + $s = $m[1]; |
| 1557 | + } else { |
| 1558 | + $prefix=''; |
| 1559 | + } |
| 1560 | + # first link |
| 1561 | + if($first_prefix) { |
| 1562 | + $prefix = $first_prefix; |
| 1563 | + $first_prefix = false; |
| 1564 | + } |
| 1565 | + wfProfileOut( $fname.'-prefixhandling' ); |
| 1566 | + } |
| 1567 | + |
| 1568 | + $might_be_img = false; |
| 1569 | + |
| 1570 | + wfProfileIn( "$fname-e1" ); |
| 1571 | + if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt |
| 1572 | + $text = $m[2]; |
| 1573 | + # If we get a ] at the beginning of $m[3] that means we have a link that's something like: |
| 1574 | + # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, |
| 1575 | + # the real problem is with the $e1 regex |
| 1576 | + # See bug 1300. |
| 1577 | + # |
| 1578 | + # Still some problems for cases where the ] is meant to be outside punctuation, |
| 1579 | + # and no image is in sight. See bug 2095. |
| 1580 | + # |
| 1581 | + if( $text !== '' && |
| 1582 | + substr( $m[3], 0, 1 ) === ']' && |
| 1583 | + strpos($text, '[') !== false |
| 1584 | + ) |
| 1585 | + { |
| 1586 | + $text .= ']'; # so that replaceExternalLinks($text) works later |
| 1587 | + $m[3] = substr( $m[3], 1 ); |
| 1588 | + } |
| 1589 | + # fix up urlencoded title texts |
| 1590 | + if( strpos( $m[1], '%' ) !== false ) { |
| 1591 | + # Should anchors '#' also be rejected? |
| 1592 | + $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode($m[1]) ); |
| 1593 | + } |
| 1594 | + $trail = $m[3]; |
| 1595 | + } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption |
| 1596 | + $might_be_img = true; |
| 1597 | + $text = $m[2]; |
| 1598 | + if ( strpos( $m[1], '%' ) !== false ) { |
| 1599 | + $m[1] = urldecode($m[1]); |
| 1600 | + } |
| 1601 | + $trail = ""; |
| 1602 | + } else { # Invalid form; output directly |
| 1603 | + $s .= $prefix . '[[' . $line ; |
| 1604 | + wfProfileOut( "$fname-e1" ); |
| 1605 | + continue; |
| 1606 | + } |
| 1607 | + wfProfileOut( "$fname-e1" ); |
| 1608 | + wfProfileIn( "$fname-misc" ); |
| 1609 | + |
| 1610 | + # Don't allow internal links to pages containing |
| 1611 | + # PROTO: where PROTO is a valid URL protocol; these |
| 1612 | + # should be external links. |
| 1613 | + if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { |
| 1614 | + $s .= $prefix . '[[' . $line ; |
| 1615 | + continue; |
| 1616 | + } |
| 1617 | + |
| 1618 | + # Make subpage if necessary |
| 1619 | + if( $useSubpages ) { |
| 1620 | + $link = $this->maybeDoSubpageLink( $m[1], $text ); |
| 1621 | + } else { |
| 1622 | + $link = $m[1]; |
| 1623 | + } |
| 1624 | + |
| 1625 | + $noforce = (substr($m[1], 0, 1) != ':'); |
| 1626 | + if (!$noforce) { |
| 1627 | + # Strip off leading ':' |
| 1628 | + $link = substr($link, 1); |
| 1629 | + } |
| 1630 | + |
| 1631 | + wfProfileOut( "$fname-misc" ); |
| 1632 | + wfProfileIn( "$fname-title" ); |
| 1633 | + $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); |
| 1634 | + if( !$nt ) { |
| 1635 | + $s .= $prefix . '[[' . $line; |
| 1636 | + wfProfileOut( "$fname-title" ); |
| 1637 | + continue; |
| 1638 | + } |
| 1639 | + |
| 1640 | + $ns = $nt->getNamespace(); |
| 1641 | + $iw = $nt->getInterWiki(); |
| 1642 | + wfProfileOut( "$fname-title" ); |
| 1643 | + |
| 1644 | + if ($might_be_img) { # if this is actually an invalid link |
| 1645 | + wfProfileIn( "$fname-might_be_img" ); |
| 1646 | + if ($ns == NS_IMAGE && $noforce) { #but might be an image |
| 1647 | + $found = false; |
| 1648 | + while (isset ($a[$k+1]) ) { |
| 1649 | + #look at the next 'line' to see if we can close it there |
| 1650 | + $spliced = array_splice( $a, $k + 1, 1 ); |
| 1651 | + $next_line = array_shift( $spliced ); |
| 1652 | + $m = explode( ']]', $next_line, 3 ); |
| 1653 | + if ( count( $m ) == 3 ) { |
| 1654 | + # the first ]] closes the inner link, the second the image |
| 1655 | + $found = true; |
| 1656 | + $text .= "[[{$m[0]}]]{$m[1]}"; |
| 1657 | + $trail = $m[2]; |
| 1658 | + break; |
| 1659 | + } elseif ( count( $m ) == 2 ) { |
| 1660 | + #if there's exactly one ]] that's fine, we'll keep looking |
| 1661 | + $text .= "[[{$m[0]}]]{$m[1]}"; |
| 1662 | + } else { |
| 1663 | + #if $next_line is invalid too, we need look no further |
| 1664 | + $text .= '[[' . $next_line; |
| 1665 | + break; |
| 1666 | + } |
| 1667 | + } |
| 1668 | + if ( !$found ) { |
| 1669 | + # we couldn't find the end of this imageLink, so output it raw |
| 1670 | + #but don't ignore what might be perfectly normal links in the text we've examined |
| 1671 | + $text = $this->replaceInternalLinks($text); |
| 1672 | + $s .= "{$prefix}[[$link|$text"; |
| 1673 | + # note: no $trail, because without an end, there *is* no trail |
| 1674 | + wfProfileOut( "$fname-might_be_img" ); |
| 1675 | + continue; |
| 1676 | + } |
| 1677 | + } else { #it's not an image, so output it raw |
| 1678 | + $s .= "{$prefix}[[$link|$text"; |
| 1679 | + # note: no $trail, because without an end, there *is* no trail |
| 1680 | + wfProfileOut( "$fname-might_be_img" ); |
| 1681 | + continue; |
| 1682 | + } |
| 1683 | + wfProfileOut( "$fname-might_be_img" ); |
| 1684 | + } |
| 1685 | + |
| 1686 | + $wasblank = ( '' == $text ); |
| 1687 | + if( $wasblank ) $text = $link; |
| 1688 | + |
| 1689 | + # Link not escaped by : , create the various objects |
| 1690 | + if( $noforce ) { |
| 1691 | + |
| 1692 | + # Interwikis |
| 1693 | + wfProfileIn( "$fname-interwiki" ); |
| 1694 | + if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { |
| 1695 | + $this->mOutput->addLanguageLink( $nt->getFullText() ); |
| 1696 | + $s = rtrim($s . $prefix); |
| 1697 | + $s .= trim($trail, "\n") == '' ? '': $prefix . $trail; |
| 1698 | + wfProfileOut( "$fname-interwiki" ); |
| 1699 | + continue; |
| 1700 | + } |
| 1701 | + wfProfileOut( "$fname-interwiki" ); |
| 1702 | + |
| 1703 | + if ( $ns == NS_IMAGE ) { |
| 1704 | + wfProfileIn( "$fname-image" ); |
| 1705 | + if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { |
| 1706 | + # recursively parse links inside the image caption |
| 1707 | + # actually, this will parse them in any other parameters, too, |
| 1708 | + # but it might be hard to fix that, and it doesn't matter ATM |
| 1709 | + $text = $this->replaceExternalLinks($text); |
| 1710 | + $text = $this->replaceInternalLinks($text); |
| 1711 | + |
| 1712 | + # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them |
| 1713 | + $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; |
| 1714 | + $this->mOutput->addImage( $nt->getDBkey() ); |
| 1715 | + |
| 1716 | + wfProfileOut( "$fname-image" ); |
| 1717 | + continue; |
| 1718 | + } else { |
| 1719 | + # We still need to record the image's presence on the page |
| 1720 | + $this->mOutput->addImage( $nt->getDBkey() ); |
| 1721 | + } |
| 1722 | + wfProfileOut( "$fname-image" ); |
| 1723 | + |
| 1724 | + } |
| 1725 | + |
| 1726 | + if ( $ns == NS_CATEGORY ) { |
| 1727 | + wfProfileIn( "$fname-category" ); |
| 1728 | + $s = rtrim($s . "\n"); # bug 87 |
| 1729 | + |
| 1730 | + if ( $wasblank ) { |
| 1731 | + $sortkey = $this->getDefaultSort(); |
| 1732 | + } else { |
| 1733 | + $sortkey = $text; |
| 1734 | + } |
| 1735 | + $sortkey = Sanitizer::decodeCharReferences( $sortkey ); |
| 1736 | + $sortkey = str_replace( "\n", '', $sortkey ); |
| 1737 | + $sortkey = $wgContLang->convertCategoryKey( $sortkey ); |
| 1738 | + $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); |
| 1739 | + |
| 1740 | + /** |
| 1741 | + * Strip the whitespace Category links produce, see bug 87 |
| 1742 | + * @todo We might want to use trim($tmp, "\n") here. |
| 1743 | + */ |
| 1744 | + $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; |
| 1745 | + |
| 1746 | + wfProfileOut( "$fname-category" ); |
| 1747 | + continue; |
| 1748 | + } |
| 1749 | + } |
| 1750 | + |
| 1751 | + # Self-link checking |
| 1752 | + if( $nt->getFragment() === '' ) { |
| 1753 | + if( in_array( $nt->getPrefixedText(), $selflink, true ) ) { |
| 1754 | + $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); |
| 1755 | + continue; |
| 1756 | + } |
| 1757 | + } |
| 1758 | + |
| 1759 | + # Special and Media are pseudo-namespaces; no pages actually exist in them |
| 1760 | + if( $ns == NS_MEDIA ) { |
| 1761 | + $link = $sk->makeMediaLinkObj( $nt, $text ); |
| 1762 | + # Cloak with NOPARSE to avoid replacement in replaceExternalLinks |
| 1763 | + $s .= $prefix . $this->armorLinks( $link ) . $trail; |
| 1764 | + $this->mOutput->addImage( $nt->getDBkey() ); |
| 1765 | + continue; |
| 1766 | + } elseif( $ns == NS_SPECIAL ) { |
| 1767 | + if( SpecialPage::exists( $nt->getDBkey() ) ) { |
| 1768 | + $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); |
| 1769 | + } else { |
| 1770 | + $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); |
| 1771 | + } |
| 1772 | + continue; |
| 1773 | + } elseif( $ns == NS_IMAGE ) { |
| 1774 | + $img = wfFindFile( $nt ); |
| 1775 | + if( $img ) { |
| 1776 | + // Force a blue link if the file exists; may be a remote |
| 1777 | + // upload on the shared repository, and we want to see its |
| 1778 | + // auto-generated page. |
| 1779 | + $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); |
| 1780 | + $this->mOutput->addLink( $nt ); |
| 1781 | + continue; |
| 1782 | + } |
| 1783 | + } |
| 1784 | + $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); |
| 1785 | + } |
| 1786 | + wfProfileOut( $fname ); |
| 1787 | + return $s; |
| 1788 | + } |
| 1789 | + |
| 1790 | + /** |
| 1791 | + * Make a link placeholder. The text returned can be later resolved to a real link with |
| 1792 | + * replaceLinkHolders(). This is done for two reasons: firstly to avoid further |
| 1793 | + * parsing of interwiki links, and secondly to allow all existence checks and |
| 1794 | + * article length checks (for stub links) to be bundled into a single query. |
| 1795 | + * |
| 1796 | + */ |
| 1797 | + function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { |
| 1798 | + wfProfileIn( __METHOD__ ); |
| 1799 | + if ( ! is_object($nt) ) { |
| 1800 | + # Fail gracefully |
| 1801 | + $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}"; |
| 1802 | + } else { |
| 1803 | + # Separate the link trail from the rest of the link |
| 1804 | + list( $inside, $trail ) = Linker::splitTrail( $trail ); |
| 1805 | + |
| 1806 | + if ( $nt->isExternal() ) { |
| 1807 | + $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); |
| 1808 | + $this->mInterwikiLinkHolders['titles'][] = $nt; |
| 1809 | + $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}"; |
| 1810 | + } else { |
| 1811 | + $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); |
| 1812 | + $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); |
| 1813 | + $this->mLinkHolders['queries'][] = $query; |
| 1814 | + $this->mLinkHolders['texts'][] = $prefix.$text.$inside; |
| 1815 | + $this->mLinkHolders['titles'][] = $nt; |
| 1816 | + |
| 1817 | + $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}"; |
| 1818 | + } |
| 1819 | + } |
| 1820 | + wfProfileOut( __METHOD__ ); |
| 1821 | + return $retVal; |
| 1822 | + } |
| 1823 | + |
| 1824 | + /** |
| 1825 | + * Render a forced-blue link inline; protect against double expansion of |
| 1826 | + * URLs if we're in a mode that prepends full URL prefixes to internal links. |
| 1827 | + * Since this little disaster has to split off the trail text to avoid |
| 1828 | + * breaking URLs in the following text without breaking trails on the |
| 1829 | + * wiki links, it's been made into a horrible function. |
| 1830 | + * |
| 1831 | + * @param Title $nt |
| 1832 | + * @param string $text |
| 1833 | + * @param string $query |
| 1834 | + * @param string $trail |
| 1835 | + * @param string $prefix |
| 1836 | + * @return string HTML-wikitext mix oh yuck |
| 1837 | + */ |
| 1838 | + function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { |
| 1839 | + list( $inside, $trail ) = Linker::splitTrail( $trail ); |
| 1840 | + $sk = $this->mOptions->getSkin(); |
| 1841 | + $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix ); |
| 1842 | + return $this->armorLinks( $link ) . $trail; |
| 1843 | + } |
| 1844 | + |
| 1845 | + /** |
| 1846 | + * Insert a NOPARSE hacky thing into any inline links in a chunk that's |
| 1847 | + * going to go through further parsing steps before inline URL expansion. |
| 1848 | + * |
| 1849 | + * In particular this is important when using action=render, which causes |
| 1850 | + * full URLs to be included. |
| 1851 | + * |
| 1852 | + * Oh man I hate our multi-layer parser! |
| 1853 | + * |
| 1854 | + * @param string more-or-less HTML |
| 1855 | + * @return string less-or-more HTML with NOPARSE bits |
| 1856 | + */ |
| 1857 | + function armorLinks( $text ) { |
| 1858 | + return preg_replace( '/\b(' . wfUrlProtocols() . ')/', |
| 1859 | + "{$this->mUniqPrefix}NOPARSE$1", $text ); |
| 1860 | + } |
| 1861 | + |
| 1862 | + /** |
| 1863 | + * Return true if subpage links should be expanded on this page. |
| 1864 | + * @return bool |
| 1865 | + */ |
| 1866 | + function areSubpagesAllowed() { |
| 1867 | + # Some namespaces don't allow subpages |
| 1868 | + global $wgNamespacesWithSubpages; |
| 1869 | + return !empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]); |
| 1870 | + } |
| 1871 | + |
| 1872 | + /** |
| 1873 | + * Handle link to subpage if necessary |
| 1874 | + * @param string $target the source of the link |
| 1875 | + * @param string &$text the link text, modified as necessary |
| 1876 | + * @return string the full name of the link |
| 1877 | + * @private |
| 1878 | + */ |
| 1879 | + function maybeDoSubpageLink($target, &$text) { |
| 1880 | + # Valid link forms: |
| 1881 | + # Foobar -- normal |
| 1882 | + # :Foobar -- override special treatment of prefix (images, language links) |
| 1883 | + # /Foobar -- convert to CurrentPage/Foobar |
| 1884 | + # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text |
| 1885 | + # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage |
| 1886 | + # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage |
| 1887 | + |
| 1888 | + $fname = 'Parser::maybeDoSubpageLink'; |
| 1889 | + wfProfileIn( $fname ); |
| 1890 | + $ret = $target; # default return value is no change |
| 1891 | + |
| 1892 | + # Some namespaces don't allow subpages, |
| 1893 | + # so only perform processing if subpages are allowed |
| 1894 | + if( $this->areSubpagesAllowed() ) { |
| 1895 | + $hash = strpos( $target, '#' ); |
| 1896 | + if( $hash !== false ) { |
| 1897 | + $suffix = substr( $target, $hash ); |
| 1898 | + $target = substr( $target, 0, $hash ); |
| 1899 | + } else { |
| 1900 | + $suffix = ''; |
| 1901 | + } |
| 1902 | + # bug 7425 |
| 1903 | + $target = trim( $target ); |
| 1904 | + # Look at the first character |
| 1905 | + if( $target != '' && $target{0} == '/' ) { |
| 1906 | + # / at end means we don't want the slash to be shown |
| 1907 | + $m = array(); |
| 1908 | + $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m ); |
| 1909 | + if( $trailingSlashes ) { |
| 1910 | + $noslash = $target = substr( $target, 1, -strlen($m[0][0]) ); |
| 1911 | + } else { |
| 1912 | + $noslash = substr( $target, 1 ); |
| 1913 | + } |
| 1914 | + |
| 1915 | + $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash) . $suffix; |
| 1916 | + if( '' === $text ) { |
| 1917 | + $text = $target . $suffix; |
| 1918 | + } # this might be changed for ugliness reasons |
| 1919 | + } else { |
| 1920 | + # check for .. subpage backlinks |
| 1921 | + $dotdotcount = 0; |
| 1922 | + $nodotdot = $target; |
| 1923 | + while( strncmp( $nodotdot, "../", 3 ) == 0 ) { |
| 1924 | + ++$dotdotcount; |
| 1925 | + $nodotdot = substr( $nodotdot, 3 ); |
| 1926 | + } |
| 1927 | + if($dotdotcount > 0) { |
| 1928 | + $exploded = explode( '/', $this->mTitle->GetPrefixedText() ); |
| 1929 | + if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page |
| 1930 | + $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) ); |
| 1931 | + # / at the end means don't show full path |
| 1932 | + if( substr( $nodotdot, -1, 1 ) == '/' ) { |
| 1933 | + $nodotdot = substr( $nodotdot, 0, -1 ); |
| 1934 | + if( '' === $text ) { |
| 1935 | + $text = $nodotdot . $suffix; |
| 1936 | + } |
| 1937 | + } |
| 1938 | + $nodotdot = trim( $nodotdot ); |
| 1939 | + if( $nodotdot != '' ) { |
| 1940 | + $ret .= '/' . $nodotdot; |
| 1941 | + } |
| 1942 | + $ret .= $suffix; |
| 1943 | + } |
| 1944 | + } |
| 1945 | + } |
| 1946 | + } |
| 1947 | + |
| 1948 | + wfProfileOut( $fname ); |
| 1949 | + return $ret; |
| 1950 | + } |
| 1951 | + |
| 1952 | + /**#@+ |
| 1953 | + * Used by doBlockLevels() |
| 1954 | + * @private |
| 1955 | + */ |
| 1956 | + /* private */ function closeParagraph() { |
| 1957 | + $result = ''; |
| 1958 | + if ( '' != $this->mLastSection ) { |
| 1959 | + $result = '</' . $this->mLastSection . ">\n"; |
| 1960 | + } |
| 1961 | + $this->mInPre = false; |
| 1962 | + $this->mLastSection = ''; |
| 1963 | + return $result; |
| 1964 | + } |
| 1965 | + # getCommon() returns the length of the longest common substring |
| 1966 | + # of both arguments, starting at the beginning of both. |
| 1967 | + # |
| 1968 | + /* private */ function getCommon( $st1, $st2 ) { |
| 1969 | + $fl = strlen( $st1 ); |
| 1970 | + $shorter = strlen( $st2 ); |
| 1971 | + if ( $fl < $shorter ) { $shorter = $fl; } |
| 1972 | + |
| 1973 | + for ( $i = 0; $i < $shorter; ++$i ) { |
| 1974 | + if ( $st1{$i} != $st2{$i} ) { break; } |
| 1975 | + } |
| 1976 | + return $i; |
| 1977 | + } |
| 1978 | + # These next three functions open, continue, and close the list |
| 1979 | + # element appropriate to the prefix character passed into them. |
| 1980 | + # |
| 1981 | + /* private */ function openList( $char ) { |
| 1982 | + $result = $this->closeParagraph(); |
| 1983 | + |
| 1984 | + if ( '*' == $char ) { $result .= '<ul><li>'; } |
| 1985 | + else if ( '#' == $char ) { $result .= '<ol><li>'; } |
| 1986 | + else if ( ':' == $char ) { $result .= '<dl><dd>'; } |
| 1987 | + else if ( ';' == $char ) { |
| 1988 | + $result .= '<dl><dt>'; |
| 1989 | + $this->mDTopen = true; |
| 1990 | + } |
| 1991 | + else { $result = '<!-- ERR 1 -->'; } |
| 1992 | + |
| 1993 | + return $result; |
| 1994 | + } |
| 1995 | + |
| 1996 | + /* private */ function nextItem( $char ) { |
| 1997 | + if ( '*' == $char || '#' == $char ) { return '</li><li>'; } |
| 1998 | + else if ( ':' == $char || ';' == $char ) { |
| 1999 | + $close = '</dd>'; |
| 2000 | + if ( $this->mDTopen ) { $close = '</dt>'; } |
| 2001 | + if ( ';' == $char ) { |
| 2002 | + $this->mDTopen = true; |
| 2003 | + return $close . '<dt>'; |
| 2004 | + } else { |
| 2005 | + $this->mDTopen = false; |
| 2006 | + return $close . '<dd>'; |
| 2007 | + } |
| 2008 | + } |
| 2009 | + return '<!-- ERR 2 -->'; |
| 2010 | + } |
| 2011 | + |
| 2012 | + /* private */ function closeList( $char ) { |
| 2013 | + if ( '*' == $char ) { $text = '</li></ul>'; } |
| 2014 | + else if ( '#' == $char ) { $text = '</li></ol>'; } |
| 2015 | + else if ( ':' == $char ) { |
| 2016 | + if ( $this->mDTopen ) { |
| 2017 | + $this->mDTopen = false; |
| 2018 | + $text = '</dt></dl>'; |
| 2019 | + } else { |
| 2020 | + $text = '</dd></dl>'; |
| 2021 | + } |
| 2022 | + } |
| 2023 | + else { return '<!-- ERR 3 -->'; } |
| 2024 | + return $text."\n"; |
| 2025 | + } |
| 2026 | + /**#@-*/ |
| 2027 | + |
| 2028 | + /** |
| 2029 | + * Make lists from lines starting with ':', '*', '#', etc. |
| 2030 | + * |
| 2031 | + * @private |
| 2032 | + * @return string the lists rendered as HTML |
| 2033 | + */ |
| 2034 | + function doBlockLevels( $text, $linestart ) { |
| 2035 | + $fname = 'Parser::doBlockLevels'; |
| 2036 | + wfProfileIn( $fname ); |
| 2037 | + |
| 2038 | + # Parsing through the text line by line. The main thing |
| 2039 | + # happening here is handling of block-level elements p, pre, |
| 2040 | + # and making lists from lines starting with * # : etc. |
| 2041 | + # |
| 2042 | + $textLines = explode( "\n", $text ); |
| 2043 | + |
| 2044 | + $lastPrefix = $output = ''; |
| 2045 | + $this->mDTopen = $inBlockElem = false; |
| 2046 | + $prefixLength = 0; |
| 2047 | + $paragraphStack = false; |
| 2048 | + |
| 2049 | + if ( !$linestart ) { |
| 2050 | + $output .= array_shift( $textLines ); |
| 2051 | + } |
| 2052 | + foreach ( $textLines as $oLine ) { |
| 2053 | + $lastPrefixLength = strlen( $lastPrefix ); |
| 2054 | + $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); |
| 2055 | + $preOpenMatch = preg_match('/<pre/i', $oLine ); |
| 2056 | + if ( !$this->mInPre ) { |
| 2057 | + # Multiple prefixes may abut each other for nested lists. |
| 2058 | + $prefixLength = strspn( $oLine, '*#:;' ); |
| 2059 | + $pref = substr( $oLine, 0, $prefixLength ); |
| 2060 | + |
| 2061 | + # eh? |
| 2062 | + $pref2 = str_replace( ';', ':', $pref ); |
| 2063 | + $t = substr( $oLine, $prefixLength ); |
| 2064 | + $this->mInPre = !empty($preOpenMatch); |
| 2065 | + } else { |
| 2066 | + # Don't interpret any other prefixes in preformatted text |
| 2067 | + $prefixLength = 0; |
| 2068 | + $pref = $pref2 = ''; |
| 2069 | + $t = $oLine; |
| 2070 | + } |
| 2071 | + |
| 2072 | + # List generation |
| 2073 | + if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) { |
| 2074 | + # Same as the last item, so no need to deal with nesting or opening stuff |
| 2075 | + $output .= $this->nextItem( substr( $pref, -1 ) ); |
| 2076 | + $paragraphStack = false; |
| 2077 | + |
| 2078 | + if ( substr( $pref, -1 ) == ';') { |
| 2079 | + # The one nasty exception: definition lists work like this: |
| 2080 | + # ; title : definition text |
| 2081 | + # So we check for : in the remainder text to split up the |
| 2082 | + # title and definition, without b0rking links. |
| 2083 | + $term = $t2 = ''; |
| 2084 | + if ($this->findColonNoLinks($t, $term, $t2) !== false) { |
| 2085 | + $t = $t2; |
| 2086 | + $output .= $term . $this->nextItem( ':' ); |
| 2087 | + } |
| 2088 | + } |
| 2089 | + } elseif( $prefixLength || $lastPrefixLength ) { |
| 2090 | + # Either open or close a level... |
| 2091 | + $commonPrefixLength = $this->getCommon( $pref, $lastPrefix ); |
| 2092 | + $paragraphStack = false; |
| 2093 | + |
| 2094 | + while( $commonPrefixLength < $lastPrefixLength ) { |
| 2095 | + $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} ); |
| 2096 | + --$lastPrefixLength; |
| 2097 | + } |
| 2098 | + if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { |
| 2099 | + $output .= $this->nextItem( $pref{$commonPrefixLength-1} ); |
| 2100 | + } |
| 2101 | + while ( $prefixLength > $commonPrefixLength ) { |
| 2102 | + $char = substr( $pref, $commonPrefixLength, 1 ); |
| 2103 | + $output .= $this->openList( $char ); |
| 2104 | + |
| 2105 | + if ( ';' == $char ) { |
| 2106 | + # FIXME: This is dupe of code above |
| 2107 | + if ($this->findColonNoLinks($t, $term, $t2) !== false) { |
| 2108 | + $t = $t2; |
| 2109 | + $output .= $term . $this->nextItem( ':' ); |
| 2110 | + } |
| 2111 | + } |
| 2112 | + ++$commonPrefixLength; |
| 2113 | + } |
| 2114 | + $lastPrefix = $pref2; |
| 2115 | + } |
| 2116 | + if( 0 == $prefixLength ) { |
| 2117 | + wfProfileIn( "$fname-paragraph" ); |
| 2118 | + # No prefix (not in list)--go to paragraph mode |
| 2119 | + // XXX: use a stack for nestable elements like span, table and div |
| 2120 | + $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); |
| 2121 | + $closematch = preg_match( |
| 2122 | + '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. |
| 2123 | + '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); |
| 2124 | + if ( $openmatch or $closematch ) { |
| 2125 | + $paragraphStack = false; |
| 2126 | + # TODO bug 5718: paragraph closed |
| 2127 | + $output .= $this->closeParagraph(); |
| 2128 | + if ( $preOpenMatch and !$preCloseMatch ) { |
| 2129 | + $this->mInPre = true; |
| 2130 | + } |
| 2131 | + if ( $closematch ) { |
| 2132 | + $inBlockElem = false; |
| 2133 | + } else { |
| 2134 | + $inBlockElem = true; |
| 2135 | + } |
| 2136 | + } else if ( !$inBlockElem && !$this->mInPre ) { |
| 2137 | + if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) { |
| 2138 | + // pre |
| 2139 | + if ($this->mLastSection != 'pre') { |
| 2140 | + $paragraphStack = false; |
| 2141 | + $output .= $this->closeParagraph().'<pre>'; |
| 2142 | + $this->mLastSection = 'pre'; |
| 2143 | + } |
| 2144 | + $t = substr( $t, 1 ); |
| 2145 | + } else { |
| 2146 | + // paragraph |
| 2147 | + if ( '' == trim($t) ) { |
| 2148 | + if ( $paragraphStack ) { |
| 2149 | + $output .= $paragraphStack.'<br />'; |
| 2150 | + $paragraphStack = false; |
| 2151 | + $this->mLastSection = 'p'; |
| 2152 | + } else { |
| 2153 | + if ($this->mLastSection != 'p' ) { |
| 2154 | + $output .= $this->closeParagraph(); |
| 2155 | + $this->mLastSection = ''; |
| 2156 | + $paragraphStack = '<p>'; |
| 2157 | + } else { |
| 2158 | + $paragraphStack = '</p><p>'; |
| 2159 | + } |
| 2160 | + } |
| 2161 | + } else { |
| 2162 | + if ( $paragraphStack ) { |
| 2163 | + $output .= $paragraphStack; |
| 2164 | + $paragraphStack = false; |
| 2165 | + $this->mLastSection = 'p'; |
| 2166 | + } else if ($this->mLastSection != 'p') { |
| 2167 | + $output .= $this->closeParagraph().'<p>'; |
| 2168 | + $this->mLastSection = 'p'; |
| 2169 | + } |
| 2170 | + } |
| 2171 | + } |
| 2172 | + } |
| 2173 | + wfProfileOut( "$fname-paragraph" ); |
| 2174 | + } |
| 2175 | + // somewhere above we forget to get out of pre block (bug 785) |
| 2176 | + if($preCloseMatch && $this->mInPre) { |
| 2177 | + $this->mInPre = false; |
| 2178 | + } |
| 2179 | + if ($paragraphStack === false) { |
| 2180 | + $output .= $t."\n"; |
| 2181 | + } |
| 2182 | + } |
| 2183 | + while ( $prefixLength ) { |
| 2184 | + $output .= $this->closeList( $pref2{$prefixLength-1} ); |
| 2185 | + --$prefixLength; |
| 2186 | + } |
| 2187 | + if ( '' != $this->mLastSection ) { |
| 2188 | + $output .= '</' . $this->mLastSection . '>'; |
| 2189 | + $this->mLastSection = ''; |
| 2190 | + } |
| 2191 | + |
| 2192 | + wfProfileOut( $fname ); |
| 2193 | + return $output; |
| 2194 | + } |
| 2195 | + |
| 2196 | + /** |
| 2197 | + * Split up a string on ':', ignoring any occurences inside tags |
| 2198 | + * to prevent illegal overlapping. |
| 2199 | + * @param string $str the string to split |
| 2200 | + * @param string &$before set to everything before the ':' |
| 2201 | + * @param string &$after set to everything after the ':' |
| 2202 | + * return string the position of the ':', or false if none found |
| 2203 | + */ |
| 2204 | + function findColonNoLinks($str, &$before, &$after) { |
| 2205 | + $fname = 'Parser::findColonNoLinks'; |
| 2206 | + wfProfileIn( $fname ); |
| 2207 | + |
| 2208 | + $pos = strpos( $str, ':' ); |
| 2209 | + if( $pos === false ) { |
| 2210 | + // Nothing to find! |
| 2211 | + wfProfileOut( $fname ); |
| 2212 | + return false; |
| 2213 | + } |
| 2214 | + |
| 2215 | + $lt = strpos( $str, '<' ); |
| 2216 | + if( $lt === false || $lt > $pos ) { |
| 2217 | + // Easy; no tag nesting to worry about |
| 2218 | + $before = substr( $str, 0, $pos ); |
| 2219 | + $after = substr( $str, $pos+1 ); |
| 2220 | + wfProfileOut( $fname ); |
| 2221 | + return $pos; |
| 2222 | + } |
| 2223 | + |
| 2224 | + // Ugly state machine to walk through avoiding tags. |
| 2225 | + $state = self::COLON_STATE_TEXT; |
| 2226 | + $stack = 0; |
| 2227 | + $len = strlen( $str ); |
| 2228 | + for( $i = 0; $i < $len; $i++ ) { |
| 2229 | + $c = $str{$i}; |
| 2230 | + |
| 2231 | + switch( $state ) { |
| 2232 | + // (Using the number is a performance hack for common cases) |
| 2233 | + case 0: // self::COLON_STATE_TEXT: |
| 2234 | + switch( $c ) { |
| 2235 | + case "<": |
| 2236 | + // Could be either a <start> tag or an </end> tag |
| 2237 | + $state = self::COLON_STATE_TAGSTART; |
| 2238 | + break; |
| 2239 | + case ":": |
| 2240 | + if( $stack == 0 ) { |
| 2241 | + // We found it! |
| 2242 | + $before = substr( $str, 0, $i ); |
| 2243 | + $after = substr( $str, $i + 1 ); |
| 2244 | + wfProfileOut( $fname ); |
| 2245 | + return $i; |
| 2246 | + } |
| 2247 | + // Embedded in a tag; don't break it. |
| 2248 | + break; |
| 2249 | + default: |
| 2250 | + // Skip ahead looking for something interesting |
| 2251 | + $colon = strpos( $str, ':', $i ); |
| 2252 | + if( $colon === false ) { |
| 2253 | + // Nothing else interesting |
| 2254 | + wfProfileOut( $fname ); |
| 2255 | + return false; |
| 2256 | + } |
| 2257 | + $lt = strpos( $str, '<', $i ); |
| 2258 | + if( $stack === 0 ) { |
| 2259 | + if( $lt === false || $colon < $lt ) { |
| 2260 | + // We found it! |
| 2261 | + $before = substr( $str, 0, $colon ); |
| 2262 | + $after = substr( $str, $colon + 1 ); |
| 2263 | + wfProfileOut( $fname ); |
| 2264 | + return $i; |
| 2265 | + } |
| 2266 | + } |
| 2267 | + if( $lt === false ) { |
| 2268 | + // Nothing else interesting to find; abort! |
| 2269 | + // We're nested, but there's no close tags left. Abort! |
| 2270 | + break 2; |
| 2271 | + } |
| 2272 | + // Skip ahead to next tag start |
| 2273 | + $i = $lt; |
| 2274 | + $state = self::COLON_STATE_TAGSTART; |
| 2275 | + } |
| 2276 | + break; |
| 2277 | + case 1: // self::COLON_STATE_TAG: |
| 2278 | + // In a <tag> |
| 2279 | + switch( $c ) { |
| 2280 | + case ">": |
| 2281 | + $stack++; |
| 2282 | + $state = self::COLON_STATE_TEXT; |
| 2283 | + break; |
| 2284 | + case "/": |
| 2285 | + // Slash may be followed by >? |
| 2286 | + $state = self::COLON_STATE_TAGSLASH; |
| 2287 | + break; |
| 2288 | + default: |
| 2289 | + // ignore |
| 2290 | + } |
| 2291 | + break; |
| 2292 | + case 2: // self::COLON_STATE_TAGSTART: |
| 2293 | + switch( $c ) { |
| 2294 | + case "/": |
| 2295 | + $state = self::COLON_STATE_CLOSETAG; |
| 2296 | + break; |
| 2297 | + case "!": |
| 2298 | + $state = self::COLON_STATE_COMMENT; |
| 2299 | + break; |
| 2300 | + case ">": |
| 2301 | + // Illegal early close? This shouldn't happen D: |
| 2302 | + $state = self::COLON_STATE_TEXT; |
| 2303 | + break; |
| 2304 | + default: |
| 2305 | + $state = self::COLON_STATE_TAG; |
| 2306 | + } |
| 2307 | + break; |
| 2308 | + case 3: // self::COLON_STATE_CLOSETAG: |
| 2309 | + // In a </tag> |
| 2310 | + if( $c == ">" ) { |
| 2311 | + $stack--; |
| 2312 | + if( $stack < 0 ) { |
| 2313 | + wfDebug( "Invalid input in $fname; too many close tags\n" ); |
| 2314 | + wfProfileOut( $fname ); |
| 2315 | + return false; |
| 2316 | + } |
| 2317 | + $state = self::COLON_STATE_TEXT; |
| 2318 | + } |
| 2319 | + break; |
| 2320 | + case self::COLON_STATE_TAGSLASH: |
| 2321 | + if( $c == ">" ) { |
| 2322 | + // Yes, a self-closed tag <blah/> |
| 2323 | + $state = self::COLON_STATE_TEXT; |
| 2324 | + } else { |
| 2325 | + // Probably we're jumping the gun, and this is an attribute |
| 2326 | + $state = self::COLON_STATE_TAG; |
| 2327 | + } |
| 2328 | + break; |
| 2329 | + case 5: // self::COLON_STATE_COMMENT: |
| 2330 | + if( $c == "-" ) { |
| 2331 | + $state = self::COLON_STATE_COMMENTDASH; |
| 2332 | + } |
| 2333 | + break; |
| 2334 | + case self::COLON_STATE_COMMENTDASH: |
| 2335 | + if( $c == "-" ) { |
| 2336 | + $state = self::COLON_STATE_COMMENTDASHDASH; |
| 2337 | + } else { |
| 2338 | + $state = self::COLON_STATE_COMMENT; |
| 2339 | + } |
| 2340 | + break; |
| 2341 | + case self::COLON_STATE_COMMENTDASHDASH: |
| 2342 | + if( $c == ">" ) { |
| 2343 | + $state = self::COLON_STATE_TEXT; |
| 2344 | + } else { |
| 2345 | + $state = self::COLON_STATE_COMMENT; |
| 2346 | + } |
| 2347 | + break; |
| 2348 | + default: |
| 2349 | + throw new MWException( "State machine error in $fname" ); |
| 2350 | + } |
| 2351 | + } |
| 2352 | + if( $stack > 0 ) { |
| 2353 | + wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" ); |
| 2354 | + return false; |
| 2355 | + } |
| 2356 | + wfProfileOut( $fname ); |
| 2357 | + return false; |
| 2358 | + } |
| 2359 | + |
| 2360 | + /** |
| 2361 | + * Return value of a magic variable (like PAGENAME) |
| 2362 | + * |
| 2363 | + * @private |
| 2364 | + */ |
| 2365 | + function getVariableValue( $index ) { |
| 2366 | + global $wgContLang, $wgSitename, $wgServer, $wgServerName, $wgScriptPath; |
| 2367 | + |
| 2368 | + /** |
| 2369 | + * Some of these require message or data lookups and can be |
| 2370 | + * expensive to check many times. |
| 2371 | + */ |
| 2372 | + static $varCache = array(); |
| 2373 | + if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$varCache ) ) ) { |
| 2374 | + if ( isset( $varCache[$index] ) ) { |
| 2375 | + return $varCache[$index]; |
| 2376 | + } |
| 2377 | + } |
| 2378 | + |
| 2379 | + $ts = time(); |
| 2380 | + wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); |
| 2381 | + |
| 2382 | + # Use the time zone |
| 2383 | + global $wgLocaltimezone; |
| 2384 | + if ( isset( $wgLocaltimezone ) ) { |
| 2385 | + $oldtz = getenv( 'TZ' ); |
| 2386 | + putenv( 'TZ='.$wgLocaltimezone ); |
| 2387 | + } |
| 2388 | + |
| 2389 | + wfSuppressWarnings(); // E_STRICT system time bitching |
| 2390 | + $localTimestamp = date( 'YmdHis', $ts ); |
| 2391 | + $localMonth = date( 'm', $ts ); |
| 2392 | + $localMonthName = date( 'n', $ts ); |
| 2393 | + $localDay = date( 'j', $ts ); |
| 2394 | + $localDay2 = date( 'd', $ts ); |
| 2395 | + $localDayOfWeek = date( 'w', $ts ); |
| 2396 | + $localWeek = date( 'W', $ts ); |
| 2397 | + $localYear = date( 'Y', $ts ); |
| 2398 | + $localHour = date( 'H', $ts ); |
| 2399 | + if ( isset( $wgLocaltimezone ) ) { |
| 2400 | + putenv( 'TZ='.$oldtz ); |
| 2401 | + } |
| 2402 | + wfRestoreWarnings(); |
| 2403 | + |
| 2404 | + switch ( $index ) { |
| 2405 | + case 'currentmonth': |
| 2406 | + return $varCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) ); |
| 2407 | + case 'currentmonthname': |
| 2408 | + return $varCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); |
| 2409 | + case 'currentmonthnamegen': |
| 2410 | + return $varCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); |
| 2411 | + case 'currentmonthabbrev': |
| 2412 | + return $varCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); |
| 2413 | + case 'currentday': |
| 2414 | + return $varCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) ); |
| 2415 | + case 'currentday2': |
| 2416 | + return $varCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) ); |
| 2417 | + case 'localmonth': |
| 2418 | + return $varCache[$index] = $wgContLang->formatNum( $localMonth ); |
| 2419 | + case 'localmonthname': |
| 2420 | + return $varCache[$index] = $wgContLang->getMonthName( $localMonthName ); |
| 2421 | + case 'localmonthnamegen': |
| 2422 | + return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); |
| 2423 | + case 'localmonthabbrev': |
| 2424 | + return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); |
| 2425 | + case 'localday': |
| 2426 | + return $varCache[$index] = $wgContLang->formatNum( $localDay ); |
| 2427 | + case 'localday2': |
| 2428 | + return $varCache[$index] = $wgContLang->formatNum( $localDay2 ); |
| 2429 | + case 'pagename': |
| 2430 | + return wfEscapeWikiText( $this->mTitle->getText() ); |
| 2431 | + case 'pagenamee': |
| 2432 | + return $this->mTitle->getPartialURL(); |
| 2433 | + case 'fullpagename': |
| 2434 | + return wfEscapeWikiText( $this->mTitle->getPrefixedText() ); |
| 2435 | + case 'fullpagenamee': |
| 2436 | + return $this->mTitle->getPrefixedURL(); |
| 2437 | + case 'subpagename': |
| 2438 | + return wfEscapeWikiText( $this->mTitle->getSubpageText() ); |
| 2439 | + case 'subpagenamee': |
| 2440 | + return $this->mTitle->getSubpageUrlForm(); |
| 2441 | + case 'basepagename': |
| 2442 | + return wfEscapeWikiText( $this->mTitle->getBaseText() ); |
| 2443 | + case 'basepagenamee': |
| 2444 | + return wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ); |
| 2445 | + case 'talkpagename': |
| 2446 | + if( $this->mTitle->canTalk() ) { |
| 2447 | + $talkPage = $this->mTitle->getTalkPage(); |
| 2448 | + return wfEscapeWikiText( $talkPage->getPrefixedText() ); |
| 2449 | + } else { |
| 2450 | + return ''; |
| 2451 | + } |
| 2452 | + case 'talkpagenamee': |
| 2453 | + if( $this->mTitle->canTalk() ) { |
| 2454 | + $talkPage = $this->mTitle->getTalkPage(); |
| 2455 | + return $talkPage->getPrefixedUrl(); |
| 2456 | + } else { |
| 2457 | + return ''; |
| 2458 | + } |
| 2459 | + case 'subjectpagename': |
| 2460 | + $subjPage = $this->mTitle->getSubjectPage(); |
| 2461 | + return wfEscapeWikiText( $subjPage->getPrefixedText() ); |
| 2462 | + case 'subjectpagenamee': |
| 2463 | + $subjPage = $this->mTitle->getSubjectPage(); |
| 2464 | + return $subjPage->getPrefixedUrl(); |
| 2465 | + case 'revisionid': |
| 2466 | + return $this->mRevisionId; |
| 2467 | + case 'revisionday': |
| 2468 | + return intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); |
| 2469 | + case 'revisionday2': |
| 2470 | + return substr( $this->getRevisionTimestamp(), 6, 2 ); |
| 2471 | + case 'revisionmonth': |
| 2472 | + return intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); |
| 2473 | + case 'revisionyear': |
| 2474 | + return substr( $this->getRevisionTimestamp(), 0, 4 ); |
| 2475 | + case 'revisiontimestamp': |
| 2476 | + return $this->getRevisionTimestamp(); |
| 2477 | + case 'namespace': |
| 2478 | + return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); |
| 2479 | + case 'namespacee': |
| 2480 | + return wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); |
| 2481 | + case 'talkspace': |
| 2482 | + return $this->mTitle->canTalk() ? str_replace('_',' ',$this->mTitle->getTalkNsText()) : ''; |
| 2483 | + case 'talkspacee': |
| 2484 | + return $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; |
| 2485 | + case 'subjectspace': |
| 2486 | + return $this->mTitle->getSubjectNsText(); |
| 2487 | + case 'subjectspacee': |
| 2488 | + return( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); |
| 2489 | + case 'currentdayname': |
| 2490 | + return $varCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); |
| 2491 | + case 'currentyear': |
| 2492 | + return $varCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); |
| 2493 | + case 'currenttime': |
| 2494 | + return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); |
| 2495 | + case 'currenthour': |
| 2496 | + return $varCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); |
| 2497 | + case 'currentweek': |
| 2498 | + // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to |
| 2499 | + // int to remove the padding |
| 2500 | + return $varCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); |
| 2501 | + case 'currentdow': |
| 2502 | + return $varCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) ); |
| 2503 | + case 'localdayname': |
| 2504 | + return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); |
| 2505 | + case 'localyear': |
| 2506 | + return $varCache[$index] = $wgContLang->formatNum( $localYear, true ); |
| 2507 | + case 'localtime': |
| 2508 | + return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false ); |
| 2509 | + case 'localhour': |
| 2510 | + return $varCache[$index] = $wgContLang->formatNum( $localHour, true ); |
| 2511 | + case 'localweek': |
| 2512 | + // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to |
| 2513 | + // int to remove the padding |
| 2514 | + return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek ); |
| 2515 | + case 'localdow': |
| 2516 | + return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); |
| 2517 | + case 'numberofarticles': |
| 2518 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::articles() ); |
| 2519 | + case 'numberoffiles': |
| 2520 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::images() ); |
| 2521 | + case 'numberofusers': |
| 2522 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::users() ); |
| 2523 | + case 'numberofpages': |
| 2524 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); |
| 2525 | + case 'numberofadmins': |
| 2526 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); |
| 2527 | + case 'numberofedits': |
| 2528 | + return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); |
| 2529 | + case 'currenttimestamp': |
| 2530 | + return $varCache[$index] = wfTimestampNow(); |
| 2531 | + case 'localtimestamp': |
| 2532 | + return $varCache[$index] = $localTimestamp; |
| 2533 | + case 'currentversion': |
| 2534 | + return $varCache[$index] = SpecialVersion::getVersion(); |
| 2535 | + case 'sitename': |
| 2536 | + return $wgSitename; |
| 2537 | + case 'server': |
| 2538 | + return $wgServer; |
| 2539 | + case 'servername': |
| 2540 | + return $wgServerName; |
| 2541 | + case 'scriptpath': |
| 2542 | + return $wgScriptPath; |
| 2543 | + case 'directionmark': |
| 2544 | + return $wgContLang->getDirMark(); |
| 2545 | + case 'contentlanguage': |
| 2546 | + global $wgContLanguageCode; |
| 2547 | + return $wgContLanguageCode; |
| 2548 | + default: |
| 2549 | + $ret = null; |
| 2550 | + if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) ) |
| 2551 | + return $ret; |
| 2552 | + else |
| 2553 | + return null; |
| 2554 | + } |
| 2555 | + } |
| 2556 | + |
| 2557 | + /** |
| 2558 | + * initialise the magic variables (like CURRENTMONTHNAME) |
| 2559 | + * |
| 2560 | + * @private |
| 2561 | + */ |
| 2562 | + function initialiseVariables() { |
| 2563 | + $fname = 'Parser::initialiseVariables'; |
| 2564 | + wfProfileIn( $fname ); |
| 2565 | + $variableIDs = MagicWord::getVariableIDs(); |
| 2566 | + |
| 2567 | + $this->mVariables = array(); |
| 2568 | + foreach ( $variableIDs as $id ) { |
| 2569 | + $mw =& MagicWord::get( $id ); |
| 2570 | + $mw->addToArray( $this->mVariables, $id ); |
| 2571 | + } |
| 2572 | + wfProfileOut( $fname ); |
| 2573 | + } |
| 2574 | + |
| 2575 | + /** |
| 2576 | + * parse any parentheses in format ((title|part|part)) |
| 2577 | + * and call callbacks to get a replacement text for any found piece |
| 2578 | + * |
| 2579 | + * @param string $text The text to parse |
| 2580 | + * @param array $callbacks rules in form: |
| 2581 | + * '{' => array( # opening parentheses |
| 2582 | + * 'end' => '}', # closing parentheses |
| 2583 | + * 'cb' => array(2 => callback, # replacement callback to call if {{..}} is found |
| 2584 | + * 3 => callback # replacement callback to call if {{{..}}} is found |
| 2585 | + * ) |
| 2586 | + * ) |
| 2587 | + * 'min' => 2, # Minimum parenthesis count in cb |
| 2588 | + * 'max' => 3, # Maximum parenthesis count in cb |
| 2589 | + * @private |
| 2590 | + */ |
| 2591 | + function replace_callback ($text, $callbacks) { |
| 2592 | + wfProfileIn( __METHOD__ ); |
| 2593 | + $openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet |
| 2594 | + $lastOpeningBrace = -1; # last not closed parentheses |
| 2595 | + |
| 2596 | + $validOpeningBraces = implode( '', array_keys( $callbacks ) ); |
| 2597 | + |
| 2598 | + $i = 0; |
| 2599 | + while ( $i < strlen( $text ) ) { |
| 2600 | + # Find next opening brace, closing brace or pipe |
| 2601 | + if ( $lastOpeningBrace == -1 ) { |
| 2602 | + $currentClosing = ''; |
| 2603 | + $search = $validOpeningBraces; |
| 2604 | + } else { |
| 2605 | + $currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd']; |
| 2606 | + $search = $validOpeningBraces . '|' . $currentClosing; |
| 2607 | + } |
| 2608 | + $rule = null; |
| 2609 | + $i += strcspn( $text, $search, $i ); |
| 2610 | + if ( $i < strlen( $text ) ) { |
| 2611 | + if ( $text[$i] == '|' ) { |
| 2612 | + $found = 'pipe'; |
| 2613 | + } elseif ( $text[$i] == $currentClosing ) { |
| 2614 | + $found = 'close'; |
| 2615 | + } elseif ( isset( $callbacks[$text[$i]] ) ) { |
| 2616 | + $found = 'open'; |
| 2617 | + $rule = $callbacks[$text[$i]]; |
| 2618 | + } else { |
| 2619 | + # Some versions of PHP have a strcspn which stops on null characters |
| 2620 | + # Ignore and continue |
| 2621 | + ++$i; |
| 2622 | + continue; |
| 2623 | + } |
| 2624 | + } else { |
| 2625 | + # All done |
| 2626 | + break; |
| 2627 | + } |
| 2628 | + |
| 2629 | + if ( $found == 'open' ) { |
| 2630 | + # found opening brace, let's add it to parentheses stack |
| 2631 | + $piece = array('brace' => $text[$i], |
| 2632 | + 'braceEnd' => $rule['end'], |
| 2633 | + 'title' => '', |
| 2634 | + 'parts' => null); |
| 2635 | + |
| 2636 | + # count opening brace characters |
| 2637 | + $piece['count'] = strspn( $text, $piece['brace'], $i ); |
| 2638 | + $piece['startAt'] = $piece['partStart'] = $i + $piece['count']; |
| 2639 | + $i += $piece['count']; |
| 2640 | + |
| 2641 | + # we need to add to stack only if opening brace count is enough for one of the rules |
| 2642 | + if ( $piece['count'] >= $rule['min'] ) { |
| 2643 | + $lastOpeningBrace ++; |
| 2644 | + $openingBraceStack[$lastOpeningBrace] = $piece; |
| 2645 | + } |
| 2646 | + } elseif ( $found == 'close' ) { |
| 2647 | + # lets check if it is enough characters for closing brace |
| 2648 | + $maxCount = $openingBraceStack[$lastOpeningBrace]['count']; |
| 2649 | + $count = strspn( $text, $text[$i], $i, $maxCount ); |
| 2650 | + |
| 2651 | + # check for maximum matching characters (if there are 5 closing |
| 2652 | + # characters, we will probably need only 3 - depending on the rules) |
| 2653 | + $matchingCount = 0; |
| 2654 | + $matchingCallback = null; |
| 2655 | + $cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']]; |
| 2656 | + if ( $count > $cbType['max'] ) { |
| 2657 | + # The specified maximum exists in the callback array, unless the caller |
| 2658 | + # has made an error |
| 2659 | + $matchingCount = $cbType['max']; |
| 2660 | + } else { |
| 2661 | + # Count is less than the maximum |
| 2662 | + # Skip any gaps in the callback array to find the true largest match |
| 2663 | + # Need to use array_key_exists not isset because the callback can be null |
| 2664 | + $matchingCount = $count; |
| 2665 | + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $cbType['cb'] ) ) { |
| 2666 | + --$matchingCount; |
| 2667 | + } |
| 2668 | + } |
| 2669 | + |
| 2670 | + if ($matchingCount <= 0) { |
| 2671 | + $i += $count; |
| 2672 | + continue; |
| 2673 | + } |
| 2674 | + $matchingCallback = $cbType['cb'][$matchingCount]; |
| 2675 | + |
| 2676 | + # let's set a title or last part (if '|' was found) |
| 2677 | + if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { |
| 2678 | + $openingBraceStack[$lastOpeningBrace]['title'] = |
| 2679 | + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
| 2680 | + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
| 2681 | + } else { |
| 2682 | + $openingBraceStack[$lastOpeningBrace]['parts'][] = |
| 2683 | + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
| 2684 | + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
| 2685 | + } |
| 2686 | + |
| 2687 | + $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount; |
| 2688 | + $pieceEnd = $i + $matchingCount; |
| 2689 | + |
| 2690 | + if( is_callable( $matchingCallback ) ) { |
| 2691 | + $cbArgs = array ( |
| 2692 | + 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart), |
| 2693 | + 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']), |
| 2694 | + 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'], |
| 2695 | + 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")), |
| 2696 | + ); |
| 2697 | + # finally we can call a user callback and replace piece of text |
| 2698 | + $replaceWith = call_user_func( $matchingCallback, $cbArgs ); |
| 2699 | + $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd); |
| 2700 | + $i = $pieceStart + strlen($replaceWith); |
| 2701 | + } else { |
| 2702 | + # null value for callback means that parentheses should be parsed, but not replaced |
| 2703 | + $i += $matchingCount; |
| 2704 | + } |
| 2705 | + |
| 2706 | + # reset last opening parentheses, but keep it in case there are unused characters |
| 2707 | + $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'], |
| 2708 | + 'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'], |
| 2709 | + 'count' => $openingBraceStack[$lastOpeningBrace]['count'], |
| 2710 | + 'title' => '', |
| 2711 | + 'parts' => null, |
| 2712 | + 'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']); |
| 2713 | + $openingBraceStack[$lastOpeningBrace--] = null; |
| 2714 | + |
| 2715 | + if ($matchingCount < $piece['count']) { |
| 2716 | + $piece['count'] -= $matchingCount; |
| 2717 | + $piece['startAt'] -= $matchingCount; |
| 2718 | + $piece['partStart'] = $piece['startAt']; |
| 2719 | + # do we still qualify for any callback with remaining count? |
| 2720 | + $currentCbList = $callbacks[$piece['brace']]['cb']; |
| 2721 | + while ( $piece['count'] ) { |
| 2722 | + if ( array_key_exists( $piece['count'], $currentCbList ) ) { |
| 2723 | + $lastOpeningBrace++; |
| 2724 | + $openingBraceStack[$lastOpeningBrace] = $piece; |
| 2725 | + break; |
| 2726 | + } |
| 2727 | + --$piece['count']; |
| 2728 | + } |
| 2729 | + } |
| 2730 | + } elseif ( $found == 'pipe' ) { |
| 2731 | + # lets set a title if it is a first separator, or next part otherwise |
| 2732 | + if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { |
| 2733 | + $openingBraceStack[$lastOpeningBrace]['title'] = |
| 2734 | + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
| 2735 | + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
| 2736 | + $openingBraceStack[$lastOpeningBrace]['parts'] = array(); |
| 2737 | + } else { |
| 2738 | + $openingBraceStack[$lastOpeningBrace]['parts'][] = |
| 2739 | + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], |
| 2740 | + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); |
| 2741 | + } |
| 2742 | + $openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i; |
| 2743 | + } |
| 2744 | + } |
| 2745 | + |
| 2746 | + wfProfileOut( __METHOD__ ); |
| 2747 | + return $text; |
| 2748 | + } |
| 2749 | + |
| 2750 | + /** |
| 2751 | + * Replace magic variables, templates, and template arguments |
| 2752 | + * with the appropriate text. Templates are substituted recursively, |
| 2753 | + * taking care to avoid infinite loops. |
| 2754 | + * |
| 2755 | + * Note that the substitution depends on value of $mOutputType: |
| 2756 | + * OT_WIKI: only {{subst:}} templates |
| 2757 | + * OT_MSG: only magic variables |
| 2758 | + * OT_HTML: all templates and magic variables |
| 2759 | + * |
| 2760 | + * @param string $tex The text to transform |
| 2761 | + * @param array $args Key-value pairs representing template parameters to substitute |
| 2762 | + * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion |
| 2763 | + * @private |
| 2764 | + */ |
| 2765 | + function replaceVariables( $text, $args = array(), $argsOnly = false ) { |
| 2766 | + # Prevent too big inclusions |
| 2767 | + if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { |
| 2768 | + return $text; |
| 2769 | + } |
| 2770 | + |
| 2771 | + $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; |
| 2772 | + wfProfileIn( $fname ); |
| 2773 | + |
| 2774 | + # This function is called recursively. To keep track of arguments we need a stack: |
| 2775 | + array_push( $this->mArgStack, $args ); |
| 2776 | + |
| 2777 | + $braceCallbacks = array(); |
| 2778 | + if ( !$argsOnly ) { |
| 2779 | + $braceCallbacks[2] = array( &$this, 'braceSubstitution' ); |
| 2780 | + } |
| 2781 | + if ( $this->mOutputType != OT_MSG ) { |
| 2782 | + $braceCallbacks[3] = array( &$this, 'argSubstitution' ); |
| 2783 | + } |
| 2784 | + if ( $braceCallbacks ) { |
| 2785 | + $callbacks = array( |
| 2786 | + '{' => array( |
| 2787 | + 'end' => '}', |
| 2788 | + 'cb' => $braceCallbacks, |
| 2789 | + 'min' => $argsOnly ? 3 : 2, |
| 2790 | + 'max' => isset( $braceCallbacks[3] ) ? 3 : 2, |
| 2791 | + ), |
| 2792 | + '[' => array( |
| 2793 | + 'end' => ']', |
| 2794 | + 'cb' => array(2=>null), |
| 2795 | + 'min' => 2, |
| 2796 | + 'max' => 2, |
| 2797 | + ) |
| 2798 | + ); |
| 2799 | + $text = $this->replace_callback ($text, $callbacks); |
| 2800 | + |
| 2801 | + array_pop( $this->mArgStack ); |
| 2802 | + } |
| 2803 | + wfProfileOut( $fname ); |
| 2804 | + return $text; |
| 2805 | + } |
| 2806 | + |
| 2807 | + /** |
| 2808 | + * Replace magic variables |
| 2809 | + * @private |
| 2810 | + */ |
| 2811 | + function variableSubstitution( $matches ) { |
| 2812 | + global $wgContLang; |
| 2813 | + $fname = 'Parser::variableSubstitution'; |
| 2814 | + $varname = $wgContLang->lc($matches[1]); |
| 2815 | + wfProfileIn( $fname ); |
| 2816 | + $skip = false; |
| 2817 | + if ( $this->mOutputType == OT_WIKI ) { |
| 2818 | + # Do only magic variables prefixed by SUBST |
| 2819 | + $mwSubst =& MagicWord::get( 'subst' ); |
| 2820 | + if (!$mwSubst->matchStartAndRemove( $varname )) |
| 2821 | + $skip = true; |
| 2822 | + # Note that if we don't substitute the variable below, |
| 2823 | + # we don't remove the {{subst:}} magic word, in case |
| 2824 | + # it is a template rather than a magic variable. |
| 2825 | + } |
| 2826 | + if ( !$skip && array_key_exists( $varname, $this->mVariables ) ) { |
| 2827 | + $id = $this->mVariables[$varname]; |
| 2828 | + # Now check if we did really match, case sensitive or not |
| 2829 | + $mw =& MagicWord::get( $id ); |
| 2830 | + if ($mw->match($matches[1])) { |
| 2831 | + $text = $this->getVariableValue( $id ); |
| 2832 | + $this->mOutput->mContainsOldMagic = true; |
| 2833 | + } else { |
| 2834 | + $text = $matches[0]; |
| 2835 | + } |
| 2836 | + } else { |
| 2837 | + $text = $matches[0]; |
| 2838 | + } |
| 2839 | + wfProfileOut( $fname ); |
| 2840 | + return $text; |
| 2841 | + } |
| 2842 | + |
| 2843 | + |
| 2844 | + /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. |
| 2845 | + static function createAssocArgs( $args ) { |
| 2846 | + $assocArgs = array(); |
| 2847 | + $index = 1; |
| 2848 | + foreach( $args as $arg ) { |
| 2849 | + $eqpos = strpos( $arg, '=' ); |
| 2850 | + if ( $eqpos === false ) { |
| 2851 | + $assocArgs[$index++] = $arg; |
| 2852 | + } else { |
| 2853 | + $name = trim( substr( $arg, 0, $eqpos ) ); |
| 2854 | + $value = trim( substr( $arg, $eqpos+1 ) ); |
| 2855 | + if ( $value === false ) { |
| 2856 | + $value = ''; |
| 2857 | + } |
| 2858 | + if ( $name !== false ) { |
| 2859 | + $assocArgs[$name] = $value; |
| 2860 | + } |
| 2861 | + } |
| 2862 | + } |
| 2863 | + |
| 2864 | + return $assocArgs; |
| 2865 | + } |
| 2866 | + |
| 2867 | + /** |
| 2868 | + * Return the text of a template, after recursively |
| 2869 | + * replacing any variables or templates within the template. |
| 2870 | + * |
| 2871 | + * @param array $piece The parts of the template |
| 2872 | + * $piece['text']: matched text |
| 2873 | + * $piece['title']: the title, i.e. the part before the | |
| 2874 | + * $piece['parts']: the parameter array |
| 2875 | + * @return string the text of the template |
| 2876 | + * @private |
| 2877 | + */ |
| 2878 | + function braceSubstitution( $piece ) { |
| 2879 | + global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; |
| 2880 | + $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; |
| 2881 | + wfProfileIn( $fname ); |
| 2882 | + wfProfileIn( __METHOD__.'-setup' ); |
| 2883 | + |
| 2884 | + # Flags |
| 2885 | + $found = false; # $text has been filled |
| 2886 | + $nowiki = false; # wiki markup in $text should be escaped |
| 2887 | + $noparse = false; # Unsafe HTML tags should not be stripped, etc. |
| 2888 | + $noargs = false; # Don't replace triple-brace arguments in $text |
| 2889 | + $replaceHeadings = false; # Make the edit section links go to the template not the article |
| 2890 | + $headingOffset = 0; # Skip headings when number, to account for those that weren't transcluded. |
| 2891 | + $isHTML = false; # $text is HTML, armour it against wikitext transformation |
| 2892 | + $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered |
| 2893 | + |
| 2894 | + # Title object, where $text came from |
| 2895 | + $title = NULL; |
| 2896 | + |
| 2897 | + $linestart = ''; |
| 2898 | + |
| 2899 | + |
| 2900 | + # $part1 is the bit before the first |, and must contain only title characters |
| 2901 | + # $args is a list of arguments, starting from index 0, not including $part1 |
| 2902 | + |
| 2903 | + $titleText = $part1 = $piece['title']; |
| 2904 | + # If the third subpattern matched anything, it will start with | |
| 2905 | + |
| 2906 | + if (null == $piece['parts']) { |
| 2907 | + $replaceWith = $this->variableSubstitution (array ($piece['text'], $piece['title'])); |
| 2908 | + if ($replaceWith != $piece['text']) { |
| 2909 | + $text = $replaceWith; |
| 2910 | + $found = true; |
| 2911 | + $noparse = true; |
| 2912 | + $noargs = true; |
| 2913 | + } |
| 2914 | + } |
| 2915 | + |
| 2916 | + $args = (null == $piece['parts']) ? array() : $piece['parts']; |
| 2917 | + wfProfileOut( __METHOD__.'-setup' ); |
| 2918 | + |
| 2919 | + # SUBST |
| 2920 | + wfProfileIn( __METHOD__.'-modifiers' ); |
| 2921 | + if ( !$found ) { |
| 2922 | + $mwSubst =& MagicWord::get( 'subst' ); |
| 2923 | + if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) { |
| 2924 | + # One of two possibilities is true: |
| 2925 | + # 1) Found SUBST but not in the PST phase |
| 2926 | + # 2) Didn't find SUBST and in the PST phase |
| 2927 | + # In either case, return without further processing |
| 2928 | + $text = $piece['text']; |
| 2929 | + $found = true; |
| 2930 | + $noparse = true; |
| 2931 | + $noargs = true; |
| 2932 | + } |
| 2933 | + } |
| 2934 | + |
| 2935 | + # MSG, MSGNW and RAW |
| 2936 | + if ( !$found ) { |
| 2937 | + # Check for MSGNW: |
| 2938 | + $mwMsgnw =& MagicWord::get( 'msgnw' ); |
| 2939 | + if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { |
| 2940 | + $nowiki = true; |
| 2941 | + } else { |
| 2942 | + # Remove obsolete MSG: |
| 2943 | + $mwMsg =& MagicWord::get( 'msg' ); |
| 2944 | + $mwMsg->matchStartAndRemove( $part1 ); |
| 2945 | + } |
| 2946 | + |
| 2947 | + # Check for RAW: |
| 2948 | + $mwRaw =& MagicWord::get( 'raw' ); |
| 2949 | + if ( $mwRaw->matchStartAndRemove( $part1 ) ) { |
| 2950 | + $forceRawInterwiki = true; |
| 2951 | + } |
| 2952 | + } |
| 2953 | + wfProfileOut( __METHOD__.'-modifiers' ); |
| 2954 | + |
| 2955 | + //save path level before recursing into functions & templates. |
| 2956 | + $lastPathLevel = $this->mTemplatePath; |
| 2957 | + |
| 2958 | + # Parser functions |
| 2959 | + if ( !$found ) { |
| 2960 | + wfProfileIn( __METHOD__ . '-pfunc' ); |
| 2961 | + |
| 2962 | + $colonPos = strpos( $part1, ':' ); |
| 2963 | + if ( $colonPos !== false ) { |
| 2964 | + # Case sensitive functions |
| 2965 | + $function = substr( $part1, 0, $colonPos ); |
| 2966 | + if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { |
| 2967 | + $function = $this->mFunctionSynonyms[1][$function]; |
| 2968 | + } else { |
| 2969 | + # Case insensitive functions |
| 2970 | + $function = strtolower( $function ); |
| 2971 | + if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { |
| 2972 | + $function = $this->mFunctionSynonyms[0][$function]; |
| 2973 | + } else { |
| 2974 | + $function = false; |
| 2975 | + } |
| 2976 | + } |
| 2977 | + if ( $function ) { |
| 2978 | + $funcArgs = array_map( 'trim', $args ); |
| 2979 | + $funcArgs = array_merge( array( &$this, trim( substr( $part1, $colonPos + 1 ) ) ), $funcArgs ); |
| 2980 | + $result = call_user_func_array( $this->mFunctionHooks[$function], $funcArgs ); |
| 2981 | + $found = true; |
| 2982 | + |
| 2983 | + // The text is usually already parsed, doesn't need triple-brace tags expanded, etc. |
| 2984 | + //$noargs = true; |
| 2985 | + //$noparse = true; |
| 2986 | + |
| 2987 | + if ( is_array( $result ) ) { |
| 2988 | + if ( isset( $result[0] ) ) { |
| 2989 | + $text = $linestart . $result[0]; |
| 2990 | + unset( $result[0] ); |
| 2991 | + } |
| 2992 | + |
| 2993 | + // Extract flags into the local scope |
| 2994 | + // This allows callers to set flags such as nowiki, noparse, found, etc. |
| 2995 | + extract( $result ); |
| 2996 | + } else { |
| 2997 | + $text = $linestart . $result; |
| 2998 | + } |
| 2999 | + } |
| 3000 | + } |
| 3001 | + wfProfileOut( __METHOD__ . '-pfunc' ); |
| 3002 | + } |
| 3003 | + |
| 3004 | + # Template table test |
| 3005 | + |
| 3006 | + # Did we encounter this template already? If yes, it is in the cache |
| 3007 | + # and we need to check for loops. |
| 3008 | + if ( !$found && isset( $this->mTemplates[$piece['title']] ) ) { |
| 3009 | + $found = true; |
| 3010 | + |
| 3011 | + # Infinite loop test |
| 3012 | + if ( isset( $this->mTemplatePath[$part1] ) ) { |
| 3013 | + $noparse = true; |
| 3014 | + $noargs = true; |
| 3015 | + $found = true; |
| 3016 | + $text = $linestart . |
| 3017 | + "[[$part1]]<!-- WARNING: template loop detected -->"; |
| 3018 | + wfDebug( __METHOD__.": template loop broken at '$part1'\n" ); |
| 3019 | + } else { |
| 3020 | + # set $text to cached message. |
| 3021 | + $text = $linestart . $this->mTemplates[$piece['title']]; |
| 3022 | + #treat title for cached page the same as others |
| 3023 | + $ns = NS_TEMPLATE; |
| 3024 | + $subpage = ''; |
| 3025 | + $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); |
| 3026 | + if ($subpage !== '') { |
| 3027 | + $ns = $this->mTitle->getNamespace(); |
| 3028 | + } |
| 3029 | + $title = Title::newFromText( $part1, $ns ); |
| 3030 | + //used by include size checking |
| 3031 | + $titleText = $title->getPrefixedText(); |
| 3032 | + //used by edit section links |
| 3033 | + $replaceHeadings = true; |
| 3034 | + |
| 3035 | + } |
| 3036 | + } |
| 3037 | + |
| 3038 | + # Load from database |
| 3039 | + if ( !$found ) { |
| 3040 | + wfProfileIn( __METHOD__ . '-loadtpl' ); |
| 3041 | + $ns = NS_TEMPLATE; |
| 3042 | + # declaring $subpage directly in the function call |
| 3043 | + # does not work correctly with references and breaks |
| 3044 | + # {{/subpage}}-style inclusions |
| 3045 | + $subpage = ''; |
| 3046 | + $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); |
| 3047 | + if ($subpage !== '') { |
| 3048 | + $ns = $this->mTitle->getNamespace(); |
| 3049 | + } |
| 3050 | + $title = Title::newFromText( $part1, $ns ); |
| 3051 | + |
| 3052 | + |
| 3053 | + if ( !is_null( $title ) ) { |
| 3054 | + $titleText = $title->getPrefixedText(); |
| 3055 | + # Check for language variants if the template is not found |
| 3056 | + if($wgContLang->hasVariants() && $title->getArticleID() == 0){ |
| 3057 | + $wgContLang->findVariantLink($part1, $title); |
| 3058 | + } |
| 3059 | + |
| 3060 | + if ( !$title->isExternal() ) { |
| 3061 | + if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { |
| 3062 | + $text = SpecialPage::capturePath( $title ); |
| 3063 | + if ( is_string( $text ) ) { |
| 3064 | + $found = true; |
| 3065 | + $noparse = true; |
| 3066 | + $noargs = true; |
| 3067 | + $isHTML = true; |
| 3068 | + $this->disableCache(); |
| 3069 | + } |
| 3070 | + } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { |
| 3071 | + $found = false; //access denied |
| 3072 | + wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); |
| 3073 | + } else { |
| 3074 | + list($articleContent,$title) = $this->fetchTemplateAndtitle( $title ); |
| 3075 | + if ( $articleContent !== false ) { |
| 3076 | + $found = true; |
| 3077 | + $text = $articleContent; |
| 3078 | + $replaceHeadings = true; |
| 3079 | + } |
| 3080 | + } |
| 3081 | + |
| 3082 | + # If the title is valid but undisplayable, make a link to it |
| 3083 | + if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
| 3084 | + $text = "[[:$titleText]]"; |
| 3085 | + $found = true; |
| 3086 | + } |
| 3087 | + } elseif ( $title->isTrans() ) { |
| 3088 | + // Interwiki transclusion |
| 3089 | + if ( $this->ot['html'] && !$forceRawInterwiki ) { |
| 3090 | + $text = $this->interwikiTransclude( $title, 'render' ); |
| 3091 | + $isHTML = true; |
| 3092 | + $noparse = true; |
| 3093 | + } else { |
| 3094 | + $text = $this->interwikiTransclude( $title, 'raw' ); |
| 3095 | + $replaceHeadings = true; |
| 3096 | + } |
| 3097 | + $found = true; |
| 3098 | + } |
| 3099 | + |
| 3100 | + # Template cache array insertion |
| 3101 | + # Use the original $piece['title'] not the mangled $part1, so that |
| 3102 | + # modifiers such as RAW: produce separate cache entries |
| 3103 | + if( $found ) { |
| 3104 | + if( $isHTML ) { |
| 3105 | + // A special page; don't store it in the template cache. |
| 3106 | + } else { |
| 3107 | + $this->mTemplates[$piece['title']] = $text; |
| 3108 | + } |
| 3109 | + $text = $linestart . $text; |
| 3110 | + } |
| 3111 | + } |
| 3112 | + wfProfileOut( __METHOD__ . '-loadtpl' ); |
| 3113 | + } |
| 3114 | + |
| 3115 | + if ( $found && !$this->incrementIncludeSize( 'pre-expand', strlen( $text ) ) ) { |
| 3116 | + # Error, oversize inclusion |
| 3117 | + $text = $linestart . |
| 3118 | + "[[$titleText]]<!-- WARNING: template omitted, pre-expand include size too large -->"; |
| 3119 | + $noparse = true; |
| 3120 | + $noargs = true; |
| 3121 | + } |
| 3122 | + |
| 3123 | + # Recursive parsing, escaping and link table handling |
| 3124 | + # Only for HTML output |
| 3125 | + if ( $nowiki && $found && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
| 3126 | + $text = wfEscapeWikiText( $text ); |
| 3127 | + } elseif ( !$this->ot['msg'] && $found ) { |
| 3128 | + if ( $noargs ) { |
| 3129 | + $assocArgs = array(); |
| 3130 | + } else { |
| 3131 | + # Clean up argument array |
| 3132 | + $assocArgs = self::createAssocArgs($args); |
| 3133 | + # Add a new element to the templace recursion path |
| 3134 | + $this->mTemplatePath[$part1] = 1; |
| 3135 | + } |
| 3136 | + |
| 3137 | + if ( !$noparse ) { |
| 3138 | + # If there are any <onlyinclude> tags, only include them |
| 3139 | + if ( in_string( '<onlyinclude>', $text ) && in_string( '</onlyinclude>', $text ) ) { |
| 3140 | + $replacer = new OnlyIncludeReplacer; |
| 3141 | + StringUtils::delimiterReplaceCallback( '<onlyinclude>', '</onlyinclude>', |
| 3142 | + array( &$replacer, 'replace' ), $text ); |
| 3143 | + $text = $replacer->output; |
| 3144 | + } |
| 3145 | + # Remove <noinclude> sections and <includeonly> tags |
| 3146 | + $text = StringUtils::delimiterReplace( '<noinclude>', '</noinclude>', '', $text ); |
| 3147 | + $text = strtr( $text, array( '<includeonly>' => '' , '</includeonly>' => '' ) ); |
| 3148 | + |
| 3149 | + if( $this->ot['html'] || $this->ot['pre'] ) { |
| 3150 | + # Strip <nowiki>, <pre>, etc. |
| 3151 | + $text = $this->strip( $text, $this->mStripState ); |
| 3152 | + if ( $this->ot['html'] ) { |
| 3153 | + $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs ); |
| 3154 | + } elseif ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) { |
| 3155 | + $text = Sanitizer::removeHTMLcomments( $text ); |
| 3156 | + } |
| 3157 | + } |
| 3158 | + $text = $this->replaceVariables( $text, $assocArgs ); |
| 3159 | + |
| 3160 | + # If the template begins with a table or block-level |
| 3161 | + # element, it should be treated as beginning a new line. |
| 3162 | + if (!$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ |
| 3163 | + $text = "\n" . $text; |
| 3164 | + } |
| 3165 | + } elseif ( !$noargs ) { |
| 3166 | + # $noparse and !$noargs |
| 3167 | + # Just replace the arguments, not any double-brace items |
| 3168 | + # This is used for rendered interwiki transclusion |
| 3169 | + $text = $this->replaceVariables( $text, $assocArgs, true ); |
| 3170 | + } |
| 3171 | + } |
| 3172 | + # Prune lower levels off the recursion check path |
| 3173 | + $this->mTemplatePath = $lastPathLevel; |
| 3174 | + |
| 3175 | + if ( $found && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { |
| 3176 | + # Error, oversize inclusion |
| 3177 | + $text = $linestart . |
| 3178 | + "[[$titleText]]<!-- WARNING: template omitted, post-expand include size too large -->"; |
| 3179 | + $noparse = true; |
| 3180 | + $noargs = true; |
| 3181 | + } |
| 3182 | + |
| 3183 | + if ( !$found ) { |
| 3184 | + wfProfileOut( $fname ); |
| 3185 | + return $piece['text']; |
| 3186 | + } else { |
| 3187 | + wfProfileIn( __METHOD__ . '-placeholders' ); |
| 3188 | + if ( $isHTML ) { |
| 3189 | + # Replace raw HTML by a placeholder |
| 3190 | + # Add a blank line preceding, to prevent it from mucking up |
| 3191 | + # immediately preceding headings |
| 3192 | + $text = "\n\n" . $this->insertStripItem( $text, $this->mStripState ); |
| 3193 | + } else { |
| 3194 | + # replace ==section headers== |
| 3195 | + # XXX this needs to go away once we have a better parser. |
| 3196 | + if ( !$this->ot['wiki'] && !$this->ot['pre'] && $replaceHeadings ) { |
| 3197 | + if( !is_null( $title ) ) |
| 3198 | + $encodedname = base64_encode($title->getPrefixedDBkey()); |
| 3199 | + else |
| 3200 | + $encodedname = base64_encode(""); |
| 3201 | + $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1, |
| 3202 | + PREG_SPLIT_DELIM_CAPTURE); |
| 3203 | + $text = ''; |
| 3204 | + $nsec = $headingOffset; |
| 3205 | + |
| 3206 | + for( $i = 0; $i < count($m); $i += 2 ) { |
| 3207 | + $text .= $m[$i]; |
| 3208 | + if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue; |
| 3209 | + $hl = $m[$i + 1]; |
| 3210 | + if( strstr($hl, "<!--MWTEMPLATESECTION") ) { |
| 3211 | + $text .= $hl; |
| 3212 | + continue; |
| 3213 | + } |
| 3214 | + $m2 = array(); |
| 3215 | + preg_match('/^(={1,6})(.*?)(={1,6}\s*?)$/m', $hl, $m2); |
| 3216 | + $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION=" |
| 3217 | + . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3]; |
| 3218 | + |
| 3219 | + $nsec++; |
| 3220 | + } |
| 3221 | + } |
| 3222 | + } |
| 3223 | + wfProfileOut( __METHOD__ . '-placeholders' ); |
| 3224 | + } |
| 3225 | + |
| 3226 | + # Prune lower levels off the recursion check path |
| 3227 | + $this->mTemplatePath = $lastPathLevel; |
| 3228 | + |
| 3229 | + if ( !$found ) { |
| 3230 | + wfProfileOut( $fname ); |
| 3231 | + return $piece['text']; |
| 3232 | + } else { |
| 3233 | + wfProfileOut( $fname ); |
| 3234 | + return $text; |
| 3235 | + } |
| 3236 | + } |
| 3237 | + |
| 3238 | + /** |
| 3239 | + * Fetch the unparsed text of a template and register a reference to it. |
| 3240 | + */ |
| 3241 | + function fetchTemplateAndTitle( $title ) { |
| 3242 | + $templateCb = $this->mOptions->getTemplateCallback(); |
| 3243 | + $stuff = call_user_func( $templateCb, $title ); |
| 3244 | + $text = $stuff['text']; |
| 3245 | + $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; |
| 3246 | + if ( isset( $stuff['deps'] ) ) { |
| 3247 | + foreach ( $stuff['deps'] as $dep ) { |
| 3248 | + $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); |
| 3249 | + } |
| 3250 | + } |
| 3251 | + return array($text,$finalTitle); |
| 3252 | + } |
| 3253 | + |
| 3254 | + function fetchTemplate( $title ) { |
| 3255 | + $rv = $this->fetchTemplateAndtitle($title); |
| 3256 | + return $rv[0]; |
| 3257 | + } |
| 3258 | + |
| 3259 | + /** |
| 3260 | + * Static function to get a template |
| 3261 | + * Can be overridden via ParserOptions::setTemplateCallback(). |
| 3262 | + * |
| 3263 | + * Returns an associative array: |
| 3264 | + * text The unparsed template text |
| 3265 | + * finalTitle (Optional) The title after following redirects |
| 3266 | + * deps (Optional) An array of associative array dependencies: |
| 3267 | + * title: The dependency title, to be registered in templatelinks |
| 3268 | + * page_id: The page_id of the title |
| 3269 | + * rev_id: The revision ID loaded |
| 3270 | + */ |
| 3271 | + static function statelessFetchTemplate( $title ) { |
| 3272 | + $text = $skip = false; |
| 3273 | + $finalTitle = $title; |
| 3274 | + $deps = array(); |
| 3275 | + |
| 3276 | + // Loop to fetch the article, with up to 1 redirect |
| 3277 | + for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { |
| 3278 | + # Give extensions a chance to select the revision instead |
| 3279 | + $id = false; // Assume current |
| 3280 | + wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( false, &$title, &$skip, &$id ) ); |
| 3281 | + |
| 3282 | + if( $skip ) { |
| 3283 | + $text = false; |
| 3284 | + $deps[] = array( |
| 3285 | + 'title' => $title, |
| 3286 | + 'page_id' => $title->getArticleID(), |
| 3287 | + 'rev_id' => null ); |
| 3288 | + break; |
| 3289 | + } |
| 3290 | + $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); |
| 3291 | + $rev_id = $rev ? $rev->getId() : 0; |
| 3292 | + |
| 3293 | + $deps[] = array( |
| 3294 | + 'title' => $title, |
| 3295 | + 'page_id' => $title->getArticleID(), |
| 3296 | + 'rev_id' => $rev_id ); |
| 3297 | + |
| 3298 | + if( $rev ) { |
| 3299 | + $text = $rev->getText(); |
| 3300 | + } elseif( $title->getNamespace() == NS_MEDIAWIKI ) { |
| 3301 | + global $wgLang; |
| 3302 | + $message = $wgLang->lcfirst( $title->getText() ); |
| 3303 | + $text = wfMsgForContentNoTrans( $message ); |
| 3304 | + if( wfEmptyMsg( $message, $text ) ) { |
| 3305 | + $text = false; |
| 3306 | + break; |
| 3307 | + } |
| 3308 | + } else { |
| 3309 | + break; |
| 3310 | + } |
| 3311 | + if ( $text === false ) { |
| 3312 | + break; |
| 3313 | + } |
| 3314 | + // Redirect? |
| 3315 | + $finalTitle = $title; |
| 3316 | + $title = Title::newFromRedirect( $text ); |
| 3317 | + } |
| 3318 | + return array( |
| 3319 | + 'text' => $text, |
| 3320 | + 'finalTitle' => $finalTitle, |
| 3321 | + 'deps' => $deps ); |
| 3322 | + } |
| 3323 | + |
| 3324 | + /** |
| 3325 | + * Transclude an interwiki link. |
| 3326 | + */ |
| 3327 | + function interwikiTransclude( $title, $action ) { |
| 3328 | + global $wgEnableScaryTranscluding; |
| 3329 | + |
| 3330 | + if (!$wgEnableScaryTranscluding) |
| 3331 | + return wfMsg('scarytranscludedisabled'); |
| 3332 | + |
| 3333 | + $url = $title->getFullUrl( "action=$action" ); |
| 3334 | + |
| 3335 | + if (strlen($url) > 255) |
| 3336 | + return wfMsg('scarytranscludetoolong'); |
| 3337 | + return $this->fetchScaryTemplateMaybeFromCache($url); |
| 3338 | + } |
| 3339 | + |
| 3340 | + function fetchScaryTemplateMaybeFromCache($url) { |
| 3341 | + global $wgTranscludeCacheExpiry; |
| 3342 | + $dbr = wfGetDB(DB_SLAVE); |
| 3343 | + $obj = $dbr->selectRow('transcache', array('tc_time', 'tc_contents'), |
| 3344 | + array('tc_url' => $url)); |
| 3345 | + if ($obj) { |
| 3346 | + $time = $obj->tc_time; |
| 3347 | + $text = $obj->tc_contents; |
| 3348 | + if ($time && time() < $time + $wgTranscludeCacheExpiry ) { |
| 3349 | + return $text; |
| 3350 | + } |
| 3351 | + } |
| 3352 | + |
| 3353 | + $text = Http::get($url); |
| 3354 | + if (!$text) |
| 3355 | + return wfMsg('scarytranscludefailed', $url); |
| 3356 | + |
| 3357 | + $dbw = wfGetDB(DB_MASTER); |
| 3358 | + $dbw->replace('transcache', array('tc_url'), array( |
| 3359 | + 'tc_url' => $url, |
| 3360 | + 'tc_time' => time(), |
| 3361 | + 'tc_contents' => $text)); |
| 3362 | + return $text; |
| 3363 | + } |
| 3364 | + |
| 3365 | + |
| 3366 | + /** |
| 3367 | + * Triple brace replacement -- used for template arguments |
| 3368 | + * @private |
| 3369 | + */ |
| 3370 | + function argSubstitution( $matches ) { |
| 3371 | + $arg = trim( $matches['title'] ); |
| 3372 | + $text = $matches['text']; |
| 3373 | + $inputArgs = end( $this->mArgStack ); |
| 3374 | + |
| 3375 | + if ( array_key_exists( $arg, $inputArgs ) ) { |
| 3376 | + $text = $inputArgs[$arg]; |
| 3377 | + } else if (($this->mOutputType == OT_HTML || $this->mOutputType == OT_PREPROCESS ) && |
| 3378 | + null != $matches['parts'] && count($matches['parts']) > 0) { |
| 3379 | + $text = $matches['parts'][0]; |
| 3380 | + } |
| 3381 | + if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { |
| 3382 | + $text = $matches['text'] . |
| 3383 | + '<!-- WARNING: argument omitted, expansion size too large -->'; |
| 3384 | + } |
| 3385 | + |
| 3386 | + return $text; |
| 3387 | + } |
| 3388 | + |
| 3389 | + /** |
| 3390 | + * Increment an include size counter |
| 3391 | + * |
| 3392 | + * @param string $type The type of expansion |
| 3393 | + * @param integer $size The size of the text |
| 3394 | + * @return boolean False if this inclusion would take it over the maximum, true otherwise |
| 3395 | + */ |
| 3396 | + function incrementIncludeSize( $type, $size ) { |
| 3397 | + if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { |
| 3398 | + return false; |
| 3399 | + } else { |
| 3400 | + $this->mIncludeSizes[$type] += $size; |
| 3401 | + return true; |
| 3402 | + } |
| 3403 | + } |
| 3404 | + |
| 3405 | + /** |
| 3406 | + * Detect __NOGALLERY__ magic word and set a placeholder |
| 3407 | + */ |
| 3408 | + function stripNoGallery( &$text ) { |
| 3409 | + # if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML, |
| 3410 | + # do not add TOC |
| 3411 | + $mw = MagicWord::get( 'nogallery' ); |
| 3412 | + $this->mOutput->mNoGallery = $mw->matchAndRemove( $text ) ; |
| 3413 | + } |
| 3414 | + |
| 3415 | + /** |
| 3416 | + * Find the first __TOC__ magic word and set a <!--MWTOC--> |
| 3417 | + * placeholder that will then be replaced by the real TOC in |
| 3418 | + * ->formatHeadings, this works because at this points real |
| 3419 | + * comments will have already been discarded by the sanitizer. |
| 3420 | + * |
| 3421 | + * Any additional __TOC__ magic words left over will be discarded |
| 3422 | + * as there can only be one TOC on the page. |
| 3423 | + */ |
| 3424 | + function stripToc( $text ) { |
| 3425 | + # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, |
| 3426 | + # do not add TOC |
| 3427 | + $mw = MagicWord::get( 'notoc' ); |
| 3428 | + if( $mw->matchAndRemove( $text ) ) { |
| 3429 | + $this->mShowToc = false; |
| 3430 | + } |
| 3431 | + |
| 3432 | + $mw = MagicWord::get( 'toc' ); |
| 3433 | + if( $mw->match( $text ) ) { |
| 3434 | + $this->mShowToc = true; |
| 3435 | + $this->mForceTocPosition = true; |
| 3436 | + |
| 3437 | + // Set a placeholder. At the end we'll fill it in with the TOC. |
| 3438 | + $text = $mw->replace( '<!--MWTOC-->', $text, 1 ); |
| 3439 | + |
| 3440 | + // Only keep the first one. |
| 3441 | + $text = $mw->replace( '', $text ); |
| 3442 | + } |
| 3443 | + return $text; |
| 3444 | + } |
| 3445 | + |
| 3446 | + /** |
| 3447 | + * This function accomplishes several tasks: |
| 3448 | + * 1) Auto-number headings if that option is enabled |
| 3449 | + * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page |
| 3450 | + * 3) Add a Table of contents on the top for users who have enabled the option |
| 3451 | + * 4) Auto-anchor headings |
| 3452 | + * |
| 3453 | + * It loops through all headlines, collects the necessary data, then splits up the |
| 3454 | + * string and re-inserts the newly formatted headlines. |
| 3455 | + * |
| 3456 | + * @param string $text |
| 3457 | + * @param boolean $isMain |
| 3458 | + * @private |
| 3459 | + */ |
| 3460 | + function formatHeadings( $text, $isMain=true ) { |
| 3461 | + global $wgMaxTocLevel, $wgContLang; |
| 3462 | + |
| 3463 | + $doNumberHeadings = $this->mOptions->getNumberHeadings(); |
| 3464 | + if( !$this->mTitle->quickUserCan( 'edit' ) ) { |
| 3465 | + $showEditLink = 0; |
| 3466 | + } else { |
| 3467 | + $showEditLink = $this->mOptions->getEditSection(); |
| 3468 | + } |
| 3469 | + |
| 3470 | + # Inhibit editsection links if requested in the page |
| 3471 | + $esw =& MagicWord::get( 'noeditsection' ); |
| 3472 | + if( $esw->matchAndRemove( $text ) ) { |
| 3473 | + $showEditLink = 0; |
| 3474 | + } |
| 3475 | + |
| 3476 | + # Get all headlines for numbering them and adding funky stuff like [edit] |
| 3477 | + # links - this is for later, but we need the number of headlines right now |
| 3478 | + $matches = array(); |
| 3479 | + $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?'.'>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches ); |
| 3480 | + |
| 3481 | + # if there are fewer than 4 headlines in the article, do not show TOC |
| 3482 | + # unless it's been explicitly enabled. |
| 3483 | + $enoughToc = $this->mShowToc && |
| 3484 | + (($numMatches >= 4) || $this->mForceTocPosition); |
| 3485 | + |
| 3486 | + # Allow user to stipulate that a page should have a "new section" |
| 3487 | + # link added via __NEWSECTIONLINK__ |
| 3488 | + $mw =& MagicWord::get( 'newsectionlink' ); |
| 3489 | + if( $mw->matchAndRemove( $text ) ) |
| 3490 | + $this->mOutput->setNewSection( true ); |
| 3491 | + |
| 3492 | + # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, |
| 3493 | + # override above conditions and always show TOC above first header |
| 3494 | + $mw =& MagicWord::get( 'forcetoc' ); |
| 3495 | + if ($mw->matchAndRemove( $text ) ) { |
| 3496 | + $this->mShowToc = true; |
| 3497 | + $enoughToc = true; |
| 3498 | + } |
| 3499 | + |
| 3500 | + # We need this to perform operations on the HTML |
| 3501 | + $sk = $this->mOptions->getSkin(); |
| 3502 | + |
| 3503 | + # headline counter |
| 3504 | + $headlineCount = 0; |
| 3505 | + $sectionCount = 0; # headlineCount excluding template sections |
| 3506 | + $numVisible = 0; |
| 3507 | + |
| 3508 | + # Ugh .. the TOC should have neat indentation levels which can be |
| 3509 | + # passed to the skin functions. These are determined here |
| 3510 | + $toc = ''; |
| 3511 | + $full = ''; |
| 3512 | + $head = array(); |
| 3513 | + $sublevelCount = array(); |
| 3514 | + $levelCount = array(); |
| 3515 | + $toclevel = 0; |
| 3516 | + $level = 0; |
| 3517 | + $prevlevel = 0; |
| 3518 | + $toclevel = 0; |
| 3519 | + $prevtoclevel = 0; |
| 3520 | + |
| 3521 | + foreach( $matches[3] as $headline ) { |
| 3522 | + $istemplate = 0; |
| 3523 | + $templatetitle = ''; |
| 3524 | + $templatesection = 0; |
| 3525 | + $numbering = ''; |
| 3526 | + $mat = array(); |
| 3527 | + if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) { |
| 3528 | + $istemplate = 1; |
| 3529 | + $templatetitle = base64_decode($mat[1]); |
| 3530 | + $templatesection = 1 + (int)base64_decode($mat[2]); |
| 3531 | + $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline); |
| 3532 | + } |
| 3533 | + |
| 3534 | + if( $toclevel ) { |
| 3535 | + $prevlevel = $level; |
| 3536 | + $prevtoclevel = $toclevel; |
| 3537 | + } |
| 3538 | + $level = $matches[1][$headlineCount]; |
| 3539 | + |
| 3540 | + if( $doNumberHeadings || $enoughToc ) { |
| 3541 | + |
| 3542 | + if ( $level > $prevlevel ) { |
| 3543 | + # Increase TOC level |
| 3544 | + $toclevel++; |
| 3545 | + $sublevelCount[$toclevel] = 0; |
| 3546 | + if( $toclevel<$wgMaxTocLevel ) { |
| 3547 | + $prevtoclevel = $toclevel; |
| 3548 | + $toc .= $sk->tocIndent(); |
| 3549 | + $numVisible++; |
| 3550 | + } |
| 3551 | + } |
| 3552 | + elseif ( $level < $prevlevel && $toclevel > 1 ) { |
| 3553 | + # Decrease TOC level, find level to jump to |
| 3554 | + |
| 3555 | + if ( $toclevel == 2 && $level <= $levelCount[1] ) { |
| 3556 | + # Can only go down to level 1 |
| 3557 | + $toclevel = 1; |
| 3558 | + } else { |
| 3559 | + for ($i = $toclevel; $i > 0; $i--) { |
| 3560 | + if ( $levelCount[$i] == $level ) { |
| 3561 | + # Found last matching level |
| 3562 | + $toclevel = $i; |
| 3563 | + break; |
| 3564 | + } |
| 3565 | + elseif ( $levelCount[$i] < $level ) { |
| 3566 | + # Found first matching level below current level |
| 3567 | + $toclevel = $i + 1; |
| 3568 | + break; |
| 3569 | + } |
| 3570 | + } |
| 3571 | + } |
| 3572 | + if( $toclevel<$wgMaxTocLevel ) { |
| 3573 | + if($prevtoclevel < $wgMaxTocLevel) { |
| 3574 | + # Unindent only if the previous toc level was shown :p |
| 3575 | + $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel ); |
| 3576 | + } else { |
| 3577 | + $toc .= $sk->tocLineEnd(); |
| 3578 | + } |
| 3579 | + } |
| 3580 | + } |
| 3581 | + else { |
| 3582 | + # No change in level, end TOC line |
| 3583 | + if( $toclevel<$wgMaxTocLevel ) { |
| 3584 | + $toc .= $sk->tocLineEnd(); |
| 3585 | + } |
| 3586 | + } |
| 3587 | + |
| 3588 | + $levelCount[$toclevel] = $level; |
| 3589 | + |
| 3590 | + # count number of headlines for each level |
| 3591 | + @$sublevelCount[$toclevel]++; |
| 3592 | + $dot = 0; |
| 3593 | + for( $i = 1; $i <= $toclevel; $i++ ) { |
| 3594 | + if( !empty( $sublevelCount[$i] ) ) { |
| 3595 | + if( $dot ) { |
| 3596 | + $numbering .= '.'; |
| 3597 | + } |
| 3598 | + $numbering .= $wgContLang->formatNum( $sublevelCount[$i] ); |
| 3599 | + $dot = 1; |
| 3600 | + } |
| 3601 | + } |
| 3602 | + } |
| 3603 | + |
| 3604 | + # The canonized header is a version of the header text safe to use for links |
| 3605 | + # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
| 3606 | + $canonized_headline = $this->mStripState->unstripBoth( $headline ); |
| 3607 | + |
| 3608 | + # Remove link placeholders by the link text. |
| 3609 | + # <!--LINK number--> |
| 3610 | + # turns into |
| 3611 | + # link text with suffix |
| 3612 | + $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e', |
| 3613 | + "\$this->mLinkHolders['texts'][\$1]", |
| 3614 | + $canonized_headline ); |
| 3615 | + $canonized_headline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e', |
| 3616 | + "\$this->mInterwikiLinkHolders['texts'][\$1]", |
| 3617 | + $canonized_headline ); |
| 3618 | + |
| 3619 | + # Strip out HTML (other than plain <sup> and <sub>: bug 8393) |
| 3620 | + $tocline = preg_replace( |
| 3621 | + array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), |
| 3622 | + array( '', '<$1>'), |
| 3623 | + $canonized_headline |
| 3624 | + ); |
| 3625 | + $tocline = trim( $tocline ); |
| 3626 | + |
| 3627 | + # For the anchor, strip out HTML-y stuff period |
| 3628 | + $canonized_headline = preg_replace( '/<.*?'.'>/', '', $canonized_headline ); |
| 3629 | + $canonized_headline = trim( $canonized_headline ); |
| 3630 | + |
| 3631 | + # Save headline for section edit hint before it's escaped |
| 3632 | + $headline_hint = $canonized_headline; |
| 3633 | + $canonized_headline = Sanitizer::escapeId( $canonized_headline ); |
| 3634 | + $refers[$headlineCount] = $canonized_headline; |
| 3635 | + |
| 3636 | + # count how many in assoc. array so we can track dupes in anchors |
| 3637 | + isset( $refers[$canonized_headline] ) ? $refers[$canonized_headline]++ : $refers[$canonized_headline] = 1; |
| 3638 | + $refcount[$headlineCount]=$refers[$canonized_headline]; |
| 3639 | + |
| 3640 | + # Don't number the heading if it is the only one (looks silly) |
| 3641 | + if( $doNumberHeadings && count( $matches[3] ) > 1) { |
| 3642 | + # the two are different if the line contains a link |
| 3643 | + $headline=$numbering . ' ' . $headline; |
| 3644 | + } |
| 3645 | + |
| 3646 | + # Create the anchor for linking from the TOC to the section |
| 3647 | + $anchor = $canonized_headline; |
| 3648 | + if($refcount[$headlineCount] > 1 ) { |
| 3649 | + $anchor .= '_' . $refcount[$headlineCount]; |
| 3650 | + } |
| 3651 | + if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { |
| 3652 | + $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); |
| 3653 | + } |
| 3654 | + # give headline the correct <h#> tag |
| 3655 | + if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) { |
| 3656 | + if( $istemplate ) |
| 3657 | + $editlink = $sk->editSectionLinkForOther($templatetitle, $templatesection); |
| 3658 | + else |
| 3659 | + $editlink = $sk->editSectionLink($this->mTitle, $sectionCount+1, $headline_hint); |
| 3660 | + } else { |
| 3661 | + $editlink = ''; |
| 3662 | + } |
| 3663 | + $head[$headlineCount] = $sk->makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink ); |
| 3664 | + |
| 3665 | + $headlineCount++; |
| 3666 | + if( !$istemplate ) |
| 3667 | + $sectionCount++; |
| 3668 | + } |
| 3669 | + |
| 3670 | + # Never ever show TOC if no headers |
| 3671 | + if( $numVisible < 1 ) { |
| 3672 | + $enoughToc = false; |
| 3673 | + } |
| 3674 | + |
| 3675 | + if( $enoughToc ) { |
| 3676 | + if( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { |
| 3677 | + $toc .= $sk->tocUnindent( $prevtoclevel - 1 ); |
| 3678 | + } |
| 3679 | + $toc = $sk->tocList( $toc ); |
| 3680 | + } |
| 3681 | + |
| 3682 | + # split up and insert constructed headlines |
| 3683 | + |
| 3684 | + $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text ); |
| 3685 | + $i = 0; |
| 3686 | + |
| 3687 | + foreach( $blocks as $block ) { |
| 3688 | + if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { |
| 3689 | + # This is the [edit] link that appears for the top block of text when |
| 3690 | + # section editing is enabled |
| 3691 | + |
| 3692 | + # Disabled because it broke block formatting |
| 3693 | + # For example, a bullet point in the top line |
| 3694 | + # $full .= $sk->editSectionLink(0); |
| 3695 | + } |
| 3696 | + $full .= $block; |
| 3697 | + if( $enoughToc && !$i && $isMain && !$this->mForceTocPosition ) { |
| 3698 | + # Top anchor now in skin |
| 3699 | + $full = $full.$toc; |
| 3700 | + } |
| 3701 | + |
| 3702 | + if( !empty( $head[$i] ) ) { |
| 3703 | + $full .= $head[$i]; |
| 3704 | + } |
| 3705 | + $i++; |
| 3706 | + } |
| 3707 | + if( $this->mForceTocPosition ) { |
| 3708 | + return str_replace( '<!--MWTOC-->', $toc, $full ); |
| 3709 | + } else { |
| 3710 | + return $full; |
| 3711 | + } |
| 3712 | + } |
| 3713 | + |
| 3714 | + /** |
| 3715 | + * Transform wiki markup when saving a page by doing \r\n -> \n |
| 3716 | + * conversion, substitting signatures, {{subst:}} templates, etc. |
| 3717 | + * |
| 3718 | + * @param string $text the text to transform |
| 3719 | + * @param Title &$title the Title object for the current article |
| 3720 | + * @param User &$user the User object describing the current user |
| 3721 | + * @param ParserOptions $options parsing options |
| 3722 | + * @param bool $clearState whether to clear the parser state first |
| 3723 | + * @return string the altered wiki markup |
| 3724 | + * @public |
| 3725 | + */ |
| 3726 | + function preSaveTransform( $text, &$title, $user, $options, $clearState = true ) { |
| 3727 | + $this->mOptions = $options; |
| 3728 | + $this->mTitle =& $title; |
| 3729 | + $this->setOutputType( OT_WIKI ); |
| 3730 | + |
| 3731 | + if ( $clearState ) { |
| 3732 | + $this->clearState(); |
| 3733 | + } |
| 3734 | + |
| 3735 | + $stripState = new StripState; |
| 3736 | + $pairs = array( |
| 3737 | + "\r\n" => "\n", |
| 3738 | + ); |
| 3739 | + $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); |
| 3740 | + $text = $this->strip( $text, $stripState, true, array( 'gallery' ) ); |
| 3741 | + $text = $this->pstPass2( $text, $stripState, $user ); |
| 3742 | + $text = $stripState->unstripBoth( $text ); |
| 3743 | + return $text; |
| 3744 | + } |
| 3745 | + |
| 3746 | + /** |
| 3747 | + * Pre-save transform helper function |
| 3748 | + * @private |
| 3749 | + */ |
| 3750 | + function pstPass2( $text, &$stripState, $user ) { |
| 3751 | + global $wgContLang, $wgLocaltimezone; |
| 3752 | + |
| 3753 | + /* Note: This is the timestamp saved as hardcoded wikitext to |
| 3754 | + * the database, we use $wgContLang here in order to give |
| 3755 | + * everyone the same signature and use the default one rather |
| 3756 | + * than the one selected in each user's preferences. |
| 3757 | + */ |
| 3758 | + if ( isset( $wgLocaltimezone ) ) { |
| 3759 | + $oldtz = getenv( 'TZ' ); |
| 3760 | + putenv( 'TZ='.$wgLocaltimezone ); |
| 3761 | + } |
| 3762 | + $d = $wgContLang->timeanddate( date( 'YmdHis' ), false, false) . |
| 3763 | + ' (' . date( 'T' ) . ')'; |
| 3764 | + if ( isset( $wgLocaltimezone ) ) { |
| 3765 | + putenv( 'TZ='.$oldtz ); |
| 3766 | + } |
| 3767 | + |
| 3768 | + # Variable replacement |
| 3769 | + # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags |
| 3770 | + $text = $this->replaceVariables( $text ); |
| 3771 | + |
| 3772 | + # Strip out <nowiki> etc. added via replaceVariables |
| 3773 | + $text = $this->strip( $text, $stripState, false, array( 'gallery' ) ); |
| 3774 | + |
| 3775 | + # Signatures |
| 3776 | + $sigText = $this->getUserSig( $user ); |
| 3777 | + $text = strtr( $text, array( |
| 3778 | + '~~~~~' => $d, |
| 3779 | + '~~~~' => "$sigText $d", |
| 3780 | + '~~~' => $sigText |
| 3781 | + ) ); |
| 3782 | + |
| 3783 | + # Context links: [[|name]] and [[name (context)|]] |
| 3784 | + # |
| 3785 | + global $wgLegalTitleChars; |
| 3786 | + $tc = "[$wgLegalTitleChars]"; |
| 3787 | + $nc = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii! |
| 3788 | + |
| 3789 | + $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] |
| 3790 | + $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] |
| 3791 | + $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] |
| 3792 | + |
| 3793 | + # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" |
| 3794 | + $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); |
| 3795 | + $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); |
| 3796 | + |
| 3797 | + $t = $this->mTitle->getText(); |
| 3798 | + $m = array(); |
| 3799 | + if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { |
| 3800 | + $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); |
| 3801 | + } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && '' != "$m[1]$m[2]" ) { |
| 3802 | + $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); |
| 3803 | + } else { |
| 3804 | + # if there's no context, don't bother duplicating the title |
| 3805 | + $text = preg_replace( $p2, '[[\\1]]', $text ); |
| 3806 | + } |
| 3807 | + |
| 3808 | + # Trim trailing whitespace |
| 3809 | + $text = rtrim( $text ); |
| 3810 | + |
| 3811 | + return $text; |
| 3812 | + } |
| 3813 | + |
| 3814 | + /** |
| 3815 | + * Fetch the user's signature text, if any, and normalize to |
| 3816 | + * validated, ready-to-insert wikitext. |
| 3817 | + * |
| 3818 | + * @param User $user |
| 3819 | + * @return string |
| 3820 | + * @private |
| 3821 | + */ |
| 3822 | + function getUserSig( &$user ) { |
| 3823 | + global $wgMaxSigChars; |
| 3824 | + |
| 3825 | + $username = $user->getName(); |
| 3826 | + $nickname = $user->getOption( 'nickname' ); |
| 3827 | + $nickname = $nickname === '' ? $username : $nickname; |
| 3828 | + |
| 3829 | + if( mb_strlen( $nickname ) > $wgMaxSigChars ) { |
| 3830 | + $nickname = $username; |
| 3831 | + wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); |
| 3832 | + } elseif( $user->getBoolOption( 'fancysig' ) !== false ) { |
| 3833 | + # Sig. might contain markup; validate this |
| 3834 | + if( $this->validateSig( $nickname ) !== false ) { |
| 3835 | + # Validated; clean up (if needed) and return it |
| 3836 | + return $this->cleanSig( $nickname, true ); |
| 3837 | + } else { |
| 3838 | + # Failed to validate; fall back to the default |
| 3839 | + $nickname = $username; |
| 3840 | + wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" ); |
| 3841 | + } |
| 3842 | + } |
| 3843 | + |
| 3844 | + // Make sure nickname doesnt get a sig in a sig |
| 3845 | + $nickname = $this->cleanSigInSig( $nickname ); |
| 3846 | + |
| 3847 | + # If we're still here, make it a link to the user page |
| 3848 | + $userpage = $user->getUserPage(); |
| 3849 | + return( '[[' . $userpage->getPrefixedText() . '|' . wfEscapeWikiText( $nickname ) . ']]' ); |
| 3850 | + } |
| 3851 | + |
| 3852 | + /** |
| 3853 | + * Check that the user's signature contains no bad XML |
| 3854 | + * |
| 3855 | + * @param string $text |
| 3856 | + * @return mixed An expanded string, or false if invalid. |
| 3857 | + */ |
| 3858 | + function validateSig( $text ) { |
| 3859 | + return( wfIsWellFormedXmlFragment( $text ) ? $text : false ); |
| 3860 | + } |
| 3861 | + |
| 3862 | + /** |
| 3863 | + * Clean up signature text |
| 3864 | + * |
| 3865 | + * 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures @see cleanSigInSig |
| 3866 | + * 2) Substitute all transclusions |
| 3867 | + * |
| 3868 | + * @param string $text |
| 3869 | + * @param $parsing Whether we're cleaning (preferences save) or parsing |
| 3870 | + * @return string Signature text |
| 3871 | + */ |
| 3872 | + function cleanSig( $text, $parsing = false ) { |
| 3873 | + global $wgTitle; |
| 3874 | + $this->startExternalParse( $wgTitle, new ParserOptions(), $parsing ? OT_WIKI : OT_MSG ); |
| 3875 | + |
| 3876 | + $substWord = MagicWord::get( 'subst' ); |
| 3877 | + $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); |
| 3878 | + $substText = '{{' . $substWord->getSynonym( 0 ); |
| 3879 | + |
| 3880 | + $text = preg_replace( $substRegex, $substText, $text ); |
| 3881 | + $text = $this->cleanSigInSig( $text ); |
| 3882 | + $text = $this->replaceVariables( $text ); |
| 3883 | + |
| 3884 | + $this->clearState(); |
| 3885 | + return $text; |
| 3886 | + } |
| 3887 | + |
| 3888 | + /** |
| 3889 | + * Strip ~~~, ~~~~ and ~~~~~ out of signatures |
| 3890 | + * @param string $text |
| 3891 | + * @return string Signature text with /~{3,5}/ removed |
| 3892 | + */ |
| 3893 | + function cleanSigInSig( $text ) { |
| 3894 | + $text = preg_replace( '/~{3,5}/', '', $text ); |
| 3895 | + return $text; |
| 3896 | + } |
| 3897 | + |
| 3898 | + /** |
| 3899 | + * Set up some variables which are usually set up in parse() |
| 3900 | + * so that an external function can call some class members with confidence |
| 3901 | + * @public |
| 3902 | + */ |
| 3903 | + function startExternalParse( &$title, $options, $outputType, $clearState = true ) { |
| 3904 | + $this->mTitle =& $title; |
| 3905 | + $this->mOptions = $options; |
| 3906 | + $this->setOutputType( $outputType ); |
| 3907 | + if ( $clearState ) { |
| 3908 | + $this->clearState(); |
| 3909 | + } |
| 3910 | + } |
| 3911 | + |
| 3912 | + /** |
| 3913 | + * Transform a MediaWiki message by replacing magic variables. |
| 3914 | + * |
| 3915 | + * @param string $text the text to transform |
| 3916 | + * @param ParserOptions $options options |
| 3917 | + * @return string the text with variables substituted |
| 3918 | + * @public |
| 3919 | + */ |
| 3920 | + function transformMsg( $text, $options ) { |
| 3921 | + global $wgTitle; |
| 3922 | + static $executing = false; |
| 3923 | + |
| 3924 | + $fname = "Parser::transformMsg"; |
| 3925 | + |
| 3926 | + # Guard against infinite recursion |
| 3927 | + if ( $executing ) { |
| 3928 | + return $text; |
| 3929 | + } |
| 3930 | + $executing = true; |
| 3931 | + |
| 3932 | + wfProfileIn($fname); |
| 3933 | + |
| 3934 | + if ( $wgTitle && !( $wgTitle instanceof FakeTitle ) ) { |
| 3935 | + $this->mTitle = $wgTitle; |
| 3936 | + } else { |
| 3937 | + $this->mTitle = Title::newFromText('msg'); |
| 3938 | + } |
| 3939 | + $this->mOptions = $options; |
| 3940 | + $this->setOutputType( OT_MSG ); |
| 3941 | + $this->clearState(); |
| 3942 | + $text = $this->replaceVariables( $text ); |
| 3943 | + |
| 3944 | + $executing = false; |
| 3945 | + wfProfileOut($fname); |
| 3946 | + return $text; |
| 3947 | + } |
| 3948 | + |
| 3949 | + /** |
| 3950 | + * Create an HTML-style tag, e.g. <yourtag>special text</yourtag> |
| 3951 | + * The callback should have the following form: |
| 3952 | + * function myParserHook( $text, $params, &$parser ) { ... } |
| 3953 | + * |
| 3954 | + * Transform and return $text. Use $parser for any required context, e.g. use |
| 3955 | + * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions |
| 3956 | + * |
| 3957 | + * @public |
| 3958 | + * |
| 3959 | + * @param mixed $tag The tag to use, e.g. 'hook' for <hook> |
| 3960 | + * @param mixed $callback The callback function (and object) to use for the tag |
| 3961 | + * |
| 3962 | + * @return The old value of the mTagHooks array associated with the hook |
| 3963 | + */ |
| 3964 | + function setHook( $tag, $callback ) { |
| 3965 | + $tag = strtolower( $tag ); |
| 3966 | + $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; |
| 3967 | + $this->mTagHooks[$tag] = $callback; |
| 3968 | + |
| 3969 | + return $oldVal; |
| 3970 | + } |
| 3971 | + |
| 3972 | + function setTransparentTagHook( $tag, $callback ) { |
| 3973 | + $tag = strtolower( $tag ); |
| 3974 | + $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; |
| 3975 | + $this->mTransparentTagHooks[$tag] = $callback; |
| 3976 | + |
| 3977 | + return $oldVal; |
| 3978 | + } |
| 3979 | + |
| 3980 | + /** |
| 3981 | + * Create a function, e.g. {{sum:1|2|3}} |
| 3982 | + * The callback function should have the form: |
| 3983 | + * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... } |
| 3984 | + * |
| 3985 | + * The callback may either return the text result of the function, or an array with the text |
| 3986 | + * in element 0, and a number of flags in the other elements. The names of the flags are |
| 3987 | + * specified in the keys. Valid flags are: |
| 3988 | + * found The text returned is valid, stop processing the template. This |
| 3989 | + * is on by default. |
| 3990 | + * nowiki Wiki markup in the return value should be escaped |
| 3991 | + * noparse Unsafe HTML tags should not be stripped, etc. |
| 3992 | + * noargs Don't replace triple-brace arguments in the return value |
| 3993 | + * isHTML The returned text is HTML, armour it against wikitext transformation |
| 3994 | + * |
| 3995 | + * @public |
| 3996 | + * |
| 3997 | + * @param string $id The magic word ID |
| 3998 | + * @param mixed $callback The callback function (and object) to use |
| 3999 | + * @param integer $flags a combination of the following flags: |
| 4000 | + * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} |
| 4001 | + * |
| 4002 | + * @return The old callback function for this name, if any |
| 4003 | + */ |
| 4004 | + function setFunctionHook( $id, $callback, $flags = 0 ) { |
| 4005 | + $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id] : null; |
| 4006 | + $this->mFunctionHooks[$id] = $callback; |
| 4007 | + |
| 4008 | + # Add to function cache |
| 4009 | + $mw = MagicWord::get( $id ); |
| 4010 | + if( !$mw ) |
| 4011 | + throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); |
| 4012 | + |
| 4013 | + $synonyms = $mw->getSynonyms(); |
| 4014 | + $sensitive = intval( $mw->isCaseSensitive() ); |
| 4015 | + |
| 4016 | + foreach ( $synonyms as $syn ) { |
| 4017 | + # Case |
| 4018 | + if ( !$sensitive ) { |
| 4019 | + $syn = strtolower( $syn ); |
| 4020 | + } |
| 4021 | + # Add leading hash |
| 4022 | + if ( !( $flags & SFH_NO_HASH ) ) { |
| 4023 | + $syn = '#' . $syn; |
| 4024 | + } |
| 4025 | + # Remove trailing colon |
| 4026 | + if ( substr( $syn, -1, 1 ) == ':' ) { |
| 4027 | + $syn = substr( $syn, 0, -1 ); |
| 4028 | + } |
| 4029 | + $this->mFunctionSynonyms[$sensitive][$syn] = $id; |
| 4030 | + } |
| 4031 | + return $oldVal; |
| 4032 | + } |
| 4033 | + |
| 4034 | + /** |
| 4035 | + * Get all registered function hook identifiers |
| 4036 | + * |
| 4037 | + * @return array |
| 4038 | + */ |
| 4039 | + function getFunctionHooks() { |
| 4040 | + return array_keys( $this->mFunctionHooks ); |
| 4041 | + } |
| 4042 | + |
| 4043 | + /** |
| 4044 | + * Replace <!--LINK--> link placeholders with actual links, in the buffer |
| 4045 | + * Placeholders created in Skin::makeLinkObj() |
| 4046 | + * Returns an array of links found, indexed by PDBK: |
| 4047 | + * 0 - broken |
| 4048 | + * 1 - normal link |
| 4049 | + * 2 - stub |
| 4050 | + * $options is a bit field, RLH_FOR_UPDATE to select for update |
| 4051 | + */ |
| 4052 | + function replaceLinkHolders( &$text, $options = 0 ) { |
| 4053 | + global $wgUser; |
| 4054 | + global $wgContLang; |
| 4055 | + |
| 4056 | + $fname = 'Parser::replaceLinkHolders'; |
| 4057 | + wfProfileIn( $fname ); |
| 4058 | + |
| 4059 | + $pdbks = array(); |
| 4060 | + $colours = array(); |
| 4061 | + $sk = $this->mOptions->getSkin(); |
| 4062 | + $linkCache =& LinkCache::singleton(); |
| 4063 | + |
| 4064 | + if ( !empty( $this->mLinkHolders['namespaces'] ) ) { |
| 4065 | + wfProfileIn( $fname.'-check' ); |
| 4066 | + $dbr = wfGetDB( DB_SLAVE ); |
| 4067 | + $page = $dbr->tableName( 'page' ); |
| 4068 | + $threshold = $wgUser->getOption('stubthreshold'); |
| 4069 | + |
| 4070 | + # Sort by namespace |
| 4071 | + asort( $this->mLinkHolders['namespaces'] ); |
| 4072 | + |
| 4073 | + # Generate query |
| 4074 | + $query = false; |
| 4075 | + $current = null; |
| 4076 | + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { |
| 4077 | + # Make title object |
| 4078 | + $title = $this->mLinkHolders['titles'][$key]; |
| 4079 | + |
| 4080 | + # Skip invalid entries. |
| 4081 | + # Result will be ugly, but prevents crash. |
| 4082 | + if ( is_null( $title ) ) { |
| 4083 | + continue; |
| 4084 | + } |
| 4085 | + $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); |
| 4086 | + |
| 4087 | + # Check if it's a static known link, e.g. interwiki |
| 4088 | + if ( $title->isAlwaysKnown() ) { |
| 4089 | + $colours[$pdbk] = 1; |
| 4090 | + } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { |
| 4091 | + $colours[$pdbk] = 1; |
| 4092 | + $this->mOutput->addLink( $title, $id ); |
| 4093 | + } elseif ( $linkCache->isBadLink( $pdbk ) ) { |
| 4094 | + $colours[$pdbk] = 0; |
| 4095 | + } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { |
| 4096 | + $colours[$pdbk] = 0; |
| 4097 | + } else { |
| 4098 | + # Not in the link cache, add it to the query |
| 4099 | + if ( !isset( $current ) ) { |
| 4100 | + $current = $ns; |
| 4101 | + $query = "SELECT page_id, page_namespace, page_title"; |
| 4102 | + if ( $threshold > 0 ) { |
| 4103 | + $query .= ', page_len, page_is_redirect'; |
| 4104 | + } |
| 4105 | + $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; |
| 4106 | + } elseif ( $current != $ns ) { |
| 4107 | + $current = $ns; |
| 4108 | + $query .= ")) OR (page_namespace=$ns AND page_title IN("; |
| 4109 | + } else { |
| 4110 | + $query .= ', '; |
| 4111 | + } |
| 4112 | + |
| 4113 | + $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] ); |
| 4114 | + } |
| 4115 | + } |
| 4116 | + if ( $query ) { |
| 4117 | + $query .= '))'; |
| 4118 | + if ( $options & RLH_FOR_UPDATE ) { |
| 4119 | + $query .= ' FOR UPDATE'; |
| 4120 | + } |
| 4121 | + |
| 4122 | + $res = $dbr->query( $query, $fname ); |
| 4123 | + |
| 4124 | + # Fetch data and form into an associative array |
| 4125 | + # non-existent = broken |
| 4126 | + # 1 = known |
| 4127 | + # 2 = stub |
| 4128 | + while ( $s = $dbr->fetchObject($res) ) { |
| 4129 | + $title = Title::makeTitle( $s->page_namespace, $s->page_title ); |
| 4130 | + $pdbk = $title->getPrefixedDBkey(); |
| 4131 | + $linkCache->addGoodLinkObj( $s->page_id, $title ); |
| 4132 | + $this->mOutput->addLink( $title, $s->page_id ); |
| 4133 | + |
| 4134 | + $colours[$pdbk] = ( $threshold == 0 || ( |
| 4135 | + $s->page_len >= $threshold || # always true if $threshold <= 0 |
| 4136 | + $s->page_is_redirect || |
| 4137 | + !Namespace::isContent( $s->page_namespace ) ) |
| 4138 | + ? 1 : 2 ); |
| 4139 | + } |
| 4140 | + } |
| 4141 | + wfProfileOut( $fname.'-check' ); |
| 4142 | + |
| 4143 | + # Do a second query for different language variants of links and categories |
| 4144 | + if($wgContLang->hasVariants()){ |
| 4145 | + $linkBatch = new LinkBatch(); |
| 4146 | + $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) |
| 4147 | + $categoryMap = array(); // maps $category_variant => $category (dbkeys) |
| 4148 | + $varCategories = array(); // category replacements oldDBkey => newDBkey |
| 4149 | + |
| 4150 | + $categories = $this->mOutput->getCategoryLinks(); |
| 4151 | + |
| 4152 | + // Add variants of links to link batch |
| 4153 | + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { |
| 4154 | + $title = $this->mLinkHolders['titles'][$key]; |
| 4155 | + if ( is_null( $title ) ) |
| 4156 | + continue; |
| 4157 | + |
| 4158 | + $pdbk = $title->getPrefixedDBkey(); |
| 4159 | + $titleText = $title->getText(); |
| 4160 | + |
| 4161 | + // generate all variants of the link title text |
| 4162 | + $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); |
| 4163 | + |
| 4164 | + // if link was not found (in first query), add all variants to query |
| 4165 | + if ( !isset($colours[$pdbk]) ){ |
| 4166 | + foreach($allTextVariants as $textVariant){ |
| 4167 | + if($textVariant != $titleText){ |
| 4168 | + $variantTitle = Title::makeTitle( $ns, $textVariant ); |
| 4169 | + if(is_null($variantTitle)) continue; |
| 4170 | + $linkBatch->addObj( $variantTitle ); |
| 4171 | + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; |
| 4172 | + } |
| 4173 | + } |
| 4174 | + } |
| 4175 | + } |
| 4176 | + |
| 4177 | + // process categories, check if a category exists in some variant |
| 4178 | + foreach( $categories as $category ){ |
| 4179 | + $variants = $wgContLang->convertLinkToAllVariants($category); |
| 4180 | + foreach($variants as $variant){ |
| 4181 | + if($variant != $category){ |
| 4182 | + $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); |
| 4183 | + if(is_null($variantTitle)) continue; |
| 4184 | + $linkBatch->addObj( $variantTitle ); |
| 4185 | + $categoryMap[$variant] = $category; |
| 4186 | + } |
| 4187 | + } |
| 4188 | + } |
| 4189 | + |
| 4190 | + |
| 4191 | + if(!$linkBatch->isEmpty()){ |
| 4192 | + // construct query |
| 4193 | + $titleClause = $linkBatch->constructSet('page', $dbr); |
| 4194 | + |
| 4195 | + $variantQuery = "SELECT page_id, page_namespace, page_title"; |
| 4196 | + if ( $threshold > 0 ) { |
| 4197 | + $variantQuery .= ', page_len, page_is_redirect'; |
| 4198 | + } |
| 4199 | + |
| 4200 | + $variantQuery .= " FROM $page WHERE $titleClause"; |
| 4201 | + if ( $options & RLH_FOR_UPDATE ) { |
| 4202 | + $variantQuery .= ' FOR UPDATE'; |
| 4203 | + } |
| 4204 | + |
| 4205 | + $varRes = $dbr->query( $variantQuery, $fname ); |
| 4206 | + |
| 4207 | + // for each found variants, figure out link holders and replace |
| 4208 | + while ( $s = $dbr->fetchObject($varRes) ) { |
| 4209 | + |
| 4210 | + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); |
| 4211 | + $varPdbk = $variantTitle->getPrefixedDBkey(); |
| 4212 | + $vardbk = $variantTitle->getDBkey(); |
| 4213 | + |
| 4214 | + $holderKeys = array(); |
| 4215 | + if(isset($variantMap[$varPdbk])){ |
| 4216 | + $holderKeys = $variantMap[$varPdbk]; |
| 4217 | + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle ); |
| 4218 | + $this->mOutput->addLink( $variantTitle, $s->page_id ); |
| 4219 | + } |
| 4220 | + |
| 4221 | + // loop over link holders |
| 4222 | + foreach($holderKeys as $key){ |
| 4223 | + $title = $this->mLinkHolders['titles'][$key]; |
| 4224 | + if ( is_null( $title ) ) continue; |
| 4225 | + |
| 4226 | + $pdbk = $title->getPrefixedDBkey(); |
| 4227 | + |
| 4228 | + if(!isset($colours[$pdbk])){ |
| 4229 | + // found link in some of the variants, replace the link holder data |
| 4230 | + $this->mLinkHolders['titles'][$key] = $variantTitle; |
| 4231 | + $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); |
| 4232 | + |
| 4233 | + // set pdbk and colour |
| 4234 | + $pdbks[$key] = $varPdbk; |
| 4235 | + if ( $threshold > 0 ) { |
| 4236 | + $size = $s->page_len; |
| 4237 | + if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) { |
| 4238 | + $colours[$varPdbk] = 1; |
| 4239 | + } else { |
| 4240 | + $colours[$varPdbk] = 2; |
| 4241 | + } |
| 4242 | + } |
| 4243 | + else { |
| 4244 | + $colours[$varPdbk] = 1; |
| 4245 | + } |
| 4246 | + } |
| 4247 | + } |
| 4248 | + |
| 4249 | + // check if the object is a variant of a category |
| 4250 | + if(isset($categoryMap[$vardbk])){ |
| 4251 | + $oldkey = $categoryMap[$vardbk]; |
| 4252 | + if($oldkey != $vardbk) |
| 4253 | + $varCategories[$oldkey]=$vardbk; |
| 4254 | + } |
| 4255 | + } |
| 4256 | + |
| 4257 | + // rebuild the categories in original order (if there are replacements) |
| 4258 | + if(count($varCategories)>0){ |
| 4259 | + $newCats = array(); |
| 4260 | + $originalCats = $this->mOutput->getCategories(); |
| 4261 | + foreach($originalCats as $cat => $sortkey){ |
| 4262 | + // make the replacement |
| 4263 | + if( array_key_exists($cat,$varCategories) ) |
| 4264 | + $newCats[$varCategories[$cat]] = $sortkey; |
| 4265 | + else $newCats[$cat] = $sortkey; |
| 4266 | + } |
| 4267 | + $this->mOutput->setCategoryLinks($newCats); |
| 4268 | + } |
| 4269 | + } |
| 4270 | + } |
| 4271 | + |
| 4272 | + # Construct search and replace arrays |
| 4273 | + wfProfileIn( $fname.'-construct' ); |
| 4274 | + $replacePairs = array(); |
| 4275 | + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { |
| 4276 | + $pdbk = $pdbks[$key]; |
| 4277 | + $searchkey = "<!--LINK $key-->"; |
| 4278 | + $title = $this->mLinkHolders['titles'][$key]; |
| 4279 | + if ( empty( $colours[$pdbk] ) ) { |
| 4280 | + $linkCache->addBadLinkObj( $title ); |
| 4281 | + $colours[$pdbk] = 0; |
| 4282 | + $this->mOutput->addLink( $title, 0 ); |
| 4283 | + $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, |
| 4284 | + $this->mLinkHolders['texts'][$key], |
| 4285 | + $this->mLinkHolders['queries'][$key] ); |
| 4286 | + } elseif ( $colours[$pdbk] == 1 ) { |
| 4287 | + $replacePairs[$searchkey] = $sk->makeKnownLinkObj( $title, |
| 4288 | + $this->mLinkHolders['texts'][$key], |
| 4289 | + $this->mLinkHolders['queries'][$key] ); |
| 4290 | + } elseif ( $colours[$pdbk] == 2 ) { |
| 4291 | + $replacePairs[$searchkey] = $sk->makeStubLinkObj( $title, |
| 4292 | + $this->mLinkHolders['texts'][$key], |
| 4293 | + $this->mLinkHolders['queries'][$key] ); |
| 4294 | + } |
| 4295 | + } |
| 4296 | + $replacer = new HashtableReplacer( $replacePairs, 1 ); |
| 4297 | + wfProfileOut( $fname.'-construct' ); |
| 4298 | + |
| 4299 | + # Do the thing |
| 4300 | + wfProfileIn( $fname.'-replace' ); |
| 4301 | + $text = preg_replace_callback( |
| 4302 | + '/(<!--LINK .*?-->)/', |
| 4303 | + $replacer->cb(), |
| 4304 | + $text); |
| 4305 | + |
| 4306 | + wfProfileOut( $fname.'-replace' ); |
| 4307 | + } |
| 4308 | + |
| 4309 | + # Now process interwiki link holders |
| 4310 | + # This is quite a bit simpler than internal links |
| 4311 | + if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) { |
| 4312 | + wfProfileIn( $fname.'-interwiki' ); |
| 4313 | + # Make interwiki link HTML |
| 4314 | + $replacePairs = array(); |
| 4315 | + foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) { |
| 4316 | + $title = $this->mInterwikiLinkHolders['titles'][$key]; |
| 4317 | + $replacePairs[$key] = $sk->makeLinkObj( $title, $link ); |
| 4318 | + } |
| 4319 | + $replacer = new HashtableReplacer( $replacePairs, 1 ); |
| 4320 | + |
| 4321 | + $text = preg_replace_callback( |
| 4322 | + '/<!--IWLINK (.*?)-->/', |
| 4323 | + $replacer->cb(), |
| 4324 | + $text ); |
| 4325 | + wfProfileOut( $fname.'-interwiki' ); |
| 4326 | + } |
| 4327 | + |
| 4328 | + wfProfileOut( $fname ); |
| 4329 | + return $colours; |
| 4330 | + } |
| 4331 | + |
| 4332 | + /** |
| 4333 | + * Replace <!--LINK--> link placeholders with plain text of links |
| 4334 | + * (not HTML-formatted). |
| 4335 | + * @param string $text |
| 4336 | + * @return string |
| 4337 | + */ |
| 4338 | + function replaceLinkHoldersText( $text ) { |
| 4339 | + $fname = 'Parser::replaceLinkHoldersText'; |
| 4340 | + wfProfileIn( $fname ); |
| 4341 | + |
| 4342 | + $text = preg_replace_callback( |
| 4343 | + '/<!--(LINK|IWLINK) (.*?)-->/', |
| 4344 | + array( &$this, 'replaceLinkHoldersTextCallback' ), |
| 4345 | + $text ); |
| 4346 | + |
| 4347 | + wfProfileOut( $fname ); |
| 4348 | + return $text; |
| 4349 | + } |
| 4350 | + |
| 4351 | + /** |
| 4352 | + * @param array $matches |
| 4353 | + * @return string |
| 4354 | + * @private |
| 4355 | + */ |
| 4356 | + function replaceLinkHoldersTextCallback( $matches ) { |
| 4357 | + $type = $matches[1]; |
| 4358 | + $key = $matches[2]; |
| 4359 | + if( $type == 'LINK' ) { |
| 4360 | + if( isset( $this->mLinkHolders['texts'][$key] ) ) { |
| 4361 | + return $this->mLinkHolders['texts'][$key]; |
| 4362 | + } |
| 4363 | + } elseif( $type == 'IWLINK' ) { |
| 4364 | + if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) { |
| 4365 | + return $this->mInterwikiLinkHolders['texts'][$key]; |
| 4366 | + } |
| 4367 | + } |
| 4368 | + return $matches[0]; |
| 4369 | + } |
| 4370 | + |
| 4371 | + /** |
| 4372 | + * Tag hook handler for 'pre'. |
| 4373 | + */ |
| 4374 | + function renderPreTag( $text, $attribs ) { |
| 4375 | + // Backwards-compatibility hack |
| 4376 | + $content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' ); |
| 4377 | + |
| 4378 | + $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' ); |
| 4379 | + return wfOpenElement( 'pre', $attribs ) . |
| 4380 | + Xml::escapeTagsOnly( $content ) . |
| 4381 | + '</pre>'; |
| 4382 | + } |
| 4383 | + |
| 4384 | + /** |
| 4385 | + * Renders an image gallery from a text with one line per image. |
| 4386 | + * text labels may be given by using |-style alternative text. E.g. |
| 4387 | + * Image:one.jpg|The number "1" |
| 4388 | + * Image:tree.jpg|A tree |
| 4389 | + * given as text will return the HTML of a gallery with two images, |
| 4390 | + * labeled 'The number "1"' and |
| 4391 | + * 'A tree'. |
| 4392 | + */ |
| 4393 | + function renderImageGallery( $text, $params ) { |
| 4394 | + $ig = new ImageGallery(); |
| 4395 | + $ig->setContextTitle( $this->mTitle ); |
| 4396 | + $ig->setShowBytes( false ); |
| 4397 | + $ig->setShowFilename( false ); |
| 4398 | + $ig->setParser( $this ); |
| 4399 | + $ig->setHideBadImages(); |
| 4400 | + $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); |
| 4401 | + $ig->useSkin( $this->mOptions->getSkin() ); |
| 4402 | + $ig->mRevisionId = $this->mRevisionId; |
| 4403 | + |
| 4404 | + if( isset( $params['caption'] ) ) { |
| 4405 | + $caption = $params['caption']; |
| 4406 | + $caption = htmlspecialchars( $caption ); |
| 4407 | + $caption = $this->replaceInternalLinks( $caption ); |
| 4408 | + $ig->setCaptionHtml( $caption ); |
| 4409 | + } |
| 4410 | + if( isset( $params['perrow'] ) ) { |
| 4411 | + $ig->setPerRow( $params['perrow'] ); |
| 4412 | + } |
| 4413 | + if( isset( $params['widths'] ) ) { |
| 4414 | + $ig->setWidths( $params['widths'] ); |
| 4415 | + } |
| 4416 | + if( isset( $params['heights'] ) ) { |
| 4417 | + $ig->setHeights( $params['heights'] ); |
| 4418 | + } |
| 4419 | + |
| 4420 | + wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); |
| 4421 | + |
| 4422 | + $lines = explode( "\n", $text ); |
| 4423 | + foreach ( $lines as $line ) { |
| 4424 | + # match lines like these: |
| 4425 | + # Image:someimage.jpg|This is some image |
| 4426 | + $matches = array(); |
| 4427 | + preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches ); |
| 4428 | + # Skip empty lines |
| 4429 | + if ( count( $matches ) == 0 ) { |
| 4430 | + continue; |
| 4431 | + } |
| 4432 | + $tp = Title::newFromText( $matches[1] ); |
| 4433 | + $nt =& $tp; |
| 4434 | + if( is_null( $nt ) ) { |
| 4435 | + # Bogus title. Ignore these so we don't bomb out later. |
| 4436 | + continue; |
| 4437 | + } |
| 4438 | + if ( isset( $matches[3] ) ) { |
| 4439 | + $label = $matches[3]; |
| 4440 | + } else { |
| 4441 | + $label = ''; |
| 4442 | + } |
| 4443 | + |
| 4444 | + $pout = $this->parse( $label, |
| 4445 | + $this->mTitle, |
| 4446 | + $this->mOptions, |
| 4447 | + false, // Strip whitespace...? |
| 4448 | + false // Don't clear state! |
| 4449 | + ); |
| 4450 | + $html = $pout->getText(); |
| 4451 | + |
| 4452 | + $ig->add( $nt, $html ); |
| 4453 | + |
| 4454 | + # Only add real images (bug #5586) |
| 4455 | + if ( $nt->getNamespace() == NS_IMAGE ) { |
| 4456 | + $this->mOutput->addImage( $nt->getDBkey() ); |
| 4457 | + } |
| 4458 | + } |
| 4459 | + return $ig->toHTML(); |
| 4460 | + } |
| 4461 | + |
| 4462 | + function getImageParams( $handler ) { |
| 4463 | + if ( $handler ) { |
| 4464 | + $handlerClass = get_class( $handler ); |
| 4465 | + } else { |
| 4466 | + $handlerClass = ''; |
| 4467 | + } |
| 4468 | + if ( !isset( $this->mImageParams[$handlerClass] ) ) { |
| 4469 | + // Initialise static lists |
| 4470 | + static $internalParamNames = array( |
| 4471 | + 'horizAlign' => array( 'left', 'right', 'center', 'none' ), |
| 4472 | + 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', |
| 4473 | + 'bottom', 'text-bottom' ), |
| 4474 | + 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', |
| 4475 | + 'upright', 'border' ), |
| 4476 | + ); |
| 4477 | + static $internalParamMap; |
| 4478 | + if ( !$internalParamMap ) { |
| 4479 | + $internalParamMap = array(); |
| 4480 | + foreach ( $internalParamNames as $type => $names ) { |
| 4481 | + foreach ( $names as $name ) { |
| 4482 | + $magicName = str_replace( '-', '_', "img_$name" ); |
| 4483 | + $internalParamMap[$magicName] = array( $type, $name ); |
| 4484 | + } |
| 4485 | + } |
| 4486 | + } |
| 4487 | + |
| 4488 | + // Add handler params |
| 4489 | + $paramMap = $internalParamMap; |
| 4490 | + if ( $handler ) { |
| 4491 | + $handlerParamMap = $handler->getParamMap(); |
| 4492 | + foreach ( $handlerParamMap as $magic => $paramName ) { |
| 4493 | + $paramMap[$magic] = array( 'handler', $paramName ); |
| 4494 | + } |
| 4495 | + } |
| 4496 | + $this->mImageParams[$handlerClass] = $paramMap; |
| 4497 | + $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) ); |
| 4498 | + } |
| 4499 | + return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ); |
| 4500 | + } |
| 4501 | + |
| 4502 | + /** |
| 4503 | + * Parse image options text and use it to make an image |
| 4504 | + */ |
| 4505 | + function makeImage( $title, $options ) { |
| 4506 | + # @TODO: let the MediaHandler specify its transform parameters |
| 4507 | + # |
| 4508 | + # Check if the options text is of the form "options|alt text" |
| 4509 | + # Options are: |
| 4510 | + # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang |
| 4511 | + # * left no resizing, just left align. label is used for alt= only |
| 4512 | + # * right same, but right aligned |
| 4513 | + # * none same, but not aligned |
| 4514 | + # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox |
| 4515 | + # * center center the image |
| 4516 | + # * framed Keep original image size, no magnify-button. |
| 4517 | + # * frameless like 'thumb' but without a frame. Keeps user preferences for width |
| 4518 | + # * upright reduce width for upright images, rounded to full __0 px |
| 4519 | + # * border draw a 1px border around the image |
| 4520 | + # vertical-align values (no % or length right now): |
| 4521 | + # * baseline |
| 4522 | + # * sub |
| 4523 | + # * super |
| 4524 | + # * top |
| 4525 | + # * text-top |
| 4526 | + # * middle |
| 4527 | + # * bottom |
| 4528 | + # * text-bottom |
| 4529 | + |
| 4530 | + $parts = array_map( 'trim', explode( '|', $options) ); |
| 4531 | + $sk = $this->mOptions->getSkin(); |
| 4532 | + |
| 4533 | + # Give extensions a chance to select the file revision for us |
| 4534 | + $skip = $time = false; |
| 4535 | + wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time ) ); |
| 4536 | + |
| 4537 | + if ( $skip ) { |
| 4538 | + return $sk->makeLinkObj( $title ); |
| 4539 | + } |
| 4540 | + |
| 4541 | + # Get parameter map |
| 4542 | + $file = wfFindFile( $title, $time ); |
| 4543 | + $handler = $file ? $file->getHandler() : false; |
| 4544 | + |
| 4545 | + list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); |
| 4546 | + |
| 4547 | + # Process the input parameters |
| 4548 | + $caption = ''; |
| 4549 | + $params = array( 'frame' => array(), 'handler' => array(), |
| 4550 | + 'horizAlign' => array(), 'vertAlign' => array() ); |
| 4551 | + foreach( $parts as $part ) { |
| 4552 | + list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); |
| 4553 | + if ( isset( $paramMap[$magicName] ) ) { |
| 4554 | + list( $type, $paramName ) = $paramMap[$magicName]; |
| 4555 | + $params[$type][$paramName] = $value; |
| 4556 | + |
| 4557 | + // Special case; width and height come in one variable together |
| 4558 | + if( $type == 'handler' && $paramName == 'width' ) { |
| 4559 | + $m = array(); |
| 4560 | + if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $value, $m ) ) { |
| 4561 | + $params[$type]['width'] = intval( $m[1] ); |
| 4562 | + $params[$type]['height'] = intval( $m[2] ); |
| 4563 | + } else { |
| 4564 | + $params[$type]['width'] = intval( $value ); |
| 4565 | + } |
| 4566 | + } |
| 4567 | + } else { |
| 4568 | + $caption = $part; |
| 4569 | + } |
| 4570 | + } |
| 4571 | + |
| 4572 | + # Process alignment parameters |
| 4573 | + if ( $params['horizAlign'] ) { |
| 4574 | + $params['frame']['align'] = key( $params['horizAlign'] ); |
| 4575 | + } |
| 4576 | + if ( $params['vertAlign'] ) { |
| 4577 | + $params['frame']['valign'] = key( $params['vertAlign'] ); |
| 4578 | + } |
| 4579 | + |
| 4580 | + # Validate the handler parameters |
| 4581 | + if ( $handler ) { |
| 4582 | + foreach ( $params['handler'] as $name => $value ) { |
| 4583 | + if ( !$handler->validateParam( $name, $value ) ) { |
| 4584 | + unset( $params['handler'][$name] ); |
| 4585 | + } |
| 4586 | + } |
| 4587 | + } |
| 4588 | + |
| 4589 | + # Strip bad stuff out of the alt text |
| 4590 | + $alt = $this->replaceLinkHoldersText( $caption ); |
| 4591 | + |
| 4592 | + # make sure there are no placeholders in thumbnail attributes |
| 4593 | + # that are later expanded to html- so expand them now and |
| 4594 | + # remove the tags |
| 4595 | + $alt = $this->mStripState->unstripBoth( $alt ); |
| 4596 | + $alt = Sanitizer::stripAllTags( $alt ); |
| 4597 | + |
| 4598 | + $params['frame']['alt'] = $alt; |
| 4599 | + $params['frame']['caption'] = $caption; |
| 4600 | + |
| 4601 | + # Linker does the rest |
| 4602 | + $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'] ); |
| 4603 | + |
| 4604 | + # Give the handler a chance to modify the parser object |
| 4605 | + if ( $handler ) { |
| 4606 | + $handler->parserTransformHook( $this, $file ); |
| 4607 | + } |
| 4608 | + |
| 4609 | + return $ret; |
| 4610 | + } |
| 4611 | + |
| 4612 | + /** |
| 4613 | + * Set a flag in the output object indicating that the content is dynamic and |
| 4614 | + * shouldn't be cached. |
| 4615 | + */ |
| 4616 | + function disableCache() { |
| 4617 | + wfDebug( "Parser output marked as uncacheable.\n" ); |
| 4618 | + $this->mOutput->mCacheTime = -1; |
| 4619 | + } |
| 4620 | + |
| 4621 | + /**#@+ |
| 4622 | + * Callback from the Sanitizer for expanding items found in HTML attribute |
| 4623 | + * values, so they can be safely tested and escaped. |
| 4624 | + * @param string $text |
| 4625 | + * @param array $args |
| 4626 | + * @return string |
| 4627 | + * @private |
| 4628 | + */ |
| 4629 | + function attributeStripCallback( &$text, $args ) { |
| 4630 | + $text = $this->replaceVariables( $text, $args ); |
| 4631 | + $text = $this->mStripState->unstripBoth( $text ); |
| 4632 | + return $text; |
| 4633 | + } |
| 4634 | + |
| 4635 | + /**#@-*/ |
| 4636 | + |
| 4637 | + /**#@+ |
| 4638 | + * Accessor/mutator |
| 4639 | + */ |
| 4640 | + function Title( $x = NULL ) { return wfSetVar( $this->mTitle, $x ); } |
| 4641 | + function Options( $x = NULL ) { return wfSetVar( $this->mOptions, $x ); } |
| 4642 | + function OutputType( $x = NULL ) { return wfSetVar( $this->mOutputType, $x ); } |
| 4643 | + /**#@-*/ |
| 4644 | + |
| 4645 | + /**#@+ |
| 4646 | + * Accessor |
| 4647 | + */ |
| 4648 | + function getTags() { return array_merge( array_keys($this->mTransparentTagHooks), array_keys( $this->mTagHooks ) ); } |
| 4649 | + /**#@-*/ |
| 4650 | + |
| 4651 | + |
| 4652 | + /** |
| 4653 | + * Break wikitext input into sections, and either pull or replace |
| 4654 | + * some particular section's text. |
| 4655 | + * |
| 4656 | + * External callers should use the getSection and replaceSection methods. |
| 4657 | + * |
| 4658 | + * @param $text Page wikitext |
| 4659 | + * @param $section Numbered section. 0 pulls the text before the first |
| 4660 | + * heading; other numbers will pull the given section |
| 4661 | + * along with its lower-level subsections. |
| 4662 | + * @param $mode One of "get" or "replace" |
| 4663 | + * @param $newtext Replacement text for section data. |
| 4664 | + * @return string for "get", the extracted section text. |
| 4665 | + * for "replace", the whole page with the section replaced. |
| 4666 | + */ |
| 4667 | + private function extractSections( $text, $section, $mode, $newtext='' ) { |
| 4668 | + # I.... _hope_ this is right. |
| 4669 | + # Otherwise, sometimes we don't have things initialized properly. |
| 4670 | + $this->clearState(); |
| 4671 | + |
| 4672 | + # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML |
| 4673 | + # comments to be stripped as well) |
| 4674 | + $stripState = new StripState; |
| 4675 | + |
| 4676 | + $oldOutputType = $this->mOutputType; |
| 4677 | + $oldOptions = $this->mOptions; |
| 4678 | + $this->mOptions = new ParserOptions(); |
| 4679 | + $this->setOutputType( OT_WIKI ); |
| 4680 | + |
| 4681 | + $striptext = $this->strip( $text, $stripState, true ); |
| 4682 | + |
| 4683 | + $this->setOutputType( $oldOutputType ); |
| 4684 | + $this->mOptions = $oldOptions; |
| 4685 | + |
| 4686 | + # now that we can be sure that no pseudo-sections are in the source, |
| 4687 | + # split it up by section |
| 4688 | + $uniq = preg_quote( $this->uniqPrefix(), '/' ); |
| 4689 | + $comment = "(?:$uniq-!--.*?QINU\x07)"; |
| 4690 | + $secs = preg_split( |
| 4691 | + "/ |
| 4692 | + ( |
| 4693 | + ^ |
| 4694 | + (?:$comment|<\/?noinclude>)* # Initial comments will be stripped |
| 4695 | + (=+) # Should this be limited to 6? |
| 4696 | + .+? # Section title... |
| 4697 | + \\2 # Ending = count must match start |
| 4698 | + (?:$comment|<\/?noinclude>|[ \\t]+)* # Trailing whitespace ok |
| 4699 | + $ |
| 4700 | + | |
| 4701 | + <h([1-6])\b.*?> |
| 4702 | + .*? |
| 4703 | + <\/h\\3\s*> |
| 4704 | + ) |
| 4705 | + /mix", |
| 4706 | + $striptext, -1, |
| 4707 | + PREG_SPLIT_DELIM_CAPTURE); |
| 4708 | + |
| 4709 | + if( $mode == "get" ) { |
| 4710 | + if( $section == 0 ) { |
| 4711 | + // "Section 0" returns the content before any other section. |
| 4712 | + $rv = $secs[0]; |
| 4713 | + } else { |
| 4714 | + //track missing section, will replace if found. |
| 4715 | + $rv = $newtext; |
| 4716 | + } |
| 4717 | + } elseif( $mode == "replace" ) { |
| 4718 | + if( $section == 0 ) { |
| 4719 | + $rv = $newtext . "\n\n"; |
| 4720 | + $remainder = true; |
| 4721 | + } else { |
| 4722 | + $rv = $secs[0]; |
| 4723 | + $remainder = false; |
| 4724 | + } |
| 4725 | + } |
| 4726 | + $count = 0; |
| 4727 | + $sectionLevel = 0; |
| 4728 | + for( $index = 1; $index < count( $secs ); ) { |
| 4729 | + $headerLine = $secs[$index++]; |
| 4730 | + if( $secs[$index] ) { |
| 4731 | + // A wiki header |
| 4732 | + $headerLevel = strlen( $secs[$index++] ); |
| 4733 | + } else { |
| 4734 | + // An HTML header |
| 4735 | + $index++; |
| 4736 | + $headerLevel = intval( $secs[$index++] ); |
| 4737 | + } |
| 4738 | + $content = $secs[$index++]; |
| 4739 | + |
| 4740 | + $count++; |
| 4741 | + if( $mode == "get" ) { |
| 4742 | + if( $count == $section ) { |
| 4743 | + $rv = $headerLine . $content; |
| 4744 | + $sectionLevel = $headerLevel; |
| 4745 | + } elseif( $count > $section ) { |
| 4746 | + if( $sectionLevel && $headerLevel > $sectionLevel ) { |
| 4747 | + $rv .= $headerLine . $content; |
| 4748 | + } else { |
| 4749 | + // Broke out to a higher-level section |
| 4750 | + break; |
| 4751 | + } |
| 4752 | + } |
| 4753 | + } elseif( $mode == "replace" ) { |
| 4754 | + if( $count < $section ) { |
| 4755 | + $rv .= $headerLine . $content; |
| 4756 | + } elseif( $count == $section ) { |
| 4757 | + $rv .= $newtext . "\n\n"; |
| 4758 | + $sectionLevel = $headerLevel; |
| 4759 | + } elseif( $count > $section ) { |
| 4760 | + if( $headerLevel <= $sectionLevel ) { |
| 4761 | + // Passed the section's sub-parts. |
| 4762 | + $remainder = true; |
| 4763 | + } |
| 4764 | + if( $remainder ) { |
| 4765 | + $rv .= $headerLine . $content; |
| 4766 | + } |
| 4767 | + } |
| 4768 | + } |
| 4769 | + } |
| 4770 | + if (is_string($rv)) |
| 4771 | + # reinsert stripped tags |
| 4772 | + $rv = trim( $stripState->unstripBoth( $rv ) ); |
| 4773 | + |
| 4774 | + return $rv; |
| 4775 | + } |
| 4776 | + |
| 4777 | + /** |
| 4778 | + * This function returns the text of a section, specified by a number ($section). |
| 4779 | + * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or |
| 4780 | + * the first section before any such heading (section 0). |
| 4781 | + * |
| 4782 | + * If a section contains subsections, these are also returned. |
| 4783 | + * |
| 4784 | + * @param $text String: text to look in |
| 4785 | + * @param $section Integer: section number |
| 4786 | + * @param $deftext: default to return if section is not found |
| 4787 | + * @return string text of the requested section |
| 4788 | + */ |
| 4789 | + public function getSection( $text, $section, $deftext='' ) { |
| 4790 | + return $this->extractSections( $text, $section, "get", $deftext ); |
| 4791 | + } |
| 4792 | + |
| 4793 | + public function replaceSection( $oldtext, $section, $text ) { |
| 4794 | + return $this->extractSections( $oldtext, $section, "replace", $text ); |
| 4795 | + } |
| 4796 | + |
| 4797 | + /** |
| 4798 | + * Get the timestamp associated with the current revision, adjusted for |
| 4799 | + * the default server-local timestamp |
| 4800 | + */ |
| 4801 | + function getRevisionTimestamp() { |
| 4802 | + if ( is_null( $this->mRevisionTimestamp ) ) { |
| 4803 | + wfProfileIn( __METHOD__ ); |
| 4804 | + global $wgContLang; |
| 4805 | + $dbr = wfGetDB( DB_SLAVE ); |
| 4806 | + $timestamp = $dbr->selectField( 'revision', 'rev_timestamp', |
| 4807 | + array( 'rev_id' => $this->mRevisionId ), __METHOD__ ); |
| 4808 | + |
| 4809 | + // Normalize timestamp to internal MW format for timezone processing. |
| 4810 | + // This has the added side-effect of replacing a null value with |
| 4811 | + // the current time, which gives us more sensible behavior for |
| 4812 | + // previews. |
| 4813 | + $timestamp = wfTimestamp( TS_MW, $timestamp ); |
| 4814 | + |
| 4815 | + // The cryptic '' timezone parameter tells to use the site-default |
| 4816 | + // timezone offset instead of the user settings. |
| 4817 | + // |
| 4818 | + // Since this value will be saved into the parser cache, served |
| 4819 | + // to other users, and potentially even used inside links and such, |
| 4820 | + // it needs to be consistent for all visitors. |
| 4821 | + $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); |
| 4822 | + |
| 4823 | + wfProfileOut( __METHOD__ ); |
| 4824 | + } |
| 4825 | + return $this->mRevisionTimestamp; |
| 4826 | + } |
| 4827 | + |
| 4828 | + /** |
| 4829 | + * Mutator for $mDefaultSort |
| 4830 | + * |
| 4831 | + * @param $sort New value |
| 4832 | + */ |
| 4833 | + public function setDefaultSort( $sort ) { |
| 4834 | + $this->mDefaultSort = $sort; |
| 4835 | + } |
| 4836 | + |
| 4837 | + /** |
| 4838 | + * Accessor for $mDefaultSort |
| 4839 | + * Will use the title/prefixed title if none is set |
| 4840 | + * |
| 4841 | + * @return string |
| 4842 | + */ |
| 4843 | + public function getDefaultSort() { |
| 4844 | + if( $this->mDefaultSort !== false ) { |
| 4845 | + return $this->mDefaultSort; |
| 4846 | + } else { |
| 4847 | + return $this->mTitle->getNamespace() == NS_CATEGORY |
| 4848 | + ? $this->mTitle->getText() |
| 4849 | + : $this->mTitle->getPrefixedText(); |
| 4850 | + } |
| 4851 | + } |
| 4852 | + |
| 4853 | + /** |
| 4854 | + * Try to guess the section anchor name based on a wikitext fragment |
| 4855 | + * presumably extracted from a heading, for example "Header" from |
| 4856 | + * "== Header ==". |
| 4857 | + */ |
| 4858 | + public function guessSectionNameFromWikiText( $text ) { |
| 4859 | + # Strip out wikitext links(they break the anchor) |
| 4860 | + $text = $this->stripSectionName( $text ); |
| 4861 | + $headline = Sanitizer::decodeCharReferences( $text ); |
| 4862 | + # strip out HTML |
| 4863 | + $headline = StringUtils::delimiterReplace( '<', '>', '', $headline ); |
| 4864 | + $headline = trim( $headline ); |
| 4865 | + $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) ); |
| 4866 | + $replacearray = array( |
| 4867 | + '%3A' => ':', |
| 4868 | + '%' => '.' |
| 4869 | + ); |
| 4870 | + return str_replace( |
| 4871 | + array_keys( $replacearray ), |
| 4872 | + array_values( $replacearray ), |
| 4873 | + $sectionanchor ); |
| 4874 | + } |
| 4875 | + |
| 4876 | + /** |
| 4877 | + * Strips a text string of wikitext for use in a section anchor |
| 4878 | + * |
| 4879 | + * Accepts a text string and then removes all wikitext from the |
| 4880 | + * string and leaves only the resultant text (i.e. the result of |
| 4881 | + * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of |
| 4882 | + * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended |
| 4883 | + * to create valid section anchors by mimicing the output of the |
| 4884 | + * parser when headings are parsed. |
| 4885 | + * |
| 4886 | + * @param $text string Text string to be stripped of wikitext |
| 4887 | + * for use in a Section anchor |
| 4888 | + * @return Filtered text string |
| 4889 | + */ |
| 4890 | + public function stripSectionName( $text ) { |
| 4891 | + # Strip internal link markup |
| 4892 | + $text = preg_replace('/\[\[:?([^[|]+)\|([^[]+)\]\]/','$2',$text); |
| 4893 | + $text = preg_replace('/\[\[:?([^[]+)\|?\]\]/','$1',$text); |
| 4894 | + |
| 4895 | + # Strip external link markup (FIXME: Not Tolerant to blank link text |
| 4896 | + # I.E. [http://www.mediawiki.org] will render as [1] or something depending |
| 4897 | + # on how many empty links there are on the page - need to figure that out. |
| 4898 | + $text = preg_replace('/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/','$2',$text); |
| 4899 | + |
| 4900 | + # Parse wikitext quotes (italics & bold) |
| 4901 | + $text = $this->doQuotes($text); |
| 4902 | + |
| 4903 | + # Strip HTML tags |
| 4904 | + $text = StringUtils::delimiterReplace( '<', '>', '', $text ); |
| 4905 | + return $text; |
| 4906 | + } |
| 4907 | + |
| 4908 | + /** |
| 4909 | + * strip/replaceVariables/unstrip for preprocessor regression testing |
| 4910 | + */ |
| 4911 | + function srvus( $text ) { |
| 4912 | + $text = $this->strip( $text, $this->mStripState ); |
| 4913 | + $text = $this->replaceVariables( $text ); |
| 4914 | + $text = preg_replace( '/<!--MWTEMPLATESECTION.*?-->/', '', $text ); |
| 4915 | + $text = $this->mStripState->unstripBoth( $text ); |
| 4916 | + return $text; |
| 4917 | + } |
| 4918 | +} |
| 4919 | + |
Property changes on: trunk/phase3/includes/Parser_OldPP.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 4920 | + native |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -7,6 +7,8 @@ |
8 | 8 | function __autoload($className) { |
9 | 9 | global $wgAutoloadClasses; |
10 | 10 | |
| 11 | + # Locations of core classes |
| 12 | + # Extension classes are specified with $wgAutoloadClasses |
11 | 13 | static $localClasses = array( |
12 | 14 | # Includes |
13 | 15 | 'AjaxDispatcher' => 'includes/AjaxDispatcher.php', |
— | — | @@ -133,9 +135,11 @@ |
134 | 136 | 'ReverseChronologicalPager' => 'includes/Pager.php', |
135 | 137 | 'TablePager' => 'includes/Pager.php', |
136 | 138 | 'Parser' => 'includes/Parser.php', |
| 139 | + 'Parser_OldPP' => 'includes/Parser_OldPP.php', |
| 140 | + 'Parser_DiffTest' => 'includes/Parser_DiffTest.php', |
| 141 | + 'ParserCache' => 'includes/ParserCache.php', |
137 | 142 | 'ParserOutput' => 'includes/ParserOutput.php', |
138 | 143 | 'ParserOptions' => 'includes/ParserOptions.php', |
139 | | - 'ParserCache' => 'includes/ParserCache.php', |
140 | 144 | 'PatrolLog' => 'includes/PatrolLog.php', |
141 | 145 | 'ProfilerSimple' => 'includes/ProfilerSimple.php', |
142 | 146 | 'ProfilerSimpleUDP' => 'includes/ProfilerSimpleUDP.php', |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -881,6 +881,8 @@ |
882 | 882 | $wgMaxSigChars = 255; # Maximum number of Unicode characters in signature |
883 | 883 | $wgMaxArticleSize = 2048; # Maximum article size in kilobytes |
884 | 884 | |
| 885 | +$wgMaxPPNodeCount = 1000000; # A complexity limit on template expansion |
| 886 | + |
885 | 887 | $wgExtraSubtitle = ''; |
886 | 888 | $wgSiteSupportPage = ''; # A page where you users can receive donations |
887 | 889 | |
— | — | @@ -1875,7 +1877,7 @@ |
1876 | 1878 | $wgTidyBin = 'tidy'; |
1877 | 1879 | $wgTidyConf = $IP.'/includes/tidy.conf'; |
1878 | 1880 | $wgTidyOpts = ''; |
1879 | | -$wgTidyInternal = function_exists( 'tidy_load_config' ); |
| 1881 | +$wgTidyInternal = extension_loaded( 'tidy' ); |
1880 | 1882 | |
1881 | 1883 | /** See list of skins and their symbolic names in languages/Language.php */ |
1882 | 1884 | $wgDefaultSkin = 'monobook'; |
— | — | @@ -2782,3 +2784,19 @@ |
2783 | 2785 | */ |
2784 | 2786 | $wgSlaveLagWarning = 10; |
2785 | 2787 | $wgSlaveLagCritical = 30; |
| 2788 | + |
| 2789 | +/** |
| 2790 | + * Parser configuration. Associative array with the following members: |
| 2791 | + * |
| 2792 | + * class The class name |
| 2793 | + * |
| 2794 | + * The entire associative array will be passed through to the constructor as |
| 2795 | + * the first parameter. Note that only Setup.php can use this variable -- |
| 2796 | + * the configuration will change at runtime via $wgParser member functions, so |
| 2797 | + * the contents of this variable will be out-of-date. The variable can only be |
| 2798 | + * changed during LocalSettings.php, in particular, it can't be changed during |
| 2799 | + * an extension setup function. |
| 2800 | + */ |
| 2801 | +$wgParserConf = array( |
| 2802 | + 'class' => 'Parser', |
| 2803 | +); |
Index: trunk/phase3/includes/ParserOptions.php |
— | — | @@ -21,7 +21,9 @@ |
22 | 22 | var $mTidy; # Ask for tidy cleanup |
23 | 23 | var $mInterfaceMessage; # Which lang to call for PLURAL and GRAMMAR |
24 | 24 | var $mMaxIncludeSize; # Maximum size of template expansions, in bytes |
| 25 | + var $mMaxPPNodeCount; # Maximum number of nodes touched by PPFrame::expand() |
25 | 26 | var $mRemoveComments; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS |
| 27 | + var $mTemplateCallback; # Callback for template fetching |
26 | 28 | |
27 | 29 | var $mUser; # Stored user object, just used to initialise the skin |
28 | 30 | |
— | — | @@ -36,7 +38,9 @@ |
37 | 39 | function getTidy() { return $this->mTidy; } |
38 | 40 | function getInterfaceMessage() { return $this->mInterfaceMessage; } |
39 | 41 | function getMaxIncludeSize() { return $this->mMaxIncludeSize; } |
| 42 | + function getMaxPPNodeCount() { return $this->mMaxPPNodeCount; } |
40 | 43 | function getRemoveComments() { return $this->mRemoveComments; } |
| 44 | + function getTemplateCallback() { return $this->mTemplateCallback; } |
41 | 45 | |
42 | 46 | function getSkin() { |
43 | 47 | if ( !isset( $this->mSkin ) ) { |
— | — | @@ -65,7 +69,9 @@ |
66 | 70 | function setSkin( $x ) { $this->mSkin = $x; } |
67 | 71 | function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x); } |
68 | 72 | function setMaxIncludeSize( $x ) { return wfSetVar( $this->mMaxIncludeSize, $x ); } |
| 73 | + function setMaxPPNodeCount( $x ) { return wfSetVar( $this->mMaxPPNodeCount, $x ); } |
69 | 74 | function setRemoveComments( $x ) { return wfSetVar( $this->mRemoveComments, $x ); } |
| 75 | + function setTemplateCallback( $x ) { return wfSetVar( $this->mTemplateCallback, $x ); } |
70 | 76 | |
71 | 77 | function __construct( $user = null ) { |
72 | 78 | $this->initialiseFromUser( $user ); |
— | — | @@ -83,6 +89,7 @@ |
84 | 90 | function initialiseFromUser( $userInput ) { |
85 | 91 | global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; |
86 | 92 | global $wgAllowExternalImagesFrom, $wgAllowSpecialInclusion, $wgMaxArticleSize; |
| 93 | + global $wgMaxPPNodeCount; |
87 | 94 | $fname = 'ParserOptions::initialiseFromUser'; |
88 | 95 | wfProfileIn( $fname ); |
89 | 96 | if ( !$userInput ) { |
— | — | @@ -111,7 +118,9 @@ |
112 | 119 | $this->mTidy = false; |
113 | 120 | $this->mInterfaceMessage = false; |
114 | 121 | $this->mMaxIncludeSize = $wgMaxArticleSize * 1024; |
| 122 | + $this->mMaxPPNodeCount = $wgMaxPPNodeCount; |
115 | 123 | $this->mRemoveComments = true; |
| 124 | + $this->mTemplateCallback = array( 'Parser', 'statelessFetchTemplate' ); |
116 | 125 | wfProfileOut( $fname ); |
117 | 126 | } |
118 | 127 | } |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -174,6 +174,71 @@ |
175 | 175 | * Make a better rate-limiting error message (i.e. a normal MW error, |
176 | 176 | rather than an "Internal Server Error"). |
177 | 177 | |
| 178 | +== Parser changes in 1.12 == |
| 179 | + |
| 180 | +The parser pass order has changed from |
| 181 | + |
| 182 | + * Extension tag strip and render |
| 183 | + * HTML normalisation and security |
| 184 | + * Template expansion |
| 185 | + * Main section... |
| 186 | + |
| 187 | +to |
| 188 | + |
| 189 | + * Template and extension tag parse to intermediate representation |
| 190 | + * Template expansion and extension rendering |
| 191 | + * HTML normalisation and security |
| 192 | + * Main section... |
| 193 | + |
| 194 | +The main effect of this for the user is that the rules for uncovered syntax |
| 195 | +have changed. |
| 196 | + |
| 197 | +Uncovered main-pass syntax, such as HTML tags, are now generally valid, whereas |
| 198 | +previously in some cases they were escaped. For example, you could have "<ta" in |
| 199 | +one template, and "ble>" in another template, and put them together to make a |
| 200 | +valid <table> tag. Previously the result would have been "<table>". |
| 201 | + |
| 202 | +Uncovered preprocessor syntax is generally not recognised. For example, if you |
| 203 | +have "{{a" in Template:A and "b}}" in Template:B, then "{{a}}{{b}}" will be |
| 204 | +converted to a literal "{{ab}}" rather than the contents of Template:Ab. This |
| 205 | +was the case previously in HTML output mode, and is now uniformly the case in |
| 206 | +the other modes as well. HTML-style comments uncovered by template expansion |
| 207 | +will not be recognised by the preprocessor and hence will not prevent template |
| 208 | +expansion within them, but they will be stripped by the following HTML security |
| 209 | +pass. |
| 210 | + |
| 211 | +The rules for template expansion during message transformation were |
| 212 | +counterintuitive, mostly accidental and buggy. There are a few small changes in |
| 213 | +this version: for example, templates with dynamic names, as in "{{ {{a}} }}", |
| 214 | +are fully expanded as they are in HTML mode, whereas previously only the inner |
| 215 | +template was expanded. I'd like to make some larger breaking changes to message |
| 216 | +transformation, after a review of typical use cases. |
| 217 | + |
| 218 | +The header identification routines for section edit and for numbering section |
| 219 | +edit links have been merged. This removes a significant failure mode and fixes a |
| 220 | +whole category of bugs (tracked by bug #4899). Wikitext headings uncovered by |
| 221 | +template expansion or comment removal will still be rendered into a heading tag, |
| 222 | +and will get an entry in the TOC, but will not have a section edit link. |
| 223 | +HTML-style headings will also not have a section edit link. Valid wikitext |
| 224 | +headings present in the template source text will get a template section edit |
| 225 | +link. This is a major break from previous behaviour, but I believe the effects |
| 226 | +are almost entirely beneficial. |
| 227 | + |
| 228 | +The main motivation for making these changes was performance. The new two-pass |
| 229 | +preprocessor can skip "dead branches" in template expansion, such as unfollowed |
| 230 | +#switch cases and unused defaults for template arguments. This provides a |
| 231 | +significant performance improvement in template-heavy test cases taken from |
| 232 | +Wikipedia. Parser function hooks can participate in this performance improvement |
| 233 | +by using the new SFH_OBJECT_ARGS flag during registration. |
| 234 | + |
| 235 | +The pre-expand include size limit has been removed, since there's no efficient |
| 236 | +way to calculate such a figure, and it would now be meaningless for performance |
| 237 | +anyway. The "preprocessor node count" takes its place, with a generous default |
| 238 | +limit. |
| 239 | + |
| 240 | +The context in which XML-style extension tags are called has changed, so |
| 241 | +extensions which make use of the parser state may need compatibility changes. |
| 242 | + |
178 | 243 | === API changes in 1.12 === |
179 | 244 | |
180 | 245 | Full API documentation is available at http://www.mediawiki.org/wiki/API |