r73595 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r73594‎ | r73595 | r73596 >
Date:06:49, 23 September 2010
Author:aj
Status:deferred (Comments)
Tags:
Comment:
added mediawiki patch
Modified paths:
  • /trunk/parsers/libmwparser/mediawiki.patch (added) (history)

Diff [purge]

Index: trunk/parsers/libmwparser/mediawiki.patch
@@ -0,0 +1,478 @@
 2+Index: includes/parser/Parser.php
 3+===================================================================
 4+--- includes/parser/Parser.php (revision 69308)
 5+@@ -7,6 +7,9 @@
 6+ * File for Parser and related classes
 7+ */
 8+
 9++include_once('MediaWiki/mwp.php');
 10++include_once('LinkResolver.php');
 11++define('USE_LIBMWPARSER', true);
 12+
 13+ /**
 14+ * PHP Parser - Processes wiki markup (which uses a more user-friendly
 15+@@ -114,6 +117,7 @@
 16+ var $mRevisionTimestamp; # The timestamp of the specified revision ID
 17+ var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
 18+
 19++
 20+ /**
 21+ * Constructor
 22+ *
 23+@@ -262,13 +266,17 @@
 24+ wfProfileIn( __METHOD__ );
 25+ wfProfileIn( $fname );
 26+
 27++
 28+ if ( $clearState ) {
 29+ $this->clearState();
 30+ }
 31+
 32+ $this->mOptions = $options;
 33+ $this->setTitle( $title ); # Page title has to be set for the pre-processor
 34++ $rawTitle = $title;
 35+
 36++ $rawText = $text;
 37++
 38+ $oldRevisionId = $this->mRevisionId;
 39+ $oldRevisionTimestamp = $this->mRevisionTimestamp;
 40+ if ( $revid !== null ) {
 41+@@ -281,143 +289,156 @@
 42+ wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
 43+ $text = $this->internalParse( $text );
 44+
 45+- $text = $this->mStripState->unstripGeneral( $text );
 46++ if (!USE_LIBMWPARSER) {
 47++ $text = $this->mStripState->unstripGeneral( $text );
 48+
 49+- # Clean up special characters, only run once, next-to-last before doBlockLevels
 50+- $fixtags = array(
 51+- # french spaces, last one Guillemet-left
 52+- # only if there is something before the space
 53+- '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2',
 54+- # french spaces, Guillemet-right
 55+- '/(\\302\\253) /' => '\\1 ',
 56+- '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
 57+- );
 58+- $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
 59++ # Clean up special characters, only run once, next-to-last before doBlockLevels
 60++ $fixtags = array(
 61++ # french spaces, last one Guillemet-left
 62++ # only if there is something before the space
 63++ '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2',
 64++ # french spaces, Guillemet-right
 65++ '/(\\302\\253) /' => '\\1 ',
 66++ '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
 67++ );
 68++ $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
 69+
 70+- $text = $this->doBlockLevels( $text, $linestart );
 71++ $text = $this->doBlockLevels( $text, $linestart );
 72+
 73+- $this->replaceLinkHolders( $text );
 74++ $this->replaceLinkHolders( $text );
 75+
 76+- /**
 77+- * The page doesn't get language converted if
 78+- * a) It's disabled
 79+- * b) Content isn't converted
 80+- * c) It's a conversion table
 81+- */
 82+- if ( !( $wgDisableLangConversion
 83+- || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 84+- || $this->mTitle->isConversionTable() ) ) {
 85++ /**
 86++ * The page doesn't get language converted if
 87++ * a) It's disabled
 88++ * b) Content isn't converted
 89++ * c) It's a conversion table
 90++ */
 91++ if ( !( $wgDisableLangConversion
 92++ || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 93++ || $this->mTitle->isConversionTable() ) ) {
 94+
 95+ # The position of the convert() call should not be changed. it
 96+- # assumes that the links are all replaced and the only thing left
 97+- # is the <nowiki> mark.
 98++ # assumes that the links are all replaced and the only thing left
 99++ # is the <nowiki> mark.
 100+
 101+- $text = $wgContLang->convert( $text );
 102+- }
 103++ $text = $wgContLang->convert( $text );
 104++ }
 105+
 106+- /**
 107+- * A page get its title converted except:
 108+- * a) Language conversion is globally disabled
 109+- * b) Title convert is globally disabled
 110+- * c) The page is a redirect page
 111+- * d) User request with a "linkconvert" set to "no"
 112+- * e) A "nocontentconvert" magic word has been set
 113+- * f) A "notitleconvert" magic word has been set
 114+- * g) User sets "noconvertlink" in his/her preference
 115+- *
 116+- * Note that if a user tries to set a title in a conversion
 117+- * rule but content conversion was not done, then the parser
 118+- * won't pick it up. This is probably expected behavior.
 119+- */
 120+- if ( !( $wgDisableLangConversion
 121+- || $wgDisableTitleConversion
 122+- || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 123+- || isset( $this->mDoubleUnderscores['notitleconvert'] )
 124+- || $this->mOutput->getDisplayTitle() !== false ) )
 125+- {
 126+- $convruletitle = $wgContLang->getConvRuleTitle();
 127+- if ( $convruletitle ) {
 128++ /**
 129++ * A page get its title converted except:
 130++ * a) Language conversion is globally disabled
 131++ * b) Title convert is globally disabled
 132++ * c) The page is a redirect page
 133++ * d) User request with a "linkconvert" set to "no"
 134++ * e) A "nocontentconvert" magic word has been set
 135++ * f) A "notitleconvert" magic word has been set
 136++ * g) User sets "noconvertlink" in his/her preference
 137++ *
 138++ * Note that if a user tries to set a title in a conversion
 139++ * rule but content conversion was not done, then the parser
 140++ * won't pick it up. This is probably expected behavior.
 141++ */
 142++ if ( !( $wgDisableLangConversion
 143++ || $wgDisableTitleConversion
 144++ || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 145++ || isset( $this->mDoubleUnderscores['notitleconvert'] )
 146++ || $this->mOutput->getDisplayTitle() !== false ) )
 147++ {
 148++ $convruletitle = $wgContLang->getConvRuleTitle();
 149++ if ( $convruletitle ) {
 150+ $this->mOutput->setTitleText( $convruletitle );
 151+- } else {
 152++ } else {
 153+ $titleText = $wgContLang->convertTitle( $title );
 154+ $this->mOutput->setTitleText( $titleText );
 155+- }
 156+- }
 157++ }
 158++ }
 159+
 160+- $text = $this->mStripState->unstripNoWiki( $text );
 161++ $text = $this->mStripState->unstripNoWiki( $text );
 162+
 163+- wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
 164++ wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
 165+
 166+-//!JF Move to its own function
 167++ //!JF Move to its own function
 168+
 169+- $uniq_prefix = $this->mUniqPrefix;
 170+- $matches = array();
 171+- $elements = array_keys( $this->mTransparentTagHooks );
 172+- $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
 173++ $uniq_prefix = $this->mUniqPrefix;
 174++ $matches = array();
 175++ $elements = array_keys( $this->mTransparentTagHooks );
 176++ $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
 177+
 178+- foreach ( $matches as $marker => $data ) {
 179++ foreach ( $matches as $marker => $data ) {
 180+ list( $element, $content, $params, $tag ) = $data;
 181+ $tagName = strtolower( $element );
 182+ if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
 183+- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
 184++ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
 185+ } else {
 186+- $output = $tag;
 187++ $output = $tag;
 188+ }
 189+ $this->mStripState->general->setPair( $marker, $output );
 190+- }
 191+- $text = $this->mStripState->unstripGeneral( $text );
 192++ }
 193++ $text = $this->mStripState->unstripGeneral( $text );
 194+
 195+- $text = Sanitizer::normalizeCharReferences( $text );
 196++ $text = Sanitizer::normalizeCharReferences( $text );
 197+
 198+- if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) {
 199++ if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) {
 200+ $text = MWTidy::tidy( $text );
 201+- } else {
 202++ } else {
 203+ # attempt to sanitize at least some nesting problems
 204+- # (bug #2702 and quite a few others)
 205+- $tidyregs = array(
 206+- # ''Something [http://www.cool.com cool''] -->
 207+- # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
 208+- '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
 209+- '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
 210+- # fix up an anchor inside another anchor, only
 211+- # at least for a single single nested link (bug 3695)
 212+- '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
 213+- '\\1\\2</a>\\3</a>\\1\\4</a>',
 214+- # fix div inside inline elements- doBlockLevels won't wrap a line which
 215+- # contains a div, so fix it up here; replace
 216+- # div with escaped text
 217+- '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
 218+- '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
 219+- # remove empty italic or bold tag pairs, some
 220+- # introduced by rules above
 221+- '/<([bi])><\/\\1>/' => '',
 222+- );
 223++ # (bug #2702 and quite a few others)
 224++ $tidyregs = array(
 225++ # ''Something [http://www.cool.com cool''] -->
 226++ # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
 227++ '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
 228++ '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
 229++ # fix up an anchor inside another anchor, only
 230++ # at least for a single single nested link (bug 3695)
 231++ '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
 232++ '\\1\\2</a>\\3</a>\\1\\4</a>',
 233++ # fix div inside inline elements- doBlockLevels won't wrap a line which
 234++ # contains a div, so fix it up here; replace
 235++ # div with escaped text
 236++ '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
 237++ '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
 238++ # remove empty italic or bold tag pairs, some
 239++ # introduced by rules above
 240++ '/<([bi])><\/\\1>/' => '',
 241++ );
 242+
 243+ $text = preg_replace(
 244+- array_keys( $tidyregs ),
 245+- array_values( $tidyregs ),
 246+- $text );
 247+- }
 248+- global $wgExpensiveParserFunctionLimit;
 249+- if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) {
 250++ array_keys( $tidyregs ),
 251++ array_values( $tidyregs ),
 252++ $text );
 253++ }
 254++ global $wgExpensiveParserFunctionLimit;
 255++ if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) {
 256+ $this->limitationWarn( 'expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit );
 257+- }
 258++ }
 259+
 260+- wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
 261++ wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
 262+
 263+- # Information on include size limits, for the benefit of users who try to skirt them
 264+- if ( $this->mOptions->getEnableLimitReport() ) {
 265+- $max = $this->mOptions->getMaxIncludeSize();
 266+- $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
 267+- $limitReport =
 268+- "NewPP limit report\n" .
 269+- "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
 270+- "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
 271+- "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
 272+- $PFreport;
 273+- wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
 274+- $text .= "\n<!-- \n$limitReport-->\n";
 275+- }
 276++ # Information on include size limits, for the benefit of users who try to skirt them
 277++ if ( $this->mOptions->getEnableLimitReport() ) {
 278++ $max = $this->mOptions->getMaxIncludeSize();
 279++ $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
 280++ $limitReport =
 281++ "NewPP limit report\n" .
 282++ "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
 283++ "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
 284++ "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
 285++ $PFreport;
 286++ wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
 287++ $text .= "\n<!-- \n$limitReport-->\n";
 288++ }
 289++ } else {
 290++ # libmwparse
 291++ $istream = new_MWParserInput($text);
 292++
 293++ $parser = new_MWParser($istream);
 294++
 295++ $text = implode('', MWParseArticle($parser));
 296++ delete_MWParser($parser);
 297++ delete_MWParserInput($istream);
 298++
 299++ }
 300++
 301+ $this->mOutput->setText( $text );
 302+
 303+ $this->mRevisionId = $oldRevisionId;
 304+@@ -1051,38 +1072,42 @@
 305+ $text = $frame->expand( $dom );
 306+ } else {
 307+ # if $frame is not provided, then use old-style replaceVariables
 308+- $text = $this->replaceVariables( $text );
 309++ if (!USE_LIBMWPARSER) {
 310++ $text = $this->replaceVariables( $text );
 311++ }
 312+ }
 313+
 314+- $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
 315+- wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
 316++ if (!USE_LIBMWPARSER) {
 317++ $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
 318++ wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
 319+
 320+- # Tables need to come after variable replacement for things to work
 321+- # properly; putting them before other transformations should keep
 322+- # exciting things like link expansions from showing up in surprising
 323+- # places.
 324+- $text = $this->doTableStuff( $text );
 325++ # Tables need to come after variable replacement for things to work
 326++ # properly; putting them before other transformations should keep
 327++ # exciting things like link expansions from showing up in surprising
 328++ # places.
 329++ $text = $this->doTableStuff( $text );
 330+
 331+- $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 332+-
 333++ $text = preg_replace( '/(^|\n)-----*'.'/', '\\1<hr />', $text );
 334++ }
 335+ $text = $this->doDoubleUnderscore( $text );
 336+-
 337+- $text = $this->doHeadings( $text );
 338+- if ( $this->mOptions->getUseDynamicDates() ) {
 339++ if (!USE_LIBMWPARSER) {
 340++ $text = $this->doHeadings( $text );
 341++ if ( $this->mOptions->getUseDynamicDates() ) {
 342+ $df = DateFormatter::getInstance();
 343+ $text = $df->reformat( $this->mOptions->getDateFormat(), $text );
 344+- }
 345+- $text = $this->replaceInternalLinks( $text );
 346+- $text = $this->doAllQuotes( $text );
 347+- $text = $this->replaceExternalLinks( $text );
 348++ }
 349++ $text = $this->replaceInternalLinks( $text );
 350++ $text = $this->doAllQuotes( $text );
 351++ $text = $this->replaceExternalLinks( $text );
 352+
 353+- # replaceInternalLinks may sometimes leave behind
 354+- # absolute URLs, which have to be masked to hide them from replaceExternalLinks
 355+- $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text );
 356++ # replaceInternalLinks may sometimes leave behind
 357++ # absolute URLs, which have to be masked to hide them from replaceExternalLinks
 358++ $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text );
 359+
 360+- $text = $this->doMagicLinks( $text );
 361+- $text = $this->formatHeadings( $text, $origText, $isMain );
 362+-
 363++ $text = $this->doMagicLinks( $text );
 364++ $text = $this->formatHeadings( $text, $origText, $isMain );
 365++
 366++ }
 367+ wfProfileOut( __METHOD__ );
 368+ return $text;
 369+ }
 370+Index: includes/parser/LinkResolver.php
 371+===================================================================
 372+--- includes/parser/LinkResolver.php (revision 0)
 373+@@ -0,0 +1,103 @@
 374++<?php
 375++
 376++include_once('MediaWiki/mwp.php');
 377++
 378++# TODO add support for GetLinkColors hook here
 379++
 380++function MWParserLinkResolverCallback($linkCollection) {
 381++ $lcKeys = MWLinkCollectionGet($linkCollection);
 382++ $linkCache = LinkCache::singleton();
 383++ $dbr = wfGetDB( DB_SLAVE );
 384++ $page = $dbr->tableName( 'page' );
 385++
 386++ $links = array();
 387++
 388++ foreach ($lcKeys as $lcKey) {
 389++ $type = MWLCKeyGetLinkType($lcKey);
 390++ if ($type == MWLT_EXTERNAL) {
 391++ continue;
 392++ }
 393++ $title = Title::newFromText(MWLCKeyGetLinkTitle($lcKey));
 394++ if ($type == MWLT_INTERNAL) {
 395++ $ns = $title->getNamespace();
 396++ $links[$title->getDBKey()] = $lcKey;
 397++
 398++ # Not in the link cache, add it to the query
 399++ if ( !isset( $current ) ) {
 400++ $current = $ns;
 401++ $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len, page_latest";
 402++ $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
 403++ } elseif ( $current != $ns ) {
 404++ $current = $ns;
 405++ $query .= ")) OR (page_namespace=$ns AND page_title IN(";
 406++ } else {
 407++ $query .= ', ';
 408++ }
 409++
 410++ $query .= $dbr->addQuotes( $title->getDBkey() );
 411++ } elseif ($type == MWLT_MEDIA) {
 412++ $linkResolution = new_MWLINKRESOLUTION();
 413++ $time = false;
 414++ // TODO File hook
 415++ $img = wfFindFile( $title, array( 'time' => $time ) );
 416++ $alt = htmlspecialchars( $title->getText(), ENT_QUOTES );
 417++ MWLINKRESOLUTION_alt_set($linkResolution, $alt);
 418++ MWLINKRESOLUTION_url_set($linkResolution, $title->getLocalURL() );
 419++ if ($img) {
 420++ MWLINKRESOLUTION_imageUrl_set($linkResolution, htmlspecialchars($img->getURL()));
 421++ MWLINKRESOLUTION_c_class_set($linkResolution, 'internal');
 422++ if ($img->getWidth()) {
 423++ MWLINKRESOLUTION_imageWidth_set($linkResolution, $img->getWidth());
 424++ }
 425++ if ($img->getHeight()) {
 426++ MWLINKRESOLUTION_imageHeight_set($linkResolution, $img->getHeight());
 427++ }
 428++ } else {
 429++ MWLINKRESOLUTION_c_class_set($linkResolution, 'new');
 430++ }
 431++ MWLinkCollectionResolve($linkCollection, $lcKey, $linkResolution);
 432++ }
 433++ }
 434++
 435++ if ( $query ) {
 436++ $query .= '))';
 437++
 438++ wfDebugLog('linkresolver', "Link resolver callback query: '$query'");
 439++ $res = $dbr->query( $query, __METHOD__ );
 440++
 441++ # Fetch data and form into an associative array
 442++ # non-existent = broken
 443++ while ( $s = $dbr->fetchObject($res) ) {
 444++ $title = Title::makeTitle( $s->page_namespace, $s->page_title );
 445++ $lcKey = $links[$title->getDBKey()];
 446++ unset($links[$title->getDBKey()]);
 447++ wfDebugLog('linkresolver', 'Resolving ' . MWLCKeyGetLinkTitle($lcKey) . ' to ' . $title->getLocalURL());
 448++ $linkResolution = new_MWLINKRESOLUTION();
 449++ MWLINKRESOLUTION_url_set($linkResolution, $title->getLocalURL());
 450++ $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect, $s->page_latest );
 451++ MWLINKRESOLUTION_color_set($linkResolution, MWLINKCOLOR_BLUE);
 452++ MWLinkCollectionResolve($linkCollection, $lcKey, $linkResolution);
 453++ }
 454++ unset( $res );
 455++ }
 456++
 457++ foreach ($links as $lcKey) {
 458++ $title = Title::newFromText(MWLCKeyGetLinkTitle($lcKey));
 459++ $linkResolution = new_MWLINKRESOLUTION();
 460++
 461++ if ($title->isKnown()) {
 462++ MWLINKRESOLUTION_url_set($linkResolution, $title->getLocalURL());
 463++ MWLINKRESOLUTION_color_set($linkResolution, MWLINKCOLOR_BLUE);
 464++ } else {
 465++ MWLINKRESOLUTION_url_set($linkResolution, $title->getLocalURL('action=edit&redlink=1'));
 466++ MWLINKRESOLUTION_color_set($linkResolution, MWLINKCOLOR_RED);
 467++ }
 468++ MWLinkCollectionResolve($linkCollection, $lcKey, $linkResolution);
 469++ }
 470++
 471++
 472++ wfProfileOut( __METHOD__ );
 473++
 474++}
 475++
 476++?>
 477+\ No newline at end of file

Comments

#Comment by Tim Starling (talk | contribs)   06:51, 23 September 2010

Can't you do a subclass instead of a patch?

Status & tagging log