r60868 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r60867‎ | r60868 | r60869 >
Date:18:39, 9 January 2010
Author:siebrand
Status:deferred
Tags:
Comment:
(bug 21496) Make WikiArticleFeed work with MediaWiki 1.16alpha. Patch by Jools Wills.

Comment by submitter: "I have simplified many of the regular expressions, although they should be enough to match only what they are supposed to. This patch includes making the extraction of the author/date/time work, with a simple --~~~~ or ~~~~ signature."
Modified paths:
  • /trunk/extensions/WikiArticleFeeds/WikiArticleFeeds.php (modified) (history)

Diff [purge]

Index: trunk/extensions/WikiArticleFeeds/WikiArticleFeeds.php
@@ -2,7 +2,7 @@
33 /*
44 * WikiArticleFeeds.php - A MediaWiki extension for converting regular pages into feeds.
55 * @author Jim R. Wilson
6 - * @version 0.6.3
 6+ * @version 0.6.5
77 * @copyright Copyright (C) 2007 Jim R. Wilson
88 * @license The MIT License - http://www.opensource.org/licenses/mit-license.php
99 * -----------------------------------------------------------------------
@@ -35,6 +35,8 @@
3636 * {{#itemTags:dogs, cats}}
3737 * {{#itemTags:dogs|cats}}
3838 * Version Notes:
 39+ * version 0.6.5:
 40+ * Simplified many regular expression to get it working on MW 1.16
3941 * version 0.6.4:
4042 * Small fix for MW 1.14 in which section header anchors changed format.
4143 * First version to be checked into wikimedia SVN.
@@ -454,17 +456,19 @@
455457 if ( $match < $lvl ) $lvl = $match;
456458 }
457459
 460+ $sectionRegExp = '#<h' . $lvl . '>\s*<span.+?id="(.*?)">\s*(.*?)\s*</span>\s*</h' . $lvl . '>#m';
 461+
458462 # Determine the item titles and default item links
459463 preg_match_all(
460 - '/<a[^>]*\\s+name=([\'"])(.*?)\\1[^>]*><\\/a><h' . $lvl . '>\\s*(.*?)\\s*<\\/h' . $lvl . '>/m',
461 - $feedContent,
 464+ $sectionRegExp,
 465+ $feedContent,
462466 $matches
463467 );
464 - $itemLinks = $matches[2];
465 - $itemTitles = $matches[3];
 468+ $itemLinks = $matches[1];
 469+ $itemTitles = $matches[2];
466470
467471 # Split content into segments
468 - $segments = preg_split( '/<a name=([\'"]).*?\\1\\s*><\\/a><h' . $lvl . '>.*?<\\/h' . $lvl . '>/m', $feedContent );
 472+ $segments = preg_split( $sectionRegExp, $feedContent );
469473 $segDesc = trim( strip_tags( array_shift( $segments ) ) );
470474 if ( $segDesc ) {
471475 if ( !$feedDescription ) {
@@ -505,26 +509,13 @@
506510 # Determine the item author and date
507511 $author = null;
508512 $date = null;
509 -
 513+ $signatureRegExp = '#<a href=".+?User:.+?" title="User:.+?">(.*?)</a> (\d\d):(\d\d), (\d+) ([a-z]+) (\d{4}) \([A-Z]+\)#im';
510514 # Look for a regular ~~~~ sig
511 - $isAttributable = preg_match(
512 - '%<a [^>]*href=([\'"])' . preg_quote( $wgScript ) . '(/|\\?title=)User:.*?\\1[^>]*>(.*?)</a> (\\d\\d):(\\d\\d), (\\d+) ([A-z][a-z]+) (\\d{4}) \\([A-Z]+\\)%m',
513 - $seg,
514 - $matches
515 - );
 515+ $isAttributable = preg_match($signatureRegExp, $seg, $matches );
516516
517 - # As a fallback - look for a --~~~~ like sig with a user page outside the User NS
518 - if ( !$isAttributable ) {
519 - $isAttributable = preg_match(
520 - '%--<a [^>]*href=([\'"])' . preg_quote( $wgScript ) . '(/|\\?title=).*?\\1[^>]*>(.*?)</a> (\\d\\d):(\\d\\d), (\\d+) ([A-z][a-z]+) (\\d{4}) \\([A-Z]+\\)%m',
521 - $seg,
522 - $matches
523 - );
524 - }
525 -
526517 # Parse it out - if we can
527518 if ( $isAttributable ) {
528 - list( $author, $hour, $min, $day, $monthName, $year ) = array_slice( $matches, 3 );
 519+ list( $author, $hour, $min, $day, $monthName, $year ) = array_slice( $matches, 1 );
529520 $months = array(
530521 'January' => '01', 'February' => '02', 'March' => '03', 'April' => '04',
531522 'May' => '05', 'June' => '06', 'July' => '07', 'August' => '08',
@@ -541,22 +532,15 @@
542533 # Look for an alternative to the default link (unless default 'section linking' has been forced)
543534 global $wgForceArticleFeedSectionLinks;
544535 if ( !$wgForceArticleFeedSectionLinks ) {
545 - $strippedSeg = preg_replace(
546 - array(
547 - '%<a [^>]*href=([\'"])' . preg_quote( $wgScript ) . '(/|\\?title=)User:.*?\\1[^>]*>(.*?)</a> (\\d\\d:\\d\\d, \\d+ [A-z][a-z]+ \\d{4} \\([A-Z]+\\))%m',
548 - '%--<a [^>]*href=([\'"])' . preg_quote( $wgScript ) . '(/|\\?title=).*?\\1[^>]*>(.*?)</a> (\\d\\d:\\d\\d, \\d+ [A-z][a-z]+ \\d{4} \\([A-Z]+\\))%m'
549 - ),
550 - '',
551 - $seg
552 - );
 536+ $strippedSeg = preg_replace($signatureRegExp, '', $seg );
553537 preg_match(
554 - '%<a [^>]*href=([\'"])(.*?)\\1[^>]*>(.*?)</a>%m',
555 - $strippedSeg,
556 - $matches
557 - );
 538+ '#<a [^>]*href=([\'"])(.*?)\\1[^>]*>(.*?)</a>#m',
 539+ $strippedSeg,
 540+ $matches
 541+ );
558542 if ( $matches[2] ) {
559543 $url = $matches[2];
560 - if ( preg_match( '%^/%', $url ) ) {
 544+ if ( preg_match( '#^/#', $url ) ) {
561545 $url = $wgServer . $url;
562546 }
563547 }

Status & tagging log