r60868 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r60867‎ \| r60868 \| r60869 >
Date:	18:39, 9 January 2010
Author:	siebrand
Status:	deferred
Tags:
Comment:	(bug 21496) Make WikiArticleFeed work with MediaWiki 1.16alpha. Patch by Jools Wills. Comment by submitter: "I have simplified many of the regular expressions, although they should be enough to match only what they are supposed to. This patch includes making the extraction of the author/date/time work, with a simple --~~~~ or ~~~~ signature."
Modified paths:	/trunk/extensions/WikiArticleFeeds/WikiArticleFeeds.php (modified) (history)

Diff [purge]

Index: trunk/extensions/WikiArticleFeeds/WikiArticleFeeds.php
—	—	@@ -2,7 +2,7 @@
3	3	/*
4	4	* WikiArticleFeeds.php - A MediaWiki extension for converting regular pages into feeds.
5	5	* @author Jim R. Wilson
6		~~- * @version 0.6.3~~
	6	+ * @version 0.6.5
7	7	* @copyright Copyright (C) 2007 Jim R. Wilson
8	8	* @license The MIT License - http://www.opensource.org/licenses/mit-license.php
9	9	* -----------------------------------------------------------------------
—	—	@@ -35,6 +35,8 @@
36	36	* {{#itemTags:dogs, cats}}
37	37	* {{#itemTags:dogs\|cats}}
38	38	* Version Notes:
	39	+ * version 0.6.5:
	40	+ * Simplified many regular expression to get it working on MW 1.16
39	41	* version 0.6.4:
40	42	* Small fix for MW 1.14 in which section header anchors changed format.
41	43	* First version to be checked into wikimedia SVN.
—	—	@@ -454,17 +456,19 @@
455	457	if ( $match < $lvl ) $lvl = $match;
456	458	}
457	459
	460	+ $sectionRegExp = '#<h' . $lvl . '>\s<span.+?id="(.?)">\s(.?)\s</span>\s</h' . $lvl . '>#m';
	461	+
458	462	# Determine the item titles and default item links
459	463	preg_match_all(
460		~~- '/<a[^>]\\s+name=([\'"])(.?)\\1[^>]><\\/a><h' . $lvl . '>\\s(.?)\\s<\\/h' . $lvl . '>/m',~~
461		~~- $feedContent,~~
	464	+ $sectionRegExp,
	465	+ $feedContent,
462	466	$matches
463	467	);
464		~~- $itemLinks = $matches[2];~~
465		~~- $itemTitles = $matches[3];~~
	468	+ $itemLinks = $matches[1];
	469	+ $itemTitles = $matches[2];
466	470
467	471	# Split content into segments
468		~~- $segments = preg_split( '/<a name=([\'"]).?\\1\\s><\\/a><h' . $lvl . '>.*?<\\/h' . $lvl . '>/m', $feedContent );~~
	472	+ $segments = preg_split( $sectionRegExp, $feedContent );
469	473	$segDesc = trim( strip_tags( array_shift( $segments ) ) );
470	474	if ( $segDesc ) {
471	475	if ( !$feedDescription ) {
—	—	@@ -505,26 +509,13 @@
506	510	# Determine the item author and date
507	511	$author = null;
508	512	$date = null;
509		-
	513	+ $signatureRegExp = '#<a href=".+?User:.+?" title="User:.+?">(.*?)</a> (\d\d):(\d\d), (\d+) ([a-z]+) (\d{4}) $[A-Z]+$#im';
510	514	# Look for a regular ~~~~ sig
511		~~- $isAttributable = preg_match(~~
512		~~- '%<a [^>]href=([\'"])' . preg_quote( $wgScript ) . '(/\|\\?title=)User:.?\\1[^>]>(.?)</a> (\\d\\d):(\\d\\d), (\\d+) ([A-z][a-z]+) (\\d{4}) \$[A-Z]+\$%m',~~
513		~~- $seg,~~
514		~~- $matches~~
515		~~- );~~
	515	+ $isAttributable = preg_match($signatureRegExp, $seg, $matches );
516	516
517		~~- # As a fallback - look for a --~~~~ like sig with a user page outside the User NS~~
518		~~- if ( !$isAttributable ) {~~
519		~~- $isAttributable = preg_match(~~
520		~~- '%--<a [^>]href=([\'"])' . preg_quote( $wgScript ) . '(/\|\\?title=).?\\1[^>]>(.?)</a> (\\d\\d):(\\d\\d), (\\d+) ([A-z][a-z]+) (\\d{4}) \$[A-Z]+\$%m',~~
521		~~- $seg,~~
522		~~- $matches~~
523		~~- );~~
524		~~- }~~
525		-
526	517	# Parse it out - if we can
527	518	if ( $isAttributable ) {
528		~~- list( $author, $hour, $min, $day, $monthName, $year ) = array_slice( $matches, 3 );~~
	519	+ list( $author, $hour, $min, $day, $monthName, $year ) = array_slice( $matches, 1 );
529	520	$months = array(
530	521	'January' => '01', 'February' => '02', 'March' => '03', 'April' => '04',
531	522	'May' => '05', 'June' => '06', 'July' => '07', 'August' => '08',
—	—	@@ -541,22 +532,15 @@
542	533	# Look for an alternative to the default link (unless default 'section linking' has been forced)
543	534	global $wgForceArticleFeedSectionLinks;
544	535	if ( !$wgForceArticleFeedSectionLinks ) {
545		~~- $strippedSeg = preg_replace(~~
546		~~- array(~~
547		~~- '%<a [^>]href=([\'"])' . preg_quote( $wgScript ) . '(/\|\\?title=)User:.?\\1[^>]>(.?)</a> (\\d\\d:\\d\\d, \\d+ [A-z][a-z]+ \\d{4} \$[A-Z]+\$)%m',~~
548		~~- '%--<a [^>]href=([\'"])' . preg_quote( $wgScript ) . '(/\|\\?title=).?\\1[^>]>(.?)</a> (\\d\\d:\\d\\d, \\d+ [A-z][a-z]+ \\d{4} \$[A-Z]+\$)%m'~~
549		~~- ),~~
550		~~- '',~~
551		~~- $seg~~
552		~~- );~~
	536	+ $strippedSeg = preg_replace($signatureRegExp, '', $seg );
553	537	preg_match(
554		~~- '%<a [^>]href=([\'"])(.?)\\1[^>]>(.?)</a>%m',~~
555		~~- $strippedSeg,~~
556		~~- $matches~~
557		~~- );~~
	538	+ '#<a [^>]href=([\'"])(.?)\\1[^>]>(.?)</a>#m',
	539	+ $strippedSeg,
	540	+ $matches
	541	+ );
558	542	if ( $matches[2] ) {
559	543	$url = $matches[2];
560		~~- if ( preg_match( '%^/%', $url ) ) {~~
	544	+ if ( preg_match( '#^/#', $url ) ) {
561	545	$url = $wgServer . $url;
562	546	}
563	547	}

Status & tagging log

03:22, 10 January 2010 😂 (talk | contribs) changed the status of r60868 [removed: new added: deferred]