r82212 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r82211‎ | r82212 | r82213 >
Date:03:11, 16 February 2011
Author:yaron
Status:resolved (Comments)
Tags:
Comment:
Added handling for regular expressions
Modified paths:
  • /trunk/extensions/ReplaceText/SpecialReplaceText.php (modified) (history)

Diff [purge]

Index: trunk/extensions/ReplaceText/SpecialReplaceText.php
@@ -1,6 +1,7 @@
2 -<?php
 2+<?php
33
44 class ReplaceText extends SpecialPage {
 5+
56 /**
67 * Constructor
78 */
@@ -42,6 +43,7 @@
4344
4445 $this->target = $wgRequest->getText( 'target' );
4546 $this->replacement = $wgRequest->getText( 'replacement' );
 47+ $this->use_regex = ( $wgRequest->getVal( 'use_regex' ) == 1 );
4648 $this->category = $wgRequest->getText( 'category' );
4749 $this->prefix = $wgRequest->getText( 'prefix' );
4850 $this->edit_pages = ( $wgRequest->getVal( 'edit_pages' ) == 1 );
@@ -60,6 +62,7 @@
6163 $replacement_params['user_id'] = $wgUser->getId();
6264 $replacement_params['target_str'] = $this->target;
6365 $replacement_params['replacement_str'] = $this->replacement;
 66+ $replacement_params['use_regex'] = $this->use_regex;
6467 $replacement_params['edit_summary'] = wfMsgForContent( 'replacetext_editsummary', $this->target, $this->replacement );
6568 $replacement_params['create_redirect'] = false;
6669 $replacement_params['watch_page'] = false;
@@ -85,7 +88,7 @@
8689 }
8790 Job::batchInsert( $jobs );
8891
89 - $count = $wgLang->formatNum( count( $jobs ) );
 92+ $count = $wgLang->formatNum( count( $jobs ) );
9093 $wgOut->addWikiMsg( 'replacetext_success', "<tt><nowiki>{$this->target}</nowiki></tt>", "<tt><nowiki>{$this->replacement}</nowiki></tt>", $count );
9194
9295 // Link back
@@ -93,7 +96,6 @@
9497 $wgOut->addHTML( $sk->makeKnownLinkObj( $this->getTitle(), wfMsgHtml( 'replacetext_return' ) ) );
9598 return;
9699 } elseif ( $wgRequest->getCheck( 'target' ) ) { // very long elseif, look for "end elseif"
97 -
98100 // first, check that at least one namespace has been
99101 // picked, and that either editing or moving pages
100102 // has been selected
@@ -113,20 +115,24 @@
114116
115117 // if user is replacing text within pages...
116118 if ( $this->edit_pages ) {
117 - $res = $this->doSearchQuery( $this->target, $this->selected_namespaces, $this->category, $this->prefix );
 119+ $res = $this->doSearchQuery( $this->target, $this->selected_namespaces, $this->category, $this->prefix , $this->use_regex );
118120 foreach ( $res as $row ) {
119121 $title = Title::makeTitleSafe( $row->page_namespace, $row->page_title );
120 - $context = $this->extractContext( $row->old_text, $this->target );
 122+ $context = $this->extractContext( $row->old_text, $this->target, $this->use_regex );
121123 $titles_for_edit[] = array( $title, $context );
122124 }
123125 }
124126 if ( $this->move_pages ) {
125 - $res = $this->getMatchingTitles( $this->target, $this->selected_namespaces, $this->category, $this->prefix );
 127+ $res = $this->getMatchingTitles( $this->target, $this->selected_namespaces, $this->category, $this->prefix, $this->use_regex );
126128 foreach ( $res as $row ) {
127129 $title = Title::makeTitleSafe( $row->page_namespace, $row->page_title );
128130 // see if this move can happen
129131 $cur_page_name = str_replace( '_', ' ', $row->page_title );
130 - $new_page_name = str_replace( $this->target, $this->replacement, $cur_page_name );
 132+ if ( $this->use_regex ) {
 133+ $new_page_name = preg_replace( "/".$this->target."/U", $this->replacement, $cur_page_name );
 134+ } else {
 135+ $new_page_name = str_replace( $this->target, $this->replacement, $cur_page_name );
 136+ }
131137 $new_title = Title::makeTitleSafe( $row->page_namespace, $new_page_name );
132138 $err = $title->isValidMoveOperation( $new_title );
133139 if ( $title->userCan( 'move', true ) && !is_array( $err ) ) {
@@ -158,16 +164,16 @@
159165 //FIXME: raw html message
160166 $wgOut->addHTML( '<p>' . $sk->makeKnownLinkObj( $this->getTitle(), wfMsg( 'replacetext_return' ) ) . '</p>' );
161167 } else {
162 - // show a warning message if the replacement string
163 - // is either blank or found elsewhere on the wiki
164 - // (since undoing the replacement would be
165 - // difficult in either case)
 168+ // Show a warning message if the replacement
 169+ // string is either blank or found elsewhere on
 170+ // the wiki (since undoing the replacement
 171+ // would be difficult in either case).
166172 $warning_msg = null;
167173
168174 if ( $this->replacement === '' ) {
169175 $warning_msg = wfMsg('replacetext_blankwarning');
170176 } elseif ( count( $titles_for_edit ) > 0 ) {
171 - $res = $this->doSearchQuery( $this->replacement, $this->selected_namespaces, $this->category, $this->prefix );
 177+ $res = $this->doSearchQuery( $this->replacement, $this->selected_namespaces, $this->category, $this->prefix, $this->use_regex );
172178 $count = $res->numRows();
173179 if ( $count > 0 ) {
174180 $warning_msg = wfMsgExt( 'replacetext_warning', 'parsemag',
@@ -176,7 +182,7 @@
177183 );
178184 }
179185 } elseif ( count( $titles_for_move ) > 0 ) {
180 - $res = $this->getMatchingTitles( $this->replacement, $this->selected_namespaces, $this->category, $this->prefix );
 186+ $res = $this->getMatchingTitles( $this->replacement, $this->selected_namespaces, $this->category, $this->prefix, $this->use_regex );
181187 $count = $res->numRows();
182188 if ( $count > 0 ) {
183189 $warning_msg = wfMsgExt( 'replacetext_warning', 'parsemag',
@@ -200,7 +206,7 @@
201207 }
202208
203209 function showForm( $warning_msg = null ) {
204 - global $wgOut;
 210+ global $wgOut;
205211 $wgOut->addHTML(
206212 Xml::openElement( 'form', array( 'id' => 'powersearch', 'action' => $this->getTitle()->getFullUrl(), 'method' => 'post' ) ) .
207213 Xml::hidden( 'title', $this->getTitle()->getPrefixedText() ) .
@@ -223,8 +229,13 @@
224230 $wgOut->addHTML( '</td><td>' );
225231 $wgOut->addHTML( Xml::textarea( 'replacement', $this->replacement, 50, 2, array( 'style' => 'width: auto;' ) ) );
226232 $wgOut->addHTML( '</td></tr></table>' );
 233+ $wgOut->addHTML( Xml::tags( 'p', null,
 234+ Xml::checkLabel( wfMsg( 'replacetext_useregex' ), 'use_regex', 'use_regex' ) ) . "\n" .
 235+ Xml::element( 'p', array( 'style' => 'font-style: italic' ),
 236+ wfMsg( 'replacetext_regexdocu' ) )
 237+ );
227238
228 - // the interface is heavily based on the one in Special:Search
 239+ // The interface is heavily based on the one in Special:Search.
229240 $search_label = wfMsg( 'powersearch-ns' );
230241 $namespaces = SearchEngine::searchableNamespaces();
231242 $tables = $this->namespaceTables( $namespaces );
@@ -233,10 +244,10 @@
234245 "<fieldset id=\"mw-searchoptions\">\n" .
235246 Xml::tags( 'h4', null, wfMsgExt( 'powersearch-ns', array( 'parseinline' ) ) )
236247 );
237 - // the ability to select/unselect groups of namespaces in the
 248+ // The ability to select/unselect groups of namespaces in the
238249 // search interface exists only in some skins, like Vector -
239250 // check for the presence of the 'powersearch-togglelabel'
240 - // message to see if we can use this functionality here
 251+ // message to see if we can use this functionality here.
241252 if ( !wfEmptyMsg( 'powersearch-togglelabel', wfMsg( 'powersearch-togglelabel' ) ) ) {
242253 $wgOut->addHTML(
243254 Xml::tags(
@@ -262,7 +273,6 @@
263274 )
264275 )
265276 )
266 -
267277 );
268278 } // end if
269279 $wgOut->addHTML(
@@ -334,7 +344,6 @@
335345 return $tables;
336346 }
337347
338 -
339348 function pageListForm( $titles_for_edit, $titles_for_move, $unmoveable_titles ) {
340349 global $wgOut, $wgLang, $wgScript;
341350
@@ -345,7 +354,8 @@
346355 Xml::openElement( 'form', $formOpts ) .
347356 Xml::hidden( 'title', $this->getTitle()->getPrefixedText() ) .
348357 Xml::hidden( 'target', $this->target ) .
349 - Xml::hidden( 'replacement', $this->replacement )
 358+ Xml::hidden( 'replacement', $this->replacement ) .
 359+ Xml::hidden( 'use_regex', $this->use_regex )
350360 );
351361
352362 $js = file_get_contents( dirname( __FILE__ ) . '/ReplaceText.js' );
@@ -389,7 +399,8 @@
390400 Xml::hidden( 'replace', 1 )
391401 );
392402
393 - // only show "invert selections" link if there are more than five pages
 403+ // Only show "invert selections" link if there are more than
 404+ // five pages.
394405 if ( count( $titles_for_edit ) + count( $titles_for_move ) > 5 ) {
395406 $buttonOpts = array(
396407 'type' => 'button',
@@ -415,18 +426,24 @@
416427 }
417428 }
418429
419 -
420430 /**
421431 * Extract context and highlights search text
 432+ *
 433+ * TODO: The bolding needs to be fixed for regular expressions.
422434 */
423 - function extractContext( $text, $target ) {
 435+ function extractContext( $text, $target, $use_regex = false ) {
424436 global $wgLang;
 437+
425438 $cw = $this->user->getOption( 'contextchars', 40 );
426439
427440 // Get all indexes
428 - $targetq = preg_quote( $target, '/' );
429 - preg_match_all( "/$targetq/", $text, $matches, PREG_OFFSET_CAPTURE );
430 -
 441+ if ( $use_regex ) {
 442+ preg_match_all( "/$target/", $text, $matches, PREG_OFFSET_CAPTURE );
 443+ } else {
 444+ $targetq = preg_quote( $target, '/' );
 445+ preg_match_all( "/$targetq/", $text, $matches, PREG_OFFSET_CAPTURE );
 446+ }
 447+
431448 $poss = array();
432449 foreach ( $matches[0] as $_ ) {
433450 $poss[] = $_[1];
@@ -454,11 +471,16 @@
455472 list( $index, $len, ) = $_;
456473 $context .= self::convertWhiteSpaceToHTML( $wgLang->truncate( substr( $text, 0, $index ), - $cw ) );
457474 $snippet = self::convertWhiteSpaceToHTML( substr( $text, $index, $len ) );
458 - $targetq = preg_quote( self::convertWhiteSpaceToHTML( $target ), '/' );
459 - $context .= preg_replace( "/$targetq/i", '<span class="searchmatch">\0</span>', $snippet );
 475+ if ( $use_regex ) {
 476+ $targetStr = "/$target/U";
 477+ } else {
 478+ $targetq = preg_quote( self::convertWhiteSpaceToHTML( $target ), '/' );
 479+ $targetStr = "/$targetq/i";
 480+ }
 481+ $context .= preg_replace( $targetStr, '<span class="searchmatch">\0</span>', $snippet );
 482+
460483 $context .= self::convertWhiteSpaceToHTML( $wgLang->truncate( substr( $text, $index + $len ), $cw ) );
461484 }
462 -
463485 return $context;
464486 }
465487
@@ -471,27 +493,28 @@
472494 return $msg;
473495 }
474496
475 - function getMatchingTitles( $str, $namespaces, $category, $prefix ) {
 497+ function getMatchingTitles( $str, $namespaces, $category, $prefix, $use_regex = false ) {
476498 $dbr = wfGetDB( DB_SLAVE );
477499
478500 $str = Title::newFromText( $str )->getDbKey();
479501
480502 $tables = array( 'page' );
481503 $vars = array( 'page_title', 'page_namespace' );
482 - // anyString() method was added in MW 1.16
483 - if ( method_exists( $dbr, 'anyString' ) ) {
484 - $any = $dbr->anyString();
485 - $conds = array(
486 - 'page_title ' . $dbr->buildLike( $any, $str, $any ),
487 - "page_namespace IN ({$dbr->makeList( $namespaces )})",
488 - );
 504+ if ( $use_regex ) {
 505+ $comparisonCond = "page_title REGEXP '$str'";
489506 } else {
490 - $include_ns = $dbr->makeList( $namespaces );
491 - $conds = array(
492 - "page_title LIKE '%$str%'",
493 - "page_namespace IN ($include_ns)",
494 - );
 507+ // anyString() method was added in MW 1.16
 508+ if ( method_exists( $dbr, 'anyString' ) ) {
 509+ $any = $dbr->anyString();
 510+ $comparisonCond = 'page_title ' . $dbr->buildLike( $any, $str, $any );
 511+ } else {
 512+ $comparisonCond = "page_title LIKE '%$str%'";
 513+ }
495514 }
 515+ $conds = array(
 516+ $comparisonCond,
 517+ "page_namespace IN ({$dbr->makeList( $namespaces )})",
 518+ );
496519
497520 $this->categoryCondition( $category, $tables, $conds );
498521 $this->prefixCondition( $prefix, $conds );
@@ -500,30 +523,27 @@
501524 return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort );
502525 }
503526
504 - function doSearchQuery( $search, $namespaces, $category, $prefix ) {
 527+ function doSearchQuery( $search, $namespaces, $category, $prefix, $use_regex = false ) {
505528 $dbr = wfGetDB( DB_SLAVE );
506 -
507529 $tables = array( 'page', 'revision', 'text' );
508530 $vars = array( 'page_id', 'page_namespace', 'page_title', 'old_text' );
509 - // anyString() method was added in MW 1.16
510 - if ( method_exists( $dbr, 'anyString' ) ) {
511 - $any = $dbr->anyString();
512 - $conds = array(
513 - 'old_text ' . $dbr->buildLike( $any, $search, $any ),
514 - "page_namespace IN ({$dbr->makeList( $namespaces )})",
515 - 'rev_id = page_latest',
516 - 'rev_text_id = old_id'
517 - );
 531+ if ( $use_regex ) {
 532+ $comparisonCond = "old_text REGEXP '$search'";
518533 } else {
519 - $any = $dbr->anyString();
520 - $include_ns = $dbr->makeList( $namespaces );
521 - $conds = array(
522 - "old_text " . $dbr->buildLike( $any, $search, $any ),
523 - "page_namespace IN ($include_ns)",
524 - 'rev_id = page_latest',
525 - 'rev_text_id = old_id'
526 - );
 534+ // anyString() method was added in MW 1.16
 535+ if ( method_exists( $dbr, 'anyString' ) ) {
 536+ $any = $dbr->anyString();
 537+ $comparisonCond = 'old_text ' . $dbr->buildLike( $any, $search, $any );
 538+ } else {
 539+ $comparisonCond = "old_text LIKE '%$search%'";
 540+ }
527541 }
 542+ $conds = array(
 543+ $comparisonCond,
 544+ "page_namespace IN ({$dbr->makeList( $namespaces )})",
 545+ 'rev_id = page_latest',
 546+ 'rev_text_id = old_id'
 547+ );
528548
529549 $this->categoryCondition( $category, $tables, $conds );
530550 $this->prefixCondition( $prefix, $conds );

Follow-up revisions

RevisionCommit summaryAuthorDate
r82240Partly revert r82212: Remove spurios characterraymond13:22, 16 February 2011
r82276Follow-up to r82212 - simplified and improved namespace check within SQLyaron19:04, 16 February 2011
r83707Made some improvements to SQL, including use of addQuotes() - follow-up to r8...yaron18:39, 11 March 2011
r84054Added escaping so that a search string containing a space can be used on page...yaron20:30, 15 March 2011

Comments

#Comment by Nikerabbit (talk | contribs)   07:36, 16 February 2011

Some kind of space character was inserted before <?php

$this->target is not escaped properly in the regex.

+"page_namespace IN ({$dbr->makeList( $namespaces )})",

Is better done 'page_namespace' => $namespaces,

Looks like this revision introduces SQL injection.

#Comment by Raymond (talk | contribs)   15:57, 15 March 2011

"Replace text in page titles, when possible" is broken when "Original text:" contains (space), -, _ (at least these charaters).

#Comment by Yaron Koren (talk | contribs)   20:35, 15 March 2011

Thanks for the note - I fixed handling for spaces. Underlines still don't work, but I don't think they should, really... hyphens/dashes, on the other hand, seemed to work fine even beforehand.

#Comment by Yaron Koren (talk | contribs)   10:16, 16 August 2011

Changing status from "fixme" back to "new" - I think the bugs found were all addressed.

Status & tagging log