r45758 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45757‎ | r45758 | r45759 >
Date:22:33, 14 January 2009
Author:rainman
Status:deferred
Tags:
Comment:
Rewrite replacePrefixes:
* make a regexp of namespace names to replace then do search/replace
* now namespaces with spaces are correctly rewritten
* remove some arcane syntax noone is using
Modified paths:
  • /trunk/extensions/MWSearch/MWSearch_body.php (modified) (history)

Diff [purge]

Index: trunk/extensions/MWSearch/MWSearch_body.php
@@ -42,7 +42,6 @@
4343 *
4444 * 1) rewrite namespaces into standardized form
4545 * e.g. image:clouds -> [6]:clouds
46 - * e.g. help,wp:npov -> [12,4]:npov
4746 *
4847 * 2) rewrite localizations of "search everything" keyword
4948 * e.g. alle:heidegger -> all:heidegger
@@ -86,79 +85,53 @@
8786 return trim($ret);
8887 }
8988
90 - for($i = 0 ; $i < $qlen ; $i++){
91 - $c = $query[$i];
92 -
93 - // ignore chars in quotes
94 - if($inquotes && $c!='"');
95 - // check if $c is valid prefix character
96 - else if(($c >= 'a' && $c <= 'z') ||
97 - ($c >= 'A' && $c <= 'Z') ||
98 - $c == '_' || $c == '-' || $c ==','){
99 - if($len == 0){
100 - $start = $i; // begin of token
101 - $len = 1;
102 - } else
103 - $len++;
104 - // check for utf-8 chars
105 - } else if(($c >= "\xc0" && $c <= "\xff")){
106 - $utf8len = 1;
107 - for($j = $i+1; $j < $qlen; $j++){ // fetch extra utf-8 bytes
108 - if($query[$j] >= "\x80" && $query[$j] <= "\xbf")
109 - $utf8len++;
110 - else
111 - break;
112 - }
113 - if($len == 0){
114 - $start = $i;
115 - $len = $utf8len;
116 - } else
117 - $len += $utf8len;
118 - $i = $j - 1; // we consumed the chars
119 - // check for end of prefix (i.e. semicolon)
120 - } else if($c == ':' && $len !=0){
121 - $rewrite = array(); // here we collect namespaces
122 - $prefixes = explode(',',substr($query,$start,$len));
123 - // iterate thru comma-separated list of prefixes
124 - foreach($prefixes as $prefix){
125 - $index = $wgContLang->getNsIndex($prefix);
126 -
127 - // check for special prefixes all/incategory
128 - if($prefix == $allkeyword){
129 - $rewrite = 'all';
130 - break;
131 - // check for localized names of namespaces
132 - } else if($index !== false)
133 - $rewrite[] = $index;
134 - }
135 - $translated = null;
136 - if($rewrite === 'all')
137 - $translated = $rewrite;
138 - else if(count($rewrite) != 0)
139 - $translated = '['.implode(',',array_unique($rewrite)).']';
140 -
141 - if(isset($translated)){
142 - // append text before the prefix, and then the prefix
143 - $rewritten .= substr($query,$rindex,$start-$rindex);
144 - $rewritten .= $translated . ':';
145 - $rindex = $i+1;
146 - }
147 -
148 - $len = 0;
149 - } else{ // end of token
150 - if($c == '"') // get in/out of quotes
151 - $inquotes = !$inquotes;
152 -
153 - $len = 0;
 89+ global $wgCanonicalNamespaceNames, $wgNamespaceAliases;
 90+ $nsNamesRaw = array_merge($wgContLang->getNamespaces(), $wgCanonicalNamespaceNames,
 91+ array_keys( array_merge($wgNamespaceAliases, $wgContLang->namespaceAliases) ) );
 92+
 93+ # add all namespace names w/o spaces
 94+ $nsNames = array();
 95+ foreach($nsNamesRaw as $ns){
 96+ if( $ns != ''){
 97+ $nsNames[] = $ns;
 98+ $nsNames[] = str_replace('_',' ',$ns);
15499 }
155 -
156100 }
157 - // add rest of the original query that doesn't need rewritting
158 - $rewritten .= substr($query,$rindex,$qlen-$rindex);
 101+
 102+ $regexp = implode('|',array_unique( $nsNames ));
 103+
 104+ # rewrite the query by replacing valid namespace names
 105+ $parts = preg_split('/(")/',$query,-1,PREG_SPLIT_DELIM_CAPTURE);
 106+ $inquotes = false;
 107+ $rewritten = '';
 108+ foreach($parts as $part){
 109+ if( $part == '"'){ # stuff in quote doesnt get rewritten
 110+ $rewritten .= $part;
 111+ $inquotes = !$inquotes;
 112+ } elseif( $inquotes ){
 113+ $rewritten .= $part;
 114+ } else{
 115+ # replace namespaces
 116+ $r = preg_replace_callback('/('.$regexp.'):/i',array($this,'replaceNamespace'),$part);
 117+ # replace to backend all: notation
 118+ $rewritten .= str_replace($allkeyword.':', 'all:', $r);
 119+ }
 120+ }
159121 wfProfileOut($fname);
160122 return $rewritten;
161123 }
162124
 125+ /** callback to replace namespace names to internal notation, e.g. User: -> [2]: */
 126+ function replaceNamespace($matches){
 127+ global $wgContLang;
 128+ $inx = $wgContLang->getNsIndex(str_replace(' ', '_', $matches[1]));
 129+ if ($inx === false)
 130+ return $matches[0];
 131+ else
 132+ return "[$inx]:";
 133+
 134+ }
 135+
163136 function acceptListRedirects() {
164137 return false;
165138 }

Status & tagging log