Index: trunk/extensions/MWSearch/MWSearch_body.php |
— | — | @@ -42,7 +42,6 @@ |
43 | 43 | * |
44 | 44 | * 1) rewrite namespaces into standardized form |
45 | 45 | * e.g. image:clouds -> [6]:clouds |
46 | | - * e.g. help,wp:npov -> [12,4]:npov |
47 | 46 | * |
48 | 47 | * 2) rewrite localizations of "search everything" keyword |
49 | 48 | * e.g. alle:heidegger -> all:heidegger |
— | — | @@ -86,79 +85,53 @@ |
87 | 86 | return trim($ret); |
88 | 87 | } |
89 | 88 | |
90 | | - for($i = 0 ; $i < $qlen ; $i++){ |
91 | | - $c = $query[$i]; |
92 | | - |
93 | | - // ignore chars in quotes |
94 | | - if($inquotes && $c!='"'); |
95 | | - // check if $c is valid prefix character |
96 | | - else if(($c >= 'a' && $c <= 'z') || |
97 | | - ($c >= 'A' && $c <= 'Z') || |
98 | | - $c == '_' || $c == '-' || $c ==','){ |
99 | | - if($len == 0){ |
100 | | - $start = $i; // begin of token |
101 | | - $len = 1; |
102 | | - } else |
103 | | - $len++; |
104 | | - // check for utf-8 chars |
105 | | - } else if(($c >= "\xc0" && $c <= "\xff")){ |
106 | | - $utf8len = 1; |
107 | | - for($j = $i+1; $j < $qlen; $j++){ // fetch extra utf-8 bytes |
108 | | - if($query[$j] >= "\x80" && $query[$j] <= "\xbf") |
109 | | - $utf8len++; |
110 | | - else |
111 | | - break; |
112 | | - } |
113 | | - if($len == 0){ |
114 | | - $start = $i; |
115 | | - $len = $utf8len; |
116 | | - } else |
117 | | - $len += $utf8len; |
118 | | - $i = $j - 1; // we consumed the chars |
119 | | - // check for end of prefix (i.e. semicolon) |
120 | | - } else if($c == ':' && $len !=0){ |
121 | | - $rewrite = array(); // here we collect namespaces |
122 | | - $prefixes = explode(',',substr($query,$start,$len)); |
123 | | - // iterate thru comma-separated list of prefixes |
124 | | - foreach($prefixes as $prefix){ |
125 | | - $index = $wgContLang->getNsIndex($prefix); |
126 | | - |
127 | | - // check for special prefixes all/incategory |
128 | | - if($prefix == $allkeyword){ |
129 | | - $rewrite = 'all'; |
130 | | - break; |
131 | | - // check for localized names of namespaces |
132 | | - } else if($index !== false) |
133 | | - $rewrite[] = $index; |
134 | | - } |
135 | | - $translated = null; |
136 | | - if($rewrite === 'all') |
137 | | - $translated = $rewrite; |
138 | | - else if(count($rewrite) != 0) |
139 | | - $translated = '['.implode(',',array_unique($rewrite)).']'; |
140 | | - |
141 | | - if(isset($translated)){ |
142 | | - // append text before the prefix, and then the prefix |
143 | | - $rewritten .= substr($query,$rindex,$start-$rindex); |
144 | | - $rewritten .= $translated . ':'; |
145 | | - $rindex = $i+1; |
146 | | - } |
147 | | - |
148 | | - $len = 0; |
149 | | - } else{ // end of token |
150 | | - if($c == '"') // get in/out of quotes |
151 | | - $inquotes = !$inquotes; |
152 | | - |
153 | | - $len = 0; |
| 89 | + global $wgCanonicalNamespaceNames, $wgNamespaceAliases; |
| 90 | + $nsNamesRaw = array_merge($wgContLang->getNamespaces(), $wgCanonicalNamespaceNames, |
| 91 | + array_keys( array_merge($wgNamespaceAliases, $wgContLang->namespaceAliases) ) ); |
| 92 | + |
| 93 | + # add all namespace names w/o spaces |
| 94 | + $nsNames = array(); |
| 95 | + foreach($nsNamesRaw as $ns){ |
| 96 | + if( $ns != ''){ |
| 97 | + $nsNames[] = $ns; |
| 98 | + $nsNames[] = str_replace('_',' ',$ns); |
154 | 99 | } |
155 | | - |
156 | 100 | } |
157 | | - // add rest of the original query that doesn't need rewritting |
158 | | - $rewritten .= substr($query,$rindex,$qlen-$rindex); |
| 101 | + |
| 102 | + $regexp = implode('|',array_unique( $nsNames )); |
| 103 | + |
| 104 | + # rewrite the query by replacing valid namespace names |
| 105 | + $parts = preg_split('/(")/',$query,-1,PREG_SPLIT_DELIM_CAPTURE); |
| 106 | + $inquotes = false; |
| 107 | + $rewritten = ''; |
| 108 | + foreach($parts as $part){ |
| 109 | + if( $part == '"'){ # stuff in quote doesnt get rewritten |
| 110 | + $rewritten .= $part; |
| 111 | + $inquotes = !$inquotes; |
| 112 | + } elseif( $inquotes ){ |
| 113 | + $rewritten .= $part; |
| 114 | + } else{ |
| 115 | + # replace namespaces |
| 116 | + $r = preg_replace_callback('/('.$regexp.'):/i',array($this,'replaceNamespace'),$part); |
| 117 | + # replace to backend all: notation |
| 118 | + $rewritten .= str_replace($allkeyword.':', 'all:', $r); |
| 119 | + } |
| 120 | + } |
159 | 121 | wfProfileOut($fname); |
160 | 122 | return $rewritten; |
161 | 123 | } |
162 | 124 | |
| 125 | + /** callback to replace namespace names to internal notation, e.g. User: -> [2]: */ |
| 126 | + function replaceNamespace($matches){ |
| 127 | + global $wgContLang; |
| 128 | + $inx = $wgContLang->getNsIndex(str_replace(' ', '_', $matches[1])); |
| 129 | + if ($inx === false) |
| 130 | + return $matches[0]; |
| 131 | + else |
| 132 | + return "[$inx]:"; |
| 133 | + |
| 134 | + } |
| 135 | + |
163 | 136 | function acceptListRedirects() { |
164 | 137 | return false; |
165 | 138 | } |