Index: trunk/phase3/maintenance/importUseModWikipedia.php |
— | — | @@ -94,264 +94,40 @@ |
95 | 95 | var $FS, $FS1, $FS2, $FS3; |
96 | 96 | var $FreeLinkPattern, $UrlPattern, $LinkPattern, $InterLinkPattern; |
97 | 97 | |
98 | | - var $cp1252Table = <<<EOT |
99 | | -0x00 0x0000 |
100 | | -0x01 0x0001 |
101 | | -0x02 0x0002 |
102 | | -0x03 0x0003 |
103 | | -0x04 0x0004 |
104 | | -0x05 0x0005 |
105 | | -0x06 0x0006 |
106 | | -0x07 0x0007 |
107 | | -0x08 0x0008 |
108 | | -0x09 0x0009 |
109 | | -0x0a 0x000a |
110 | | -0x0b 0x000b |
111 | | -0x0c 0x000c |
112 | | -0x0d 0x000d |
113 | | -0x0e 0x000e |
114 | | -0x0f 0x000f |
115 | | -0x10 0x0010 |
116 | | -0x11 0x0011 |
117 | | -0x12 0x0012 |
118 | | -0x13 0x0013 |
119 | | -0x14 0x0014 |
120 | | -0x15 0x0015 |
121 | | -0x16 0x0016 |
122 | | -0x17 0x0017 |
123 | | -0x18 0x0018 |
124 | | -0x19 0x0019 |
125 | | -0x1a 0x001a |
126 | | -0x1b 0x001b |
127 | | -0x1c 0x001c |
128 | | -0x1d 0x001d |
129 | | -0x1e 0x001e |
130 | | -0x1f 0x001f |
131 | | -0x20 0x0020 |
132 | | -0x21 0x0021 |
133 | | -0x22 0x0022 |
134 | | -0x23 0x0023 |
135 | | -0x24 0x0024 |
136 | | -0x25 0x0025 |
137 | | -0x26 0x0026 |
138 | | -0x27 0x0027 |
139 | | -0x28 0x0028 |
140 | | -0x29 0x0029 |
141 | | -0x2a 0x002a |
142 | | -0x2b 0x002b |
143 | | -0x2c 0x002c |
144 | | -0x2d 0x002d |
145 | | -0x2e 0x002e |
146 | | -0x2f 0x002f |
147 | | -0x30 0x0030 |
148 | | -0x31 0x0031 |
149 | | -0x32 0x0032 |
150 | | -0x33 0x0033 |
151 | | -0x34 0x0034 |
152 | | -0x35 0x0035 |
153 | | -0x36 0x0036 |
154 | | -0x37 0x0037 |
155 | | -0x38 0x0038 |
156 | | -0x39 0x0039 |
157 | | -0x3a 0x003a |
158 | | -0x3b 0x003b |
159 | | -0x3c 0x003c |
160 | | -0x3d 0x003d |
161 | | -0x3e 0x003e |
162 | | -0x3f 0x003f |
163 | | -0x40 0x0040 |
164 | | -0x41 0x0041 |
165 | | -0x42 0x0042 |
166 | | -0x43 0x0043 |
167 | | -0x44 0x0044 |
168 | | -0x45 0x0045 |
169 | | -0x46 0x0046 |
170 | | -0x47 0x0047 |
171 | | -0x48 0x0048 |
172 | | -0x49 0x0049 |
173 | | -0x4a 0x004a |
174 | | -0x4b 0x004b |
175 | | -0x4c 0x004c |
176 | | -0x4d 0x004d |
177 | | -0x4e 0x004e |
178 | | -0x4f 0x004f |
179 | | -0x50 0x0050 |
180 | | -0x51 0x0051 |
181 | | -0x52 0x0052 |
182 | | -0x53 0x0053 |
183 | | -0x54 0x0054 |
184 | | -0x55 0x0055 |
185 | | -0x56 0x0056 |
186 | | -0x57 0x0057 |
187 | | -0x58 0x0058 |
188 | | -0x59 0x0059 |
189 | | -0x5a 0x005a |
190 | | -0x5b 0x005b |
191 | | -0x5c 0x005c |
192 | | -0x5d 0x005d |
193 | | -0x5e 0x005e |
194 | | -0x5f 0x005f |
195 | | -0x60 0x0060 |
196 | | -0x61 0x0061 |
197 | | -0x62 0x0062 |
198 | | -0x63 0x0063 |
199 | | -0x64 0x0064 |
200 | | -0x65 0x0065 |
201 | | -0x66 0x0066 |
202 | | -0x67 0x0067 |
203 | | -0x68 0x0068 |
204 | | -0x69 0x0069 |
205 | | -0x6a 0x006a |
206 | | -0x6b 0x006b |
207 | | -0x6c 0x006c |
208 | | -0x6d 0x006d |
209 | | -0x6e 0x006e |
210 | | -0x6f 0x006f |
211 | | -0x70 0x0070 |
212 | | -0x71 0x0071 |
213 | | -0x72 0x0072 |
214 | | -0x73 0x0073 |
215 | | -0x74 0x0074 |
216 | | -0x75 0x0075 |
217 | | -0x76 0x0076 |
218 | | -0x77 0x0077 |
219 | | -0x78 0x0078 |
220 | | -0x79 0x0079 |
221 | | -0x7a 0x007a |
222 | | -0x7b 0x007b |
223 | | -0x7c 0x007c |
224 | | -0x7d 0x007d |
225 | | -0x7e 0x007e |
226 | | -0x7f 0x007f |
227 | | -0x80 0x20ac |
228 | | -0x81 0x0081 |
229 | | -0x82 0x201a |
230 | | -0x83 0x0192 |
231 | | -0x84 0x201e |
232 | | -0x85 0x2026 |
233 | | -0x86 0x2020 |
234 | | -0x87 0x2021 |
235 | | -0x88 0x02c6 |
236 | | -0x89 0x2030 |
237 | | -0x8a 0x0160 |
238 | | -0x8b 0x2039 |
239 | | -0x8c 0x0152 |
240 | | -0x8d 0x008d |
241 | | -0x8e 0x017d |
242 | | -0x8f 0x008f |
243 | | -0x90 0x0090 |
244 | | -0x91 0x2018 |
245 | | -0x92 0x2019 |
246 | | -0x93 0x201c |
247 | | -0x94 0x201d |
248 | | -0x95 0x2022 |
249 | | -0x96 0x2013 |
250 | | -0x97 0x2014 |
251 | | -0x98 0x02dc |
252 | | -0x99 0x2122 |
253 | | -0x9a 0x0161 |
254 | | -0x9b 0x203a |
255 | | -0x9c 0x0153 |
256 | | -0x9d 0x009d |
257 | | -0x9e 0x017e |
258 | | -0x9f 0x0178 |
259 | | -0xa0 0x00a0 |
260 | | -0xa1 0x00a1 |
261 | | -0xa2 0x00a2 |
262 | | -0xa3 0x00a3 |
263 | | -0xa4 0x00a4 |
264 | | -0xa5 0x00a5 |
265 | | -0xa6 0x00a6 |
266 | | -0xa7 0x00a7 |
267 | | -0xa8 0x00a8 |
268 | | -0xa9 0x00a9 |
269 | | -0xaa 0x00aa |
270 | | -0xab 0x00ab |
271 | | -0xac 0x00ac |
272 | | -0xad 0x00ad |
273 | | -0xae 0x00ae |
274 | | -0xaf 0x00af |
275 | | -0xb0 0x00b0 |
276 | | -0xb1 0x00b1 |
277 | | -0xb2 0x00b2 |
278 | | -0xb3 0x00b3 |
279 | | -0xb4 0x00b4 |
280 | | -0xb5 0x00b5 |
281 | | -0xb6 0x00b6 |
282 | | -0xb7 0x00b7 |
283 | | -0xb8 0x00b8 |
284 | | -0xb9 0x00b9 |
285 | | -0xba 0x00ba |
286 | | -0xbb 0x00bb |
287 | | -0xbc 0x00bc |
288 | | -0xbd 0x00bd |
289 | | -0xbe 0x00be |
290 | | -0xbf 0x00bf |
291 | | -0xc0 0x00c0 |
292 | | -0xc1 0x00c1 |
293 | | -0xc2 0x00c2 |
294 | | -0xc3 0x00c3 |
295 | | -0xc4 0x00c4 |
296 | | -0xc5 0x00c5 |
297 | | -0xc6 0x00c6 |
298 | | -0xc7 0x00c7 |
299 | | -0xc8 0x00c8 |
300 | | -0xc9 0x00c9 |
301 | | -0xca 0x00ca |
302 | | -0xcb 0x00cb |
303 | | -0xcc 0x00cc |
304 | | -0xcd 0x00cd |
305 | | -0xce 0x00ce |
306 | | -0xcf 0x00cf |
307 | | -0xd0 0x00d0 |
308 | | -0xd1 0x00d1 |
309 | | -0xd2 0x00d2 |
310 | | -0xd3 0x00d3 |
311 | | -0xd4 0x00d4 |
312 | | -0xd5 0x00d5 |
313 | | -0xd6 0x00d6 |
314 | | -0xd7 0x00d7 |
315 | | -0xd8 0x00d8 |
316 | | -0xd9 0x00d9 |
317 | | -0xda 0x00da |
318 | | -0xdb 0x00db |
319 | | -0xdc 0x00dc |
320 | | -0xdd 0x00dd |
321 | | -0xde 0x00de |
322 | | -0xdf 0x00df |
323 | | -0xe0 0x00e0 |
324 | | -0xe1 0x00e1 |
325 | | -0xe2 0x00e2 |
326 | | -0xe3 0x00e3 |
327 | | -0xe4 0x00e4 |
328 | | -0xe5 0x00e5 |
329 | | -0xe6 0x00e6 |
330 | | -0xe7 0x00e7 |
331 | | -0xe8 0x00e8 |
332 | | -0xe9 0x00e9 |
333 | | -0xea 0x00ea |
334 | | -0xeb 0x00eb |
335 | | -0xec 0x00ec |
336 | | -0xed 0x00ed |
337 | | -0xee 0x00ee |
338 | | -0xef 0x00ef |
339 | | -0xf0 0x00f0 |
340 | | -0xf1 0x00f1 |
341 | | -0xf2 0x00f2 |
342 | | -0xf3 0x00f3 |
343 | | -0xf4 0x00f4 |
344 | | -0xf5 0x00f5 |
345 | | -0xf6 0x00f6 |
346 | | -0xf7 0x00f7 |
347 | | -0xf8 0x00f8 |
348 | | -0xf9 0x00f9 |
349 | | -0xfa 0x00fa |
350 | | -0xfb 0x00fb |
351 | | -0xfc 0x00fc |
352 | | -0xfd 0x00fd |
353 | | -0xfe 0x00fe |
354 | | -0xff 0x00ff |
355 | | -EOT; |
| 98 | + var $cp1252Table = array( |
| 99 | +0x80 => 0x20ac, |
| 100 | +0x81 => 0x0081, |
| 101 | +0x82 => 0x201a, |
| 102 | +0x83 => 0x0192, |
| 103 | +0x84 => 0x201e, |
| 104 | +0x85 => 0x2026, |
| 105 | +0x86 => 0x2020, |
| 106 | +0x87 => 0x2021, |
| 107 | +0x88 => 0x02c6, |
| 108 | +0x89 => 0x2030, |
| 109 | +0x8a => 0x0160, |
| 110 | +0x8b => 0x2039, |
| 111 | +0x8c => 0x0152, |
| 112 | +0x8d => 0x008d, |
| 113 | +0x8e => 0x017d, |
| 114 | +0x8f => 0x008f, |
| 115 | +0x90 => 0x0090, |
| 116 | +0x91 => 0x2018, |
| 117 | +0x92 => 0x2019, |
| 118 | +0x93 => 0x201c, |
| 119 | +0x94 => 0x201d, |
| 120 | +0x95 => 0x2022, |
| 121 | +0x96 => 0x2013, |
| 122 | +0x97 => 0x2014, |
| 123 | +0x98 => 0x02dc, |
| 124 | +0x99 => 0x2122, |
| 125 | +0x9a => 0x0161, |
| 126 | +0x9b => 0x203a, |
| 127 | +0x9c => 0x0153, |
| 128 | +0x9d => 0x009d, |
| 129 | +0x9e => 0x017e, |
| 130 | +0x9f => 0x0178); |
| 131 | + |
356 | 132 | public function __construct() { |
357 | 133 | parent::__construct(); |
358 | 134 | $this->addOption( 'datadir', 'the value of $DataDir from wiki.cgi', true, true ); |
— | — | @@ -359,10 +135,15 @@ |
360 | 136 | $this->initLinkPatterns(); |
361 | 137 | |
362 | 138 | $this->encodeMap = $this->decodeMap = array(); |
363 | | - foreach ( explode( "\n", $this->cp1252Table ) as $line ) { |
364 | | - list( $source, $dest ) = explode( "\t", $line ); |
365 | | - $sourceChar = chr( base_convert( substr( $source, 2 ), 16, 10 ) ); |
366 | | - $destChar = codepointToUtf8( base_convert( substr( $dest, 2 ), 16, 10 ) ); |
| 139 | + |
| 140 | + for ($source = 0; $source <= 0xff; $source++) { |
| 141 | + if ( isset( $this->cp1252Table[$source] ) ) { |
| 142 | + $dest = $this->cp1252Table[$source]; |
| 143 | + } else { |
| 144 | + $dest = $source; |
| 145 | + } |
| 146 | + $sourceChar = chr( $source ); |
| 147 | + $destChar = codepointToUtf8( $dest ); |
367 | 148 | $this->encodeMap[$sourceChar] = $destChar; |
368 | 149 | $this->decodeMap[$destChar] = $sourceChar; |
369 | 150 | } |