r13899 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r13898‎ | r13899 | r13900 >
Date:16:38, 28 April 2006
Author:hashar
Status:old
Tags:
Comment:
some s/"/'/g , should speed up a bit that stuff.
Modified paths:
  • /trunk/wiki2xml/php/filter_named_entities.php (modified) (history)

Diff [purge]

Index: trunk/wiki2xml/php/filter_named_entities.php
@@ -1,266 +1,265 @@
22 <?php
3 -
43 /**
54 * This file is to compensate for a bug in PHP4 and early PHP5 versions
65 * which do not replace some entities correctly
7 -*/
 6+ */
87
98 $html_named_entities_mapping_mine = array (
10 - // Obtained with:
11 - // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table
12 -"nbsp"=>160,
13 -"iexcl"=>161,
14 -"cent"=>162,
15 -"pound"=>163,
16 -"curren"=>164,
17 -"yen"=>165,
18 -"brvbar"=>166,
19 -"sect"=>167,
20 -"uml"=>168,
21 -"copy"=>169,
22 -"ordf"=>170,
23 -"laquo"=>171,
24 -"not"=>172,
25 -"shy"=>173,
26 -"reg"=>174,
27 -"macr"=>175,
28 -"deg"=>176,
29 -"plusmn"=>177,
30 -"sup2"=>178,
31 -"sup3"=>179,
32 -"acute"=>180,
33 -"micro"=>181,
34 -"para"=>182,
35 -"middot"=>183,
36 -"cedil"=>184,
37 -"sup1"=>185,
38 -"ordm"=>186,
39 -"raquo"=>187,
40 -"frac14"=>188,
41 -"frac12"=>189,
42 -"frac34"=>190,
43 -"iquest"=>191,
44 -"Agrave"=>192,
45 -"Aacute"=>193,
46 -"Acirc"=>194,
47 -"Atilde"=>195,
48 -"Auml"=>196,
49 -"Aring"=>197,
50 -"AElig"=>198,
51 -"Ccedil"=>199,
52 -"Egrave"=>200,
53 -"Eacute"=>201,
54 -"Ecirc"=>202,
55 -"Euml"=>203,
56 -"Igrave"=>204,
57 -"Iacute"=>205,
58 -"Icirc"=>206,
59 -"Iuml"=>207,
60 -"ETH"=>208,
61 -"Ntilde"=>209,
62 -"Ograve"=>210,
63 -"Oacute"=>211,
64 -"Ocirc"=>212,
65 -"Otilde"=>213,
66 -"Ouml"=>214,
67 -"times"=>215,
68 -"Oslash"=>216,
69 -"Ugrave"=>217,
70 -"Uacute"=>218,
71 -"Ucirc"=>219,
72 -"Uuml"=>220,
73 -"Yacute"=>221,
74 -"THORN"=>222,
75 -"szlig"=>223,
76 -"agrave"=>224,
77 -"aacute"=>225,
78 -"acirc"=>226,
79 -"atilde"=>227,
80 -"auml"=>228,
81 -"aring"=>229,
82 -"aelig"=>230,
83 -"ccedil"=>231,
84 -"egrave"=>232,
85 -"eacute"=>233,
86 -"ecirc"=>234,
87 -"euml"=>235,
88 -"igrave"=>236,
89 -"iacute"=>237,
90 -"icirc"=>238,
91 -"iuml"=>239,
92 -"eth"=>240,
93 -"ntilde"=>241,
94 -"ograve"=>242,
95 -"oacute"=>243,
96 -"ocirc"=>244,
97 -"otilde"=>245,
98 -"ouml"=>246,
99 -"divide"=>247,
100 -"oslash"=>248,
101 -"ugrave"=>249,
102 -"uacute"=>250,
103 -"ucirc"=>251,
104 -"uuml"=>252,
105 -"yacute"=>253,
106 -"thorn"=>254,
107 -"yuml"=>255,
108 -"quot"=>34,
109 -"amp"=>38,
110 -"lt"=>60,
111 -"gt"=>62,
112 -"OElig"=>338,
113 -"oelig"=>339,
114 -"Scaron"=>352,
115 -"scaron"=>353,
116 -"Yuml"=>376,
117 -"circ"=>710,
118 -"tilde"=>732,
119 -"ensp"=>8194,
120 -"emsp"=>8195,
121 -"thinsp"=>8201,
122 -"zwnj"=>8204,
123 -"zwj"=>8205,
124 -"lrm"=>8206,
125 -"rlm"=>8207,
126 -"ndash"=>8211,
127 -"mdash"=>8212,
128 -"lsquo"=>8216,
129 -"rsquo"=>8217,
130 -"sbquo"=>8218,
131 -"ldquo"=>8220,
132 -"rdquo"=>8221,
133 -"bdquo"=>8222,
134 -"dagger"=>8224,
135 -"Dagger"=>8225,
136 -"permil"=>8240,
137 -"lsaquo"=>8249,
138 -"rsaquo"=>8250,
139 -"euro"=>8364,
140 -"fnof"=>402,
141 -"Alpha"=>913,
142 -"Beta"=>914,
143 -"Gamma"=>915,
144 -"Delta"=>916,
145 -"Epsilon"=>917,
146 -"Zeta"=>918,
147 -"Eta"=>919,
148 -"Theta"=>920,
149 -"Iota"=>921,
150 -"Kappa"=>922,
151 -"Lambda"=>923,
152 -"Mu"=>924,
153 -"Nu"=>925,
154 -"Xi"=>926,
155 -"Omicron"=>927,
156 -"Pi"=>928,
157 -"Rho"=>929,
158 -"Sigma"=>931,
159 -"Tau"=>932,
160 -"Upsilon"=>933,
161 -"Phi"=>934,
162 -"Chi"=>935,
163 -"Psi"=>936,
164 -"Omega"=>937,
165 -"alpha"=>945,
166 -"beta"=>946,
167 -"gamma"=>947,
168 -"delta"=>948,
169 -"epsilon"=>949,
170 -"zeta"=>950,
171 -"eta"=>951,
172 -"theta"=>952,
173 -"iota"=>953,
174 -"kappa"=>954,
175 -"lambda"=>955,
176 -"mu"=>956,
177 -"nu"=>957,
178 -"xi"=>958,
179 -"omicron"=>959,
180 -"pi"=>960,
181 -"rho"=>961,
182 -"sigmaf"=>962,
183 -"sigma"=>963,
184 -"tau"=>964,
185 -"upsilon"=>965,
186 -"phi"=>966,
187 -"chi"=>967,
188 -"psi"=>968,
189 -"omega"=>969,
190 -"thetasym"=>977,
191 -"upsih"=>978,
192 -"piv"=>982,
193 -"bull"=>8226,
194 -"hellip"=>8230,
195 -"prime"=>8242,
196 -"Prime"=>8243,
197 -"oline"=>8254,
198 -"frasl"=>8260,
199 -"weierp"=>8472,
200 -"image"=>8465,
201 -"real"=>8476,
202 -"trade"=>8482,
203 -"alefsym"=>8501,
204 -"larr"=>8592,
205 -"uarr"=>8593,
206 -"rarr"=>8594,
207 -"darr"=>8595,
208 -"harr"=>8596,
209 -"crarr"=>8629,
210 -"lArr"=>8656,
211 -"uArr"=>8657,
212 -"rArr"=>8658,
213 -"dArr"=>8659,
214 -"hArr"=>8660,
215 -"forall"=>8704,
216 -"part"=>8706,
217 -"exist"=>8707,
218 -"empty"=>8709,
219 -"nabla"=>8711,
220 -"isin"=>8712,
221 -"notin"=>8713,
222 -"ni"=>8715,
223 -"prod"=>8719,
224 -"sum"=>8721,
225 -"minus"=>8722,
226 -"lowast"=>8727,
227 -"radic"=>8730,
228 -"prop"=>8733,
229 -"infin"=>8734,
230 -"ang"=>8736,
231 -"and"=>8743,
232 -"or"=>8744,
233 -"cap"=>8745,
234 -"cup"=>8746,
235 -"int"=>8747,
236 -"there4"=>8756,
237 -"sim"=>8764,
238 -"cong"=>8773,
239 -"asymp"=>8776,
240 -"ne"=>8800,
241 -"equiv"=>8801,
242 -"le"=>8804,
243 -"ge"=>8805,
244 -"sub"=>8834,
245 -"sup"=>8835,
246 -"nsub"=>8836,
247 -"sube"=>8838,
248 -"supe"=>8839,
249 -"oplus"=>8853,
250 -"otimes"=>8855,
251 -"perp"=>8869,
252 -"sdot"=>8901,
253 -"lceil"=>8968,
254 -"rceil"=>8969,
255 -"lfloor"=>8970,
256 -"rfloor"=>8971,
257 -"lang"=>9001,
258 -"rang"=>9002,
259 -"loz"=>9674,
260 -"spades"=>9824,
261 -"clubs"=>9827,
262 -"hearts"=>9829,
263 -"diams"=>9830,
264 -"32"=>32,
 9+ // Obtained with:
 10+ // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table
 11+'nbsp'=>160,
 12+'iexcl'=>161,
 13+'cent'=>162,
 14+'pound'=>163,
 15+'curren'=>164,
 16+'yen'=>165,
 17+'brvbar'=>166,
 18+'sect'=>167,
 19+'uml'=>168,
 20+'copy'=>169,
 21+'ordf'=>170,
 22+'laquo'=>171,
 23+'not'=>172,
 24+'shy'=>173,
 25+'reg'=>174,
 26+'macr'=>175,
 27+'deg'=>176,
 28+'plusmn'=>177,
 29+'sup2'=>178,
 30+'sup3'=>179,
 31+'acute'=>180,
 32+'micro'=>181,
 33+'para'=>182,
 34+'middot'=>183,
 35+'cedil'=>184,
 36+'sup1'=>185,
 37+'ordm'=>186,
 38+'raquo'=>187,
 39+'frac14'=>188,
 40+'frac12'=>189,
 41+'frac34'=>190,
 42+'iquest'=>191,
 43+'Agrave'=>192,
 44+'Aacute'=>193,
 45+'Acirc'=>194,
 46+'Atilde'=>195,
 47+'Auml'=>196,
 48+'Aring'=>197,
 49+'AElig'=>198,
 50+'Ccedil'=>199,
 51+'Egrave'=>200,
 52+'Eacute'=>201,
 53+'Ecirc'=>202,
 54+'Euml'=>203,
 55+'Igrave'=>204,
 56+'Iacute'=>205,
 57+'Icirc'=>206,
 58+'Iuml'=>207,
 59+'ETH'=>208,
 60+'Ntilde'=>209,
 61+'Ograve'=>210,
 62+'Oacute'=>211,
 63+'Ocirc'=>212,
 64+'Otilde'=>213,
 65+'Ouml'=>214,
 66+'times'=>215,
 67+'Oslash'=>216,
 68+'Ugrave'=>217,
 69+'Uacute'=>218,
 70+'Ucirc'=>219,
 71+'Uuml'=>220,
 72+'Yacute'=>221,
 73+'THORN'=>222,
 74+'szlig'=>223,
 75+'agrave'=>224,
 76+'aacute'=>225,
 77+'acirc'=>226,
 78+'atilde'=>227,
 79+'auml'=>228,
 80+'aring'=>229,
 81+'aelig'=>230,
 82+'ccedil'=>231,
 83+'egrave'=>232,
 84+'eacute'=>233,
 85+'ecirc'=>234,
 86+'euml'=>235,
 87+'igrave'=>236,
 88+'iacute'=>237,
 89+'icirc'=>238,
 90+'iuml'=>239,
 91+'eth'=>240,
 92+'ntilde'=>241,
 93+'ograve'=>242,
 94+'oacute'=>243,
 95+'ocirc'=>244,
 96+'otilde'=>245,
 97+'ouml'=>246,
 98+'divide'=>247,
 99+'oslash'=>248,
 100+'ugrave'=>249,
 101+'uacute'=>250,
 102+'ucirc'=>251,
 103+'uuml'=>252,
 104+'yacute'=>253,
 105+'thorn'=>254,
 106+'yuml'=>255,
 107+'quot'=>34,
 108+'amp'=>38,
 109+'lt'=>60,
 110+'gt'=>62,
 111+'OElig'=>338,
 112+'oelig'=>339,
 113+'Scaron'=>352,
 114+'scaron'=>353,
 115+'Yuml'=>376,
 116+'circ'=>710,
 117+'tilde'=>732,
 118+'ensp'=>8194,
 119+'emsp'=>8195,
 120+'thinsp'=>8201,
 121+'zwnj'=>8204,
 122+'zwj'=>8205,
 123+'lrm'=>8206,
 124+'rlm'=>8207,
 125+'ndash'=>8211,
 126+'mdash'=>8212,
 127+'lsquo'=>8216,
 128+'rsquo'=>8217,
 129+'sbquo'=>8218,
 130+'ldquo'=>8220,
 131+'rdquo'=>8221,
 132+'bdquo'=>8222,
 133+'dagger'=>8224,
 134+'Dagger'=>8225,
 135+'permil'=>8240,
 136+'lsaquo'=>8249,
 137+'rsaquo'=>8250,
 138+'euro'=>8364,
 139+'fnof'=>402,
 140+'Alpha'=>913,
 141+'Beta'=>914,
 142+'Gamma'=>915,
 143+'Delta'=>916,
 144+'Epsilon'=>917,
 145+'Zeta'=>918,
 146+'Eta'=>919,
 147+'Theta'=>920,
 148+'Iota'=>921,
 149+'Kappa'=>922,
 150+'Lambda'=>923,
 151+'Mu'=>924,
 152+'Nu'=>925,
 153+'Xi'=>926,
 154+'Omicron'=>927,
 155+'Pi'=>928,
 156+'Rho'=>929,
 157+'Sigma'=>931,
 158+'Tau'=>932,
 159+'Upsilon'=>933,
 160+'Phi'=>934,
 161+'Chi'=>935,
 162+'Psi'=>936,
 163+'Omega'=>937,
 164+'alpha'=>945,
 165+'beta'=>946,
 166+'gamma'=>947,
 167+'delta'=>948,
 168+'epsilon'=>949,
 169+'zeta'=>950,
 170+'eta'=>951,
 171+'theta'=>952,
 172+'iota'=>953,
 173+'kappa'=>954,
 174+'lambda'=>955,
 175+'mu'=>956,
 176+'nu'=>957,
 177+'xi'=>958,
 178+'omicron'=>959,
 179+'pi'=>960,
 180+'rho'=>961,
 181+'sigmaf'=>962,
 182+'sigma'=>963,
 183+'tau'=>964,
 184+'upsilon'=>965,
 185+'phi'=>966,
 186+'chi'=>967,
 187+'psi'=>968,
 188+'omega'=>969,
 189+'thetasym'=>977,
 190+'upsih'=>978,
 191+'piv'=>982,
 192+'bull'=>8226,
 193+'hellip'=>8230,
 194+'prime'=>8242,
 195+'Prime'=>8243,
 196+'oline'=>8254,
 197+'frasl'=>8260,
 198+'weierp'=>8472,
 199+'image'=>8465,
 200+'real'=>8476,
 201+'trade'=>8482,
 202+'alefsym'=>8501,
 203+'larr'=>8592,
 204+'uarr'=>8593,
 205+'rarr'=>8594,
 206+'darr'=>8595,
 207+'harr'=>8596,
 208+'crarr'=>8629,
 209+'lArr'=>8656,
 210+'uArr'=>8657,
 211+'rArr'=>8658,
 212+'dArr'=>8659,
 213+'hArr'=>8660,
 214+'forall'=>8704,
 215+'part'=>8706,
 216+'exist'=>8707,
 217+'empty'=>8709,
 218+'nabla'=>8711,
 219+'isin'=>8712,
 220+'notin'=>8713,
 221+'ni'=>8715,
 222+'prod'=>8719,
 223+'sum'=>8721,
 224+'minus'=>8722,
 225+'lowast'=>8727,
 226+'radic'=>8730,
 227+'prop'=>8733,
 228+'infin'=>8734,
 229+'ang'=>8736,
 230+'and'=>8743,
 231+'or'=>8744,
 232+'cap'=>8745,
 233+'cup'=>8746,
 234+'int'=>8747,
 235+'there4'=>8756,
 236+'sim'=>8764,
 237+'cong'=>8773,
 238+'asymp'=>8776,
 239+'ne'=>8800,
 240+'equiv'=>8801,
 241+'le'=>8804,
 242+'ge'=>8805,
 243+'sub'=>8834,
 244+'sup'=>8835,
 245+'nsub'=>8836,
 246+'sube'=>8838,
 247+'supe'=>8839,
 248+'oplus'=>8853,
 249+'otimes'=>8855,
 250+'perp'=>8869,
 251+'sdot'=>8901,
 252+'lceil'=>8968,
 253+'rceil'=>8969,
 254+'lfloor'=>8970,
 255+'rfloor'=>8971,
 256+'lang'=>9001,
 257+'rang'=>9002,
 258+'loz'=>9674,
 259+'spades'=>9824,
 260+'clubs'=>9827,
 261+'hearts'=>9829,
 262+'diams'=>9830,
 263+'32'=>32,
265264 );
266265
267266 function utf8_chr($code)

Status & tagging log