Index: trunk/wiki2xml/php/filter_named_entities.php |
— | — | @@ -1,266 +1,265 @@ |
2 | 2 | <?php |
3 | | - |
4 | 3 | /** |
5 | 4 | * This file is to compensate for a bug in PHP4 and early PHP5 versions |
6 | 5 | * which do not replace some entities correctly |
7 | | -*/ |
| 6 | + */ |
8 | 7 | |
9 | 8 | $html_named_entities_mapping_mine = array ( |
10 | | - // Obtained with: |
11 | | - // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table |
12 | | -"nbsp"=>160, |
13 | | -"iexcl"=>161, |
14 | | -"cent"=>162, |
15 | | -"pound"=>163, |
16 | | -"curren"=>164, |
17 | | -"yen"=>165, |
18 | | -"brvbar"=>166, |
19 | | -"sect"=>167, |
20 | | -"uml"=>168, |
21 | | -"copy"=>169, |
22 | | -"ordf"=>170, |
23 | | -"laquo"=>171, |
24 | | -"not"=>172, |
25 | | -"shy"=>173, |
26 | | -"reg"=>174, |
27 | | -"macr"=>175, |
28 | | -"deg"=>176, |
29 | | -"plusmn"=>177, |
30 | | -"sup2"=>178, |
31 | | -"sup3"=>179, |
32 | | -"acute"=>180, |
33 | | -"micro"=>181, |
34 | | -"para"=>182, |
35 | | -"middot"=>183, |
36 | | -"cedil"=>184, |
37 | | -"sup1"=>185, |
38 | | -"ordm"=>186, |
39 | | -"raquo"=>187, |
40 | | -"frac14"=>188, |
41 | | -"frac12"=>189, |
42 | | -"frac34"=>190, |
43 | | -"iquest"=>191, |
44 | | -"Agrave"=>192, |
45 | | -"Aacute"=>193, |
46 | | -"Acirc"=>194, |
47 | | -"Atilde"=>195, |
48 | | -"Auml"=>196, |
49 | | -"Aring"=>197, |
50 | | -"AElig"=>198, |
51 | | -"Ccedil"=>199, |
52 | | -"Egrave"=>200, |
53 | | -"Eacute"=>201, |
54 | | -"Ecirc"=>202, |
55 | | -"Euml"=>203, |
56 | | -"Igrave"=>204, |
57 | | -"Iacute"=>205, |
58 | | -"Icirc"=>206, |
59 | | -"Iuml"=>207, |
60 | | -"ETH"=>208, |
61 | | -"Ntilde"=>209, |
62 | | -"Ograve"=>210, |
63 | | -"Oacute"=>211, |
64 | | -"Ocirc"=>212, |
65 | | -"Otilde"=>213, |
66 | | -"Ouml"=>214, |
67 | | -"times"=>215, |
68 | | -"Oslash"=>216, |
69 | | -"Ugrave"=>217, |
70 | | -"Uacute"=>218, |
71 | | -"Ucirc"=>219, |
72 | | -"Uuml"=>220, |
73 | | -"Yacute"=>221, |
74 | | -"THORN"=>222, |
75 | | -"szlig"=>223, |
76 | | -"agrave"=>224, |
77 | | -"aacute"=>225, |
78 | | -"acirc"=>226, |
79 | | -"atilde"=>227, |
80 | | -"auml"=>228, |
81 | | -"aring"=>229, |
82 | | -"aelig"=>230, |
83 | | -"ccedil"=>231, |
84 | | -"egrave"=>232, |
85 | | -"eacute"=>233, |
86 | | -"ecirc"=>234, |
87 | | -"euml"=>235, |
88 | | -"igrave"=>236, |
89 | | -"iacute"=>237, |
90 | | -"icirc"=>238, |
91 | | -"iuml"=>239, |
92 | | -"eth"=>240, |
93 | | -"ntilde"=>241, |
94 | | -"ograve"=>242, |
95 | | -"oacute"=>243, |
96 | | -"ocirc"=>244, |
97 | | -"otilde"=>245, |
98 | | -"ouml"=>246, |
99 | | -"divide"=>247, |
100 | | -"oslash"=>248, |
101 | | -"ugrave"=>249, |
102 | | -"uacute"=>250, |
103 | | -"ucirc"=>251, |
104 | | -"uuml"=>252, |
105 | | -"yacute"=>253, |
106 | | -"thorn"=>254, |
107 | | -"yuml"=>255, |
108 | | -"quot"=>34, |
109 | | -"amp"=>38, |
110 | | -"lt"=>60, |
111 | | -"gt"=>62, |
112 | | -"OElig"=>338, |
113 | | -"oelig"=>339, |
114 | | -"Scaron"=>352, |
115 | | -"scaron"=>353, |
116 | | -"Yuml"=>376, |
117 | | -"circ"=>710, |
118 | | -"tilde"=>732, |
119 | | -"ensp"=>8194, |
120 | | -"emsp"=>8195, |
121 | | -"thinsp"=>8201, |
122 | | -"zwnj"=>8204, |
123 | | -"zwj"=>8205, |
124 | | -"lrm"=>8206, |
125 | | -"rlm"=>8207, |
126 | | -"ndash"=>8211, |
127 | | -"mdash"=>8212, |
128 | | -"lsquo"=>8216, |
129 | | -"rsquo"=>8217, |
130 | | -"sbquo"=>8218, |
131 | | -"ldquo"=>8220, |
132 | | -"rdquo"=>8221, |
133 | | -"bdquo"=>8222, |
134 | | -"dagger"=>8224, |
135 | | -"Dagger"=>8225, |
136 | | -"permil"=>8240, |
137 | | -"lsaquo"=>8249, |
138 | | -"rsaquo"=>8250, |
139 | | -"euro"=>8364, |
140 | | -"fnof"=>402, |
141 | | -"Alpha"=>913, |
142 | | -"Beta"=>914, |
143 | | -"Gamma"=>915, |
144 | | -"Delta"=>916, |
145 | | -"Epsilon"=>917, |
146 | | -"Zeta"=>918, |
147 | | -"Eta"=>919, |
148 | | -"Theta"=>920, |
149 | | -"Iota"=>921, |
150 | | -"Kappa"=>922, |
151 | | -"Lambda"=>923, |
152 | | -"Mu"=>924, |
153 | | -"Nu"=>925, |
154 | | -"Xi"=>926, |
155 | | -"Omicron"=>927, |
156 | | -"Pi"=>928, |
157 | | -"Rho"=>929, |
158 | | -"Sigma"=>931, |
159 | | -"Tau"=>932, |
160 | | -"Upsilon"=>933, |
161 | | -"Phi"=>934, |
162 | | -"Chi"=>935, |
163 | | -"Psi"=>936, |
164 | | -"Omega"=>937, |
165 | | -"alpha"=>945, |
166 | | -"beta"=>946, |
167 | | -"gamma"=>947, |
168 | | -"delta"=>948, |
169 | | -"epsilon"=>949, |
170 | | -"zeta"=>950, |
171 | | -"eta"=>951, |
172 | | -"theta"=>952, |
173 | | -"iota"=>953, |
174 | | -"kappa"=>954, |
175 | | -"lambda"=>955, |
176 | | -"mu"=>956, |
177 | | -"nu"=>957, |
178 | | -"xi"=>958, |
179 | | -"omicron"=>959, |
180 | | -"pi"=>960, |
181 | | -"rho"=>961, |
182 | | -"sigmaf"=>962, |
183 | | -"sigma"=>963, |
184 | | -"tau"=>964, |
185 | | -"upsilon"=>965, |
186 | | -"phi"=>966, |
187 | | -"chi"=>967, |
188 | | -"psi"=>968, |
189 | | -"omega"=>969, |
190 | | -"thetasym"=>977, |
191 | | -"upsih"=>978, |
192 | | -"piv"=>982, |
193 | | -"bull"=>8226, |
194 | | -"hellip"=>8230, |
195 | | -"prime"=>8242, |
196 | | -"Prime"=>8243, |
197 | | -"oline"=>8254, |
198 | | -"frasl"=>8260, |
199 | | -"weierp"=>8472, |
200 | | -"image"=>8465, |
201 | | -"real"=>8476, |
202 | | -"trade"=>8482, |
203 | | -"alefsym"=>8501, |
204 | | -"larr"=>8592, |
205 | | -"uarr"=>8593, |
206 | | -"rarr"=>8594, |
207 | | -"darr"=>8595, |
208 | | -"harr"=>8596, |
209 | | -"crarr"=>8629, |
210 | | -"lArr"=>8656, |
211 | | -"uArr"=>8657, |
212 | | -"rArr"=>8658, |
213 | | -"dArr"=>8659, |
214 | | -"hArr"=>8660, |
215 | | -"forall"=>8704, |
216 | | -"part"=>8706, |
217 | | -"exist"=>8707, |
218 | | -"empty"=>8709, |
219 | | -"nabla"=>8711, |
220 | | -"isin"=>8712, |
221 | | -"notin"=>8713, |
222 | | -"ni"=>8715, |
223 | | -"prod"=>8719, |
224 | | -"sum"=>8721, |
225 | | -"minus"=>8722, |
226 | | -"lowast"=>8727, |
227 | | -"radic"=>8730, |
228 | | -"prop"=>8733, |
229 | | -"infin"=>8734, |
230 | | -"ang"=>8736, |
231 | | -"and"=>8743, |
232 | | -"or"=>8744, |
233 | | -"cap"=>8745, |
234 | | -"cup"=>8746, |
235 | | -"int"=>8747, |
236 | | -"there4"=>8756, |
237 | | -"sim"=>8764, |
238 | | -"cong"=>8773, |
239 | | -"asymp"=>8776, |
240 | | -"ne"=>8800, |
241 | | -"equiv"=>8801, |
242 | | -"le"=>8804, |
243 | | -"ge"=>8805, |
244 | | -"sub"=>8834, |
245 | | -"sup"=>8835, |
246 | | -"nsub"=>8836, |
247 | | -"sube"=>8838, |
248 | | -"supe"=>8839, |
249 | | -"oplus"=>8853, |
250 | | -"otimes"=>8855, |
251 | | -"perp"=>8869, |
252 | | -"sdot"=>8901, |
253 | | -"lceil"=>8968, |
254 | | -"rceil"=>8969, |
255 | | -"lfloor"=>8970, |
256 | | -"rfloor"=>8971, |
257 | | -"lang"=>9001, |
258 | | -"rang"=>9002, |
259 | | -"loz"=>9674, |
260 | | -"spades"=>9824, |
261 | | -"clubs"=>9827, |
262 | | -"hearts"=>9829, |
263 | | -"diams"=>9830, |
264 | | -"32"=>32, |
| 9 | + // Obtained with: |
| 10 | + // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table |
| 11 | +'nbsp'=>160, |
| 12 | +'iexcl'=>161, |
| 13 | +'cent'=>162, |
| 14 | +'pound'=>163, |
| 15 | +'curren'=>164, |
| 16 | +'yen'=>165, |
| 17 | +'brvbar'=>166, |
| 18 | +'sect'=>167, |
| 19 | +'uml'=>168, |
| 20 | +'copy'=>169, |
| 21 | +'ordf'=>170, |
| 22 | +'laquo'=>171, |
| 23 | +'not'=>172, |
| 24 | +'shy'=>173, |
| 25 | +'reg'=>174, |
| 26 | +'macr'=>175, |
| 27 | +'deg'=>176, |
| 28 | +'plusmn'=>177, |
| 29 | +'sup2'=>178, |
| 30 | +'sup3'=>179, |
| 31 | +'acute'=>180, |
| 32 | +'micro'=>181, |
| 33 | +'para'=>182, |
| 34 | +'middot'=>183, |
| 35 | +'cedil'=>184, |
| 36 | +'sup1'=>185, |
| 37 | +'ordm'=>186, |
| 38 | +'raquo'=>187, |
| 39 | +'frac14'=>188, |
| 40 | +'frac12'=>189, |
| 41 | +'frac34'=>190, |
| 42 | +'iquest'=>191, |
| 43 | +'Agrave'=>192, |
| 44 | +'Aacute'=>193, |
| 45 | +'Acirc'=>194, |
| 46 | +'Atilde'=>195, |
| 47 | +'Auml'=>196, |
| 48 | +'Aring'=>197, |
| 49 | +'AElig'=>198, |
| 50 | +'Ccedil'=>199, |
| 51 | +'Egrave'=>200, |
| 52 | +'Eacute'=>201, |
| 53 | +'Ecirc'=>202, |
| 54 | +'Euml'=>203, |
| 55 | +'Igrave'=>204, |
| 56 | +'Iacute'=>205, |
| 57 | +'Icirc'=>206, |
| 58 | +'Iuml'=>207, |
| 59 | +'ETH'=>208, |
| 60 | +'Ntilde'=>209, |
| 61 | +'Ograve'=>210, |
| 62 | +'Oacute'=>211, |
| 63 | +'Ocirc'=>212, |
| 64 | +'Otilde'=>213, |
| 65 | +'Ouml'=>214, |
| 66 | +'times'=>215, |
| 67 | +'Oslash'=>216, |
| 68 | +'Ugrave'=>217, |
| 69 | +'Uacute'=>218, |
| 70 | +'Ucirc'=>219, |
| 71 | +'Uuml'=>220, |
| 72 | +'Yacute'=>221, |
| 73 | +'THORN'=>222, |
| 74 | +'szlig'=>223, |
| 75 | +'agrave'=>224, |
| 76 | +'aacute'=>225, |
| 77 | +'acirc'=>226, |
| 78 | +'atilde'=>227, |
| 79 | +'auml'=>228, |
| 80 | +'aring'=>229, |
| 81 | +'aelig'=>230, |
| 82 | +'ccedil'=>231, |
| 83 | +'egrave'=>232, |
| 84 | +'eacute'=>233, |
| 85 | +'ecirc'=>234, |
| 86 | +'euml'=>235, |
| 87 | +'igrave'=>236, |
| 88 | +'iacute'=>237, |
| 89 | +'icirc'=>238, |
| 90 | +'iuml'=>239, |
| 91 | +'eth'=>240, |
| 92 | +'ntilde'=>241, |
| 93 | +'ograve'=>242, |
| 94 | +'oacute'=>243, |
| 95 | +'ocirc'=>244, |
| 96 | +'otilde'=>245, |
| 97 | +'ouml'=>246, |
| 98 | +'divide'=>247, |
| 99 | +'oslash'=>248, |
| 100 | +'ugrave'=>249, |
| 101 | +'uacute'=>250, |
| 102 | +'ucirc'=>251, |
| 103 | +'uuml'=>252, |
| 104 | +'yacute'=>253, |
| 105 | +'thorn'=>254, |
| 106 | +'yuml'=>255, |
| 107 | +'quot'=>34, |
| 108 | +'amp'=>38, |
| 109 | +'lt'=>60, |
| 110 | +'gt'=>62, |
| 111 | +'OElig'=>338, |
| 112 | +'oelig'=>339, |
| 113 | +'Scaron'=>352, |
| 114 | +'scaron'=>353, |
| 115 | +'Yuml'=>376, |
| 116 | +'circ'=>710, |
| 117 | +'tilde'=>732, |
| 118 | +'ensp'=>8194, |
| 119 | +'emsp'=>8195, |
| 120 | +'thinsp'=>8201, |
| 121 | +'zwnj'=>8204, |
| 122 | +'zwj'=>8205, |
| 123 | +'lrm'=>8206, |
| 124 | +'rlm'=>8207, |
| 125 | +'ndash'=>8211, |
| 126 | +'mdash'=>8212, |
| 127 | +'lsquo'=>8216, |
| 128 | +'rsquo'=>8217, |
| 129 | +'sbquo'=>8218, |
| 130 | +'ldquo'=>8220, |
| 131 | +'rdquo'=>8221, |
| 132 | +'bdquo'=>8222, |
| 133 | +'dagger'=>8224, |
| 134 | +'Dagger'=>8225, |
| 135 | +'permil'=>8240, |
| 136 | +'lsaquo'=>8249, |
| 137 | +'rsaquo'=>8250, |
| 138 | +'euro'=>8364, |
| 139 | +'fnof'=>402, |
| 140 | +'Alpha'=>913, |
| 141 | +'Beta'=>914, |
| 142 | +'Gamma'=>915, |
| 143 | +'Delta'=>916, |
| 144 | +'Epsilon'=>917, |
| 145 | +'Zeta'=>918, |
| 146 | +'Eta'=>919, |
| 147 | +'Theta'=>920, |
| 148 | +'Iota'=>921, |
| 149 | +'Kappa'=>922, |
| 150 | +'Lambda'=>923, |
| 151 | +'Mu'=>924, |
| 152 | +'Nu'=>925, |
| 153 | +'Xi'=>926, |
| 154 | +'Omicron'=>927, |
| 155 | +'Pi'=>928, |
| 156 | +'Rho'=>929, |
| 157 | +'Sigma'=>931, |
| 158 | +'Tau'=>932, |
| 159 | +'Upsilon'=>933, |
| 160 | +'Phi'=>934, |
| 161 | +'Chi'=>935, |
| 162 | +'Psi'=>936, |
| 163 | +'Omega'=>937, |
| 164 | +'alpha'=>945, |
| 165 | +'beta'=>946, |
| 166 | +'gamma'=>947, |
| 167 | +'delta'=>948, |
| 168 | +'epsilon'=>949, |
| 169 | +'zeta'=>950, |
| 170 | +'eta'=>951, |
| 171 | +'theta'=>952, |
| 172 | +'iota'=>953, |
| 173 | +'kappa'=>954, |
| 174 | +'lambda'=>955, |
| 175 | +'mu'=>956, |
| 176 | +'nu'=>957, |
| 177 | +'xi'=>958, |
| 178 | +'omicron'=>959, |
| 179 | +'pi'=>960, |
| 180 | +'rho'=>961, |
| 181 | +'sigmaf'=>962, |
| 182 | +'sigma'=>963, |
| 183 | +'tau'=>964, |
| 184 | +'upsilon'=>965, |
| 185 | +'phi'=>966, |
| 186 | +'chi'=>967, |
| 187 | +'psi'=>968, |
| 188 | +'omega'=>969, |
| 189 | +'thetasym'=>977, |
| 190 | +'upsih'=>978, |
| 191 | +'piv'=>982, |
| 192 | +'bull'=>8226, |
| 193 | +'hellip'=>8230, |
| 194 | +'prime'=>8242, |
| 195 | +'Prime'=>8243, |
| 196 | +'oline'=>8254, |
| 197 | +'frasl'=>8260, |
| 198 | +'weierp'=>8472, |
| 199 | +'image'=>8465, |
| 200 | +'real'=>8476, |
| 201 | +'trade'=>8482, |
| 202 | +'alefsym'=>8501, |
| 203 | +'larr'=>8592, |
| 204 | +'uarr'=>8593, |
| 205 | +'rarr'=>8594, |
| 206 | +'darr'=>8595, |
| 207 | +'harr'=>8596, |
| 208 | +'crarr'=>8629, |
| 209 | +'lArr'=>8656, |
| 210 | +'uArr'=>8657, |
| 211 | +'rArr'=>8658, |
| 212 | +'dArr'=>8659, |
| 213 | +'hArr'=>8660, |
| 214 | +'forall'=>8704, |
| 215 | +'part'=>8706, |
| 216 | +'exist'=>8707, |
| 217 | +'empty'=>8709, |
| 218 | +'nabla'=>8711, |
| 219 | +'isin'=>8712, |
| 220 | +'notin'=>8713, |
| 221 | +'ni'=>8715, |
| 222 | +'prod'=>8719, |
| 223 | +'sum'=>8721, |
| 224 | +'minus'=>8722, |
| 225 | +'lowast'=>8727, |
| 226 | +'radic'=>8730, |
| 227 | +'prop'=>8733, |
| 228 | +'infin'=>8734, |
| 229 | +'ang'=>8736, |
| 230 | +'and'=>8743, |
| 231 | +'or'=>8744, |
| 232 | +'cap'=>8745, |
| 233 | +'cup'=>8746, |
| 234 | +'int'=>8747, |
| 235 | +'there4'=>8756, |
| 236 | +'sim'=>8764, |
| 237 | +'cong'=>8773, |
| 238 | +'asymp'=>8776, |
| 239 | +'ne'=>8800, |
| 240 | +'equiv'=>8801, |
| 241 | +'le'=>8804, |
| 242 | +'ge'=>8805, |
| 243 | +'sub'=>8834, |
| 244 | +'sup'=>8835, |
| 245 | +'nsub'=>8836, |
| 246 | +'sube'=>8838, |
| 247 | +'supe'=>8839, |
| 248 | +'oplus'=>8853, |
| 249 | +'otimes'=>8855, |
| 250 | +'perp'=>8869, |
| 251 | +'sdot'=>8901, |
| 252 | +'lceil'=>8968, |
| 253 | +'rceil'=>8969, |
| 254 | +'lfloor'=>8970, |
| 255 | +'rfloor'=>8971, |
| 256 | +'lang'=>9001, |
| 257 | +'rang'=>9002, |
| 258 | +'loz'=>9674, |
| 259 | +'spades'=>9824, |
| 260 | +'clubs'=>9827, |
| 261 | +'hearts'=>9829, |
| 262 | +'diams'=>9830, |
| 263 | +'32'=>32, |
265 | 264 | ); |
266 | 265 | |
267 | 266 | function utf8_chr($code) |