Index: trunk/wiki2xml/php/local.php |
— | — | @@ -0,0 +1,7 @@ |
| 2 | +<? |
| 3 | + |
| 4 | +$xmlg["namespace_template"] = "Template" ; |
| 5 | +$xmlg["site_base_url"] = "en.wikipedia.org/w" ; # Dummy default |
| 6 | +#$xmlg["site_base_url"] = "127.0.0.1/phase3" ; |
| 7 | + |
| 8 | +?> |
Property changes on: trunk/wiki2xml/php/local.php |
___________________________________________________________________ |
Added: svn:keywords |
1 | 9 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 10 | + native |
Index: trunk/wiki2xml/php/w2x.php |
— | — | @@ -0,0 +1,109 @@ |
| 2 | +<? |
| 3 | +# Copyright by Magnus Manske (2005) |
| 4 | +# Released under GPL |
| 5 | + |
| 6 | +include ( "wiki2xml.php" ) ; |
| 7 | +include ( "local.php" ) ; |
| 8 | + |
| 9 | +class ContentProvider { |
| 10 | + var $article_cache = array () ; |
| 11 | + var $first_title = "" ; |
| 12 | + |
| 13 | + function get_wiki_text ( $title , $do_cache = false ) { |
| 14 | + global $xmlg ; |
| 15 | + $title = trim ( $title ) ; |
| 16 | + if ( $title == "" ) return "" ; # Just in case... |
| 17 | + if ( isset ( $this->article_cache[$title] ) ) # Already in the cache |
| 18 | + return $this->article_cache[$title] ; |
| 19 | + |
| 20 | + if ( $this->first_title == "" ) $this->first_title = $title ; |
| 21 | + |
| 22 | + # Retrieve it |
| 23 | + $url = "http://" . $xmlg["site_base_url"] . "/index.php?action=raw&title=" . urlencode ( $title ) ; |
| 24 | + $s = file_get_contents ( $url ) ; |
| 25 | + if ( $do_cache ) $this->article_cache[$title] = $s ; |
| 26 | + return $s ; |
| 27 | + } |
| 28 | + |
| 29 | + function get_template_text ( $title ) { |
| 30 | + global $xmlg ; |
| 31 | + |
| 32 | + # Check for fix variables |
| 33 | + if ( $title == "PAGENAME" ) return $this->first_title ; |
| 34 | + if ( $title == "PAGENAMEE" ) return urlencode ( $this->first_title ) ; |
| 35 | + |
| 36 | + $title = trim ( $title ) ; |
| 37 | + if ( count ( explode ( ":" , $title , 2 ) ) == 1 ) # Does the template title contain a ":"? |
| 38 | + $title = $xmlg["namespace_template"] . ":" . $title ; |
| 39 | + else if ( substr ( $title , 0 , 1 ) == ":" ) # Main namespace |
| 40 | + $title = substr ( $title , 1 ) ; |
| 41 | + return $this->get_wiki_text ( $title , true ) ; # Cache template texts |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +## TIMER FUNCTION |
| 46 | + |
| 47 | +function microtime_float() |
| 48 | +{ |
| 49 | + list($usec, $sec) = explode(" ", microtime()); |
| 50 | + return ((float)$usec + (float)$sec); |
| 51 | +} |
| 52 | + |
| 53 | +## MAIN PROGRAM |
| 54 | + |
| 55 | +if ( isset ( $_POST['doit'] ) ) { |
| 56 | + $wikitext = stripslashes ( $_POST['text'] ) ; |
| 57 | + |
| 58 | + $content_provider = new ContentProvider ; |
| 59 | + $xmlg["site_base_url"] = $_POST['site'] ; |
| 60 | + $xmlg["namespace_template"] = $_POST['template'] ; |
| 61 | + |
| 62 | + header('Content-type: text/xml; charset=utf-8'); |
| 63 | + print "<?xml version='1.0' encoding='UTF-8' ?>\n" ; |
| 64 | + |
| 65 | + $t = microtime_float() ; |
| 66 | + $text = "" ; |
| 67 | + $article_open = '<article>' ; |
| 68 | + if ( $_POST['whatsthis'] == "wikitext" ) { |
| 69 | + $p = new wiki2xml ; |
| 70 | + $text = $article_open . $p->parse ( $wikitext ) . "</article>" ; |
| 71 | + } else { |
| 72 | + $t = microtime_float() ; |
| 73 | + $articles = explode ( "\n" , $wikitext ) ; |
| 74 | + foreach ( $articles AS $a ) { |
| 75 | + $p = new wiki2xml ; |
| 76 | + $wikitext = $content_provider->get_wiki_text ( $a ) ; |
| 77 | + $text .= $article_open . $p->parse ( $wikitext ) . "</article>" ; |
| 78 | + } |
| 79 | + } |
| 80 | + $t = microtime_float() - $t ; |
| 81 | +# xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" |
| 82 | + print "<articles xmlns:xhtml=\" \" rendertime='{$t} sec'>{$text}</articles>" ; |
| 83 | +} else if ( isset ( $_GET['showsource'] ) ) { |
| 84 | + header('Content-type: text/plain; charset=utf-8'); |
| 85 | + print file_get_contents ( "wiki2xml.php" ) ; |
| 86 | +} else { |
| 87 | + header('Content-type: text/html; charset=utf-8'); |
| 88 | + print " |
| 89 | +<html><head></head><body><form method='post'> |
| 90 | +<h1>Magnus' magic wiki-to-XML converter</h1> |
| 91 | +<p>All written in PHP - so portable, so incredibly slow... (see <a href=\"wiki2xml.php?showsource=true\">the source</a>)</p> |
| 92 | +<p> |
| 93 | +Known bugs: |
| 94 | +<ul> |
| 95 | +<li>The \";\" markup doesn't work yet (not implemented); \":\" does, though</li> |
| 96 | +</ul> |
| 97 | +</p> |
| 98 | +<h2>Paste wikitext here</h2> |
| 99 | +<textarea rows='20' cols='80' style='width:100%' name='text'></textarea><br/> |
| 100 | +This is |
| 101 | +<INPUT type='radio' name='whatsthis' value='wikitext'>raw wikitext |
| 102 | +<INPUT checked type='radio' name='whatsthis' value='articlelist'>a list of articles |
| 103 | +<br/> |
| 104 | +Site : http://<input type='text' name='site' value='".$xmlg["site_base_url"]."'/>/index.php<br/> |
| 105 | +Template namespace name : <input type='text' name='template' value='".$xmlg["namespace_template"]."'/><br/> |
| 106 | +<input type='submit' name='doit' value='Convert'/> |
| 107 | +</form></body></html>" ; |
| 108 | +} |
| 109 | + |
| 110 | +?> |
\ No newline at end of file |
Property changes on: trunk/wiki2xml/php/w2x.php |
___________________________________________________________________ |
Added: svn:keywords |
1 | 111 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 112 | + native |
Index: trunk/wiki2xml/php/wiki2xml.php |
— | — | @@ -0,0 +1,1089 @@ |
| 2 | +<? |
| 3 | +# Copyright by Magnus Manske (2005) |
| 4 | +# Released under GPL |
| 5 | + |
| 6 | +# TODO : |
| 7 | +# The ";" thingy |
| 8 | + |
| 9 | +class wiki2xml |
| 10 | + { |
| 11 | + var $protocols = array ( "http" , "https" , "news" , "ftp" , "irc" , "mailto" ) ; |
| 12 | + var $errormessage = "ERROR!" ; |
| 13 | + var $compensate_markup_errors = false; |
| 14 | + var $auto_fill_templates = true ; # Will try and replace templates right inline, instead of using <template> tags; requires global $content_provider |
| 15 | + var $use_space_tag = true ; # Use <space/> instead of spaces before and after tags |
| 16 | + var $allowed = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 #:;,%="\'\\' ; |
| 17 | + var $directhtmltags = array ( |
| 18 | + "b" => "xhtml:b", |
| 19 | + "i" => "xhtml:i", |
| 20 | + "u" => "xhtml:u", |
| 21 | + "s" => "xhtml:s", |
| 22 | + "p" => "xhtml:p", |
| 23 | + "br" => "xhtml:br", |
| 24 | + "div" => "xhtml:div", |
| 25 | + "span" => "xhtml:span", |
| 26 | + ) ; |
| 27 | + |
| 28 | + var $w ; # The wiki text |
| 29 | + var $wl ; # The wiki text length |
| 30 | + var $bold_italics ; |
| 31 | + var $tables ; |
| 32 | + |
| 33 | + # Some often used functions |
| 34 | + function fitit ( &$a , &$xml , &$f , $atleastonce , $many ) |
| 35 | + { |
| 36 | + $f = "p_{$f}" ; |
| 37 | + $cnt = 0 ; |
| 38 | + do { |
| 39 | + $matched = $this->$f ( $a , $xml ) ; |
| 40 | + if ( $matched && $many ) $again = true ; |
| 41 | + else $again = false ; |
| 42 | + if ( $matched ) $cnt++ ; |
| 43 | + } while ( $again ) ; |
| 44 | + if ( !$atleastonce ) return true ; |
| 45 | + if ( $cnt > 0 ) return true ; |
| 46 | + return false ; |
| 47 | + } |
| 48 | + |
| 49 | + function once ( &$a , &$xml , $f ) |
| 50 | + { |
| 51 | + return $this->fitit ( $a , $xml , $f , true , false ) ; |
| 52 | + } |
| 53 | + |
| 54 | + function onceormore ( &$a , &$xml , $f ) |
| 55 | + { |
| 56 | + return $this->fitit ( $a , $xml , $f , true , true ) ; |
| 57 | + } |
| 58 | + |
| 59 | + function many ( &$a , &$xml , $f ) |
| 60 | + { |
| 61 | + return $this->fitit ( $a , $xml , $f , false , true ) ; |
| 62 | + } |
| 63 | + |
| 64 | + function nextis ( &$a , $t , $movecounter = true ) |
| 65 | + { |
| 66 | + if ( substr ( $this->w , $a , strlen ( $t ) ) != $t ) return false ; |
| 67 | + if ( $movecounter ) $a += strlen ( $t ) ; |
| 68 | + return true ; |
| 69 | + } |
| 70 | + |
| 71 | + function nextchar ( &$a , &$x ) |
| 72 | + { |
| 73 | + if ( $a >= $this->wl ) return false ; |
| 74 | + $x .= htmlspecialchars ( $this->w[$a] ) ; |
| 75 | + $a++ ; |
| 76 | + return true ; |
| 77 | + } |
| 78 | + |
| 79 | + function ischaracter ( $c ) |
| 80 | + { |
| 81 | + if ( $c >= 'A' && $c <= 'Z' ) return true ; |
| 82 | + if ( $c >= 'a' && $c <= 'z' ) return true ; |
| 83 | + return false ; |
| 84 | + } |
| 85 | + |
| 86 | + function skipblanks ( &$a , $blank = " " ) |
| 87 | + { |
| 88 | + while ( $a < $this->wl ) |
| 89 | + { |
| 90 | + if ( $this->w[$a] != $blank ) return ; |
| 91 | + $a++ ; |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + ############## |
| 96 | + |
| 97 | + |
| 98 | + function p_internal_link_target ( &$a , &$xml , $closeit = "]]" ) |
| 99 | + { |
| 100 | + return $this->p_internal_link_text ( $a , $xml , true , $closeit ) ; |
| 101 | + } |
| 102 | + |
| 103 | + function p_internal_link_text2 ( &$a , &$xml , $closeit ) |
| 104 | + { |
| 105 | + return $this->p_internal_link_text ( $a , $xml , false , $closeit , false ) ; |
| 106 | + } |
| 107 | + |
| 108 | + function p_internal_link_text ( &$a , &$xml , $istarget = false , $closeit = "]]" , $mark = true ) |
| 109 | + { |
| 110 | + $b = $a ; |
| 111 | + $x = "" ; |
| 112 | + if ( $b >= $this->wl ) return false ; |
| 113 | + while ( 1 ) |
| 114 | + { |
| 115 | + $c = $this->w[$b] ; |
| 116 | + if ( $closeit != "}}" && $c == "\n" ) return false ; |
| 117 | + if ( $c == "|" ) break ; |
| 118 | + if ( $this->nextis ( $b , $closeit , false ) ) break ; |
| 119 | + if ( !$istarget ) |
| 120 | + { |
| 121 | + if ( $c == "[" && $this->once ( $b , $x , "internal_link" ) ) continue ; |
| 122 | + if ( $c == "[" && $this->once ( $b , $x , "external_link" ) ) continue ; |
| 123 | + if ( $this->once ( $b , $x , "external_freelink" ) ) continue ; |
| 124 | + if ( $c == "{" && $this->once ( $b , $x , "template_variable" ) ) continue ; |
| 125 | + if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ; |
| 126 | + if ( $c == "<" && $this->once ( $b , $x , "html" ) ) continue ; |
| 127 | + if ( $c == "'" && $this->p_bold ( $b , $x , "internal_link_text2" , $closeit ) ) { break ; } |
| 128 | + if ( $c == "'" && $this->p_italics ( $b , $x , "internal_link_text2" , $closeit ) ) { break ; } |
| 129 | + } |
| 130 | + else |
| 131 | + { |
| 132 | + if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ; |
| 133 | + } |
| 134 | + #if ( !$this->nextchar ( $b , $x ) ) return false ; |
| 135 | + $x .= htmlspecialchars ( $c ) ; |
| 136 | + $b++ ; |
| 137 | + if ( $b >= $this->wl ) return false ; |
| 138 | + } |
| 139 | + |
| 140 | + $x = trim ( str_replace ( "\n" , "" , $x ) ) ; |
| 141 | + if ( $mark ) |
| 142 | + { |
| 143 | + if ( $istarget ) $xml .= "<target>{$x}</target>" ; |
| 144 | + else $xml .= "<part>{$x}</part>" ; |
| 145 | + |
| 146 | + } |
| 147 | + else $xml .= $x ; |
| 148 | + $a = $b ; |
| 149 | + return true ; |
| 150 | + } |
| 151 | + |
| 152 | + function p_internal_link_trail ( &$a , &$xml ) |
| 153 | + { |
| 154 | + $b = $a ; |
| 155 | + $x = "" ; |
| 156 | + while ( 1 ) |
| 157 | + { |
| 158 | + $c = "" ; |
| 159 | + if ( !$this->nextchar ( $b , $c ) ) break ; |
| 160 | + if ( $this->ischaracter ( $c ) ) |
| 161 | + { |
| 162 | + $x .= $c ; |
| 163 | + } |
| 164 | + else |
| 165 | + { |
| 166 | + $b-- ; |
| 167 | + break ; |
| 168 | + } |
| 169 | + } |
| 170 | + if ( $x == "" ) return false ; # No link trail |
| 171 | + $xml .= "<trail>{$x}</trail>" ; |
| 172 | + $a = $b ; |
| 173 | + return true ; |
| 174 | + } |
| 175 | + |
| 176 | + function p_internal_link ( &$a , &$xml ) |
| 177 | + { |
| 178 | + $x = "" ; |
| 179 | + $b = $a ; |
| 180 | + if ( !$this->nextis ( $b , "[[" ) ) return false ; |
| 181 | + if ( !$this->p_internal_link_target ( $b , $x , "]]" ) ) return false ; |
| 182 | + while ( 1 ) |
| 183 | + { |
| 184 | + if ( $this->nextis ( $b , "]]" ) ) break ; |
| 185 | + if ( !$this->nextis ( $b , "|" ) ) return false ; |
| 186 | + if ( !$this->p_internal_link_text ( $b , $x , false , "]]" ) ) return false ; |
| 187 | + } |
| 188 | + $this->p_internal_link_trail ( $b , $x ) ; |
| 189 | + $xml .= "<link>{$x}</link>" ; |
| 190 | + $a = $b ; |
| 191 | + return true ; |
| 192 | + } |
| 193 | + |
| 194 | + # Template and template variable, utilizing parts of the internal link methods |
| 195 | + function p_template ( &$a , &$xml ) |
| 196 | + { |
| 197 | + $x = "" ; |
| 198 | + $b = $a ; |
| 199 | + if ( !$this->nextis ( $b , "{{" ) ) return false ; |
| 200 | + if ( !$this->p_internal_link_target ( $b , $x , "}}" ) ) return false ; |
| 201 | + $target = $x ; |
| 202 | + $variables = array () ; |
| 203 | + $vcount = 1 ; |
| 204 | + while ( 1 ) |
| 205 | + { |
| 206 | + if ( $this->nextis ( $b , "}}" ) ) break ; |
| 207 | + if ( !$this->nextis ( $b , "|" ) ) return false ; |
| 208 | + $l1 = strlen ( $x ) ; |
| 209 | + if ( !$this->p_internal_link_text ( $b , $x , false , "}}" ) ) return false ; |
| 210 | + $v = substr ( $x , $l1 ) ; |
| 211 | + $v = str_replace ( "<part>" , "" , $v ) ; |
| 212 | + $v = str_replace ( "</part>" , "" , $v ) ; |
| 213 | + $v = explode ( "=" , $v ) ; |
| 214 | + if ( count ( $v ) < 2 ) $vk = $vcount ; |
| 215 | + else $vk = array_shift ( $v ) ; |
| 216 | + $vv = array_shift ( $v ) ; |
| 217 | + $variables[$vk] = $vv ; |
| 218 | + if ( !isset ( $variables[$vcount] ) ) $variables[$vcount] = $vv ; |
| 219 | + $vcount++ ; |
| 220 | + } |
| 221 | + |
| 222 | + if ( $this->auto_fill_templates ) { # Do not generate <template> sections, but rather replace the template call with the template text |
| 223 | + # Get template text |
| 224 | + global $content_provider ; |
| 225 | + $target = array_pop ( explode ( ">" , $target , 2 ) ) ; |
| 226 | + $target = array_shift ( explode ( "<" , $target , 2 ) ) ; |
| 227 | + $between = $content_provider->get_template_text ( $target ) ; |
| 228 | + |
| 229 | + # Replacing template variables. ATTENTION: Template variables within <nowiki> sections of templates will be replaced as well! |
| 230 | + foreach ( $variables AS $vk => $vv ) { |
| 231 | + $between = str_replace ( '{{{'.$vk.'}}}' , $vv , $between ) ; |
| 232 | + } |
| 233 | + |
| 234 | + # Change source (!) |
| 235 | + $w = $this->w ; |
| 236 | + $w1 = substr ( $w , 0 , $a ) ; |
| 237 | + $w2 = substr ( $w , $b ) ; |
| 238 | + $this->w = $w1 . $between . $w2 ; |
| 239 | + $this->wl = strlen ( $this->w ) ; |
| 240 | + } else { |
| 241 | + $xml .= "<template>{$x}</template>" ; |
| 242 | + $a = $b ; |
| 243 | + } |
| 244 | + return true ; |
| 245 | + } |
| 246 | + |
| 247 | + function p_template_variable ( &$a , &$xml ) |
| 248 | + { |
| 249 | + $x = "" ; |
| 250 | + $b = $a ; |
| 251 | + if ( !$this->nextis ( $b , "{{{" ) ) return false ; |
| 252 | + if ( !$this->p_internal_link_text ( $b , $x , false , "}}}" ) ) return false ; |
| 253 | + if ( !$this->nextis ( $b , "}}}" ) ) return false ; |
| 254 | + $xml .= "<templatevar>{$x}</templatevar>" ; |
| 255 | + $a = $b ; |
| 256 | + return true ; |
| 257 | + } |
| 258 | + |
| 259 | + # Bold / italics |
| 260 | + function p_bold ( &$a , &$xml , $recurse = "restofline" , $end = "" ) |
| 261 | + { |
| 262 | + return $this->p_intwined ( $a , $xml , "bold" , "'''" , $recurse , $end ) ; |
| 263 | + } |
| 264 | + |
| 265 | + function p_italics ( &$a , &$xml , $recurse = "restofline" , $end = "" ) |
| 266 | + { |
| 267 | + return $this->p_intwined ( $a , $xml , "italics" , "''" , $recurse , $end ) ; |
| 268 | + } |
| 269 | + |
| 270 | + function p_intwined ( &$a , &$xml , $tag , $markup , $recurse , $end ) |
| 271 | + { |
| 272 | + $b = $a ; |
| 273 | + if ( !$this->nextis ( $b , $markup ) ) return false ; |
| 274 | + $id = substr ( ucfirst ( $tag ) , 0 , 1 ) ; |
| 275 | + $bi = $this->bold_italics ; |
| 276 | + $open = false ; |
| 277 | + if ( substr ( $this->bold_italics , -1 ) == $id ) |
| 278 | + { |
| 279 | + $x = "</{$tag}>" ; |
| 280 | + $this->bold_italics = substr ( $this->bold_italics , 0 , -1 ) ; |
| 281 | + } |
| 282 | + else |
| 283 | + { |
| 284 | + $pos = strpos ( $this->bold_italics , $id ) ; |
| 285 | + if ( false !== $pos ) return false ; # Can't close a tag that ain't open |
| 286 | + $open = true ; |
| 287 | + $x = "<{$tag}>" ; |
| 288 | + $this->bold_italics .= $id ; |
| 289 | + } |
| 290 | + |
| 291 | + if ( $end == "" ) |
| 292 | + { |
| 293 | + $res = $this->once ( $b , $x , $recurse ) ; |
| 294 | + } |
| 295 | + else |
| 296 | + { |
| 297 | + $r = "p_{$recurse}" ; |
| 298 | + $res = $this->$r ( $b , $x , $end ) ; |
| 299 | + } |
| 300 | + |
| 301 | + $this->bold_italics = $bi ; |
| 302 | + if ( !$res ) |
| 303 | + { |
| 304 | + return false ; |
| 305 | + } |
| 306 | + $xml .= $x ; |
| 307 | + $a = $b ; |
| 308 | + return true ; |
| 309 | + } |
| 310 | + |
| 311 | + function scanplaintext ( &$a , &$xml , $goodstop , $badstop ) |
| 312 | + { |
| 313 | + $b = $a ; |
| 314 | + $x = "" ; |
| 315 | + while ( $b < $this->wl ) |
| 316 | + { |
| 317 | + foreach ( $goodstop AS $s ) |
| 318 | + if ( $this->nextis ( $b , $s , false ) ) break 2 ; |
| 319 | + foreach ( $badstop AS $s ) |
| 320 | + if ( $this->nextis ( $b , $s , false ) ) return false ; |
| 321 | + $c = $this->w[$b] ; |
| 322 | + $x .= htmlspecialchars ( $c ) ; |
| 323 | + $b++ ; |
| 324 | + } |
| 325 | + if ( count ( $goodstop ) > 0 && $b >= $this->wl ) return false ; # Reached end; not good |
| 326 | + $a = $b ; |
| 327 | + $xml .= $x ; |
| 328 | + return true ; |
| 329 | + } |
| 330 | + |
| 331 | + # External link |
| 332 | + function p_external_freelink ( &$a , &$xml , $mark = true ) |
| 333 | + { |
| 334 | + $protocol = "" ; |
| 335 | + $b = $a ; |
| 336 | + foreach ( $this->protocols AS $p ) |
| 337 | + { |
| 338 | + if ( $this->nextis ( $b , $p . "://" ) ) |
| 339 | + { |
| 340 | + $protocol = $p ; |
| 341 | + break ; |
| 342 | + } |
| 343 | + } |
| 344 | + if ( $protocol == "" ) return false ; |
| 345 | + $x = "{$protocol}://" ; |
| 346 | + while ( $b < $this->wl ) |
| 347 | + { |
| 348 | + if ( $this->w[$b] == "\n" || $this->w[$b] == " " ) break ; |
| 349 | + if ( !$mark && $this->w[$b] == "]" ) break ; |
| 350 | + $x .= htmlspecialchars ( $this->w[$b] ) ; |
| 351 | + $b++ ; |
| 352 | + } |
| 353 | + if ( substr ( $x , -1 ) == "." || substr ( $x , -1 ) == "," ) |
| 354 | + { |
| 355 | + $x = substr ( $x , 0 , -1 ) ; |
| 356 | + $b-- ; |
| 357 | + } |
| 358 | + $a = $b ; |
| 359 | + if ( $mark ) $xml .= "<link type='external' url='{$x}'/>" ; |
| 360 | + else $xml .= $x ; |
| 361 | + return true ; |
| 362 | + } |
| 363 | + |
| 364 | + function p_external_link ( &$a , &$xml , $mark = true ) |
| 365 | + { |
| 366 | + $b = $a ; |
| 367 | + if ( !$this->nextis ( $b , "[" ) ) return false ; |
| 368 | + $url = "" ; |
| 369 | + if ( !$this->p_external_freelink ( $b , $url , false ) ) return false ; |
| 370 | + $this->skipblanks ( $b ) ; |
| 371 | + if ( !$this->scanplaintext ( $b , $x , array ( "]" ) , array ( "\n" ) ) ) return false ; |
| 372 | + $a = $b + 1 ; |
| 373 | + $xml .= "<link type='external' href='{$url}'>{$x}</link>" ; |
| 374 | + return true ; |
| 375 | + } |
| 376 | + |
| 377 | + # Heading |
| 378 | + function p_heading ( &$a , &$xml ) |
| 379 | + { |
| 380 | + if ( !$this->nextis ( $a , "==" , false ) ) return false ; |
| 381 | + $b = $a ; |
| 382 | + $level = 0 ; |
| 383 | + $h = "" ; |
| 384 | + $x = "" ; |
| 385 | + while ( $this->nextis ( $b , "=" ) ) |
| 386 | + { |
| 387 | + $level++ ; |
| 388 | + $h .= "=" ; |
| 389 | + } |
| 390 | + $this->skipblanks ( $b ) ; |
| 391 | + if ( !$this->once ( $b , $x , "restofline" ) ) return false ; |
| 392 | + if ( $this->compensate_markup_errors ) $x = trim ( $x ) ; |
| 393 | + else if ( $x != trim ( $x ) ) $xml .= "<error type='heading' reason='trailing blank'/>" ; |
| 394 | + if ( substr ( $x , -$level ) != $h ) return false ; # No match |
| 395 | + |
| 396 | + $x = trim ( substr ( $x , 0 , -$level ) ) ; |
| 397 | + $level -= 1 ; |
| 398 | + $a = $b ; |
| 399 | + $xml .= "<heading level='{$level}'>{$x}</heading>" ; |
| 400 | + return true ; |
| 401 | + } |
| 402 | + |
| 403 | + # Line |
| 404 | + # Often used function for parsing the rest of a text line |
| 405 | + function p_restofline ( &$a , &$xml , $closeit = array() ) |
| 406 | + { |
| 407 | + $b = $a ; |
| 408 | + $x = "" ; |
| 409 | + $override = false ; |
| 410 | + while ( $b < $this->wl && !$override ) |
| 411 | + { |
| 412 | + $c = $this->w[$b] ; |
| 413 | + if ( $c == "\n" ) { $b++ ; break ; } |
| 414 | + foreach ( $closeit AS $z ) |
| 415 | + if ( $this->nextis ( $b , $z , false ) ) break ; |
| 416 | + if ( $c == "[" && $this->once ( $b , $x , "internal_link" ) ) continue ; |
| 417 | + if ( $c == "[" && $this->once ( $b , $x , "external_link" ) ) continue ; |
| 418 | + if ( $c == "{" && $this->once ( $b , $x , "template_variable" ) ) continue ; |
| 419 | + if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ; |
| 420 | + if ( $c == "<" && $this->once ( $b , $x , "html" ) ) continue ; |
| 421 | + if ( $c == "'" && $this->once ( $b , $x , "bold" ) ) { $override = true ; break ; } |
| 422 | + if ( $c == "'" && $this->once ( $b , $x , "italics" ) ) { $override = true ; break ; } |
| 423 | + if ( $this->once ( $b , $x , "external_freelink" ) ) continue ; |
| 424 | + |
| 425 | + # Just an ordinary character |
| 426 | + $x .= htmlspecialchars ( $c ) ; |
| 427 | + $b++ ; |
| 428 | + if ( $b >= $this->wl ) break ; |
| 429 | + } |
| 430 | + if ( !$override && $this->bold_italics != "" ) |
| 431 | + { |
| 432 | + return false ; |
| 433 | + } |
| 434 | + $xml .= $x ; |
| 435 | + $a = $b ; |
| 436 | + return true ; |
| 437 | + } |
| 438 | + |
| 439 | + function p_line ( &$a , &$xml , $force ) |
| 440 | + { |
| 441 | + if ( $a >= $this->wl ) return false ; # Already at the end of the text |
| 442 | + $c = $this->w[$a] ; |
| 443 | + if ( !$force ) |
| 444 | + { |
| 445 | + if ( $c == '*' || $c == ':' || $c == '#' || $c == ' ' || $c == "\n" ) return false ; # Not a suitable beginning |
| 446 | + if ( $this->nextis ( $a , "{|" , false ) ) return false ; # Table |
| 447 | + if ( count ( $this->tables ) > 0 && $this->nextis ( $a , "|" , false ) ) return false ; # Table |
| 448 | + if ( count ( $this->tables ) > 0 && $this->nextis ( $a , "!" , false ) ) return false ; # Table |
| 449 | + if ( $this->nextis ( $a , "==" , false ) ) return false ; # Heading |
| 450 | + if ( $this->nextis ( $a , "----" , false ) ) return false ; # <hr> |
| 451 | + } |
| 452 | + $this->bold_italics = "" ; |
| 453 | + return $this->once ( $a , $xml , "restofline" ) ; |
| 454 | + } |
| 455 | + |
| 456 | + function p_blankline ( &$a , &$xml ) |
| 457 | + { |
| 458 | + if ( $this->nextis ( $a , "\n" ) ) return true ; |
| 459 | + return false ; |
| 460 | + } |
| 461 | + |
| 462 | + function p_block_lines ( &$a , &$xml , $force = false ) |
| 463 | + { |
| 464 | + $x = "" ; |
| 465 | + $b = $a ; |
| 466 | + if ( !$this->p_line ( $b , $x , $force ) ) return false ; |
| 467 | + while ( $this->p_line ( $b , $x , false ) ) ; |
| 468 | + $this->many ( $b , $x , "blankline" ) ; |
| 469 | + $xml .= "<paragraph>{$x}</paragraph>" ; |
| 470 | + $a = $b ; |
| 471 | + return true ; |
| 472 | + } |
| 473 | + |
| 474 | + |
| 475 | + |
| 476 | + # PRE block |
| 477 | + # Parses a line starting with ' ' |
| 478 | + function p_preline ( &$a , &$xml ) |
| 479 | + { |
| 480 | + if ( $a >= $this->wl ) return false ; # Already at the end of the text |
| 481 | + $c = $this->w[$a] ; |
| 482 | + if ( $c != ' ' ) return false ; # Not a preline |
| 483 | + $this->bold_italics = "" ; |
| 484 | + $this->skipblanks ( $a ) ; |
| 485 | + return $this->once ( $a , $xml , "restofline" ) ; |
| 486 | + } |
| 487 | + |
| 488 | + # Parses a block of lines each starting with ' ' |
| 489 | + function p_block_pre ( &$a , &$xml ) |
| 490 | + { |
| 491 | + $x = "" ; |
| 492 | + $b = $a ; |
| 493 | + if ( !$this->onceormore ( $b , $x , "preline" ) ) return false ; |
| 494 | + $this->many ( $b , $x , "blankline" ) ; |
| 495 | + $xml .= "<pre>{$x}</pre>" ; |
| 496 | + $a = $b ; |
| 497 | + return true ; |
| 498 | + } |
| 499 | + |
| 500 | + # LIST block |
| 501 | + # Returns a list tag depending on the wiki markup |
| 502 | + function listtag ( $c , $open = true ) |
| 503 | + { |
| 504 | + if ( !$open ) return "</list>" ; |
| 505 | + $r = "" ; |
| 506 | + if ( $c == '#' ) $r = "numbered" ; |
| 507 | + if ( $c == '*' ) $r = "bullet" ; |
| 508 | + if ( $c == ':' ) $r = "ident" ; |
| 509 | + if ( $r != "" ) $r = " type='{$r}'" ; |
| 510 | + $r = "<list{$r}>" ; |
| 511 | + return $r ; |
| 512 | + } |
| 513 | + |
| 514 | + # Opens/closes list tags |
| 515 | + function fixlist ( $last , $cur ) |
| 516 | + { |
| 517 | + $r = "" ; |
| 518 | + $olast = $last ; |
| 519 | + $ocur = $cur ; |
| 520 | + $ocommon = "" ; |
| 521 | + |
| 522 | + # Remove matching parts |
| 523 | + while ( $last != "" && $cur != "" && $last[0] == $cur[0] ) |
| 524 | + { |
| 525 | + $ocommon = $cur[0] ; |
| 526 | + $cur = substr ( $cur , 1 ) ; |
| 527 | + $last = substr ( $last , 1 ) ; |
| 528 | + } |
| 529 | + |
| 530 | + # Close old tags |
| 531 | + $fixitemtag = false ; |
| 532 | + if ( $last != "" && $ocommon != "" ) $fixitemtag = true ; |
| 533 | + while ( $last != "" ) |
| 534 | + { |
| 535 | + $r .= "</listitem>" . $this->listtag ( substr ( $last , -1 ) , false ) ; |
| 536 | + $last = substr ( $last , 0 , -1 ) ; |
| 537 | + } |
| 538 | + if ( $fixitemtag ) $r .= "</listitem><listitem>" ; |
| 539 | + |
| 540 | + # Open new tags |
| 541 | + while ( $cur != "" ) |
| 542 | + { |
| 543 | + $r .= $this->listtag ( $cur[0] ) . "<listitem>" ; |
| 544 | + $cur = substr ( $cur , 1 ) ; |
| 545 | + } |
| 546 | + |
| 547 | + return $r ; |
| 548 | + } |
| 549 | + |
| 550 | + # Parses a single list line |
| 551 | + function p_list_line ( &$a , &$xml , &$last ) |
| 552 | + { |
| 553 | + $cur = "" ; |
| 554 | + do { |
| 555 | + $lcur = $cur ; |
| 556 | + while ( $this->nextis ( $a , "*" ) ) $cur .= "*" ; |
| 557 | + while ( $this->nextis ( $a , "#" ) ) $cur .= "#" ; |
| 558 | + while ( $this->nextis ( $a , ":" ) ) $cur .= ":" ; |
| 559 | + } while ( $cur != $lcur ) ; |
| 560 | + |
| 561 | + $unchanged = false ; |
| 562 | +# if ( substr ( $cur , 0 , strlen ( $last ) ) == $last ) $unchanged = true ; |
| 563 | + if ( $last == $cur ) $unchanged = true ; |
| 564 | + $xml .= $this->fixlist ( $last , $cur ) ; |
| 565 | + |
| 566 | + if ( $cur == "" ) return false ; # Not a list line |
| 567 | + $last = $cur ; |
| 568 | + $this->skipblanks ( $a ) ; |
| 569 | + |
| 570 | + if ( $unchanged ) $xml .= "</listitem><listitem>" ; |
| 571 | + $this->p_restofline ( $a , $xml ) ; |
| 572 | + return true ; |
| 573 | + } |
| 574 | + |
| 575 | + # Checks for a list block ( those nasty things starting with '*', '#', or the like... |
| 576 | + function p_block_list ( &$a , &$xml ) |
| 577 | + { |
| 578 | + $last = "" ; |
| 579 | + $found = false ; |
| 580 | + while ( $this->p_list_line ( $a , $xml , $last ) ) $found = true ; |
| 581 | + return $found ; |
| 582 | + } |
| 583 | + |
| 584 | + # HTML |
| 585 | + # This function detects a HTML tag, finds the matching close tag, |
| 586 | + # parses everything in between, and returns everything as an extension. |
| 587 | + # Returns false otherwise. |
| 588 | + function p_html ( &$a , &$xml ) |
| 589 | + { |
| 590 | + if ( !$this->nextis ( $a , "<" , false ) ) return false ; |
| 591 | + $b = $a ; |
| 592 | + $x = "" ; |
| 593 | + $tag = "" ; |
| 594 | + $closing = false ; |
| 595 | + $selfclosing = false ; |
| 596 | + if ( !$this->p_html_tag ( $b , $x , $tag , $closing , $selfclosing ) ) return false ; |
| 597 | + |
| 598 | + if ( isset ( $this->directhtmltags[$tag] ) ) |
| 599 | + { |
| 600 | + $tag_open = "<" . $this->directhtmltags[$tag] . ">" ; |
| 601 | + $tag_close = "</" . $this->directhtmltags[$tag] . ">" ; |
| 602 | + } |
| 603 | + else |
| 604 | + { |
| 605 | + $tag_open = "<extension name='{$tag}'>" ; |
| 606 | + $tag_close = "</extension>" ; |
| 607 | + } |
| 608 | + |
| 609 | + # Is this tag self-closing? |
| 610 | + if ( $selfclosing ) |
| 611 | + { |
| 612 | + $a = $b ; |
| 613 | + $xml .= $tag_open . $x . $tag_close ; |
| 614 | + return true ; |
| 615 | + } |
| 616 | + |
| 617 | + # Find the matching close tag |
| 618 | + # TODO : The simple open/close counter should be replaced with a |
| 619 | + # stack to allow for tolerating half-broken HTML, |
| 620 | + # such as unclosed <li> tags |
| 621 | + $begin = $b ; |
| 622 | + $cnt = 1 ; |
| 623 | + $tag2 = "" ; |
| 624 | + while ( $cnt > 0 && $b < $this->wl ) |
| 625 | + { |
| 626 | + $x2 = "" ; |
| 627 | + $last = $b ; |
| 628 | + if ( !$this->p_html_tag ( $b , $x2 , $tag2 , $closing , $selfclosing ) ) |
| 629 | + { |
| 630 | + $b++ ; |
| 631 | + continue ; |
| 632 | + } |
| 633 | + if ( $tag != $tag2 ) continue ; |
| 634 | + if ( $selfclosing ) continue ; |
| 635 | + if ( $closing ) $cnt-- ; |
| 636 | + else $cnt++ ; |
| 637 | + } |
| 638 | + |
| 639 | + if ( $cnt > 0 ) return false ; # Tag was never closed |
| 640 | + |
| 641 | + # What happens in between? |
| 642 | + $between = substr ( $this->w , $begin , $last - $begin ) ; |
| 643 | + if ( $tag != "pre" && $tag != "nowiki" && $tag != "math" ) |
| 644 | + { |
| 645 | + # Parse the part in between the tags |
| 646 | + $subparser = new wiki2xml ; |
| 647 | + $between2 = $subparser->parse ( $between ) ; |
| 648 | + |
| 649 | + # Was the parsing correct? |
| 650 | + if ( $between2 != $this->errormessage ) |
| 651 | + $between = $this->strip_single_paragraph ( $between2 ) ; # No <paragraph> for inline HTML tags |
| 652 | + else |
| 653 | + $between = htmlspecialchars ( $between ) ; # Incorrect markup, use safe wiki source instead |
| 654 | + } |
| 655 | + else $between = htmlspecialchars ( $between ) ; # No wiki parsing in here |
| 656 | + |
| 657 | + $a = $b ; |
| 658 | + $xml .= $tag_open . $x . $between . $tag_close ; |
| 659 | +# $xml .= $x ; |
| 660 | +# $xml .= $between ; |
| 661 | +# $xml .= "</extension>" ; |
| 662 | + return true ; |
| 663 | + } |
| 664 | + |
| 665 | + function strip_single_paragraph ( $s ) |
| 666 | + { |
| 667 | + if ( substr_count ( $s , "paragraph>" ) == 2 && |
| 668 | + substr ( $s , 0 , 11 ) == "<paragraph>" && |
| 669 | + substr ( $s , -12 ) == "</paragraph>" ) |
| 670 | + $s = substr ( $s , 11 , -12 ) ; |
| 671 | + return $s ; |
| 672 | + } |
| 673 | + |
| 674 | + # This function checks for and parses a HTML tag |
| 675 | + # Only to be called from p_html, as it returns only a partial extension tag! |
| 676 | + function p_html_tag ( &$a , &$xml , &$tag , &$closing , &$selfclosing ) |
| 677 | + { |
| 678 | + if ( $this->w[$a] != '<' ) return false ; |
| 679 | + $b = $a + 1 ; |
| 680 | + $this->skipblanks ( $b ) ; |
| 681 | + $tag = "" ; |
| 682 | + $attrs = array () ; |
| 683 | + if ( !$this->scanplaintext ( $b , $tag , array ( " " , ">" ) , array ( "\n" ) ) ) return false ; |
| 684 | + |
| 685 | + $this->skipblanks ( $b ) ; |
| 686 | + if ( $b >= $this->wl ) return false ; |
| 687 | + |
| 688 | + $tag = trim ( strtolower ( $tag ) ) ; |
| 689 | + $closing = false ; |
| 690 | + $selfclosing = false ; |
| 691 | + |
| 692 | + # Is closing tag? |
| 693 | + if ( substr ( $tag , 0 , 1 ) == "/" ) |
| 694 | + { |
| 695 | + $tag = substr ( $tag , 1 ) ; |
| 696 | + $closing = true ; |
| 697 | + $this->skipblanks ( $b ) ; |
| 698 | + if ( $b >= $this->wl ) return false ; |
| 699 | + } |
| 700 | + |
| 701 | + if ( substr ( $tag , -1 ) == "/" ) |
| 702 | + { |
| 703 | + $tag = substr ( $tag , 0 , -1 ) ; |
| 704 | + $selfclosing = true ; |
| 705 | + } |
| 706 | + |
| 707 | + # Scan attrs |
| 708 | + while ( $this->w[$b] != '>' && $this->w[$b] != '/' ) |
| 709 | + { |
| 710 | + $attr = "" ; |
| 711 | + if ( !$this->p_html_attr ( $b , $attr ) ) return false ; |
| 712 | + $attrs[] = $attr ; |
| 713 | + $this->skipblanks ( $b ) ; |
| 714 | + if ( $b >= $this->wl ) return false ; |
| 715 | + } |
| 716 | + |
| 717 | + # Is self closing? |
| 718 | + if ( $this->w[$b] == '/' ) |
| 719 | + { |
| 720 | + $b++ ; |
| 721 | + $selfclosing = true ; |
| 722 | + } |
| 723 | + |
| 724 | + $this->skipblanks ( $b ) ; |
| 725 | + if ( $b >= $this->wl ) return false ; |
| 726 | + if ( $this->w[$b] != '>' ) return false ; |
| 727 | + |
| 728 | + $a = $b + 1 ; |
| 729 | + if ( count ( $attrs ) > 0 ) |
| 730 | + { |
| 731 | + $xml .= "<attrs>" ; |
| 732 | + $xml .= implode ( "" , $attrs ) ; |
| 733 | + $xml .= "</attrs>" ; |
| 734 | + } |
| 735 | + return true ; |
| 736 | + } |
| 737 | + |
| 738 | + # This function scans a single HTML tag attribute and returns it as <attr name='key'>value</attr> |
| 739 | + function p_html_attr ( &$a , &$xml ) |
| 740 | + { |
| 741 | + $b = $a ; |
| 742 | + $this->skipblanks ( $b ) ; |
| 743 | + if ( $b >= $this->wl ) return false ; |
| 744 | + $name = "" ; |
| 745 | + if ( !$this->scanplaintext ( $b , $name , array ( " " , "=" , ">" , "/" ) , array ( "\n" ) ) ) return false ; |
| 746 | + |
| 747 | + $this->skipblanks ( $b ) ; |
| 748 | + if ( $b >= $this->wl ) return false ; |
| 749 | + $name = trim ( strtolower ( $name ) ) ; |
| 750 | + |
| 751 | + $value = "" ; |
| 752 | + if ( $this->w[$b] == "=" ) |
| 753 | + { |
| 754 | + $b++ ; |
| 755 | + $this->skipblanks ( $b ) ; |
| 756 | + if ( $b >= $this->wl ) return false ; |
| 757 | + $q = "" ; |
| 758 | + $is_q = false ; |
| 759 | + if ( $this->w[$b] == '"' || $this->w[$b] == "'" ) |
| 760 | + { |
| 761 | + $q = $this->w[$b] ; |
| 762 | + $b++ ; |
| 763 | + if ( $b >= $this->wl ) return false ; |
| 764 | + $is_q = true ; |
| 765 | + } |
| 766 | + while ( $b < $this->wl ) |
| 767 | + { |
| 768 | + $c = $this->w[$b] ; |
| 769 | + if ( $c == $q ) |
| 770 | + { |
| 771 | + $b++ ; |
| 772 | + if ( $is_q ) break ; |
| 773 | + return false ; # Broken attribute value |
| 774 | + } |
| 775 | + if ( $this->nextis ( $b , "\\{$q}" ) ) # Ignore escaped quotes |
| 776 | + { |
| 777 | + $value .= "\\{$q}" ; |
| 778 | + continue ; |
| 779 | + } |
| 780 | + if ( $c == "\n" ) return false ; # Line break before value end |
| 781 | + if ( !$is_q && ( $c == ' ' || $c == '>' || $c == '/' ) ) break ; |
| 782 | + $value .= htmlspecialchars ( $c ) ; |
| 783 | + $b++ ; |
| 784 | + } |
| 785 | + } |
| 786 | + |
| 787 | + $a = $b ; |
| 788 | + $xml .= "<attr name='{$name}'>{$value}</attr>" ; |
| 789 | + return true ; |
| 790 | + } |
| 791 | + |
| 792 | + # Horizontal ruler (<hr> / ----) |
| 793 | + function p_hr ( &$a , &$xml ) |
| 794 | + { |
| 795 | + if ( !$this->nextis ( $a , "----" ) ) return false ; |
| 796 | + $this->skipblanks ( $a , "-" ) ; |
| 797 | + $this->skipblanks ( $a ) ; |
| 798 | + $xml .= "<hr/>" ; |
| 799 | + return true ; |
| 800 | + } |
| 801 | + |
| 802 | + # TABLE |
| 803 | + # Scans the rest of the line as HTML attributes and returns the usual <attrs><attr> string |
| 804 | + function scanattributes ( &$a ) |
| 805 | + { |
| 806 | + $x = "" ; |
| 807 | + while ( $a < $this->wl ) |
| 808 | + { |
| 809 | + if ( $this->w[$a] == "\n" ) break ; |
| 810 | + $x .= $this->w[$a] ; |
| 811 | + $a++ ; |
| 812 | + } |
| 813 | + $x .= ">" ; |
| 814 | + |
| 815 | + # Creating a temporary new parder to tun the attribute list in |
| 816 | + $np = new wiki2xml ; |
| 817 | + $np->w = $x ; |
| 818 | + $np->wl = strlen ( $x ) ; |
| 819 | + |
| 820 | + # Scanning attribute list |
| 821 | + $attrs = array () ; |
| 822 | + $c = 0 ; |
| 823 | + while ( $np->w[$c] != '>' ) |
| 824 | + { |
| 825 | + $attr = "" ; |
| 826 | + if ( !$np->p_html_attr ( $c , $attr ) ) break ; |
| 827 | + $attrs[] = $attr ; |
| 828 | + $np->skipblanks ( $c ) ; |
| 829 | + } |
| 830 | + if ( substr ( $x , $c ) != ">" ) return "" ; |
| 831 | + |
| 832 | + $ret = "" ; |
| 833 | + if ( count ( $attrs ) > 0 ) |
| 834 | + { |
| 835 | + $ret .= "<attrs>" ; |
| 836 | + $ret .= implode ( "" , $attrs ) ; |
| 837 | + $ret .= "</attrs>" ; |
| 838 | + } |
| 839 | + return $ret ; |
| 840 | + } |
| 841 | + |
| 842 | + # Finds the first of the given items; does *not* alter $a |
| 843 | + function scanahead ( $a , $matches ) |
| 844 | + { |
| 845 | + while ( $a < $this->wl ) |
| 846 | + { |
| 847 | + foreach ( $matches AS $x ) |
| 848 | + { |
| 849 | + if ( $this->nextis ( $a , $x , false ) ) |
| 850 | + { |
| 851 | + return $a ; |
| 852 | + } |
| 853 | + } |
| 854 | + $a++ ; |
| 855 | + } |
| 856 | + return -1 ; # Not found |
| 857 | + } |
| 858 | + |
| 859 | + |
| 860 | + # The main table parsing function |
| 861 | + function p_table ( &$a , &$xml ) |
| 862 | + { |
| 863 | + if ( $a >= $this->wl ) return false ; |
| 864 | + $c = $this->w[$a] ; |
| 865 | + if ( $c == "{" && $this->nextis ( $a , "{|" , false ) ) |
| 866 | + return $this->p_table_open ( $a , $xml ) ; |
| 867 | + |
| 868 | + if ( $c != "|" && $c != "!" ) return false ; # No possible table markup |
| 869 | + |
| 870 | + if ( count ( $this->tables ) == 0 ) return false ; # No tables open, nothing to do |
| 871 | + |
| 872 | + if ( $c == "|" && $this->nextis ( $a , "|}" , false ) ) return $this->p_table_close ( $a , $xml ) ; |
| 873 | + |
| 874 | + #if ( $this->nextis ( $a , "|" , false ) || $this->nextis ( $a , "!" , false ) ) |
| 875 | + return $this->p_table_element ( $a , $xml , true ) ; |
| 876 | + } |
| 877 | + |
| 878 | + function lasttable () |
| 879 | + { |
| 880 | + return $this->tables[count($this->tables)-1] ; |
| 881 | + } |
| 882 | + |
| 883 | + # Returns the attributes for table cells |
| 884 | + function tryfindparams ( &$a ) |
| 885 | + { |
| 886 | + $n = strspn ( $this->w , $this->allowed , $a ) ; # PHP 4.3.0 and above |
| 887 | +# $n = strspn ( substr ( $this->w , $a ) , $this->allowed ) ; # PHP < 4.3.0 |
| 888 | + if ( $n == 0 ) return "" ; # None found |
| 889 | + |
| 890 | + $b = $a + $n ; |
| 891 | + if ( $b >= $this->wl ) return "" ; |
| 892 | + if ( $this->w[$b] != "|" && $this->w[$b] != "!" ) return "" ; |
| 893 | + if ( $this->nextis ( $b , "||" , false ) ) return "" ; # Reached a ||, so return blank string |
| 894 | + if ( $this->nextis ( $b , "!!" , false ) ) return "" ; # Reached a ||, so return blank string |
| 895 | + $this->w[$b] = "\n" ; |
| 896 | + $ret = $this->scanattributes ( $a ) ; |
| 897 | + $this->w[$b] = "|" ; |
| 898 | + $a = $b + 1 ; |
| 899 | + return $ret ; |
| 900 | + } |
| 901 | + |
| 902 | + function p_table_element ( &$a , &$xml , $newline = false ) |
| 903 | + { |
| 904 | + $b = $a ; |
| 905 | + $x = "" ; |
| 906 | + $lt = $this->lasttable() ; |
| 907 | + if ( $newline && $this->nextis ( $b , "|-" ) ) # Table row |
| 908 | + { |
| 909 | + $this->skipblanks ( $b , "-" ) ; |
| 910 | + $this->skipblanks ( $b ) ; |
| 911 | + |
| 912 | + $attrs = $this->scanattributes ( $b ) ; |
| 913 | + if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .= "</tablerow>" ; |
| 914 | + else $this->tables[count($this->tables)-1]->is_row_open = true ; |
| 915 | + $x .= "<tablerow>{$attrs}" ; |
| 916 | + } |
| 917 | + else if ( $newline && $this->nextis ( $b , "|+" ) ) # Table caption |
| 918 | + { |
| 919 | + $this->skipblanks ( $b ) ; |
| 920 | + if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .= "</tablerow>" ; |
| 921 | + $this->tables[count($this->tables)-1]->is_row_open = false ; |
| 922 | + if ( !$this->p_restofcell ( $b , $x ) ) return false ; |
| 923 | + $x = "<tablecaption>{$x}</tablecaption>" ; |
| 924 | + } |
| 925 | + else # TD or TH |
| 926 | + { |
| 927 | + $c = $this->w[$b] ; |
| 928 | + $b++ ; |
| 929 | + if ( $c == '|' ) $tag = "tablecell" ; |
| 930 | + else if ( $c == '!' ) $tag = "tablehead" ; |
| 931 | + else return false ; # This would indeed be strange! |
| 932 | + $attrs = $this->tryfindparams ( $b ) ; |
| 933 | + if ( !$this->p_restofcell ( $b , $x ) ) return false ; |
| 934 | + |
| 935 | + if ( substr ( $x , 0 , 1 ) == "|" ) # Crude fix to compensate for MediaWiki "tolerant" parsing |
| 936 | + $x = substr ( $x , 1 ) ; |
| 937 | + $x = "<{$tag}>{$attrs}{$x}</{$tag}>" ; |
| 938 | + if ( !$lt->is_row_open ) |
| 939 | + { |
| 940 | + $this->tables[count($this->tables)-1]->is_row_open = true ; |
| 941 | + $x = "<tablerow>{$x}" ; |
| 942 | + } |
| 943 | + } |
| 944 | + |
| 945 | + $a = $b ; |
| 946 | + $xml .= $x ; |
| 947 | + return true ; |
| 948 | + } |
| 949 | + |
| 950 | + # Finds the substring that composes the table cell, |
| 951 | + # then runs a new parser on it |
| 952 | + function p_restofcell ( &$a , &$xml ) |
| 953 | + { |
| 954 | + # Get substring for cell |
| 955 | + $b = $a ; |
| 956 | + $sameline = true ; |
| 957 | + while ( $b < $this->wl ) |
| 958 | + { |
| 959 | + $c = $this->w[$b] ; |
| 960 | + if ( $c == "\n" ) { $sameline = false ; } |
| 961 | + if ( $c == "\n" && $this->nextis ( $b , "\n|" , false ) ) break ; |
| 962 | + if ( $c == "\n" && $this->nextis ( $b , "\n!" , false ) ) break ; |
| 963 | + if ( $c == "|" && $sameline && $this->nextis ( $b , "||" , false ) ) break ; |
| 964 | + if ( $c == "!" && $sameline && $this->nextis ( $b , "!!" , false ) ) break ; |
| 965 | + if ( $c == "[" && $this->once ( $b , $x , "internal_link" ) ) continue ; |
| 966 | + if ( $c == "{" && $this->once ( $b , $x , "template_variable" ) ) continue ; |
| 967 | + if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ; |
| 968 | + if ( $c == "<" && $this->once ( $b , $x , "html" ) ) continue ; |
| 969 | + $b++ ; |
| 970 | + } |
| 971 | + |
| 972 | + # Parse cell substring |
| 973 | + $x = substr ( $this->w , $a , $b - $a ) ; |
| 974 | + $p = new wiki2xml ; |
| 975 | + $x = $p->parse ( $x ) ; |
| 976 | + if ( $x == $this->errormessage ) return false ; |
| 977 | + |
| 978 | + $a = $b + 1 ; |
| 979 | + $xml .= $this->strip_single_paragraph ( $x ) ; |
| 980 | + return true ; |
| 981 | + } |
| 982 | + |
| 983 | + function p_table_close ( &$a , &$xml ) |
| 984 | + { |
| 985 | + if ( count ( $this->tables ) == 0 ) return false ; |
| 986 | + $b = $a ; |
| 987 | + if ( !$this->nextis ( $b , "|}" ) ) return false ; |
| 988 | + $x = "" ; |
| 989 | + $lt = $this->lasttable() ; |
| 990 | + if ( $lt->is_row_open ) $x .= "</tablerow>" ; |
| 991 | + array_pop ( $this->tables ) ; |
| 992 | + $x .= "</table>" ; |
| 993 | + $xml .= $x ; |
| 994 | + $a = $b ; |
| 995 | + while ( $this->nextis ( $a , "\n" ) ) ; |
| 996 | + return true ; |
| 997 | + } |
| 998 | + |
| 999 | + function p_table_open ( &$a , &$xml ) |
| 1000 | + { |
| 1001 | + $b = $a ; |
| 1002 | + if ( !$this->nextis ( $b , "{|" ) ) return false ; |
| 1003 | + |
| 1004 | + $this->is_row_open = false ; |
| 1005 | + |
| 1006 | + $x = "<table>" ; |
| 1007 | + $x .= $this->scanattributes ( $b ) ; |
| 1008 | + while ( $this->nextis ( $b , "\n" ) ) ; |
| 1009 | + |
| 1010 | + # Add table to stack |
| 1011 | + $nt->is_row_open = false ; |
| 1012 | + array_push ( $this->tables , $nt ) ; |
| 1013 | + |
| 1014 | + # Try the rest of the article as another article |
| 1015 | + $x2 = "" ; |
| 1016 | + if ( !$this->p_article ( $b , $x2 ) ) |
| 1017 | + { |
| 1018 | + array_pop ( $this->tables ) ; |
| 1019 | + return false ; |
| 1020 | + } |
| 1021 | + $x2 = $this->strip_single_paragraph ( $x2 ) ; |
| 1022 | + |
| 1023 | + $a = $b ; |
| 1024 | + $xml .= $x . $x2 ; |
| 1025 | + return true ; |
| 1026 | + } |
| 1027 | + |
| 1028 | + #----------------------------------- |
| 1029 | + # Parse the article |
| 1030 | + function p_article ( &$a , &$xml ) |
| 1031 | + { |
| 1032 | + $x = "" ; |
| 1033 | + $b = $a ; |
| 1034 | + while ( $b < $this->wl ) |
| 1035 | + { |
| 1036 | + if ( $this->onceormore ( $b , $x , "heading" ) ) continue ; |
| 1037 | + if ( $this->onceormore ( $b , $x , "block_lines" ) ) continue ; |
| 1038 | + if ( $this->onceormore ( $b , $x , "block_pre" ) ) continue ; |
| 1039 | + if ( $this->onceormore ( $b , $x , "block_list" ) ) continue ; |
| 1040 | + if ( $this->onceormore ( $b , $x , "hr" ) ) continue ; |
| 1041 | + if ( $this->onceormore ( $b , $x , "table" ) ) continue ; |
| 1042 | + if ( $this->onceormore ( $b , $x , "blankline" ) ) continue ; |
| 1043 | + if ( $this->p_block_lines ( $b , $x , true ) ) continue ; |
| 1044 | + # The last resort! |
| 1045 | + if ( !$this->compensate_markup_errors ) $xml .= "<error type='general' reason='no matching markup'/>" ; |
| 1046 | + $xml .= htmlspecialchars ( $this->w[$b] ) ; # Used to be : break ; |
| 1047 | + } |
| 1048 | + # if ( $b < $this->wl ) return false ; # Now obsolete, as no break anymore |
| 1049 | + $a = $b ; |
| 1050 | + $xml .= $x ; |
| 1051 | + return true ; |
| 1052 | + } |
| 1053 | + |
| 1054 | + # The only function to be called directly from outside the class |
| 1055 | + function parse ( &$wiki ) |
| 1056 | + { |
| 1057 | + $this->w = trim ( $wiki ) ; |
| 1058 | + |
| 1059 | + # Fix line endings |
| 1060 | + $cc = count_chars ( $wiki , 0 ) ; |
| 1061 | + if ( $cc[10] > 0 && $cc[13] == 0 ) |
| 1062 | + $this->w = str_replace ( "\r" , "\n" , $this->w ) ; |
| 1063 | + $this->w = str_replace ( "\r" , "" , $this->w ) ; |
| 1064 | + |
| 1065 | + # Remove HTML comments |
| 1066 | + $this->w = preg_replace( '?<!--.*-->?msU', '', $this->w); |
| 1067 | + |
| 1068 | + # Run the thing! |
| 1069 | + $this->tables = array () ; |
| 1070 | + $this->wl = strlen ( $this->w ) ; |
| 1071 | + $xml = "" ; |
| 1072 | + $a = 0 ; |
| 1073 | + if ( !$this->p_article ( $a , $xml ) ) return $this->errormessage ; |
| 1074 | + |
| 1075 | + # XML cleanup |
| 1076 | + do { |
| 1077 | + $lxml = $xml ; |
| 1078 | + $xml = str_replace ( " " , " " , $xml ) ; |
| 1079 | + } while ( $lxml != $xml ) ; |
| 1080 | + if ( $this->use_space_tag ) { |
| 1081 | + $xml = str_replace ( "> " , "><space/>" , $xml ) ; |
| 1082 | + $xml = str_replace ( " <" , "<space/><" , $xml ) ; |
| 1083 | + } |
| 1084 | + |
| 1085 | + return $xml ; |
| 1086 | + } |
| 1087 | + |
| 1088 | + } |
| 1089 | + |
| 1090 | +?> |
\ No newline at end of file |
Property changes on: trunk/wiki2xml/php/wiki2xml.php |
___________________________________________________________________ |
Added: svn:keywords |
1 | 1091 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 1092 | + native |