r11680 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r11679‎ | r11680 | r11681 >
Date:08:14, 9 November 2005
Author:magnus_manske
Status:old
Tags:
Comment:
wiki-to-xml converter in pure PHP
Modified paths:
  • /trunk/wiki2xml/php (added) (history)
  • /trunk/wiki2xml/php/local.php (added) (history)
  • /trunk/wiki2xml/php/w2x.php (added) (history)
  • /trunk/wiki2xml/php/wiki2xml.php (added) (history)

Diff [purge]

Index: trunk/wiki2xml/php/local.php
@@ -0,0 +1,7 @@
 2+<?
 3+
 4+$xmlg["namespace_template"] = "Template" ;
 5+$xmlg["site_base_url"] = "en.wikipedia.org/w" ; # Dummy default
 6+#$xmlg["site_base_url"] = "127.0.0.1/phase3" ;
 7+
 8+?>
Property changes on: trunk/wiki2xml/php/local.php
___________________________________________________________________
Added: svn:keywords
19 + Author Date Id Revision
Added: svn:eol-style
210 + native
Index: trunk/wiki2xml/php/w2x.php
@@ -0,0 +1,109 @@
 2+<?
 3+# Copyright by Magnus Manske (2005)
 4+# Released under GPL
 5+
 6+include ( "wiki2xml.php" ) ;
 7+include ( "local.php" ) ;
 8+
 9+class ContentProvider {
 10+ var $article_cache = array () ;
 11+ var $first_title = "" ;
 12+
 13+ function get_wiki_text ( $title , $do_cache = false ) {
 14+ global $xmlg ;
 15+ $title = trim ( $title ) ;
 16+ if ( $title == "" ) return "" ; # Just in case...
 17+ if ( isset ( $this->article_cache[$title] ) ) # Already in the cache
 18+ return $this->article_cache[$title] ;
 19+
 20+ if ( $this->first_title == "" ) $this->first_title = $title ;
 21+
 22+ # Retrieve it
 23+ $url = "http://" . $xmlg["site_base_url"] . "/index.php?action=raw&title=" . urlencode ( $title ) ;
 24+ $s = file_get_contents ( $url ) ;
 25+ if ( $do_cache ) $this->article_cache[$title] = $s ;
 26+ return $s ;
 27+ }
 28+
 29+ function get_template_text ( $title ) {
 30+ global $xmlg ;
 31+
 32+ # Check for fix variables
 33+ if ( $title == "PAGENAME" ) return $this->first_title ;
 34+ if ( $title == "PAGENAMEE" ) return urlencode ( $this->first_title ) ;
 35+
 36+ $title = trim ( $title ) ;
 37+ if ( count ( explode ( ":" , $title , 2 ) ) == 1 ) # Does the template title contain a ":"?
 38+ $title = $xmlg["namespace_template"] . ":" . $title ;
 39+ else if ( substr ( $title , 0 , 1 ) == ":" ) # Main namespace
 40+ $title = substr ( $title , 1 ) ;
 41+ return $this->get_wiki_text ( $title , true ) ; # Cache template texts
 42+ }
 43+}
 44+
 45+## TIMER FUNCTION
 46+
 47+function microtime_float()
 48+{
 49+ list($usec, $sec) = explode(" ", microtime());
 50+ return ((float)$usec + (float)$sec);
 51+}
 52+
 53+## MAIN PROGRAM
 54+
 55+if ( isset ( $_POST['doit'] ) ) {
 56+ $wikitext = stripslashes ( $_POST['text'] ) ;
 57+
 58+ $content_provider = new ContentProvider ;
 59+ $xmlg["site_base_url"] = $_POST['site'] ;
 60+ $xmlg["namespace_template"] = $_POST['template'] ;
 61+
 62+ header('Content-type: text/xml; charset=utf-8');
 63+ print "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
 64+
 65+ $t = microtime_float() ;
 66+ $text = "" ;
 67+ $article_open = '<article>' ;
 68+ if ( $_POST['whatsthis'] == "wikitext" ) {
 69+ $p = new wiki2xml ;
 70+ $text = $article_open . $p->parse ( $wikitext ) . "</article>" ;
 71+ } else {
 72+ $t = microtime_float() ;
 73+ $articles = explode ( "\n" , $wikitext ) ;
 74+ foreach ( $articles AS $a ) {
 75+ $p = new wiki2xml ;
 76+ $wikitext = $content_provider->get_wiki_text ( $a ) ;
 77+ $text .= $article_open . $p->parse ( $wikitext ) . "</article>" ;
 78+ }
 79+ }
 80+ $t = microtime_float() - $t ;
 81+# xmlns:xhtml=\"http://www.w3.org/1999/xhtml\"
 82+ print "<articles xmlns:xhtml=\" \" rendertime='{$t} sec'>{$text}</articles>" ;
 83+} else if ( isset ( $_GET['showsource'] ) ) {
 84+ header('Content-type: text/plain; charset=utf-8');
 85+ print file_get_contents ( "wiki2xml.php" ) ;
 86+} else {
 87+ header('Content-type: text/html; charset=utf-8');
 88+ print "
 89+<html><head></head><body><form method='post'>
 90+<h1>Magnus' magic wiki-to-XML converter</h1>
 91+<p>All written in PHP - so portable, so incredibly slow... (see <a href=\"wiki2xml.php?showsource=true\">the source</a>)</p>
 92+<p>
 93+Known bugs:
 94+<ul>
 95+<li>The \";\" markup doesn't work yet (not implemented); \":\" does, though</li>
 96+</ul>
 97+</p>
 98+<h2>Paste wikitext here</h2>
 99+<textarea rows='20' cols='80' style='width:100%' name='text'></textarea><br/>
 100+This is
 101+<INPUT type='radio' name='whatsthis' value='wikitext'>raw wikitext
 102+<INPUT checked type='radio' name='whatsthis' value='articlelist'>a list of articles
 103+<br/>
 104+Site : http://<input type='text' name='site' value='".$xmlg["site_base_url"]."'/>/index.php<br/>
 105+Template namespace name : <input type='text' name='template' value='".$xmlg["namespace_template"]."'/><br/>
 106+<input type='submit' name='doit' value='Convert'/>
 107+</form></body></html>" ;
 108+}
 109+
 110+?>
\ No newline at end of file
Property changes on: trunk/wiki2xml/php/w2x.php
___________________________________________________________________
Added: svn:keywords
1111 + Author Date Id Revision
Added: svn:eol-style
2112 + native
Index: trunk/wiki2xml/php/wiki2xml.php
@@ -0,0 +1,1089 @@
 2+<?
 3+# Copyright by Magnus Manske (2005)
 4+# Released under GPL
 5+
 6+# TODO :
 7+# The ";" thingy
 8+
 9+class wiki2xml
 10+ {
 11+ var $protocols = array ( "http" , "https" , "news" , "ftp" , "irc" , "mailto" ) ;
 12+ var $errormessage = "ERROR!" ;
 13+ var $compensate_markup_errors = false;
 14+ var $auto_fill_templates = true ; # Will try and replace templates right inline, instead of using <template> tags; requires global $content_provider
 15+ var $use_space_tag = true ; # Use <space/> instead of spaces before and after tags
 16+ var $allowed = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 #:;,%="\'\\' ;
 17+ var $directhtmltags = array (
 18+ "b" => "xhtml:b",
 19+ "i" => "xhtml:i",
 20+ "u" => "xhtml:u",
 21+ "s" => "xhtml:s",
 22+ "p" => "xhtml:p",
 23+ "br" => "xhtml:br",
 24+ "div" => "xhtml:div",
 25+ "span" => "xhtml:span",
 26+ ) ;
 27+
 28+ var $w ; # The wiki text
 29+ var $wl ; # The wiki text length
 30+ var $bold_italics ;
 31+ var $tables ;
 32+
 33+ # Some often used functions
 34+ function fitit ( &$a , &$xml , &$f , $atleastonce , $many )
 35+ {
 36+ $f = "p_{$f}" ;
 37+ $cnt = 0 ;
 38+ do {
 39+ $matched = $this->$f ( $a , $xml ) ;
 40+ if ( $matched && $many ) $again = true ;
 41+ else $again = false ;
 42+ if ( $matched ) $cnt++ ;
 43+ } while ( $again ) ;
 44+ if ( !$atleastonce ) return true ;
 45+ if ( $cnt > 0 ) return true ;
 46+ return false ;
 47+ }
 48+
 49+ function once ( &$a , &$xml , $f )
 50+ {
 51+ return $this->fitit ( $a , $xml , $f , true , false ) ;
 52+ }
 53+
 54+ function onceormore ( &$a , &$xml , $f )
 55+ {
 56+ return $this->fitit ( $a , $xml , $f , true , true ) ;
 57+ }
 58+
 59+ function many ( &$a , &$xml , $f )
 60+ {
 61+ return $this->fitit ( $a , $xml , $f , false , true ) ;
 62+ }
 63+
 64+ function nextis ( &$a , $t , $movecounter = true )
 65+ {
 66+ if ( substr ( $this->w , $a , strlen ( $t ) ) != $t ) return false ;
 67+ if ( $movecounter ) $a += strlen ( $t ) ;
 68+ return true ;
 69+ }
 70+
 71+ function nextchar ( &$a , &$x )
 72+ {
 73+ if ( $a >= $this->wl ) return false ;
 74+ $x .= htmlspecialchars ( $this->w[$a] ) ;
 75+ $a++ ;
 76+ return true ;
 77+ }
 78+
 79+ function ischaracter ( $c )
 80+ {
 81+ if ( $c >= 'A' && $c <= 'Z' ) return true ;
 82+ if ( $c >= 'a' && $c <= 'z' ) return true ;
 83+ return false ;
 84+ }
 85+
 86+ function skipblanks ( &$a , $blank = " " )
 87+ {
 88+ while ( $a < $this->wl )
 89+ {
 90+ if ( $this->w[$a] != $blank ) return ;
 91+ $a++ ;
 92+ }
 93+ }
 94+
 95+ ##############
 96+
 97+
 98+ function p_internal_link_target ( &$a , &$xml , $closeit = "]]" )
 99+ {
 100+ return $this->p_internal_link_text ( $a , $xml , true , $closeit ) ;
 101+ }
 102+
 103+ function p_internal_link_text2 ( &$a , &$xml , $closeit )
 104+ {
 105+ return $this->p_internal_link_text ( $a , $xml , false , $closeit , false ) ;
 106+ }
 107+
 108+ function p_internal_link_text ( &$a , &$xml , $istarget = false , $closeit = "]]" , $mark = true )
 109+ {
 110+ $b = $a ;
 111+ $x = "" ;
 112+ if ( $b >= $this->wl ) return false ;
 113+ while ( 1 )
 114+ {
 115+ $c = $this->w[$b] ;
 116+ if ( $closeit != "}}" && $c == "\n" ) return false ;
 117+ if ( $c == "|" ) break ;
 118+ if ( $this->nextis ( $b , $closeit , false ) ) break ;
 119+ if ( !$istarget )
 120+ {
 121+ if ( $c == "[" && $this->once ( $b , $x , "internal_link" ) ) continue ;
 122+ if ( $c == "[" && $this->once ( $b , $x , "external_link" ) ) continue ;
 123+ if ( $this->once ( $b , $x , "external_freelink" ) ) continue ;
 124+ if ( $c == "{" && $this->once ( $b , $x , "template_variable" ) ) continue ;
 125+ if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ;
 126+ if ( $c == "<" && $this->once ( $b , $x , "html" ) ) continue ;
 127+ if ( $c == "'" && $this->p_bold ( $b , $x , "internal_link_text2" , $closeit ) ) { break ; }
 128+ if ( $c == "'" && $this->p_italics ( $b , $x , "internal_link_text2" , $closeit ) ) { break ; }
 129+ }
 130+ else
 131+ {
 132+ if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ;
 133+ }
 134+ #if ( !$this->nextchar ( $b , $x ) ) return false ;
 135+ $x .= htmlspecialchars ( $c ) ;
 136+ $b++ ;
 137+ if ( $b >= $this->wl ) return false ;
 138+ }
 139+
 140+ $x = trim ( str_replace ( "\n" , "" , $x ) ) ;
 141+ if ( $mark )
 142+ {
 143+ if ( $istarget ) $xml .= "<target>{$x}</target>" ;
 144+ else $xml .= "<part>{$x}</part>" ;
 145+
 146+ }
 147+ else $xml .= $x ;
 148+ $a = $b ;
 149+ return true ;
 150+ }
 151+
 152+ function p_internal_link_trail ( &$a , &$xml )
 153+ {
 154+ $b = $a ;
 155+ $x = "" ;
 156+ while ( 1 )
 157+ {
 158+ $c = "" ;
 159+ if ( !$this->nextchar ( $b , $c ) ) break ;
 160+ if ( $this->ischaracter ( $c ) )
 161+ {
 162+ $x .= $c ;
 163+ }
 164+ else
 165+ {
 166+ $b-- ;
 167+ break ;
 168+ }
 169+ }
 170+ if ( $x == "" ) return false ; # No link trail
 171+ $xml .= "<trail>{$x}</trail>" ;
 172+ $a = $b ;
 173+ return true ;
 174+ }
 175+
 176+ function p_internal_link ( &$a , &$xml )
 177+ {
 178+ $x = "" ;
 179+ $b = $a ;
 180+ if ( !$this->nextis ( $b , "[[" ) ) return false ;
 181+ if ( !$this->p_internal_link_target ( $b , $x , "]]" ) ) return false ;
 182+ while ( 1 )
 183+ {
 184+ if ( $this->nextis ( $b , "]]" ) ) break ;
 185+ if ( !$this->nextis ( $b , "|" ) ) return false ;
 186+ if ( !$this->p_internal_link_text ( $b , $x , false , "]]" ) ) return false ;
 187+ }
 188+ $this->p_internal_link_trail ( $b , $x ) ;
 189+ $xml .= "<link>{$x}</link>" ;
 190+ $a = $b ;
 191+ return true ;
 192+ }
 193+
 194+ # Template and template variable, utilizing parts of the internal link methods
 195+ function p_template ( &$a , &$xml )
 196+ {
 197+ $x = "" ;
 198+ $b = $a ;
 199+ if ( !$this->nextis ( $b , "{{" ) ) return false ;
 200+ if ( !$this->p_internal_link_target ( $b , $x , "}}" ) ) return false ;
 201+ $target = $x ;
 202+ $variables = array () ;
 203+ $vcount = 1 ;
 204+ while ( 1 )
 205+ {
 206+ if ( $this->nextis ( $b , "}}" ) ) break ;
 207+ if ( !$this->nextis ( $b , "|" ) ) return false ;
 208+ $l1 = strlen ( $x ) ;
 209+ if ( !$this->p_internal_link_text ( $b , $x , false , "}}" ) ) return false ;
 210+ $v = substr ( $x , $l1 ) ;
 211+ $v = str_replace ( "<part>" , "" , $v ) ;
 212+ $v = str_replace ( "</part>" , "" , $v ) ;
 213+ $v = explode ( "=" , $v ) ;
 214+ if ( count ( $v ) < 2 ) $vk = $vcount ;
 215+ else $vk = array_shift ( $v ) ;
 216+ $vv = array_shift ( $v ) ;
 217+ $variables[$vk] = $vv ;
 218+ if ( !isset ( $variables[$vcount] ) ) $variables[$vcount] = $vv ;
 219+ $vcount++ ;
 220+ }
 221+
 222+ if ( $this->auto_fill_templates ) { # Do not generate <template> sections, but rather replace the template call with the template text
 223+ # Get template text
 224+ global $content_provider ;
 225+ $target = array_pop ( explode ( ">" , $target , 2 ) ) ;
 226+ $target = array_shift ( explode ( "<" , $target , 2 ) ) ;
 227+ $between = $content_provider->get_template_text ( $target ) ;
 228+
 229+ # Replacing template variables. ATTENTION: Template variables within <nowiki> sections of templates will be replaced as well!
 230+ foreach ( $variables AS $vk => $vv ) {
 231+ $between = str_replace ( '{{{'.$vk.'}}}' , $vv , $between ) ;
 232+ }
 233+
 234+ # Change source (!)
 235+ $w = $this->w ;
 236+ $w1 = substr ( $w , 0 , $a ) ;
 237+ $w2 = substr ( $w , $b ) ;
 238+ $this->w = $w1 . $between . $w2 ;
 239+ $this->wl = strlen ( $this->w ) ;
 240+ } else {
 241+ $xml .= "<template>{$x}</template>" ;
 242+ $a = $b ;
 243+ }
 244+ return true ;
 245+ }
 246+
 247+ function p_template_variable ( &$a , &$xml )
 248+ {
 249+ $x = "" ;
 250+ $b = $a ;
 251+ if ( !$this->nextis ( $b , "{{{" ) ) return false ;
 252+ if ( !$this->p_internal_link_text ( $b , $x , false , "}}}" ) ) return false ;
 253+ if ( !$this->nextis ( $b , "}}}" ) ) return false ;
 254+ $xml .= "<templatevar>{$x}</templatevar>" ;
 255+ $a = $b ;
 256+ return true ;
 257+ }
 258+
 259+ # Bold / italics
 260+ function p_bold ( &$a , &$xml , $recurse = "restofline" , $end = "" )
 261+ {
 262+ return $this->p_intwined ( $a , $xml , "bold" , "'''" , $recurse , $end ) ;
 263+ }
 264+
 265+ function p_italics ( &$a , &$xml , $recurse = "restofline" , $end = "" )
 266+ {
 267+ return $this->p_intwined ( $a , $xml , "italics" , "''" , $recurse , $end ) ;
 268+ }
 269+
 270+ function p_intwined ( &$a , &$xml , $tag , $markup , $recurse , $end )
 271+ {
 272+ $b = $a ;
 273+ if ( !$this->nextis ( $b , $markup ) ) return false ;
 274+ $id = substr ( ucfirst ( $tag ) , 0 , 1 ) ;
 275+ $bi = $this->bold_italics ;
 276+ $open = false ;
 277+ if ( substr ( $this->bold_italics , -1 ) == $id )
 278+ {
 279+ $x = "</{$tag}>" ;
 280+ $this->bold_italics = substr ( $this->bold_italics , 0 , -1 ) ;
 281+ }
 282+ else
 283+ {
 284+ $pos = strpos ( $this->bold_italics , $id ) ;
 285+ if ( false !== $pos ) return false ; # Can't close a tag that ain't open
 286+ $open = true ;
 287+ $x = "<{$tag}>" ;
 288+ $this->bold_italics .= $id ;
 289+ }
 290+
 291+ if ( $end == "" )
 292+ {
 293+ $res = $this->once ( $b , $x , $recurse ) ;
 294+ }
 295+ else
 296+ {
 297+ $r = "p_{$recurse}" ;
 298+ $res = $this->$r ( $b , $x , $end ) ;
 299+ }
 300+
 301+ $this->bold_italics = $bi ;
 302+ if ( !$res )
 303+ {
 304+ return false ;
 305+ }
 306+ $xml .= $x ;
 307+ $a = $b ;
 308+ return true ;
 309+ }
 310+
 311+ function scanplaintext ( &$a , &$xml , $goodstop , $badstop )
 312+ {
 313+ $b = $a ;
 314+ $x = "" ;
 315+ while ( $b < $this->wl )
 316+ {
 317+ foreach ( $goodstop AS $s )
 318+ if ( $this->nextis ( $b , $s , false ) ) break 2 ;
 319+ foreach ( $badstop AS $s )
 320+ if ( $this->nextis ( $b , $s , false ) ) return false ;
 321+ $c = $this->w[$b] ;
 322+ $x .= htmlspecialchars ( $c ) ;
 323+ $b++ ;
 324+ }
 325+ if ( count ( $goodstop ) > 0 && $b >= $this->wl ) return false ; # Reached end; not good
 326+ $a = $b ;
 327+ $xml .= $x ;
 328+ return true ;
 329+ }
 330+
 331+ # External link
 332+ function p_external_freelink ( &$a , &$xml , $mark = true )
 333+ {
 334+ $protocol = "" ;
 335+ $b = $a ;
 336+ foreach ( $this->protocols AS $p )
 337+ {
 338+ if ( $this->nextis ( $b , $p . "://" ) )
 339+ {
 340+ $protocol = $p ;
 341+ break ;
 342+ }
 343+ }
 344+ if ( $protocol == "" ) return false ;
 345+ $x = "{$protocol}://" ;
 346+ while ( $b < $this->wl )
 347+ {
 348+ if ( $this->w[$b] == "\n" || $this->w[$b] == " " ) break ;
 349+ if ( !$mark && $this->w[$b] == "]" ) break ;
 350+ $x .= htmlspecialchars ( $this->w[$b] ) ;
 351+ $b++ ;
 352+ }
 353+ if ( substr ( $x , -1 ) == "." || substr ( $x , -1 ) == "," )
 354+ {
 355+ $x = substr ( $x , 0 , -1 ) ;
 356+ $b-- ;
 357+ }
 358+ $a = $b ;
 359+ if ( $mark ) $xml .= "<link type='external' url='{$x}'/>" ;
 360+ else $xml .= $x ;
 361+ return true ;
 362+ }
 363+
 364+ function p_external_link ( &$a , &$xml , $mark = true )
 365+ {
 366+ $b = $a ;
 367+ if ( !$this->nextis ( $b , "[" ) ) return false ;
 368+ $url = "" ;
 369+ if ( !$this->p_external_freelink ( $b , $url , false ) ) return false ;
 370+ $this->skipblanks ( $b ) ;
 371+ if ( !$this->scanplaintext ( $b , $x , array ( "]" ) , array ( "\n" ) ) ) return false ;
 372+ $a = $b + 1 ;
 373+ $xml .= "<link type='external' href='{$url}'>{$x}</link>" ;
 374+ return true ;
 375+ }
 376+
 377+ # Heading
 378+ function p_heading ( &$a , &$xml )
 379+ {
 380+ if ( !$this->nextis ( $a , "==" , false ) ) return false ;
 381+ $b = $a ;
 382+ $level = 0 ;
 383+ $h = "" ;
 384+ $x = "" ;
 385+ while ( $this->nextis ( $b , "=" ) )
 386+ {
 387+ $level++ ;
 388+ $h .= "=" ;
 389+ }
 390+ $this->skipblanks ( $b ) ;
 391+ if ( !$this->once ( $b , $x , "restofline" ) ) return false ;
 392+ if ( $this->compensate_markup_errors ) $x = trim ( $x ) ;
 393+ else if ( $x != trim ( $x ) ) $xml .= "<error type='heading' reason='trailing blank'/>" ;
 394+ if ( substr ( $x , -$level ) != $h ) return false ; # No match
 395+
 396+ $x = trim ( substr ( $x , 0 , -$level ) ) ;
 397+ $level -= 1 ;
 398+ $a = $b ;
 399+ $xml .= "<heading level='{$level}'>{$x}</heading>" ;
 400+ return true ;
 401+ }
 402+
 403+ # Line
 404+ # Often used function for parsing the rest of a text line
 405+ function p_restofline ( &$a , &$xml , $closeit = array() )
 406+ {
 407+ $b = $a ;
 408+ $x = "" ;
 409+ $override = false ;
 410+ while ( $b < $this->wl && !$override )
 411+ {
 412+ $c = $this->w[$b] ;
 413+ if ( $c == "\n" ) { $b++ ; break ; }
 414+ foreach ( $closeit AS $z )
 415+ if ( $this->nextis ( $b , $z , false ) ) break ;
 416+ if ( $c == "[" && $this->once ( $b , $x , "internal_link" ) ) continue ;
 417+ if ( $c == "[" && $this->once ( $b , $x , "external_link" ) ) continue ;
 418+ if ( $c == "{" && $this->once ( $b , $x , "template_variable" ) ) continue ;
 419+ if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ;
 420+ if ( $c == "<" && $this->once ( $b , $x , "html" ) ) continue ;
 421+ if ( $c == "'" && $this->once ( $b , $x , "bold" ) ) { $override = true ; break ; }
 422+ if ( $c == "'" && $this->once ( $b , $x , "italics" ) ) { $override = true ; break ; }
 423+ if ( $this->once ( $b , $x , "external_freelink" ) ) continue ;
 424+
 425+ # Just an ordinary character
 426+ $x .= htmlspecialchars ( $c ) ;
 427+ $b++ ;
 428+ if ( $b >= $this->wl ) break ;
 429+ }
 430+ if ( !$override && $this->bold_italics != "" )
 431+ {
 432+ return false ;
 433+ }
 434+ $xml .= $x ;
 435+ $a = $b ;
 436+ return true ;
 437+ }
 438+
 439+ function p_line ( &$a , &$xml , $force )
 440+ {
 441+ if ( $a >= $this->wl ) return false ; # Already at the end of the text
 442+ $c = $this->w[$a] ;
 443+ if ( !$force )
 444+ {
 445+ if ( $c == '*' || $c == ':' || $c == '#' || $c == ' ' || $c == "\n" ) return false ; # Not a suitable beginning
 446+ if ( $this->nextis ( $a , "{|" , false ) ) return false ; # Table
 447+ if ( count ( $this->tables ) > 0 && $this->nextis ( $a , "|" , false ) ) return false ; # Table
 448+ if ( count ( $this->tables ) > 0 && $this->nextis ( $a , "!" , false ) ) return false ; # Table
 449+ if ( $this->nextis ( $a , "==" , false ) ) return false ; # Heading
 450+ if ( $this->nextis ( $a , "----" , false ) ) return false ; # <hr>
 451+ }
 452+ $this->bold_italics = "" ;
 453+ return $this->once ( $a , $xml , "restofline" ) ;
 454+ }
 455+
 456+ function p_blankline ( &$a , &$xml )
 457+ {
 458+ if ( $this->nextis ( $a , "\n" ) ) return true ;
 459+ return false ;
 460+ }
 461+
 462+ function p_block_lines ( &$a , &$xml , $force = false )
 463+ {
 464+ $x = "" ;
 465+ $b = $a ;
 466+ if ( !$this->p_line ( $b , $x , $force ) ) return false ;
 467+ while ( $this->p_line ( $b , $x , false ) ) ;
 468+ $this->many ( $b , $x , "blankline" ) ;
 469+ $xml .= "<paragraph>{$x}</paragraph>" ;
 470+ $a = $b ;
 471+ return true ;
 472+ }
 473+
 474+
 475+
 476+ # PRE block
 477+ # Parses a line starting with ' '
 478+ function p_preline ( &$a , &$xml )
 479+ {
 480+ if ( $a >= $this->wl ) return false ; # Already at the end of the text
 481+ $c = $this->w[$a] ;
 482+ if ( $c != ' ' ) return false ; # Not a preline
 483+ $this->bold_italics = "" ;
 484+ $this->skipblanks ( $a ) ;
 485+ return $this->once ( $a , $xml , "restofline" ) ;
 486+ }
 487+
 488+ # Parses a block of lines each starting with ' '
 489+ function p_block_pre ( &$a , &$xml )
 490+ {
 491+ $x = "" ;
 492+ $b = $a ;
 493+ if ( !$this->onceormore ( $b , $x , "preline" ) ) return false ;
 494+ $this->many ( $b , $x , "blankline" ) ;
 495+ $xml .= "<pre>{$x}</pre>" ;
 496+ $a = $b ;
 497+ return true ;
 498+ }
 499+
 500+ # LIST block
 501+ # Returns a list tag depending on the wiki markup
 502+ function listtag ( $c , $open = true )
 503+ {
 504+ if ( !$open ) return "</list>" ;
 505+ $r = "" ;
 506+ if ( $c == '#' ) $r = "numbered" ;
 507+ if ( $c == '*' ) $r = "bullet" ;
 508+ if ( $c == ':' ) $r = "ident" ;
 509+ if ( $r != "" ) $r = " type='{$r}'" ;
 510+ $r = "<list{$r}>" ;
 511+ return $r ;
 512+ }
 513+
 514+ # Opens/closes list tags
 515+ function fixlist ( $last , $cur )
 516+ {
 517+ $r = "" ;
 518+ $olast = $last ;
 519+ $ocur = $cur ;
 520+ $ocommon = "" ;
 521+
 522+ # Remove matching parts
 523+ while ( $last != "" && $cur != "" && $last[0] == $cur[0] )
 524+ {
 525+ $ocommon = $cur[0] ;
 526+ $cur = substr ( $cur , 1 ) ;
 527+ $last = substr ( $last , 1 ) ;
 528+ }
 529+
 530+ # Close old tags
 531+ $fixitemtag = false ;
 532+ if ( $last != "" && $ocommon != "" ) $fixitemtag = true ;
 533+ while ( $last != "" )
 534+ {
 535+ $r .= "</listitem>" . $this->listtag ( substr ( $last , -1 ) , false ) ;
 536+ $last = substr ( $last , 0 , -1 ) ;
 537+ }
 538+ if ( $fixitemtag ) $r .= "</listitem><listitem>" ;
 539+
 540+ # Open new tags
 541+ while ( $cur != "" )
 542+ {
 543+ $r .= $this->listtag ( $cur[0] ) . "<listitem>" ;
 544+ $cur = substr ( $cur , 1 ) ;
 545+ }
 546+
 547+ return $r ;
 548+ }
 549+
 550+ # Parses a single list line
 551+ function p_list_line ( &$a , &$xml , &$last )
 552+ {
 553+ $cur = "" ;
 554+ do {
 555+ $lcur = $cur ;
 556+ while ( $this->nextis ( $a , "*" ) ) $cur .= "*" ;
 557+ while ( $this->nextis ( $a , "#" ) ) $cur .= "#" ;
 558+ while ( $this->nextis ( $a , ":" ) ) $cur .= ":" ;
 559+ } while ( $cur != $lcur ) ;
 560+
 561+ $unchanged = false ;
 562+# if ( substr ( $cur , 0 , strlen ( $last ) ) == $last ) $unchanged = true ;
 563+ if ( $last == $cur ) $unchanged = true ;
 564+ $xml .= $this->fixlist ( $last , $cur ) ;
 565+
 566+ if ( $cur == "" ) return false ; # Not a list line
 567+ $last = $cur ;
 568+ $this->skipblanks ( $a ) ;
 569+
 570+ if ( $unchanged ) $xml .= "</listitem><listitem>" ;
 571+ $this->p_restofline ( $a , $xml ) ;
 572+ return true ;
 573+ }
 574+
 575+ # Checks for a list block ( those nasty things starting with '*', '#', or the like...
 576+ function p_block_list ( &$a , &$xml )
 577+ {
 578+ $last = "" ;
 579+ $found = false ;
 580+ while ( $this->p_list_line ( $a , $xml , $last ) ) $found = true ;
 581+ return $found ;
 582+ }
 583+
 584+ # HTML
 585+ # This function detects a HTML tag, finds the matching close tag,
 586+ # parses everything in between, and returns everything as an extension.
 587+ # Returns false otherwise.
 588+ function p_html ( &$a , &$xml )
 589+ {
 590+ if ( !$this->nextis ( $a , "<" , false ) ) return false ;
 591+ $b = $a ;
 592+ $x = "" ;
 593+ $tag = "" ;
 594+ $closing = false ;
 595+ $selfclosing = false ;
 596+ if ( !$this->p_html_tag ( $b , $x , $tag , $closing , $selfclosing ) ) return false ;
 597+
 598+ if ( isset ( $this->directhtmltags[$tag] ) )
 599+ {
 600+ $tag_open = "<" . $this->directhtmltags[$tag] . ">" ;
 601+ $tag_close = "</" . $this->directhtmltags[$tag] . ">" ;
 602+ }
 603+ else
 604+ {
 605+ $tag_open = "<extension name='{$tag}'>" ;
 606+ $tag_close = "</extension>" ;
 607+ }
 608+
 609+ # Is this tag self-closing?
 610+ if ( $selfclosing )
 611+ {
 612+ $a = $b ;
 613+ $xml .= $tag_open . $x . $tag_close ;
 614+ return true ;
 615+ }
 616+
 617+ # Find the matching close tag
 618+ # TODO : The simple open/close counter should be replaced with a
 619+ # stack to allow for tolerating half-broken HTML,
 620+ # such as unclosed <li> tags
 621+ $begin = $b ;
 622+ $cnt = 1 ;
 623+ $tag2 = "" ;
 624+ while ( $cnt > 0 && $b < $this->wl )
 625+ {
 626+ $x2 = "" ;
 627+ $last = $b ;
 628+ if ( !$this->p_html_tag ( $b , $x2 , $tag2 , $closing , $selfclosing ) )
 629+ {
 630+ $b++ ;
 631+ continue ;
 632+ }
 633+ if ( $tag != $tag2 ) continue ;
 634+ if ( $selfclosing ) continue ;
 635+ if ( $closing ) $cnt-- ;
 636+ else $cnt++ ;
 637+ }
 638+
 639+ if ( $cnt > 0 ) return false ; # Tag was never closed
 640+
 641+ # What happens in between?
 642+ $between = substr ( $this->w , $begin , $last - $begin ) ;
 643+ if ( $tag != "pre" && $tag != "nowiki" && $tag != "math" )
 644+ {
 645+ # Parse the part in between the tags
 646+ $subparser = new wiki2xml ;
 647+ $between2 = $subparser->parse ( $between ) ;
 648+
 649+ # Was the parsing correct?
 650+ if ( $between2 != $this->errormessage )
 651+ $between = $this->strip_single_paragraph ( $between2 ) ; # No <paragraph> for inline HTML tags
 652+ else
 653+ $between = htmlspecialchars ( $between ) ; # Incorrect markup, use safe wiki source instead
 654+ }
 655+ else $between = htmlspecialchars ( $between ) ; # No wiki parsing in here
 656+
 657+ $a = $b ;
 658+ $xml .= $tag_open . $x . $between . $tag_close ;
 659+# $xml .= $x ;
 660+# $xml .= $between ;
 661+# $xml .= "</extension>" ;
 662+ return true ;
 663+ }
 664+
 665+ function strip_single_paragraph ( $s )
 666+ {
 667+ if ( substr_count ( $s , "paragraph>" ) == 2 &&
 668+ substr ( $s , 0 , 11 ) == "<paragraph>" &&
 669+ substr ( $s , -12 ) == "</paragraph>" )
 670+ $s = substr ( $s , 11 , -12 ) ;
 671+ return $s ;
 672+ }
 673+
 674+ # This function checks for and parses a HTML tag
 675+ # Only to be called from p_html, as it returns only a partial extension tag!
 676+ function p_html_tag ( &$a , &$xml , &$tag , &$closing , &$selfclosing )
 677+ {
 678+ if ( $this->w[$a] != '<' ) return false ;
 679+ $b = $a + 1 ;
 680+ $this->skipblanks ( $b ) ;
 681+ $tag = "" ;
 682+ $attrs = array () ;
 683+ if ( !$this->scanplaintext ( $b , $tag , array ( " " , ">" ) , array ( "\n" ) ) ) return false ;
 684+
 685+ $this->skipblanks ( $b ) ;
 686+ if ( $b >= $this->wl ) return false ;
 687+
 688+ $tag = trim ( strtolower ( $tag ) ) ;
 689+ $closing = false ;
 690+ $selfclosing = false ;
 691+
 692+ # Is closing tag?
 693+ if ( substr ( $tag , 0 , 1 ) == "/" )
 694+ {
 695+ $tag = substr ( $tag , 1 ) ;
 696+ $closing = true ;
 697+ $this->skipblanks ( $b ) ;
 698+ if ( $b >= $this->wl ) return false ;
 699+ }
 700+
 701+ if ( substr ( $tag , -1 ) == "/" )
 702+ {
 703+ $tag = substr ( $tag , 0 , -1 ) ;
 704+ $selfclosing = true ;
 705+ }
 706+
 707+ # Scan attrs
 708+ while ( $this->w[$b] != '>' && $this->w[$b] != '/' )
 709+ {
 710+ $attr = "" ;
 711+ if ( !$this->p_html_attr ( $b , $attr ) ) return false ;
 712+ $attrs[] = $attr ;
 713+ $this->skipblanks ( $b ) ;
 714+ if ( $b >= $this->wl ) return false ;
 715+ }
 716+
 717+ # Is self closing?
 718+ if ( $this->w[$b] == '/' )
 719+ {
 720+ $b++ ;
 721+ $selfclosing = true ;
 722+ }
 723+
 724+ $this->skipblanks ( $b ) ;
 725+ if ( $b >= $this->wl ) return false ;
 726+ if ( $this->w[$b] != '>' ) return false ;
 727+
 728+ $a = $b + 1 ;
 729+ if ( count ( $attrs ) > 0 )
 730+ {
 731+ $xml .= "<attrs>" ;
 732+ $xml .= implode ( "" , $attrs ) ;
 733+ $xml .= "</attrs>" ;
 734+ }
 735+ return true ;
 736+ }
 737+
 738+ # This function scans a single HTML tag attribute and returns it as <attr name='key'>value</attr>
 739+ function p_html_attr ( &$a , &$xml )
 740+ {
 741+ $b = $a ;
 742+ $this->skipblanks ( $b ) ;
 743+ if ( $b >= $this->wl ) return false ;
 744+ $name = "" ;
 745+ if ( !$this->scanplaintext ( $b , $name , array ( " " , "=" , ">" , "/" ) , array ( "\n" ) ) ) return false ;
 746+
 747+ $this->skipblanks ( $b ) ;
 748+ if ( $b >= $this->wl ) return false ;
 749+ $name = trim ( strtolower ( $name ) ) ;
 750+
 751+ $value = "" ;
 752+ if ( $this->w[$b] == "=" )
 753+ {
 754+ $b++ ;
 755+ $this->skipblanks ( $b ) ;
 756+ if ( $b >= $this->wl ) return false ;
 757+ $q = "" ;
 758+ $is_q = false ;
 759+ if ( $this->w[$b] == '"' || $this->w[$b] == "'" )
 760+ {
 761+ $q = $this->w[$b] ;
 762+ $b++ ;
 763+ if ( $b >= $this->wl ) return false ;
 764+ $is_q = true ;
 765+ }
 766+ while ( $b < $this->wl )
 767+ {
 768+ $c = $this->w[$b] ;
 769+ if ( $c == $q )
 770+ {
 771+ $b++ ;
 772+ if ( $is_q ) break ;
 773+ return false ; # Broken attribute value
 774+ }
 775+ if ( $this->nextis ( $b , "\\{$q}" ) ) # Ignore escaped quotes
 776+ {
 777+ $value .= "\\{$q}" ;
 778+ continue ;
 779+ }
 780+ if ( $c == "\n" ) return false ; # Line break before value end
 781+ if ( !$is_q && ( $c == ' ' || $c == '>' || $c == '/' ) ) break ;
 782+ $value .= htmlspecialchars ( $c ) ;
 783+ $b++ ;
 784+ }
 785+ }
 786+
 787+ $a = $b ;
 788+ $xml .= "<attr name='{$name}'>{$value}</attr>" ;
 789+ return true ;
 790+ }
 791+
 792+ # Horizontal ruler (<hr> / ----)
 793+ function p_hr ( &$a , &$xml )
 794+ {
 795+ if ( !$this->nextis ( $a , "----" ) ) return false ;
 796+ $this->skipblanks ( $a , "-" ) ;
 797+ $this->skipblanks ( $a ) ;
 798+ $xml .= "<hr/>" ;
 799+ return true ;
 800+ }
 801+
 802+ # TABLE
 803+ # Scans the rest of the line as HTML attributes and returns the usual <attrs><attr> string
 804+ function scanattributes ( &$a )
 805+ {
 806+ $x = "" ;
 807+ while ( $a < $this->wl )
 808+ {
 809+ if ( $this->w[$a] == "\n" ) break ;
 810+ $x .= $this->w[$a] ;
 811+ $a++ ;
 812+ }
 813+ $x .= ">" ;
 814+
 815+ # Creating a temporary new parder to tun the attribute list in
 816+ $np = new wiki2xml ;
 817+ $np->w = $x ;
 818+ $np->wl = strlen ( $x ) ;
 819+
 820+ # Scanning attribute list
 821+ $attrs = array () ;
 822+ $c = 0 ;
 823+ while ( $np->w[$c] != '>' )
 824+ {
 825+ $attr = "" ;
 826+ if ( !$np->p_html_attr ( $c , $attr ) ) break ;
 827+ $attrs[] = $attr ;
 828+ $np->skipblanks ( $c ) ;
 829+ }
 830+ if ( substr ( $x , $c ) != ">" ) return "" ;
 831+
 832+ $ret = "" ;
 833+ if ( count ( $attrs ) > 0 )
 834+ {
 835+ $ret .= "<attrs>" ;
 836+ $ret .= implode ( "" , $attrs ) ;
 837+ $ret .= "</attrs>" ;
 838+ }
 839+ return $ret ;
 840+ }
 841+
 842+ # Finds the first of the given items; does *not* alter $a
 843+ function scanahead ( $a , $matches )
 844+ {
 845+ while ( $a < $this->wl )
 846+ {
 847+ foreach ( $matches AS $x )
 848+ {
 849+ if ( $this->nextis ( $a , $x , false ) )
 850+ {
 851+ return $a ;
 852+ }
 853+ }
 854+ $a++ ;
 855+ }
 856+ return -1 ; # Not found
 857+ }
 858+
 859+
 860+ # The main table parsing function
 861+ function p_table ( &$a , &$xml )
 862+ {
 863+ if ( $a >= $this->wl ) return false ;
 864+ $c = $this->w[$a] ;
 865+ if ( $c == "{" && $this->nextis ( $a , "{|" , false ) )
 866+ return $this->p_table_open ( $a , $xml ) ;
 867+
 868+ if ( $c != "|" && $c != "!" ) return false ; # No possible table markup
 869+
 870+ if ( count ( $this->tables ) == 0 ) return false ; # No tables open, nothing to do
 871+
 872+ if ( $c == "|" && $this->nextis ( $a , "|}" , false ) ) return $this->p_table_close ( $a , $xml ) ;
 873+
 874+ #if ( $this->nextis ( $a , "|" , false ) || $this->nextis ( $a , "!" , false ) )
 875+ return $this->p_table_element ( $a , $xml , true ) ;
 876+ }
 877+
 878+ function lasttable ()
 879+ {
 880+ return $this->tables[count($this->tables)-1] ;
 881+ }
 882+
 883+ # Returns the attributes for table cells
 884+ function tryfindparams ( &$a )
 885+ {
 886+ $n = strspn ( $this->w , $this->allowed , $a ) ; # PHP 4.3.0 and above
 887+# $n = strspn ( substr ( $this->w , $a ) , $this->allowed ) ; # PHP < 4.3.0
 888+ if ( $n == 0 ) return "" ; # None found
 889+
 890+ $b = $a + $n ;
 891+ if ( $b >= $this->wl ) return "" ;
 892+ if ( $this->w[$b] != "|" && $this->w[$b] != "!" ) return "" ;
 893+ if ( $this->nextis ( $b , "||" , false ) ) return "" ; # Reached a ||, so return blank string
 894+ if ( $this->nextis ( $b , "!!" , false ) ) return "" ; # Reached a ||, so return blank string
 895+ $this->w[$b] = "\n" ;
 896+ $ret = $this->scanattributes ( $a ) ;
 897+ $this->w[$b] = "|" ;
 898+ $a = $b + 1 ;
 899+ return $ret ;
 900+ }
 901+
 902+ function p_table_element ( &$a , &$xml , $newline = false )
 903+ {
 904+ $b = $a ;
 905+ $x = "" ;
 906+ $lt = $this->lasttable() ;
 907+ if ( $newline && $this->nextis ( $b , "|-" ) ) # Table row
 908+ {
 909+ $this->skipblanks ( $b , "-" ) ;
 910+ $this->skipblanks ( $b ) ;
 911+
 912+ $attrs = $this->scanattributes ( $b ) ;
 913+ if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .= "</tablerow>" ;
 914+ else $this->tables[count($this->tables)-1]->is_row_open = true ;
 915+ $x .= "<tablerow>{$attrs}" ;
 916+ }
 917+ else if ( $newline && $this->nextis ( $b , "|+" ) ) # Table caption
 918+ {
 919+ $this->skipblanks ( $b ) ;
 920+ if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .= "</tablerow>" ;
 921+ $this->tables[count($this->tables)-1]->is_row_open = false ;
 922+ if ( !$this->p_restofcell ( $b , $x ) ) return false ;
 923+ $x = "<tablecaption>{$x}</tablecaption>" ;
 924+ }
 925+ else # TD or TH
 926+ {
 927+ $c = $this->w[$b] ;
 928+ $b++ ;
 929+ if ( $c == '|' ) $tag = "tablecell" ;
 930+ else if ( $c == '!' ) $tag = "tablehead" ;
 931+ else return false ; # This would indeed be strange!
 932+ $attrs = $this->tryfindparams ( $b ) ;
 933+ if ( !$this->p_restofcell ( $b , $x ) ) return false ;
 934+
 935+ if ( substr ( $x , 0 , 1 ) == "|" ) # Crude fix to compensate for MediaWiki "tolerant" parsing
 936+ $x = substr ( $x , 1 ) ;
 937+ $x = "<{$tag}>{$attrs}{$x}</{$tag}>" ;
 938+ if ( !$lt->is_row_open )
 939+ {
 940+ $this->tables[count($this->tables)-1]->is_row_open = true ;
 941+ $x = "<tablerow>{$x}" ;
 942+ }
 943+ }
 944+
 945+ $a = $b ;
 946+ $xml .= $x ;
 947+ return true ;
 948+ }
 949+
 950+ # Finds the substring that composes the table cell,
 951+ # then runs a new parser on it
 952+ function p_restofcell ( &$a , &$xml )
 953+ {
 954+ # Get substring for cell
 955+ $b = $a ;
 956+ $sameline = true ;
 957+ while ( $b < $this->wl )
 958+ {
 959+ $c = $this->w[$b] ;
 960+ if ( $c == "\n" ) { $sameline = false ; }
 961+ if ( $c == "\n" && $this->nextis ( $b , "\n|" , false ) ) break ;
 962+ if ( $c == "\n" && $this->nextis ( $b , "\n!" , false ) ) break ;
 963+ if ( $c == "|" && $sameline && $this->nextis ( $b , "||" , false ) ) break ;
 964+ if ( $c == "!" && $sameline && $this->nextis ( $b , "!!" , false ) ) break ;
 965+ if ( $c == "[" && $this->once ( $b , $x , "internal_link" ) ) continue ;
 966+ if ( $c == "{" && $this->once ( $b , $x , "template_variable" ) ) continue ;
 967+ if ( $c == "{" && $this->once ( $b , $x , "template" ) ) continue ;
 968+ if ( $c == "<" && $this->once ( $b , $x , "html" ) ) continue ;
 969+ $b++ ;
 970+ }
 971+
 972+ # Parse cell substring
 973+ $x = substr ( $this->w , $a , $b - $a ) ;
 974+ $p = new wiki2xml ;
 975+ $x = $p->parse ( $x ) ;
 976+ if ( $x == $this->errormessage ) return false ;
 977+
 978+ $a = $b + 1 ;
 979+ $xml .= $this->strip_single_paragraph ( $x ) ;
 980+ return true ;
 981+ }
 982+
 983+ function p_table_close ( &$a , &$xml )
 984+ {
 985+ if ( count ( $this->tables ) == 0 ) return false ;
 986+ $b = $a ;
 987+ if ( !$this->nextis ( $b , "|}" ) ) return false ;
 988+ $x = "" ;
 989+ $lt = $this->lasttable() ;
 990+ if ( $lt->is_row_open ) $x .= "</tablerow>" ;
 991+ array_pop ( $this->tables ) ;
 992+ $x .= "</table>" ;
 993+ $xml .= $x ;
 994+ $a = $b ;
 995+ while ( $this->nextis ( $a , "\n" ) ) ;
 996+ return true ;
 997+ }
 998+
 999+ function p_table_open ( &$a , &$xml )
 1000+ {
 1001+ $b = $a ;
 1002+ if ( !$this->nextis ( $b , "{|" ) ) return false ;
 1003+
 1004+ $this->is_row_open = false ;
 1005+
 1006+ $x = "<table>" ;
 1007+ $x .= $this->scanattributes ( $b ) ;
 1008+ while ( $this->nextis ( $b , "\n" ) ) ;
 1009+
 1010+ # Add table to stack
 1011+ $nt->is_row_open = false ;
 1012+ array_push ( $this->tables , $nt ) ;
 1013+
 1014+ # Try the rest of the article as another article
 1015+ $x2 = "" ;
 1016+ if ( !$this->p_article ( $b , $x2 ) )
 1017+ {
 1018+ array_pop ( $this->tables ) ;
 1019+ return false ;
 1020+ }
 1021+ $x2 = $this->strip_single_paragraph ( $x2 ) ;
 1022+
 1023+ $a = $b ;
 1024+ $xml .= $x . $x2 ;
 1025+ return true ;
 1026+ }
 1027+
 1028+ #-----------------------------------
 1029+ # Parse the article
 1030+ function p_article ( &$a , &$xml )
 1031+ {
 1032+ $x = "" ;
 1033+ $b = $a ;
 1034+ while ( $b < $this->wl )
 1035+ {
 1036+ if ( $this->onceormore ( $b , $x , "heading" ) ) continue ;
 1037+ if ( $this->onceormore ( $b , $x , "block_lines" ) ) continue ;
 1038+ if ( $this->onceormore ( $b , $x , "block_pre" ) ) continue ;
 1039+ if ( $this->onceormore ( $b , $x , "block_list" ) ) continue ;
 1040+ if ( $this->onceormore ( $b , $x , "hr" ) ) continue ;
 1041+ if ( $this->onceormore ( $b , $x , "table" ) ) continue ;
 1042+ if ( $this->onceormore ( $b , $x , "blankline" ) ) continue ;
 1043+ if ( $this->p_block_lines ( $b , $x , true ) ) continue ;
 1044+ # The last resort!
 1045+ if ( !$this->compensate_markup_errors ) $xml .= "<error type='general' reason='no matching markup'/>" ;
 1046+ $xml .= htmlspecialchars ( $this->w[$b] ) ; # Used to be : break ;
 1047+ }
 1048+ # if ( $b < $this->wl ) return false ; # Now obsolete, as no break anymore
 1049+ $a = $b ;
 1050+ $xml .= $x ;
 1051+ return true ;
 1052+ }
 1053+
 1054+ # The only function to be called directly from outside the class
 1055+ function parse ( &$wiki )
 1056+ {
 1057+ $this->w = trim ( $wiki ) ;
 1058+
 1059+ # Fix line endings
 1060+ $cc = count_chars ( $wiki , 0 ) ;
 1061+ if ( $cc[10] > 0 && $cc[13] == 0 )
 1062+ $this->w = str_replace ( "\r" , "\n" , $this->w ) ;
 1063+ $this->w = str_replace ( "\r" , "" , $this->w ) ;
 1064+
 1065+ # Remove HTML comments
 1066+ $this->w = preg_replace( '?<!--.*-->?msU', '', $this->w);
 1067+
 1068+ # Run the thing!
 1069+ $this->tables = array () ;
 1070+ $this->wl = strlen ( $this->w ) ;
 1071+ $xml = "" ;
 1072+ $a = 0 ;
 1073+ if ( !$this->p_article ( $a , $xml ) ) return $this->errormessage ;
 1074+
 1075+ # XML cleanup
 1076+ do {
 1077+ $lxml = $xml ;
 1078+ $xml = str_replace ( " " , " " , $xml ) ;
 1079+ } while ( $lxml != $xml ) ;
 1080+ if ( $this->use_space_tag ) {
 1081+ $xml = str_replace ( "> " , "><space/>" , $xml ) ;
 1082+ $xml = str_replace ( " <" , "<space/><" , $xml ) ;
 1083+ }
 1084+
 1085+ return $xml ;
 1086+ }
 1087+
 1088+ }
 1089+
 1090+?>
\ No newline at end of file
Property changes on: trunk/wiki2xml/php/wiki2xml.php
___________________________________________________________________
Added: svn:keywords
11091 + Author Date Id Revision
Added: svn:eol-style
21092 + native

Status & tagging log