Index: trunk/wiki2xml/php/xml2xhtml.php |
— | — | @@ -139,7 +139,6 @@ |
140 | 140 | function tag_extension ( $open , &$attrs ) { |
141 | 141 | if( !defined( 'MEDIAWIKI' ) ) return ; # Only as MediaWiki extension |
142 | 142 | |
143 | | - |
144 | 143 | if ( $open ) { |
145 | 144 | $this->extension_name = $attrs['EXTENSION_NAME'] ; |
146 | 145 | $this->extension_attrs = $attrs ; |
— | — | @@ -158,6 +157,7 @@ |
159 | 158 | if ( $extension_text == "" ) $extension_text = "<{$this->extension_name}/>" ; |
160 | 159 | else $extension_text = "<{$this->extension_name}>{$extension_text}</{$this->extension_name}>" ; |
161 | 160 | $options = new ParserOptions ; |
| 161 | + |
162 | 162 | $s = $wgParser->parse ( $extension_text , $wgTitle , $options , false ) ; |
163 | 163 | $this->add ( $s->getText() ) ; |
164 | 164 | } |
— | — | @@ -509,4 +509,4 @@ |
510 | 510 | xml_parser_free($xml_parser_handle); |
511 | 511 | } |
512 | 512 | |
513 | | -?> |
\ No newline at end of file |
| 513 | +?> |
Index: trunk/wiki2xml/php/wiki2xml.php |
— | — | @@ -20,6 +20,7 @@ |
21 | 21 | "s" => "xhtml:s", |
22 | 22 | "p" => "xhtml:p", |
23 | 23 | "br" => "xhtml:br", |
| 24 | + "em" => "xhtml:em", |
24 | 25 | "div" => "xhtml:div", |
25 | 26 | "span" => "xhtml:span", |
26 | 27 | "big" => "xhtml:big", |
— | — | @@ -289,7 +290,9 @@ |
290 | 291 | $between = str_replace ( "<includeonly>" , "" , $between ) ; |
291 | 292 | $between = str_replace ( "</includeonly>" , "" , $between ) ; |
292 | 293 | |
293 | | - # Replacing template variables. ATTENTION: Template variables within <nowiki> sections of templates will be replaced as well! |
| 294 | + # Replacing template variables. |
| 295 | + # ATTENTION: Template variables within <nowiki> sections of templates will be replaced as well! |
| 296 | + |
294 | 297 | $this->replace_template_variables ( $between , $variables ) ; |
295 | 298 | |
296 | 299 | # Change source (!) |
— | — | @@ -739,11 +742,13 @@ |
740 | 743 | function p_html ( &$a , &$xml ) |
741 | 744 | { |
742 | 745 | if ( !$this->nextis ( $a , "<" , false ) ) return false ; |
| 746 | + |
743 | 747 | $b = $a ; |
744 | 748 | $x = "" ; |
745 | 749 | $tag = "" ; |
746 | 750 | $closing = false ; |
747 | 751 | $selfclosing = false ; |
| 752 | + |
748 | 753 | if ( !$this->p_html_tag ( $b , $x , $tag , $closing , $selfclosing ) ) return false ; |
749 | 754 | |
750 | 755 | if ( isset ( $this->directhtmltags[$tag] ) ) |
— | — | @@ -793,6 +798,7 @@ |
794 | 799 | |
795 | 800 | # What happens in between? |
796 | 801 | $between = substr ( $this->w , $begin , $last - $begin ) ; |
| 802 | + |
797 | 803 | if ( $tag != "pre" && $tag != "nowiki" && $tag != "math" ) |
798 | 804 | { |
799 | 805 | if ( $tag == 'gallery' ) { |
— | — | @@ -891,7 +897,7 @@ |
892 | 898 | $selfclosing = true ; |
893 | 899 | } |
894 | 900 | |
895 | | - # Parsing arrtibutes |
| 901 | + # Parsing attributes |
896 | 902 | $ob = $b ; |
897 | 903 | $q = "" ; |
898 | 904 | while ( $q != "" || ( $b < $this->wl && $this->w[$b] != '>' && $this->w[$b] != '/' ) ) { |
Index: trunk/wiki2xml/php/README |
— | — | @@ -20,8 +20,38 @@ |
21 | 21 | |
22 | 22 | svn co http://svn.wikimedia.org/svnroot/mediawiki/trunk/wiki2xml |
23 | 23 | |
24 | | -=head2 Copy files |
| 24 | +=head2 Extension or cgi-bin? |
25 | 25 | |
| 26 | +There are two ways to install wiki2xml: |
| 27 | + |
| 28 | + As extension: Special::Wiki2XML |
| 29 | + As cgi-bin: http://example.com/wiki/wiki2xml/w2x |
| 30 | + |
| 31 | +The former should be prefered. |
| 32 | + |
| 33 | +=head2 As Extension |
| 34 | + |
| 35 | +To enable wiki2xml as extension, put all files in the C<< php >> directory into a |
| 36 | +C<< wiki2xml >> subdirectory of your MediaWiki extensions directory. |
| 37 | + |
| 38 | + htdocs |
| 39 | + \- wiki |
| 40 | + \- extensions |
| 41 | + \- wiki2xml <-- create this directory |
| 42 | + \- w2x.php <-- copy files here |
| 43 | + \- wiki2xml.php etc. |
| 44 | + |
| 45 | +Then add |
| 46 | + |
| 47 | + require_once ("extensions/wiki2xml/extension.php"); |
| 48 | + |
| 49 | +to your C<< LocalSettings.php >>. The extension can then be accessed as |
| 50 | +C<< [[Special:Wiki2XML]] >>. |
| 51 | + |
| 52 | +=head2 cgi-bin - Copy files |
| 53 | + |
| 54 | +The alternative method is to install wiki2xml as cgi-bin script: |
| 55 | + |
26 | 56 | Copy the subdirectory C<< ./php/ >> to your server's C<< wiki/ >> |
27 | 57 | directory as a subdirectory named C<< w2x >>: |
28 | 58 | |
— | — | @@ -31,6 +61,8 @@ |
32 | 62 | \- w2x.php |
33 | 63 | \- wiki2xml.php etc |
34 | 64 | |
| 65 | +Access it as C<< http://example.com/wiki/w2x/w2x.php >>. |
| 66 | + |
35 | 67 | =head2 Configuration |
36 | 68 | |
37 | 69 | The configuration is stored in C<< default.php >> and C<< local.php >>. |
— | — | @@ -55,43 +87,31 @@ |
56 | 88 | |
57 | 89 | =head1 USAGE |
58 | 90 | |
59 | | -Open the following in your browser: |
| 91 | +Open the correct URL (depending on install type, see above) in your browser. |
60 | 92 | |
61 | | - http://example.com/wiki/w2x/w2x.php |
| 93 | +Wiki2xml should present you with a form with a textarea and several buttons. |
62 | 94 | |
63 | | -It should present you with a form with a textarea and several buttons. |
64 | | - |
65 | 95 | =head2 Using URL parameters |
66 | 96 | |
67 | | -Parameters:
|
68 | | -doit=1
|
69 | | -text=lines_of_text_or_titles
|
70 | | -whatsthis=wikitext/articlelist
|
71 | | -site=en.wikipedia.org/w
|
72 | | -output_format=xml/text/xhtml/docbook_xml/odt_xml/odt
|
73 | | -
|
74 | | -Optional:
|
75 | | -use_templates=all/none/these/notthese
|
76 | | -templates=lines_of_templates
|
77 | | -document_title=
|
78 | | -add_gfdl=1
|
79 | | -keep_categories=1
|
80 | | -keep_interlanguage=1
|
| 97 | +Parameters: |
81 | 98 | |
| 99 | + doit=1 |
| 100 | + text=lines_of_text_or_titles |
| 101 | + whatsthis=wikitext/articlelist |
| 102 | + site=en.wikipedia.org/w |
| 103 | + output_format=xml/text/xhtml/docbook_xml/odt_xml/odt |
82 | 104 | |
83 | | -=head1 EXTENSION |
| 105 | +Optional: |
84 | 106 | |
85 | | -To enable this as extension, put all files in the C<< php >> directory into a |
86 | | -C<< wiki2xml >> subdirectory of your MediaWiki extensions directory. |
| 107 | + use_templates=all/none/these/notthese |
| 108 | + templates=lines_of_templates |
| 109 | + document_title= |
| 110 | + add_gfdl=1 |
| 111 | + keep_categories=1 |
| 112 | + keep_interlanguage=1 |
| 113 | + |
87 | 114 | |
88 | | -Then add |
89 | 115 | |
90 | | - require_once ("extensions/wiki2xml/extension.php"); |
91 | | - |
92 | | -to your C<< LocalSettings.php >>. The extension can then be accessed as |
93 | | -C<< [[Special:Wiki2XML]] >>. |
94 | | - |
95 | | - |
96 | 116 | =head1 TROUBLESHOOTING |
97 | 117 | |
98 | 118 | If you get errors like the following: |
Index: trunk/wiki2xml/php/w2x.php |
— | — | @@ -124,6 +124,22 @@ |
125 | 125 | } |
126 | 126 | } |
127 | 127 | |
| 128 | +# add one article to the stack of to-be-converted articles |
| 129 | +function push_article ( &$aArticles, $article ) { |
| 130 | + |
| 131 | + # convert _ to ' ' |
| 132 | + $a = trim( $article ); |
| 133 | + if ( $a != "" ) { |
| 134 | + $aArticles[] = preg_replace( '/_/', ' ', $a ); |
| 135 | + } |
| 136 | + |
| 137 | +} |
| 138 | + |
| 139 | +# remove one article from the stack of to-be-converted articles |
| 140 | +function pop_article ( $aArticles ) { |
| 141 | + return array_pop ( $aArticles ) ; |
| 142 | +} |
| 143 | + |
128 | 144 | ## MAIN PROGRAM |
129 | 145 | |
130 | 146 | if ( get_param('doit',false) ) { # Process |
— | — | @@ -143,6 +159,9 @@ |
144 | 160 | $xmlg['add_gfdl'] = get_param('add_gfdl',false) ; |
145 | 161 | $xmlg['keep_interlanguage'] = get_param('keep_interlanguage',false) ; |
146 | 162 | $xmlg['keep_categories'] = get_param('keep_categories',false) ; |
| 163 | + |
| 164 | + # the article list |
| 165 | + $aArticles = array () ; |
147 | 166 | |
148 | 167 | $t = microtime_float() ; |
149 | 168 | $xml = "" ; |
— | — | @@ -150,28 +169,33 @@ |
151 | 170 | $wiki2xml_authors = array () ; |
152 | 171 | $xml = $converter->article2xml ( "" , $wikitext , $xmlg ) ; |
153 | 172 | } else { |
154 | | - $t = microtime_float() ; |
155 | | - $articles = explode ( "\n" , $wikitext ) ; |
| 173 | + foreach ( explode ( "\n" , $wikitext ) AS $a ) { |
| 174 | + push_article( &$aArticles, $a ); |
| 175 | + } |
| 176 | + |
| 177 | + # set the first article name as the default title |
156 | 178 | if ($xmlg["book_title"] == '') { |
157 | | - $xmlg["book_title"] = $articles[0]; |
| 179 | + $xmlg["book_title"] = $aArticles[0]; |
158 | 180 | } |
159 | | - foreach ( $articles AS $a ) { |
| 181 | + # as long as we have articles to convert (this might change in between!) |
| 182 | + while ( $a = array_shift( $aArticles ) ) { |
160 | 183 | $wiki2xml_authors = array () ; |
161 | | - $a = trim ( $a ) ; |
162 | | - if ( $a == "" ) continue ; |
| 184 | + |
| 185 | + # Article page|Article name |
163 | 186 | $a = explode ( '|' , $a ) ; |
164 | 187 | if ( count ( $a ) == 1 ) $a[] = $a[0] ; |
165 | 188 | $title_page = trim ( array_shift ( $a ) ) ; |
166 | 189 | $title_name = trim ( array_pop ( $a ) ) ; |
| 190 | + |
167 | 191 | $wikitext = $content_provider->get_wiki_text ( $title_page ) ; |
168 | 192 | add_authors ( $content_provider->authors ) ; |
169 | | - $xml .= $converter->article2xml ( $title_name , $wikitext , $xmlg ) ; |
| 193 | + $xml .= $converter->article2xml ( $title_name , $wikitext , $xmlg, &$aArticles ) ; |
170 | 194 | } |
171 | 195 | } |
172 | 196 | $t = microtime_float() - $t ; |
173 | | - $tt = $t ; |
174 | | - $lt = $content_provider->load_time ; |
175 | | - $t -= $lt ; |
| 197 | + $tt = round( $t, 3 ) ; |
| 198 | + $lt = round( $content_provider->load_time, 3 ) ; |
| 199 | + $t = round( $t - $lt, 3) ; |
176 | 200 | |
177 | 201 | $xml = "<articles xmlns:xhtml=\" \" loadtime='{$lt} sec' rendertime='{$t} sec' totaltime='{$tt} sec'>\n{$xml}\n</articles>" ; |
178 | 202 | |
Index: trunk/wiki2xml/php/content_provider.php |
— | — | @@ -19,12 +19,14 @@ |
20 | 20 | return in_array ( $title , $this->article_list ) ; |
21 | 21 | } |
22 | 22 | |
23 | | - /** |
24 | | - * Gets the numeric namespace |
25 | | - * "-8" = category link |
26 | | - * "-9" = interlanguage link |
27 | | - */ |
28 | | - function get_namespace_id ( $text ) { |
| 23 | + /** |
| 24 | + * XXX TODO: why are some negative? |
| 25 | + * Gets the numeric namespace |
| 26 | + * "6" = images |
| 27 | + * "-8" = category link |
| 28 | + * "-9" = interlanguage link |
| 29 | + * "11" = templates |
| 30 | + */ function get_namespace_id ( $text ) { |
29 | 31 | $text = strtoupper ( $text ) ; |
30 | 32 | $text = explode ( ":" , $text , 2 ) ; |
31 | 33 | if ( count ( $text ) != 2 ) return 0 ; |
— | — | @@ -38,6 +40,7 @@ |
39 | 41 | |
40 | 42 | # Horrible manual hack, for now |
41 | 43 | if ( $text == "IMAGE" || $text == "BILD" ) $ns = 6 ; |
| 44 | + if ( $text == "TEMPLATE" || $text == "VORLAGE" ) $ns = 11 ; |
42 | 45 | |
43 | 46 | return $ns ; |
44 | 47 | } |
— | — | @@ -276,7 +279,8 @@ |
277 | 280 | |
278 | 281 | } |
279 | 282 | |
280 | | -# Access through text file structure |
| 283 | +# Access through MySQL interface |
| 284 | +# (Used via the extension via Special::wiki2XML) |
281 | 285 | class ContentProviderMySQL extends ContentProviderHTTP { |
282 | 286 | |
283 | 287 | function do_get_contents ( $title ) { |
— | — | @@ -301,15 +305,13 @@ |
302 | 306 | } |
303 | 307 | |
304 | 308 | function get_page_text ( $page , $allow_redirect = true ) { |
305 | | - /* |
306 | | - $filename = $this->get_file_location ( 0 , $page ) ; |
307 | | - $filename = $filename->fullname . $this->file_ending ; |
308 | | - if ( !file_exists ( $filename ) ) return "" ; |
309 | | - $text = trim ( file_get_contents ( $filename ) ) ; |
310 | | - */ |
311 | | - |
312 | 309 | $title = Title::newFromText ( $page ) ; |
313 | 310 | $article = new Article ( $title ) ; |
| 311 | + |
| 312 | + # article does not exist? |
| 313 | + if (!$article->exists()) { |
| 314 | + return ""; |
| 315 | + } |
314 | 316 | $text = $article->getContent () ; |
315 | 317 | |
316 | 318 | # REDIRECT? |