Index: trunk/wiki2xml/php/xml2odt.php |
— | — | @@ -658,6 +658,6 @@ |
659 | 659 | } |
660 | 660 | } |
661 | 661 | |
662 | | -require_once ( "./xml2tree.php" ) ; # Uses the "element" class defined above |
| 662 | +require_once ( "xml2tree.php" ) ; # Uses the "element" class defined above |
663 | 663 | |
664 | 664 | ?> |
Index: trunk/wiki2xml/php/mediawiki_converter.php |
— | — | @@ -60,7 +60,7 @@ |
61 | 61 | */ |
62 | 62 | function articles2text ( &$xml , $params = array () ) { |
63 | 63 | global $wiki2xml_authors ; |
64 | | - require_once ( "./xml2txt.php" ) ; |
| 64 | + require_once ( "xml2txt.php" ) ; |
65 | 65 | |
66 | 66 | $wiki2xml_authors = array () ; |
67 | 67 | $x2t = new xml2php ; |
— | — | @@ -91,7 +91,7 @@ |
92 | 92 | */ |
93 | 93 | function articles2xhtml ( &$xml , $params = array () ) { |
94 | 94 | global $xml2xhtml ; |
95 | | - require_once ( "./xml2xhtml.php" ) ; |
| 95 | + require_once ( "xml2xhtml.php" ) ; |
96 | 96 | $lang = "EN" ; # Dummy |
97 | 97 | |
98 | 98 | $ret = "" ; |
— | — | @@ -127,7 +127,7 @@ |
128 | 128 | */ |
129 | 129 | function articles2odt ( &$xml , $params = array () , $use_gfdl = false ) { |
130 | 130 | global $wiki2xml_authors , $xml2odt ; |
131 | | - require_once ( "./xml2odt.php" ) ; |
| 131 | + require_once ( "xml2odt.php" ) ; |
132 | 132 | |
133 | 133 | # XML text to tree |
134 | 134 | $xml2odt = new XML2ODT ; |
— | — | @@ -151,7 +151,7 @@ |
152 | 152 | */ |
153 | 153 | function articles2docbook_xml ( &$xml , $params = array () , $use_gfdl = false ) { |
154 | 154 | global $wiki2xml_authors ; |
155 | | - require_once ( "./xml2docbook_xml.php" ) ; |
| 155 | + require_once ( "xml2docbook_xml.php" ) ; |
156 | 156 | |
157 | 157 | $wiki2xml_authors = array () ; |
158 | 158 | $x2t = new xml2php ; |
— | — | @@ -214,6 +214,7 @@ |
215 | 215 | * Uses articles2docbook_xml |
216 | 216 | */ |
217 | 217 | function articles2docbook_pdf ( &$xml , $params = array () , $mode = "PDF" ) { |
| 218 | + global $xmlg ; |
218 | 219 | $docbook_xml = $this->articles2docbook_xml ( $xml , $params , $params['add_gfdl'] ) ; |
219 | 220 | |
220 | 221 | # Create temporary directory |
— | — | @@ -230,7 +231,7 @@ |
231 | 232 | fwrite ( $handle , utf8_encode ( $docbook_xml ) ) ; |
232 | 233 | fclose ( $handle ) ; |
233 | 234 | if ( $params['add_gfdl'] ) { |
234 | | - copy ( "./gfdl.xml" , $temp_dir . "/gfdl.xml" ) ; |
| 235 | + copy ( $xmlg['sourcedir'] . "/gfdl.xml" , $temp_dir . "/gfdl.xml" ) ; |
235 | 236 | } |
236 | 237 | |
237 | 238 | # Call converter |
Index: trunk/wiki2xml/php/filter_named_entities.php |
— | — | @@ -5,7 +5,7 @@ |
6 | 6 | * which do not replace some entities correctly |
7 | 7 | */ |
8 | 8 | |
9 | | -$html_named_entities_mapping = array ( |
| 9 | +$html_named_entities_mapping_mine = array ( |
10 | 10 | // Obtained with: |
11 | 11 | // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table |
12 | 12 | "nbsp"=>160, |
— | — | @@ -272,8 +272,8 @@ |
273 | 273 | } |
274 | 274 | |
275 | 275 | function filter_named_entities(&$content) { |
276 | | - global $html_named_entities_mapping; |
277 | | - foreach($html_named_entities_mapping as $name => $value) { |
| 276 | + global $html_named_entities_mapping_mine; |
| 277 | + foreach($html_named_entities_mapping_mine as $name => $value) { |
278 | 278 | $content=str_replace('&'.$name.';',utf8_chr ( $value ),$content); |
279 | 279 | } |
280 | 280 | $content=str_replace('�','i',$content); # Ugly hack |
Index: trunk/wiki2xml/php/xml2docbook_xml.php |
— | — | @@ -451,6 +451,6 @@ |
452 | 452 | } |
453 | 453 | } |
454 | 454 | |
455 | | -require_once ( "./xml2tree.php" ) ; # Uses the "element" class defined above |
| 455 | +require_once ( "xml2tree.php" ) ; # Uses the "element" class defined above |
456 | 456 | |
457 | 457 | ?> |
Index: trunk/wiki2xml/php/extension.php |
— | — | @@ -0,0 +1,68 @@ |
| 2 | +<?php
|
| 3 | +/*
|
| 4 | +To enable this extension, put all files in this directory into a "wiki2xml" subdirectory of your MediaWiki extensions directory
|
| 5 | +Also, add
|
| 6 | + require_once ( "extensions/wiki2xml/extension.php" ) ;
|
| 7 | +to your LocalSettings.php
|
| 8 | +The extension can then be accessed as [[Special:Wiki2XML]]
|
| 9 | +*/
|
| 10 | +
|
| 11 | +if( !defined( 'MEDIAWIKI' ) ) die();
|
| 12 | +
|
| 13 | +# Integrating into the MediaWiki environment
|
| 14 | +
|
| 15 | +$wgExtensionCredits['Wiki2XML'][] = array(
|
| 16 | + 'name' => 'Wiki2XML',
|
| 17 | + 'description' => 'An extension to convert wiki markup into XML.',
|
| 18 | + 'author' => 'Magnus Manske'
|
| 19 | +);
|
| 20 | +
|
| 21 | +$wgExtensionFunctions[] = 'wfWiki2XMLExtension';
|
| 22 | +
|
| 23 | +
|
| 24 | +#_____________________________________________________________________________
|
| 25 | +
|
| 26 | +/**
|
| 27 | +* The special page
|
| 28 | +*/
|
| 29 | +function wfWiki2XMLExtension() { # Checked for HTML and MySQL insertion attacks
|
| 30 | + global $IP, $wgMessageCache;
|
| 31 | +# wfTasksAddCache();
|
| 32 | +
|
| 33 | + // FIXME : i18n
|
| 34 | + $wgMessageCache->addMessage( 'wiki2xml', 'Wiki2XML' );
|
| 35 | +
|
| 36 | + require_once $IP.'/includes/SpecialPage.php';
|
| 37 | +
|
| 38 | + class SpecialWiki2XML extends SpecialPage {
|
| 39 | +
|
| 40 | + /**
|
| 41 | + * Constructor
|
| 42 | + */
|
| 43 | + function SpecialWiki2XML() { # Checked for HTML and MySQL insertion attacks
|
| 44 | + SpecialPage::SpecialPage( 'Wiki2XML' );
|
| 45 | + $this->includable( true );
|
| 46 | + }
|
| 47 | +
|
| 48 | + /**
|
| 49 | + * Special page main function
|
| 50 | + */
|
| 51 | + function execute( $par = null ) { # Checked for HTML and MySQL insertion attacks
|
| 52 | + global $wgOut, $wgRequest, $wgUser, $wgTitle, $IP;
|
| 53 | + $fname = 'Special::Tasks:execute';
|
| 54 | + global $xmlg , $html_named_entities_mapping_mine;
|
| 55 | + include_once ( "default.php" ) ;
|
| 56 | + $xmlg['sourcedir'] = $IP.'/extensions/wiki2xml' ;
|
| 57 | + include_once ( "w2x.php" ) ;
|
| 58 | +
|
| 59 | + $this->setHeaders();
|
| 60 | + $wgOut->addHtml( $out );
|
| 61 | + }
|
| 62 | +
|
| 63 | + } # end of class
|
| 64 | +
|
| 65 | + SpecialPage::addPage( new SpecialWiki2XML );
|
| 66 | +}
|
| 67 | +
|
| 68 | +
|
| 69 | +?>
|
Index: trunk/wiki2xml/php/w2x.php |
— | — | @@ -2,7 +2,10 @@ |
3 | 3 | # Copyright by Magnus Manske (2005) |
4 | 4 | # Released under GPL |
5 | 5 | |
6 | | -include_once ( "default.php" ) ; # Which will include local.php, if available |
| 6 | +if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone |
| 7 | + include_once ( "default.php" ) ; # Which will include local.php, if available |
| 8 | +} |
| 9 | + |
7 | 10 | require_once ( "mediawiki_converter.php" ) ; |
8 | 11 | |
9 | 12 | @set_time_limit ( 0 ) ; # No time limit |
— | — | @@ -16,6 +19,67 @@ |
17 | 20 | return ((float)$usec + (float)$sec); |
18 | 21 | } |
19 | 22 | |
| 23 | +function get_form () { |
| 24 | + global $xmlg ; |
| 25 | + $optional = array () ; |
| 26 | + if ( isset ( $xmlg['docbook']['command_pdf'] ) ) { |
| 27 | + $optional[] = "<INPUT type='radio' name='output_format' value='docbook_pdf'>DocBook PDF" ; |
| 28 | + } |
| 29 | + if ( isset ( $xmlg['docbook']['command_html'] ) ) { |
| 30 | + $optional[] = "<INPUT type='radio' name='output_format' value='docbook_html'>DocBook HTML" ; |
| 31 | + } |
| 32 | + if ( isset ( $xmlg['zip_odt'] ) ) { |
| 33 | + $optional[] = "<INPUT type='radio' name='output_format' value='odt_xml'>OpenOffice XML" ; |
| 34 | + $optional[] = "<INPUT type='radio' name='output_format' value='odt'>OpenOffice ODT" ; |
| 35 | + } |
| 36 | + $optional = "<br/>" . implode ( "<br/>" , $optional ) ; |
| 37 | + |
| 38 | + |
| 39 | + |
| 40 | +return "<form method='post'> |
| 41 | +<h2>Paste article list or wikitext here</h2> |
| 42 | +<table border='0' width='100%'><tr> |
| 43 | +<td valign='top'><textarea rows='20' cols='80' style='width:100%' name='text'></textarea></td> |
| 44 | +<td width='200px' valign='top' nowrap> |
| 45 | +<INPUT checked type='radio' name='use_templates' value='all'>Use all templates<br/> |
| 46 | +<INPUT type='radio' name='use_templates' value='none'>Do not use templates<br/> |
| 47 | +<INPUT type='radio' name='use_templates' value='these'>Use these templates<br/> |
| 48 | +<INPUT type='radio' name='use_templates' value='notthese'>Use all but these templates<br/> |
| 49 | +<textarea rows='15' cols='30' style='width:100%' name='templates'></textarea> |
| 50 | +</td></tr></table> |
| 51 | +<table border='0'><tr> |
| 52 | +<td valign='top'> |
| 53 | +This is |
| 54 | +<INPUT type='radio' name='whatsthis' value='wikitext'>raw wikitext |
| 55 | +<INPUT checked type='radio' name='whatsthis' value='articlelist'>a list of articles |
| 56 | +<br/> |
| 57 | + |
| 58 | +Site : http://<input type='text' name='site' value='".$xmlg["site_base_url"]."'/>/index.php<br/> |
| 59 | +Title : <input type='text' name='document_title' value='' size=40/><br/> |
| 60 | +<input type='checkbox' name='add_gfdl' value='1' checked>Include GFDL (for some output formats)</input><br/> |
| 61 | +<input type='checkbox' name='keep_categories' value='1' checked>Keep categories</input><br/> |
| 62 | +<input type='checkbox' name='keep_interlanguage' value='1' checked>Keep interlanguage links</input><br/> |
| 63 | +<input type='submit' name='doit' value='Convert'/> |
| 64 | +</td><td valign='top' style='border-left:1px black solid'> |
| 65 | +<b>Output</b> |
| 66 | +<br/><INPUT checked type='radio' name='output_format' value='xml'>XML |
| 67 | +<br/><INPUT type='radio' name='output_format' value='text'>Plain text |
| 68 | + <input type='checkbox' name='plaintext_markup' value='1' checked>Use *_/ markup</input> |
| 69 | + <input type='checkbox' name='plaintext_prelink' value='1' checked>Put → before internal links</input> |
| 70 | +<br/><INPUT type='radio' name='output_format' value='xhtml'>XHTML |
| 71 | +<br/><INPUT type='radio' name='output_format' value='docbook_xml'>DocBook XML |
| 72 | +{$optional} |
| 73 | +</tr></table> |
| 74 | +</form> |
| 75 | +<p> |
| 76 | +Known issues: |
| 77 | +<ul> |
| 78 | +<li>In templates, {{{variables}}} used within <nowiki> tags will be replaced as well (too lazy to strip them)</li> |
| 79 | +<li>HTML comments are removed (instead of converted into XML tags)</li> |
| 80 | +</ul> |
| 81 | +</p>" ; |
| 82 | +} |
| 83 | + |
20 | 84 | ## MAIN PROGRAM |
21 | 85 | |
22 | 86 | if ( isset ( $_POST['doit'] ) ) { # Process |
— | — | @@ -83,7 +147,8 @@ |
84 | 148 | # header("Content-type: application/xhtml+xml"); |
85 | 149 | echo $converter->articles2xhtml ( $xml , $xmlg ) ; |
86 | 150 | } else if ( $format == "odt" || $format == "odt_xml" ) { |
87 | | - $cwd = getcwd() ; |
| 151 | + if ( $xmlg['sourcedir'] == '.' ) $cwd = getcwd() ; |
| 152 | + else $cwd = $xmlg['sourcedir'] ; |
88 | 153 | $template_file = $cwd . '/template.odt' ; |
89 | 154 | |
90 | 155 | $dir_file = tempnam($xmlg["temp_dir"], "ODD"); |
— | — | @@ -169,69 +234,20 @@ |
170 | 235 | SureRemoveDir ( $pdf_dir ) ; |
171 | 236 | @rmdir ( $pdf_dir ) ; |
172 | 237 | } |
173 | | - |
| 238 | + exit ; |
174 | 239 | } else { # Show the form |
175 | | - header('Content-type: text/html; charset=utf-8'); |
176 | | - |
177 | | - $optional = array () ; |
178 | | - if ( isset ( $xmlg['docbook']['command_pdf'] ) ) { |
179 | | - $optional[] = "<INPUT type='radio' name='output_format' value='docbook_pdf'>DocBook PDF" ; |
| 240 | + if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone |
| 241 | + header('Content-type: text/html; charset=utf-8'); |
| 242 | + print " |
| 243 | +<html><head></head><body> |
| 244 | +<h1>Magnus' magic MediaWiki-to-XML-to-stuff converter</h1> |
| 245 | +<p>All written in PHP - so portable, <s>so incredibly slow...</s> <i>about as fast as the original MediaWiki parser!</i></p>" ; |
| 246 | + print get_form () ; |
| 247 | + print "</body></html>" ; |
| 248 | + } else { # MediaWiki extension |
| 249 | + $out = get_form () ; |
180 | 250 | } |
181 | | - if ( isset ( $xmlg['docbook']['command_html'] ) ) { |
182 | | - $optional[] = "<INPUT type='radio' name='output_format' value='docbook_html'>DocBook HTML" ; |
183 | | - } |
184 | | - if ( isset ( $xmlg['zip_odt'] ) ) { |
185 | | - $optional[] = "<INPUT type='radio' name='output_format' value='odt_xml'>OpenOffice XML" ; |
186 | | - $optional[] = "<INPUT type='radio' name='output_format' value='odt'>OpenOffice ODT" ; |
187 | | - } |
188 | | - $optional = "<br/>" . implode ( "<br/>" , $optional ) ; |
189 | 251 | |
190 | | - print " |
191 | | -<html><head></head><body><form method='post'> |
192 | | -<h1>Magnus' magic MediaWiki-to-XML-to-stuff converter</h1> |
193 | | -<p>All written in PHP - so portable, <s>so incredibly slow...</s> <i>about as fast as the original MediaWiki parser!</i></p> |
194 | | -<h2>Paste article list or wikitext here</h2> |
195 | | -<table border='0' width='100%'><tr> |
196 | | -<td valign='top'><textarea rows='20' cols='80' style='width:100%' name='text'></textarea></td> |
197 | | -<td width='200px' valign='top' nowrap> |
198 | | -<INPUT checked type='radio' name='use_templates' value='all'>Use all templates<br/> |
199 | | -<INPUT type='radio' name='use_templates' value='none'>Do not use templates<br/> |
200 | | -<INPUT type='radio' name='use_templates' value='these'>Use these templates<br/> |
201 | | -<INPUT type='radio' name='use_templates' value='notthese'>Use all but these templates |
202 | | -<textarea rows='15' cols='30' style='width:100%' name='templates'></textarea> |
203 | | -</td></tr></table> |
204 | | -<table border='0'><tr> |
205 | | -<td valign='top'> |
206 | | -This is |
207 | | -<INPUT type='radio' name='whatsthis' value='wikitext'>raw wikitext |
208 | | -<INPUT checked type='radio' name='whatsthis' value='articlelist'>a list of articles |
209 | | -<br/> |
210 | | - |
211 | | -Site : http://<input type='text' name='site' value='".$xmlg["site_base_url"]."'/>/index.php<br/> |
212 | | -Title : <input type='text' name='document_title' value='' size=40/><br/> |
213 | | -<input type='checkbox' name='add_gfdl' value='1' checked>Include GFDL (for some output formats)</input><br/> |
214 | | -<input type='checkbox' name='keep_categories' value='1' checked>Keep categories</input><br/> |
215 | | -<input type='checkbox' name='keep_interlanguage' value='1' checked>Keep interlanguage links</input><br/> |
216 | | -<input type='submit' name='doit' value='Convert'/> |
217 | | -</td><td valign='top' style='border-left:1px black solid'> |
218 | | -<b>Output</b> |
219 | | -<br/><INPUT checked type='radio' name='output_format' value='xml'>XML |
220 | | -<br/><INPUT type='radio' name='output_format' value='text'>Plain text |
221 | | - <input type='checkbox' name='plaintext_markup' value='1' checked>Use *_/ markup</input> |
222 | | - <input type='checkbox' name='plaintext_prelink' value='1' checked>Put → before internal links</input> |
223 | | -<br/><INPUT type='radio' name='output_format' value='xhtml'>XHTML |
224 | | -<br/><INPUT type='radio' name='output_format' value='docbook_xml'>DocBook XML |
225 | | -{$optional} |
226 | | -</tr></table> |
227 | | -</form> |
228 | | -<p> |
229 | | -Known issues: |
230 | | -<ul> |
231 | | -<li>In templates, {{{variables}}} used within <nowiki> tags will be replaced as well (too lazy to strip them)</li> |
232 | | -<li>HTML comments are removed (instead of converted into XML tags)</li> |
233 | | -</ul> |
234 | | -</p> |
235 | | -</body></html>" ; |
236 | 252 | } |
237 | 253 | |
238 | 254 | #<input type='checkbox' name='resolvetemplates' value='1' checked>Automatically resolve templates</input><br/> |
Index: trunk/wiki2xml/php/xml2txt.php |
— | — | @@ -147,7 +147,7 @@ |
148 | 148 | } |
149 | 149 | } |
150 | 150 | |
151 | | -require_once ( "./xml2tree.php" ) ; |
| 151 | +require_once ( "xml2tree.php" ) ; |
152 | 152 | |
153 | 153 | |
154 | 154 | |
Index: trunk/wiki2xml/php/default.php |
— | — | @@ -3,6 +3,7 @@ |
4 | 4 | $xmlg["namespace_template"] = "Template" ; |
5 | 5 | $xmlg["site_base_url"] = "en.wikipedia.org/w" ; |
6 | 6 | $xmlg["book_title"] = "No title" ; |
| 7 | +$xmlg['sourcedir'] = "." ; |
7 | 8 | $xmlg["temp_dir"] = "/tmp" ; |
8 | 9 | |
9 | 10 | @include ( "local.php" ) ; |