r13883 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r13882‎ | r13883 | r13884 >
Date:09:22, 27 April 2006
Author:magnusmanske
Status:old
Tags:
Comment:
Adding MediaWiki extension capability
Modified paths:
  • /trunk/wiki2xml/php/default.php (modified) (history)
  • /trunk/wiki2xml/php/extension.php (added) (history)
  • /trunk/wiki2xml/php/filter_named_entities.php (modified) (history)
  • /trunk/wiki2xml/php/mediawiki_converter.php (modified) (history)
  • /trunk/wiki2xml/php/w2x.php (modified) (history)
  • /trunk/wiki2xml/php/xml2docbook_xml.php (modified) (history)
  • /trunk/wiki2xml/php/xml2odt.php (modified) (history)
  • /trunk/wiki2xml/php/xml2txt.php (modified) (history)

Diff [purge]

Index: trunk/wiki2xml/php/xml2odt.php
@@ -658,6 +658,6 @@
659659 }
660660 }
661661
662 -require_once ( "./xml2tree.php" ) ; # Uses the "element" class defined above
 662+require_once ( "xml2tree.php" ) ; # Uses the "element" class defined above
663663
664664 ?>
Index: trunk/wiki2xml/php/mediawiki_converter.php
@@ -60,7 +60,7 @@
6161 */
6262 function articles2text ( &$xml , $params = array () ) {
6363 global $wiki2xml_authors ;
64 - require_once ( "./xml2txt.php" ) ;
 64+ require_once ( "xml2txt.php" ) ;
6565
6666 $wiki2xml_authors = array () ;
6767 $x2t = new xml2php ;
@@ -91,7 +91,7 @@
9292 */
9393 function articles2xhtml ( &$xml , $params = array () ) {
9494 global $xml2xhtml ;
95 - require_once ( "./xml2xhtml.php" ) ;
 95+ require_once ( "xml2xhtml.php" ) ;
9696 $lang = "EN" ; # Dummy
9797
9898 $ret = "" ;
@@ -127,7 +127,7 @@
128128 */
129129 function articles2odt ( &$xml , $params = array () , $use_gfdl = false ) {
130130 global $wiki2xml_authors , $xml2odt ;
131 - require_once ( "./xml2odt.php" ) ;
 131+ require_once ( "xml2odt.php" ) ;
132132
133133 # XML text to tree
134134 $xml2odt = new XML2ODT ;
@@ -151,7 +151,7 @@
152152 */
153153 function articles2docbook_xml ( &$xml , $params = array () , $use_gfdl = false ) {
154154 global $wiki2xml_authors ;
155 - require_once ( "./xml2docbook_xml.php" ) ;
 155+ require_once ( "xml2docbook_xml.php" ) ;
156156
157157 $wiki2xml_authors = array () ;
158158 $x2t = new xml2php ;
@@ -214,6 +214,7 @@
215215 * Uses articles2docbook_xml
216216 */
217217 function articles2docbook_pdf ( &$xml , $params = array () , $mode = "PDF" ) {
 218+ global $xmlg ;
218219 $docbook_xml = $this->articles2docbook_xml ( $xml , $params , $params['add_gfdl'] ) ;
219220
220221 # Create temporary directory
@@ -230,7 +231,7 @@
231232 fwrite ( $handle , utf8_encode ( $docbook_xml ) ) ;
232233 fclose ( $handle ) ;
233234 if ( $params['add_gfdl'] ) {
234 - copy ( "./gfdl.xml" , $temp_dir . "/gfdl.xml" ) ;
 235+ copy ( $xmlg['sourcedir'] . "/gfdl.xml" , $temp_dir . "/gfdl.xml" ) ;
235236 }
236237
237238 # Call converter
Index: trunk/wiki2xml/php/filter_named_entities.php
@@ -5,7 +5,7 @@
66 * which do not replace some entities correctly
77 */
88
9 -$html_named_entities_mapping = array (
 9+$html_named_entities_mapping_mine = array (
1010 // Obtained with:
1111 // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table
1212 "nbsp"=>160,
@@ -272,8 +272,8 @@
273273 }
274274
275275 function filter_named_entities(&$content) {
276 - global $html_named_entities_mapping;
277 - foreach($html_named_entities_mapping as $name => $value) {
 276+ global $html_named_entities_mapping_mine;
 277+ foreach($html_named_entities_mapping_mine as $name => $value) {
278278 $content=str_replace('&'.$name.';',utf8_chr ( $value ),$content);
279279 }
280280 $content=str_replace('�','i',$content); # Ugly hack
Index: trunk/wiki2xml/php/xml2docbook_xml.php
@@ -451,6 +451,6 @@
452452 }
453453 }
454454
455 -require_once ( "./xml2tree.php" ) ; # Uses the "element" class defined above
 455+require_once ( "xml2tree.php" ) ; # Uses the "element" class defined above
456456
457457 ?>
Index: trunk/wiki2xml/php/extension.php
@@ -0,0 +1,68 @@
 2+<?php
 3+/*
 4+To enable this extension, put all files in this directory into a "wiki2xml" subdirectory of your MediaWiki extensions directory
 5+Also, add
 6+ require_once ( "extensions/wiki2xml/extension.php" ) ;
 7+to your LocalSettings.php
 8+The extension can then be accessed as [[Special:Wiki2XML]]
 9+*/
 10+
 11+if( !defined( 'MEDIAWIKI' ) ) die();
 12+
 13+# Integrating into the MediaWiki environment
 14+
 15+$wgExtensionCredits['Wiki2XML'][] = array(
 16+ 'name' => 'Wiki2XML',
 17+ 'description' => 'An extension to convert wiki markup into XML.',
 18+ 'author' => 'Magnus Manske'
 19+);
 20+
 21+$wgExtensionFunctions[] = 'wfWiki2XMLExtension';
 22+
 23+
 24+#_____________________________________________________________________________
 25+
 26+/**
 27+* The special page
 28+*/
 29+function wfWiki2XMLExtension() { # Checked for HTML and MySQL insertion attacks
 30+ global $IP, $wgMessageCache;
 31+# wfTasksAddCache();
 32+
 33+ // FIXME : i18n
 34+ $wgMessageCache->addMessage( 'wiki2xml', 'Wiki2XML' );
 35+
 36+ require_once $IP.'/includes/SpecialPage.php';
 37+
 38+ class SpecialWiki2XML extends SpecialPage {
 39+
 40+ /**
 41+ * Constructor
 42+ */
 43+ function SpecialWiki2XML() { # Checked for HTML and MySQL insertion attacks
 44+ SpecialPage::SpecialPage( 'Wiki2XML' );
 45+ $this->includable( true );
 46+ }
 47+
 48+ /**
 49+ * Special page main function
 50+ */
 51+ function execute( $par = null ) { # Checked for HTML and MySQL insertion attacks
 52+ global $wgOut, $wgRequest, $wgUser, $wgTitle, $IP;
 53+ $fname = 'Special::Tasks:execute';
 54+ global $xmlg , $html_named_entities_mapping_mine;
 55+ include_once ( "default.php" ) ;
 56+ $xmlg['sourcedir'] = $IP.'/extensions/wiki2xml' ;
 57+ include_once ( "w2x.php" ) ;
 58+
 59+ $this->setHeaders();
 60+ $wgOut->addHtml( $out );
 61+ }
 62+
 63+ } # end of class
 64+
 65+ SpecialPage::addPage( new SpecialWiki2XML );
 66+}
 67+
 68+
 69+?>
Index: trunk/wiki2xml/php/w2x.php
@@ -2,7 +2,10 @@
33 # Copyright by Magnus Manske (2005)
44 # Released under GPL
55
6 -include_once ( "default.php" ) ; # Which will include local.php, if available
 6+if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone
 7+ include_once ( "default.php" ) ; # Which will include local.php, if available
 8+}
 9+
710 require_once ( "mediawiki_converter.php" ) ;
811
912 @set_time_limit ( 0 ) ; # No time limit
@@ -16,6 +19,67 @@
1720 return ((float)$usec + (float)$sec);
1821 }
1922
 23+function get_form () {
 24+ global $xmlg ;
 25+ $optional = array () ;
 26+ if ( isset ( $xmlg['docbook']['command_pdf'] ) ) {
 27+ $optional[] = "<INPUT type='radio' name='output_format' value='docbook_pdf'>DocBook PDF" ;
 28+ }
 29+ if ( isset ( $xmlg['docbook']['command_html'] ) ) {
 30+ $optional[] = "<INPUT type='radio' name='output_format' value='docbook_html'>DocBook HTML" ;
 31+ }
 32+ if ( isset ( $xmlg['zip_odt'] ) ) {
 33+ $optional[] = "<INPUT type='radio' name='output_format' value='odt_xml'>OpenOffice XML" ;
 34+ $optional[] = "<INPUT type='radio' name='output_format' value='odt'>OpenOffice ODT" ;
 35+ }
 36+ $optional = "<br/>" . implode ( "<br/>" , $optional ) ;
 37+
 38+
 39+
 40+return "<form method='post'>
 41+<h2>Paste article list or wikitext here</h2>
 42+<table border='0' width='100%'><tr>
 43+<td valign='top'><textarea rows='20' cols='80' style='width:100%' name='text'></textarea></td>
 44+<td width='200px' valign='top' nowrap>
 45+<INPUT checked type='radio' name='use_templates' value='all'>Use all templates<br/>
 46+<INPUT type='radio' name='use_templates' value='none'>Do not use templates<br/>
 47+<INPUT type='radio' name='use_templates' value='these'>Use these templates<br/>
 48+<INPUT type='radio' name='use_templates' value='notthese'>Use all but these templates<br/>
 49+<textarea rows='15' cols='30' style='width:100%' name='templates'></textarea>
 50+</td></tr></table>
 51+<table border='0'><tr>
 52+<td valign='top'>
 53+This is
 54+<INPUT type='radio' name='whatsthis' value='wikitext'>raw wikitext
 55+<INPUT checked type='radio' name='whatsthis' value='articlelist'>a list of articles
 56+<br/>
 57+
 58+Site : http://<input type='text' name='site' value='".$xmlg["site_base_url"]."'/>/index.php<br/>
 59+Title : <input type='text' name='document_title' value='' size=40/><br/>
 60+<input type='checkbox' name='add_gfdl' value='1' checked>Include GFDL (for some output formats)</input><br/>
 61+<input type='checkbox' name='keep_categories' value='1' checked>Keep categories</input><br/>
 62+<input type='checkbox' name='keep_interlanguage' value='1' checked>Keep interlanguage links</input><br/>
 63+<input type='submit' name='doit' value='Convert'/>
 64+</td><td valign='top' style='border-left:1px black solid'>
 65+<b>Output</b>
 66+<br/><INPUT checked type='radio' name='output_format' value='xml'>XML
 67+<br/><INPUT type='radio' name='output_format' value='text'>Plain text
 68+ <input type='checkbox' name='plaintext_markup' value='1' checked>Use *_/ markup</input>
 69+ <input type='checkbox' name='plaintext_prelink' value='1' checked>Put &rarr; before internal links</input>
 70+<br/><INPUT type='radio' name='output_format' value='xhtml'>XHTML
 71+<br/><INPUT type='radio' name='output_format' value='docbook_xml'>DocBook XML
 72+{$optional}
 73+</tr></table>
 74+</form>
 75+<p>
 76+Known issues:
 77+<ul>
 78+<li>In templates, {{{variables}}} used within &lt;nowiki&gt; tags will be replaced as well (too lazy to strip them)</li>
 79+<li>HTML comments are removed (instead of converted into XML tags)</li>
 80+</ul>
 81+</p>" ;
 82+}
 83+
2084 ## MAIN PROGRAM
2185
2286 if ( isset ( $_POST['doit'] ) ) { # Process
@@ -83,7 +147,8 @@
84148 # header("Content-type: application/xhtml+xml");
85149 echo $converter->articles2xhtml ( $xml , $xmlg ) ;
86150 } else if ( $format == "odt" || $format == "odt_xml" ) {
87 - $cwd = getcwd() ;
 151+ if ( $xmlg['sourcedir'] == '.' ) $cwd = getcwd() ;
 152+ else $cwd = $xmlg['sourcedir'] ;
88153 $template_file = $cwd . '/template.odt' ;
89154
90155 $dir_file = tempnam($xmlg["temp_dir"], "ODD");
@@ -169,69 +234,20 @@
170235 SureRemoveDir ( $pdf_dir ) ;
171236 @rmdir ( $pdf_dir ) ;
172237 }
173 -
 238+ exit ;
174239 } else { # Show the form
175 - header('Content-type: text/html; charset=utf-8');
176 -
177 - $optional = array () ;
178 - if ( isset ( $xmlg['docbook']['command_pdf'] ) ) {
179 - $optional[] = "<INPUT type='radio' name='output_format' value='docbook_pdf'>DocBook PDF" ;
 240+ if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone
 241+ header('Content-type: text/html; charset=utf-8');
 242+ print "
 243+<html><head></head><body>
 244+<h1>Magnus' magic MediaWiki-to-XML-to-stuff converter</h1>
 245+<p>All written in PHP - so portable, <s>so incredibly slow...</s> <i>about as fast as the original MediaWiki parser!</i></p>" ;
 246+ print get_form () ;
 247+ print "</body></html>" ;
 248+ } else { # MediaWiki extension
 249+ $out = get_form () ;
180250 }
181 - if ( isset ( $xmlg['docbook']['command_html'] ) ) {
182 - $optional[] = "<INPUT type='radio' name='output_format' value='docbook_html'>DocBook HTML" ;
183 - }
184 - if ( isset ( $xmlg['zip_odt'] ) ) {
185 - $optional[] = "<INPUT type='radio' name='output_format' value='odt_xml'>OpenOffice XML" ;
186 - $optional[] = "<INPUT type='radio' name='output_format' value='odt'>OpenOffice ODT" ;
187 - }
188 - $optional = "<br/>" . implode ( "<br/>" , $optional ) ;
189251
190 - print "
191 -<html><head></head><body><form method='post'>
192 -<h1>Magnus' magic MediaWiki-to-XML-to-stuff converter</h1>
193 -<p>All written in PHP - so portable, <s>so incredibly slow...</s> <i>about as fast as the original MediaWiki parser!</i></p>
194 -<h2>Paste article list or wikitext here</h2>
195 -<table border='0' width='100%'><tr>
196 -<td valign='top'><textarea rows='20' cols='80' style='width:100%' name='text'></textarea></td>
197 -<td width='200px' valign='top' nowrap>
198 -<INPUT checked type='radio' name='use_templates' value='all'>Use all templates<br/>
199 -<INPUT type='radio' name='use_templates' value='none'>Do not use templates<br/>
200 -<INPUT type='radio' name='use_templates' value='these'>Use these templates<br/>
201 -<INPUT type='radio' name='use_templates' value='notthese'>Use all but these templates
202 -<textarea rows='15' cols='30' style='width:100%' name='templates'></textarea>
203 -</td></tr></table>
204 -<table border='0'><tr>
205 -<td valign='top'>
206 -This is
207 -<INPUT type='radio' name='whatsthis' value='wikitext'>raw wikitext
208 -<INPUT checked type='radio' name='whatsthis' value='articlelist'>a list of articles
209 -<br/>
210 -
211 -Site : http://<input type='text' name='site' value='".$xmlg["site_base_url"]."'/>/index.php<br/>
212 -Title : <input type='text' name='document_title' value='' size=40/><br/>
213 -<input type='checkbox' name='add_gfdl' value='1' checked>Include GFDL (for some output formats)</input><br/>
214 -<input type='checkbox' name='keep_categories' value='1' checked>Keep categories</input><br/>
215 -<input type='checkbox' name='keep_interlanguage' value='1' checked>Keep interlanguage links</input><br/>
216 -<input type='submit' name='doit' value='Convert'/>
217 -</td><td valign='top' style='border-left:1px black solid'>
218 -<b>Output</b>
219 -<br/><INPUT checked type='radio' name='output_format' value='xml'>XML
220 -<br/><INPUT type='radio' name='output_format' value='text'>Plain text
221 - <input type='checkbox' name='plaintext_markup' value='1' checked>Use *_/ markup</input>
222 - <input type='checkbox' name='plaintext_prelink' value='1' checked>Put &rarr; before internal links</input>
223 -<br/><INPUT type='radio' name='output_format' value='xhtml'>XHTML
224 -<br/><INPUT type='radio' name='output_format' value='docbook_xml'>DocBook XML
225 -{$optional}
226 -</tr></table>
227 -</form>
228 -<p>
229 -Known issues:
230 -<ul>
231 -<li>In templates, {{{variables}}} used within &lt;nowiki&gt; tags will be replaced as well (too lazy to strip them)</li>
232 -<li>HTML comments are removed (instead of converted into XML tags)</li>
233 -</ul>
234 -</p>
235 -</body></html>" ;
236252 }
237253
238254 #<input type='checkbox' name='resolvetemplates' value='1' checked>Automatically resolve templates</input><br/>
Index: trunk/wiki2xml/php/xml2txt.php
@@ -147,7 +147,7 @@
148148 }
149149 }
150150
151 -require_once ( "./xml2tree.php" ) ;
 151+require_once ( "xml2tree.php" ) ;
152152
153153
154154
Index: trunk/wiki2xml/php/default.php
@@ -3,6 +3,7 @@
44 $xmlg["namespace_template"] = "Template" ;
55 $xmlg["site_base_url"] = "en.wikipedia.org/w" ;
66 $xmlg["book_title"] = "No title" ;
 7+$xmlg['sourcedir'] = "." ;
78 $xmlg["temp_dir"] = "/tmp" ;
89
910 @include ( "local.php" ) ;

Status & tagging log