Index: trunk/wiki2xml/php/xmldump2files.php |
— | — | @@ -1,104 +1,104 @@ |
2 | | -<?php
|
3 | | -
|
4 | | -# Change there to your local settings
|
5 | | -$dumpfile = "K:\\dewiki-20060327-pages-articles.xml" ;
|
6 | | -$basedir = "C:" ;
|
7 | | -
|
8 | | -#______________________________________________________________________________
|
9 | | -# GLOBAL VARIABLES
|
10 | | -$dir = "" ;
|
11 | | -$namespaces = array () ;
|
12 | | -$mem = array () ;
|
13 | | -$tags = array () ;
|
14 | | -$page_counter = 0 ;
|
15 | | -
|
16 | | -# FUNCTIONS
|
17 | | -
|
18 | | -require_once ( "global_functions.php" ) ;
|
19 | | -
|
20 | | -function store_file ( &$loc , &$text , $mode = "text" ) {
|
21 | | - if ( $mode == "text" ) {
|
22 | | - if ( !$handle = fopen($loc->fullname.".txt", 'wb') ) {
|
23 | | - print "Failed to open {$loc->file}.txt!<br/>" ;
|
24 | | - flush () ;
|
25 | | - }
|
26 | | - fwrite($handle, $text) ;
|
27 | | - fclose ( $handle ) ;
|
28 | | - } else if ( $mode == "gzip" ) {
|
29 | | - if ( !$gz = gzopen($loc->fullname.".gz",'w9') ) {
|
30 | | - print "Failed to open {$loc->file}.gz!<br/>" ;
|
31 | | - flush () ;
|
32 | | - }
|
33 | | - gzwrite($gz, $text);
|
34 | | - gzclose($gz);
|
35 | | - }
|
36 | | -}
|
37 | | -
|
38 | | -function microtime_float()
|
39 | | -{
|
40 | | - list($usec, $sec) = explode(" ", microtime());
|
41 | | - return ((float)$usec + (float)$sec);
|
42 | | -}
|
43 | | -
|
| 2 | +<?php |
| 3 | + |
| 4 | +# Change there to your local settings |
| 5 | +$dumpfile = "K:\\dewiki-20060327-pages-articles.xml" ; |
| 6 | +$basedir = "C:" ; |
| 7 | + |
| 8 | +#______________________________________________________________________________ |
| 9 | +# GLOBAL VARIABLES |
| 10 | +$dir = "" ; |
| 11 | +$namespaces = array () ; |
| 12 | +$mem = array () ; |
| 13 | +$tags = array () ; |
| 14 | +$page_counter = 0 ; |
| 15 | + |
| 16 | +# FUNCTIONS |
| 17 | + |
| 18 | +require_once ( "global_functions.php" ) ; |
| 19 | + |
| 20 | +function store_file ( &$loc , &$text , $mode = "text" ) { |
| 21 | + if ( $mode == "text" ) { |
| 22 | + if ( !$handle = fopen($loc->fullname.".txt", 'wb') ) { |
| 23 | + print "Failed to open {$loc->file}.txt!<br/>" ; |
| 24 | + flush () ; |
| 25 | + } |
| 26 | + fwrite($handle, $text) ; |
| 27 | + fclose ( $handle ) ; |
| 28 | + } else if ( $mode == "gzip" ) { |
| 29 | + if ( !$gz = gzopen($loc->fullname.".gz",'w9') ) { |
| 30 | + print "Failed to open {$loc->file}.gz!<br/>" ; |
| 31 | + flush () ; |
| 32 | + } |
| 33 | + gzwrite($gz, $text); |
| 34 | + gzclose($gz); |
| 35 | + } |
| 36 | +} |
| 37 | + |
| 38 | +function microtime_float() |
| 39 | +{ |
| 40 | + list($usec, $sec) = explode(" ", microtime()); |
| 41 | + return ((float)$usec + (float)$sec); |
| 42 | +} |
| 43 | + |
44 | 44 | # Global functions for parsing |
45 | 45 | |
46 | | -function XML2TXT_START($parser, $name, $attrs) {
|
47 | | - global $mem , $tags ;
|
48 | | - $mem["name"] = $name ;
|
49 | | - $tags[] = $name ;
|
50 | | - if ( $name == "NAMESPACE" ) {
|
51 | | - $mem['key'] = $attrs["KEY"] ;
|
52 | | - } else if ( $name == "TEXT" ) {
|
53 | | - $mem['text'] = "" ;
|
54 | | - }
|
| 46 | +function XML2TXT_START($parser, $name, $attrs) { |
| 47 | + global $mem , $tags ; |
| 48 | + $mem["name"] = $name ; |
| 49 | + $tags[] = $name ; |
| 50 | + if ( $name == "NAMESPACE" ) { |
| 51 | + $mem['key'] = $attrs["KEY"] ; |
| 52 | + } else if ( $name == "TEXT" ) { |
| 53 | + $mem['text'] = "" ; |
| 54 | + } |
55 | 55 | } |
56 | 56 | |
57 | 57 | function XML2TXT_END($parser, $name) { |
58 | | - global $mem , $namespaces , $tags , $page_counter , $dir ;
|
59 | | - if ( $mem['name'] == 'NAMESPACE' ) {
|
60 | | - $namespaces[$mem['key']] = $mem['text'] ;
|
61 | | - } else if ( $mem['name'] == 'PAGE' ) {
|
62 | | - $loc = get_file_location_global ( $dir , $mem['namespace'] , $mem['title'] , true ) ;
|
63 | | - store_file ( $loc , $mem['text'] , 'text' ) ;
|
64 | | -
|
65 | | - $page_counter++ ;
|
66 | | - if ( $page_counter % 1000 == 0 ) {
|
67 | | - print '.' ;
|
68 | | - if ( $page_counter % 50000 == 0 ) print "<br/>" ;
|
69 | | - flush () ;
|
70 | | - }
|
71 | | - }
|
72 | | -
|
73 | | - array_pop ( $tags ) ;
|
74 | | - if ( count ( $tags ) > 0 ) {
|
75 | | - $mem['name'] = array_pop ( $tags ) ;
|
76 | | - $tags[] = $mem['name'] ;
|
77 | | - } else {
|
78 | | - $mem['name'] = "" ;
|
| 58 | + global $mem , $namespaces , $tags , $page_counter , $dir ; |
| 59 | + if ( $mem['name'] == 'NAMESPACE' ) { |
| 60 | + $namespaces[$mem['key']] = $mem['text'] ; |
| 61 | + } else if ( $mem['name'] == 'PAGE' ) { |
| 62 | + $loc = get_file_location_global ( $dir , $mem['namespace'] , $mem['title'] , true ) ; |
| 63 | + store_file ( $loc , $mem['text'] , 'text' ) ; |
| 64 | + |
| 65 | + $page_counter++ ; |
| 66 | + if ( $page_counter % 1000 == 0 ) { |
| 67 | + print '.' ; |
| 68 | + if ( $page_counter % 50000 == 0 ) print "<br/>" ; |
| 69 | + flush () ; |
| 70 | + } |
79 | 71 | } |
| 72 | + |
| 73 | + array_pop ( $tags ) ; |
| 74 | + if ( count ( $tags ) > 0 ) { |
| 75 | + $mem['name'] = array_pop ( $tags ) ; |
| 76 | + $tags[] = $mem['name'] ; |
| 77 | + } else { |
| 78 | + $mem['name'] = "" ; |
| 79 | + } |
80 | 80 | } |
81 | 81 | |
82 | | -function XML2TXT_DATA ( $parser, $data ) {
|
83 | | - global $mem , $namespaces ;
|
84 | | - if ( $mem['name'] == 'NAMESPACE' ) {
|
85 | | - $mem['text'] = $data ;
|
86 | | - } else if ( $mem['name'] == 'TITLE' ) {
|
87 | | - $ns = 0 ;
|
88 | | - foreach ( $namespaces AS $k => $v ) {
|
89 | | - if ( $k <= 0 ) continue ;
|
90 | | - if ( substr ( 0 , strlen ( $v ) + 1 ) != $v.":" ) continue ;
|
91 | | - $ns = $k ;
|
92 | | - $data = substr ( $data , strlen ( $v ) + 1 ) ;
|
93 | | - break ;
|
94 | | - }
|
95 | | - $mem['title'] = $data ;
|
96 | | - $mem['namespace'] = $ns ;
|
97 | | - } else if ( $mem['name'] == 'TEXT' ) {
|
98 | | - $mem['text'] .= $data ;
|
99 | | - }
|
| 82 | +function XML2TXT_DATA ( $parser, $data ) { |
| 83 | + global $mem , $namespaces ; |
| 84 | + if ( $mem['name'] == 'NAMESPACE' ) { |
| 85 | + $mem['text'] = $data ; |
| 86 | + } else if ( $mem['name'] == 'TITLE' ) { |
| 87 | + $ns = 0 ; |
| 88 | + foreach ( $namespaces AS $k => $v ) { |
| 89 | + if ( $k <= 0 ) continue ; |
| 90 | + if ( substr ( 0 , strlen ( $v ) + 1 ) != $v.":" ) continue ; |
| 91 | + $ns = $k ; |
| 92 | + $data = substr ( $data , strlen ( $v ) + 1 ) ; |
| 93 | + break ; |
| 94 | + } |
| 95 | + $mem['title'] = $data ; |
| 96 | + $mem['namespace'] = $ns ; |
| 97 | + } else if ( $mem['name'] == 'TEXT' ) { |
| 98 | + $mem['text'] .= $data ; |
| 99 | + } |
100 | 100 | } |
101 | 101 | |
102 | | -function scan_xml_file ( $xml_filename ) {
|
| 102 | +function scan_xml_file ( $xml_filename ) { |
103 | 103 | global $namespaces , $dir , $page_counter ; |
104 | 104 | $xml_parser_handle = xml_parser_create(); |
105 | 105 | xml_set_element_handler($xml_parser_handle, "XML2TXT_START", "XML2TXT_END"); |
— | — | @@ -107,45 +107,45 @@ |
108 | 108 | if (!($parse_handle = fopen($xml_filename, 'r'))) { |
109 | 109 | die("FEHLER: Datei $xml_filename nicht gefunden."); |
110 | 110 | } |
111 | | -
|
112 | | - $t1 = microtime_float() ;
|
| 111 | + |
| 112 | + $t1 = microtime_float() ; |
113 | 113 | while ($xml_data = fread($parse_handle, 8192)) { |
114 | 114 | if (!xml_parse($xml_parser_handle, $xml_data, feof($parse_handle))) { |
115 | 115 | die(sprintf('XML error: %s at line %d', |
116 | 116 | xml_error_string(xml_get_error_code($xml_parser_handle)), |
117 | 117 | xml_get_current_line_number($xml_parser_handle))); |
118 | | - }
|
119 | | -
|
120 | | -/* if ( $page_counter % 100 == 0 ) {
|
121 | | - $t2 = microtime_float() - $t1 ;
|
122 | | - $t3 = $t2 * 1000 / $page_counter ;
|
123 | | - print $t3 . " sec/1000 pages<br/>" ; flush () ;
|
| 118 | + } |
| 119 | + |
| 120 | +/* if ( $page_counter % 100 == 0 ) { |
| 121 | + $t2 = microtime_float() - $t1 ; |
| 122 | + $t3 = $t2 * 1000 / $page_counter ; |
| 123 | + print $t3 . " sec/1000 pages<br/>" ; flush () ; |
124 | 124 | }*/ |
125 | | - }
|
126 | | - $t2 = microtime_float() - $t1 ;
|
127 | | - print "Took {$t2} seconds total.<br/>" ; flush () ;
|
| 125 | + } |
| 126 | + $t2 = microtime_float() - $t1 ; |
| 127 | + print "Took {$t2} seconds total.<br/>" ; flush () ; |
128 | 128 | |
129 | | - xml_parser_free($xml_parser_handle);
|
130 | | -
|
131 | | - $handle = fopen($dir."/namespaces.txt", 'wb') ;
|
132 | | - foreach ( $namespaces AS $ns => $nst ) {
|
133 | | - $t = "{$ns}:{$nst}\n" ;
|
134 | | - fwrite($handle, $t) ;
|
135 | | - }
|
136 | | - fclose ( $handle ) ;
|
| 129 | + xml_parser_free($xml_parser_handle); |
137 | 130 | |
| 131 | + $handle = fopen($dir."/namespaces.txt", 'wb') ; |
| 132 | + foreach ( $namespaces AS $ns => $nst ) { |
| 133 | + $t = "{$ns}:{$nst}\n" ; |
| 134 | + fwrite($handle, $t) ; |
| 135 | + } |
| 136 | + fclose ( $handle ) ; |
| 137 | + |
138 | 138 | } |
139 | | -
|
140 | | -
|
141 | | -# MAIN
|
142 | | -
|
143 | | -$dir = array_pop ( explode ( "/" , str_replace ( "\\" , "/" , $dumpfile ) ) ) ;
|
144 | | -$dir = $basedir . "/" . str_replace ( ".xml" , "" , $dir ) ;
|
145 | | -
|
146 | | -@set_time_limit ( 0 ) ; # No time limit
|
147 | | -#ini_set('user_agent','MSIE 4\.0b2;'); # Fake user agent
|
148 | | -header ('Content-type: text/html; charset=utf-8');
|
149 | | -@mkdir ( $dir ) ;
|
150 | | -scan_xml_file ( $dumpfile ) ;
|
151 | | -
|
152 | | -?>
|
| 139 | + |
| 140 | + |
| 141 | +# MAIN |
| 142 | + |
| 143 | +$dir = array_pop ( explode ( "/" , str_replace ( "\\" , "/" , $dumpfile ) ) ) ; |
| 144 | +$dir = $basedir . "/" . str_replace ( ".xml" , "" , $dir ) ; |
| 145 | + |
| 146 | +@set_time_limit ( 0 ) ; # No time limit |
| 147 | +#ini_set('user_agent','MSIE 4\.0b2;'); # Fake user agent |
| 148 | +header ('Content-type: text/html; charset=utf-8'); |
| 149 | +@mkdir ( $dir ) ; |
| 150 | +scan_xml_file ( $dumpfile ) ; |
| 151 | + |
| 152 | +?> |
Index: trunk/wiki2xml/php/CREDITS |
— | — | @@ -1,8 +1,8 @@ |
2 | 2 | wiki2xml is (c) by Magnus Manske 2005-2006 and released under the GPL. |
3 | 3 | |
4 | 4 | The following people (in alphabetic order) contributed to this project: |
5 | | -
|
6 | | -Magnus Manske <magnus.manske@web.de> Everything Tels didn't do ;-)
|
7 | 5 | |
| 6 | +Magnus Manske <magnus.manske@web.de> Everything Tels didn't do ;-) |
| 7 | + |
8 | 8 | Tels <nospam-abuse@bloodgate.com> Linux fixes, OpenOffice output |
9 | 9 | REDME and doc |
Index: trunk/wiki2xml/php/extension.php |
— | — | @@ -1,24 +1,24 @@ |
2 | | -<?php
|
3 | | -/*
|
4 | | -To enable this extension, put all files in this directory into a "wiki2xml" subdirectory of your MediaWiki extensions directory
|
5 | | -Also, add
|
6 | | - require_once ( "extensions/wiki2xml/extension.php" ) ;
|
7 | | -to your LocalSettings.php
|
8 | | -The extension can then be accessed as [[Special:Wiki2XML]]
|
9 | | -*/
|
10 | | -
|
11 | | -if( !defined( 'MEDIAWIKI' ) ) die();
|
12 | | -
|
13 | | -# Integrating into the MediaWiki environment
|
14 | | -
|
15 | | -$wgExtensionCredits['Wiki2XML'][] = array(
|
16 | | - 'name' => 'Wiki2XML',
|
17 | | - 'description' => 'An extension to convert wiki markup into XML.',
|
18 | | - 'author' => 'Magnus Manske'
|
19 | | -);
|
20 | | -
|
21 | | -$wgExtensionFunctions[] = 'wfWiki2XMLExtension';
|
22 | | -
|
| 2 | +<?php |
| 3 | +/* |
| 4 | +To enable this extension, put all files in this directory into a "wiki2xml" subdirectory of your MediaWiki extensions directory |
| 5 | +Also, add |
| 6 | + require_once ( "extensions/wiki2xml/extension.php" ) ; |
| 7 | +to your LocalSettings.php |
| 8 | +The extension can then be accessed as [[Special:Wiki2XML]] |
| 9 | +*/ |
| 10 | + |
| 11 | +if( !defined( 'MEDIAWIKI' ) ) die(); |
| 12 | + |
| 13 | +# Integrating into the MediaWiki environment |
| 14 | + |
| 15 | +$wgExtensionCredits['Wiki2XML'][] = array( |
| 16 | + 'name' => 'Wiki2XML', |
| 17 | + 'description' => 'An extension to convert wiki markup into XML.', |
| 18 | + 'author' => 'Magnus Manske' |
| 19 | +); |
| 20 | + |
| 21 | +$wgExtensionFunctions[] = 'wfWiki2XMLExtension'; |
| 22 | + |
23 | 23 | # for Special::Version: |
24 | 24 | $wgExtensionCredits['parserhook'][] = array( |
25 | 25 | 'name' => 'wiki2xml extension', |
— | — | @@ -26,51 +26,51 @@ |
27 | 27 | 'url' => 'http://en.wikipedia.org/wiki/User:Magnus_Manske', |
28 | 28 | 'version' => 'v0.02', |
29 | 29 | ); |
30 | | -
|
31 | | -
|
32 | | -#_____________________________________________________________________________
|
33 | | -
|
34 | | -/**
|
35 | | -* The special page
|
36 | | -*/
|
37 | | -function wfWiki2XMLExtension() { # Checked for HTML and MySQL insertion attacks
|
38 | | - global $IP, $wgMessageCache;
|
39 | | -# wfTasksAddCache();
|
40 | | -
|
41 | | - // FIXME : i18n
|
42 | | - $wgMessageCache->addMessage( 'wiki2xml', 'Wiki2XML' );
|
43 | | -
|
44 | | - require_once $IP.'/includes/SpecialPage.php';
|
45 | | -
|
46 | | - class SpecialWiki2XML extends SpecialPage {
|
47 | | -
|
48 | | - /**
|
49 | | - * Constructor
|
50 | | - */
|
51 | | - function SpecialWiki2XML() { # Checked for HTML and MySQL insertion attacks
|
52 | | - SpecialPage::SpecialPage( 'Wiki2XML' );
|
53 | | - $this->includable( true );
|
54 | | - }
|
55 | | -
|
56 | | - /**
|
57 | | - * Special page main function
|
58 | | - */
|
59 | | - function execute( $par = null ) { # Checked for HTML and MySQL insertion attacks
|
60 | | - global $wgOut, $wgRequest, $wgUser, $wgTitle, $IP;
|
61 | | - $fname = 'Special::Tasks:execute';
|
62 | | - global $xmlg , $html_named_entities_mapping_mine, $content_provider;
|
63 | | - include_once ( "default.php" ) ;
|
64 | | - $xmlg['sourcedir'] = $IP.'/extensions/wiki2xml' ;
|
65 | | - include_once ( "w2x.php" ) ;
|
66 | | -
|
67 | | - $this->setHeaders();
|
68 | | - $wgOut->addHtml( $out );
|
69 | | - }
|
70 | | -
|
71 | | - } # end of class
|
72 | | -
|
73 | | - SpecialPage::addPage( new SpecialWiki2XML );
|
74 | | -}
|
75 | | -
|
76 | | -
|
77 | | -?>
|
| 30 | + |
| 31 | + |
| 32 | +#_____________________________________________________________________________ |
| 33 | + |
| 34 | +/** |
| 35 | +* The special page |
| 36 | +*/ |
| 37 | +function wfWiki2XMLExtension() { # Checked for HTML and MySQL insertion attacks |
| 38 | + global $IP, $wgMessageCache; |
| 39 | +# wfTasksAddCache(); |
| 40 | + |
| 41 | + // FIXME : i18n |
| 42 | + $wgMessageCache->addMessage( 'wiki2xml', 'Wiki2XML' ); |
| 43 | + |
| 44 | + require_once $IP.'/includes/SpecialPage.php'; |
| 45 | + |
| 46 | + class SpecialWiki2XML extends SpecialPage { |
| 47 | + |
| 48 | + /** |
| 49 | + * Constructor |
| 50 | + */ |
| 51 | + function SpecialWiki2XML() { # Checked for HTML and MySQL insertion attacks |
| 52 | + SpecialPage::SpecialPage( 'Wiki2XML' ); |
| 53 | + $this->includable( true ); |
| 54 | + } |
| 55 | + |
| 56 | + /** |
| 57 | + * Special page main function |
| 58 | + */ |
| 59 | + function execute( $par = null ) { # Checked for HTML and MySQL insertion attacks |
| 60 | + global $wgOut, $wgRequest, $wgUser, $wgTitle, $IP; |
| 61 | + $fname = 'Special::Tasks:execute'; |
| 62 | + global $xmlg , $html_named_entities_mapping_mine, $content_provider; |
| 63 | + include_once ( "default.php" ) ; |
| 64 | + $xmlg['sourcedir'] = $IP.'/extensions/wiki2xml' ; |
| 65 | + include_once ( "w2x.php" ) ; |
| 66 | + |
| 67 | + $this->setHeaders(); |
| 68 | + $wgOut->addHtml( $out ); |
| 69 | + } |
| 70 | + |
| 71 | + } # end of class |
| 72 | + |
| 73 | + SpecialPage::addPage( new SpecialWiki2XML ); |
| 74 | +} |
| 75 | + |
| 76 | + |
| 77 | +?> |
Index: trunk/wiki2xml/php/browse_texts.php |
— | — | @@ -1,66 +1,67 @@ |
2 | | -<?php
|
3 | | -
|
4 | | -require_once ( "default.php" ) ;
|
5 | | -require_once ( "global_functions.php" ) ;
|
6 | | -require_once ( "filter_named_entities.php" ) ;
|
7 | | -require_once ( "content_provider.php" ) ;
|
8 | | -require_once ( "wiki2xml.php" ) ;
|
9 | | -require_once ( "xml2xhtml.php" ) ;
|
10 | | -require_once ( "mediawiki_converter.php" ) ;
|
11 | | -
|
12 | | -# FUNCTIONS
|
13 | | -
|
14 | | -function get_param ( $key , $default = "" ) {
|
15 | | - if ( !isset ( $_REQUEST[$key] ) ) return $default ;
|
16 | | - return $_REQUEST[$key] ;
|
17 | | -}
|
18 | | -
|
19 | | -# MAIN
|
20 | | -
|
21 | | -@set_time_limit ( 0 ) ; # No time limit
|
22 | | -
|
23 | | -$xmlg = array (
|
24 | | - 'site_base_url' => "SBU" ,
|
25 | | - 'resolvetemplates' => true ,
|
26 | | - 'templates' => array () ,
|
27 | | - 'namespace_template' => 'Vorlage' ,
|
28 | | -) ;
|
29 | | -
|
30 | | -$content_provider = new ContentProviderTextFile ;
|
31 | | -$converter = new MediaWikiConverter ;
|
32 | | -
|
33 | | -$title = urldecode ( get_param ( 'title' , urlencode ( 'Main Page' ) ) ) ;
|
34 | | -$xmlg['page_title'] = $title ;
|
35 | | -
|
36 | | -$format = strtolower ( get_param ( 'format' , 'xhtml' ) ) ;
|
37 | | -$content_provider->basedir = $base_text_dir ;
|
38 | | -
|
39 | | -$text = $content_provider->get_wiki_text ( $title ) ;
|
40 | | -$xml = $converter->article2xml ( $title , $text , $xmlg ) ;
|
41 | | -
|
42 | | -if ( $format =="xml" ) {
|
43 | | - # XML
|
44 | | - header('Content-type: text/xml; charset=utf-8');
|
45 | | - print "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
|
46 | | - print $xml ;
|
47 | | -} else if ( $format == "text" ) {
|
48 | | - # Plain text
|
49 | | - $xmlg['plaintext_markup'] = true ;
|
50 | | - $xmlg['plaintext_prelink'] = true ;
|
51 | | - $out = $converter->articles2text ( $xml , $xmlg ) ;
|
52 | | - $out = str_replace ( "\n" , "<br/>" , $out ) ;
|
53 | | - header('Content-type: text/html; charset=utf-8');
|
54 | | - print $out ;
|
55 | | -} else {
|
56 | | - # XHTML
|
57 | | - if ( stristr($_SERVER["HTTP_ACCEPT"],"application/xhtml+xml") ) {
|
58 | | - header("Content-type: text/html; charset=utf-8"); # Skipping the "strict" part ;-)
|
59 | | -# header("Content-type: application/xhtml+xml");
|
60 | | - } else {
|
61 | | - # Header hack for IE
|
62 | | - header("Content-type: text/html; charset=utf-8");
|
63 | | - }
|
64 | | - print $converter->articles2xhtml ( $xml , $xmlg ) ;
|
65 | | -}
|
66 | | -
|
67 | | -?>
|
| 2 | +<?php |
| 3 | + |
| 4 | +require_once ( "default.php" ) ; |
| 5 | +require_once ( "global_functions.php" ) ; |
| 6 | +require_once ( "filter_named_entities.php" ) ; |
| 7 | +require_once ( "content_provider.php" ) ; |
| 8 | +require_once ( "wiki2xml.php" ) ; |
| 9 | +require_once ( "xml2xhtml.php" ) ; |
| 10 | +require_once ( "mediawiki_converter.php" ) ; |
| 11 | + |
| 12 | +# FUNCTIONS |
| 13 | + |
| 14 | +function get_param ( $key , $default = "" ) { |
| 15 | + if ( !isset ( $_REQUEST[$key] ) ) return $default ; |
| 16 | + return $_REQUEST[$key] ; |
| 17 | +} |
| 18 | + |
| 19 | +# MAIN |
| 20 | + |
| 21 | +@set_time_limit ( 0 ) ; # No time limit |
| 22 | + |
| 23 | +$xmlg = array ( |
| 24 | + 'site_base_url' => "SBU" , |
| 25 | + 'resolvetemplates' => true , |
| 26 | + 'templates' => array () , |
| 27 | + 'namespace_template' => 'Vorlage' , |
| 28 | +) ; |
| 29 | + |
| 30 | +$content_provider = new ContentProviderTextFile ; |
| 31 | +$converter = new MediaWikiConverter ; |
| 32 | + |
| 33 | +$title = urldecode ( get_param ( 'title' , urlencode ( 'Main Page' ) ) ) ; |
| 34 | +$xmlg['page_title'] = $title ; |
| 35 | + |
| 36 | +$format = strtolower ( get_param ( 'format' , 'xhtml' ) ) ; |
| 37 | +$content_provider->basedir = $base_text_dir ; |
| 38 | + |
| 39 | +$text = $content_provider->get_wiki_text ( $title ) ; |
| 40 | +$xml = $converter->article2xml ( $title , $text , $xmlg ) ; |
| 41 | + |
| 42 | +if ( $format =="xml" ) { |
| 43 | + # XML |
| 44 | + header('Content-type: text/xml; charset=utf-8'); |
| 45 | + print "<?xml version='1.0' encoding='UTF-8' ?>\n" ; |
| 46 | + print $xml ; |
| 47 | +} else if ( $format == "text" ) { |
| 48 | + # Plain text |
| 49 | + $xmlg['plaintext_markup'] = true ; |
| 50 | + $xmlg['plaintext_prelink'] = true ; |
| 51 | + $out = $converter->articles2text ( $xml , $xmlg ) ; |
| 52 | + $out = str_replace ( "\n" , "<br/>" , $out ) ; |
| 53 | + header('Content-type: text/html; charset=utf-8'); |
| 54 | + print $out ; |
| 55 | +} else { |
| 56 | + # XHTML |
| 57 | + if ( stristr($_SERVER["HTTP_ACCEPT"],"application/xhtml+xml") ) { |
| 58 | + # Skipping the "strict" part ;-) |
| 59 | + header("Content-type: text/html; charset=utf-8"); |
| 60 | +# header("Content-type: application/xhtml+xml"); |
| 61 | + } else { |
| 62 | + # Header hack for IE |
| 63 | + header("Content-type: text/html; charset=utf-8"); |
| 64 | + } |
| 65 | + print $converter->articles2xhtml ( $xml , $xmlg ) ; |
| 66 | +} |
| 67 | + |
| 68 | +?> |