r79486 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r79485‎ | r79486 | r79487 >
Date:11:15, 2 January 2011
Author:mkroetzsch
Status:deferred
Tags:
Comment:
Major restructuring of the OWL/RDF export code: more shorter files with more shorter functions; clearer separation of serialisation syntax from page selection; some bugs fixed
Modified paths:
  • /trunk/extensions/SemanticMediaWiki/includes/SMW_Setup.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/SMW_SetupLight.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/datavalues/SMW_DV_Concept.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/datavalues/SMW_DV_Record.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/export/SMW_Exp_Data.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/export/SMW_Exp_Element.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/export/SMW_ExportController.php (added) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/export/SMW_Exporter.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/export/SMW_OWLExport.php (deleted) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/export/SMW_Serializer.php (added) (history)
  • /trunk/extensions/SemanticMediaWiki/maintenance/SMW_dumpRDF.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/specials/Export/SMW_SpecialOWLExport.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SemanticMediaWiki/maintenance/SMW_dumpRDF.php
@@ -17,7 +17,7 @@
1818 * --individuals do only pages that are no categories, properties, or types
1919 * -d <delay> slows down the export in order to stress the server less,
2020 * sleeping for <delay> milliseconds every now and then
21 - * -e <each> after how many exported entities should the server take a nap?
 21+ * -e <each> after how many exported entities should the process take a nap?
2222 * --server=<server> The protocol and server name to as base URLs, e.g.
2323 * http://en.wikipedia.org. This is sometimes necessary because
2424 * server name detection may fail in command line scripts.
@@ -33,7 +33,7 @@
3434 ? getenv( 'MW_INSTALL_PATH' ) . "/maintenance/commandLine.inc"
3535 : dirname( __FILE__ ) . '/../../../maintenance/commandLine.inc' );
3636 global $smwgIP, $wgServer;
37 -require_once( "$smwgIP/specials/Export/SMW_SpecialOWLExport.php" );
 37+//require_once( "$smwgIP/specials/Export/SMW_SpecialOWLExport.php" );
3838
3939 if ( !empty( $options['o'] ) ) {
4040 $outfile = $options['o'];
@@ -76,5 +76,5 @@
7777 print "\nWriting OWL/RDF dump to file \"$outfile\" ...\n";
7878 }
7979
80 -$exRDF = new SMWOWLExport();
 80+$exRDF = new SMWExportController( new SMWSerializer() );
8181 $exRDF->printAll( $outfile, $export_ns, $delay, $delayeach );
Index: trunk/extensions/SemanticMediaWiki/specials/Export/SMW_SpecialOWLExport.php
@@ -27,6 +27,9 @@
2828 * @author Markus Krötzsch
2929 */
3030 class SMWSpecialOWLExport extends SpecialPage {
 31+
 32+ /// Export controller object to be used for serializing data
 33+ protected $export_controller;
3134
3235 public function __construct() {
3336 parent::__construct( 'ExportRDF' );
@@ -34,26 +37,13 @@
3538 }
3639
3740 public function execute( $page ) {
38 - global $wgOut, $wgRequest, $wgUser, $smwgAllowRecursiveExport, $smwgExportBacklinks, $smwgExportAll;
39 -
 41+ global $wgOut, $wgRequest;
4042 $wgOut->setPageTitle( wfMsg( 'exportrdf' ) );
41 -
42 - $recursive = 0; // default, no recursion
43 - $backlinks = $smwgExportBacklinks; // default
44 -
45 - // Check whether we already know what to export.
46 - if ( $page == '' ) {
47 - // Try to get GET parameter; simple way of calling the export.
48 - $page = $wgRequest->getVal( 'page' );
49 - } else {
50 - // This is needed since MediaWiki 1.8, but it is wrong for 1.7.
51 - $page = rawurldecode( $page );
52 - }
53 -
54 - if ( $page == '' ) {
55 - // Try to get POST list; some settings are only available via POST.
 43+
 44+ // see if we can find something to export:
 45+ $page = ( $page == '' ) ? $wgRequest->getVal( 'page' ) : $page = rawurldecode( $page );
 46+ if ( $page == '' ) { // Try to get POST list; some settings are only available via POST.
5647 $pageblob = $wgRequest->getText( 'pages' );
57 -
5848 if ( $pageblob != '' ) {
5949 $pages = explode( "\n", $pageblob );
6050 }
@@ -61,100 +51,108 @@
6252 $pages = array( $page );
6353 }
6454
65 - if ( isset( $pages ) ) { // export to RDF
66 - $wgOut->disable();
67 - ob_start();
68 -
69 - // Only use rdf+xml mimetype if explicitly requested
70 - // TODO: should the see also links in the exported RDF then have this parameter as well?
71 - if ( $wgRequest->getVal( 'xmlmime' ) == 'rdf' ) {
72 - header( "Content-type: application/rdf+xml; charset=UTF-8" );
73 - } else {
74 - header( "Content-type: application/xml; charset=UTF-8" );
75 - }
76 -
77 - // Effect: assume "no" from missing parameters generated by checkboxes.
78 - $postform = $wgRequest->getText( 'postform' ) == 1;
79 -
80 - $rec = $wgRequest->getText( 'recursive' );
81 -
82 - if ( $rec == '' ) $rec = $wgRequest->getVal( 'recursive' );
83 -
84 - if ( ( $rec == '1' ) && ( $smwgAllowRecursiveExport || $wgUser->isAllowed( 'delete' ) ) ) {
85 - $recursive = 1; // users may be allowed to switch it on
86 - }
87 -
88 - $bl = $wgRequest->getText( 'backlinks' );
89 -
90 - if ( $bl == '' ) $bl = $wgRequest->getVal( 'backlinks' );
91 -
92 - if ( ( $bl == '1' ) && ( $wgUser->isAllowed( 'delete' ) ) ) {
93 - $backlinks = true; // admins can always switch on backlinks
94 - } elseif ( ( $bl == '0' ) || ( '' == $bl && $postform ) ) {
95 - $backlinks = false; // everybody can explicitly switch off backlinks
96 - }
97 -
98 - $date = $wgRequest->getText( 'date' );
99 -
100 - if ( $date == '' ) $date = $wgRequest->getVal( 'date' );
101 -
102 - $exp = new SMWOWLExport();
103 -
104 - if ( $date != '' ) $exp->setDate( $date );
105 -
106 - $exp->printPages( $pages, $recursive, $backlinks );
107 -
 55+ if ( isset( $pages ) ) {
 56+ $this->exportPages( $pages );
10857 return;
10958 } else {
11059 $offset = $wgRequest->getVal( 'offset' );
111 -
11260 if ( isset( $offset ) ) {
113 - $wgOut->disable();
114 -
115 - ob_start();
116 - header( "Content-type: application/xml; charset=UTF-8" );
117 -
118 - $exp = new SMWOWLExport();
119 - $exp->printPageList( $offset );
120 -
 61+ $this->startRDFExport();
 62+ $this->export_controller->printPageList( $offset );
12163 return;
12264 } else {
12365 $stats = $wgRequest->getVal( 'stats' );
124 -
12566 if ( isset( $stats ) ) {
126 - $wgOut->disable();
127 -
128 - ob_start();
129 - header( "Content-type: application/xml; charset=UTF-8" );
130 -
131 - $exp = new SMWOWLExport();
132 - $exp->printWikiInfo();
 67+ $this->startRDFExport();
 68+ $this->export_controller->printWikiInfo();
 69+ return;
13370 }
13471 }
13572 }
136 -
13773 // nothing exported yet; show user interface:
 74+ $this->showForm();
 75+ }
 76+
 77+ /**
 78+ * Create the HTML user interface for this special page.
 79+ */
 80+ protected function showForm() {
 81+ global $wgOut, $wgUser, $smwgAllowRecursiveExport, $smwgExportBacklinks, $smwgExportAll;
 82+
13883 $html = '<form name="tripleSearch" action="" method="POST">' . "\n" .
139 - '<p>' . wfMsg( 'smw_exportrdf_docu' ) . "</p>\n" .
140 - '<input type="hidden" name="postform" value="1"/>' . "\n" .
141 - '<textarea name="pages" cols="40" rows="10"></textarea><br />' . "\n";
 84+ '<p>' . wfMsg( 'smw_exportrdf_docu' ) . "</p>\n" .
 85+ '<input type="hidden" name="postform" value="1"/>' . "\n" .
 86+ '<textarea name="pages" cols="40" rows="10"></textarea><br />' . "\n";
14287
14388 if ( $wgUser->isAllowed( 'delete' ) || $smwgAllowRecursiveExport ) {
14489 $html .= '<input type="checkbox" name="recursive" value="1" id="rec">&#160;<label for="rec">' . wfMsg( 'smw_exportrdf_recursive' ) . '</label></input><br />' . "\n";
14590 }
146 -
14791 if ( $wgUser->isAllowed( 'delete' ) || $smwgExportBacklinks ) {
14892 $html .= '<input type="checkbox" name="backlinks" value="1" default="true" id="bl">&#160;<label for="bl">' . wfMsg( 'smw_exportrdf_backlinks' ) . '</label></input><br />' . "\n";
14993 }
150 -
15194 if ( $wgUser->isAllowed( 'delete' ) || $smwgExportAll ) {
15295 $html .= '<br />';
15396 $html .= '<input type="text" name="date" value="' . date( DATE_W3C, mktime( 0, 0, 0, 1, 1, 2000 ) ) . '" id="date">&#160;<label for="ea">' . wfMsg( 'smw_exportrdf_lastdate' ) . '</label></input><br />' . "\n";
15497 }
 98+ $html .= '<br /><input type="submit" value="' . wfMsg( 'smw_exportrdf_submit' ) . "\"/>\n</form>";
15599
156 - $html .= "<br /><input type=\"submit\" value=\"" . wfMsg( 'smw_exportrdf_submit' ) . "\"/>\n</form>";
157 -
158100 $wgOut->addHTML( $html );
159101 }
 102+
 103+ /**
 104+ * Prepare $wgOut for printing non-HTML data.
 105+ */
 106+ protected function startRDFExport() {
 107+ global $wgOut, $wgRequest;
 108+ $wgOut->disable();
 109+ ob_start();
 110+ // Only use rdf+xml mimetype if explicitly requested
 111+ // TODO: should the see also links in the exported RDF then have this parameter as well?
 112+ if ( $wgRequest->getVal( 'xmlmime' ) == 'rdf' ) {
 113+ header( "Content-type: application/rdf+xml; charset=UTF-8" );
 114+ } else {
 115+ header( "Content-type: application/xml; charset=UTF-8" );
 116+ }
 117+ $this->export_controller = new SMWExportController( new SMWSerializer() );
 118+ }
 119+
 120+ /**
 121+ * Export the given pages to RDF.
 122+ * @param array $pages containing the string names of pages to be exported
 123+ */
 124+ protected function exportPages( $pages ) {
 125+ global $wgRequest, $smwgExportBacklinks;
160126
 127+ // Effect: assume "no" from missing parameters generated by checkboxes.
 128+ $postform = $wgRequest->getText( 'postform' ) == 1;
 129+
 130+ $recursive = 0; // default, no recursion
 131+ $rec = $wgRequest->getText( 'recursive' );
 132+ if ( $rec == '' ) $rec = $wgRequest->getVal( 'recursive' );
 133+ if ( ( $rec == '1' ) && ( $smwgAllowRecursiveExport || $wgUser->isAllowed( 'delete' ) ) ) {
 134+ $recursive = 1; // users may be allowed to switch it on
 135+ }
 136+
 137+ $backlinks = $smwgExportBacklinks; // default
 138+ $bl = $wgRequest->getText( 'backlinks' );
 139+ if ( $bl == '' ) $bl = $wgRequest->getVal( 'backlinks' );
 140+ if ( ( $bl == '1' ) && ( $wgUser->isAllowed( 'delete' ) ) ) {
 141+ $backlinks = true; // admins can always switch on backlinks
 142+ } elseif ( ( $bl == '0' ) || ( '' == $bl && $postform ) ) {
 143+ $backlinks = false; // everybody can explicitly switch off backlinks
 144+ }
 145+
 146+ $date = $wgRequest->getText( 'date' );
 147+ if ( $date == '' ) $date = $wgRequest->getVal( 'date' );
 148+ if ( $date != '' ) {
 149+ $timeint = strtotime( $date );
 150+ $stamp = date( "YmdHis", $timeint );
 151+ $date = $stamp;
 152+ }
 153+
 154+ $this->startRDFExport();
 155+ $this->export_controller->enableBacklinks( $backlinks );
 156+ $this->export_controller->printPages( $pages, $recursive, $date );
 157+ }
 158+
161159 }
Index: trunk/extensions/SemanticMediaWiki/includes/export/SMW_OWLExport.php
@@ -1,836 +0,0 @@
2 -<?php
3 -
4 -/**
5 - * File holding the SMWOWLExport class for OWL export, used by SMWSpecialOWLExport (Special:ExportRDF page).
6 - *
7 - * @file SMW_OWLExport.php
8 - * @ingroup SMW
9 - *
10 - * @author Markus Krötzsch
11 - */
12 -
13 -/**
14 - * Small data object holding the bare essentials of one title.
15 - * Used to store processed and open pages for export.
16 - *
17 - * @ingroup SMW
18 - */
19 -class SMWSmallTitle {
20 - public $dbkey;
21 - public $namespace; // MW namespace constant
22 - public $modifier = ''; // e.g. a unit string
23 -
24 - public function getHash() {
25 - return $this->dbkey . ' ' . $this->namespace . ' ' . $this->modifier;
26 - }
27 -}
28 -
29 -
30 -/**
31 - * Class for encapsulating the methods for RDF export.
32 - *
33 - * @ingroup SMW
34 - */
35 -class SMWOWLExport {
36 - /**#@+
37 - * @access private
38 - */
39 -
40 - const MAX_CACHE_SIZE = 5000; // do not let cache arrays get larger than this
41 - const CACHE_BACKJUMP = 500; // kill this many cached entries if limit is reached,
42 - // avoids too much array copying; <= MAX_CACHE_SIZE!
43 -
44 - /**
45 - * An array that keeps track of the elements for which we still need to
46 - * write auxilliary definitions.
47 - */
48 - private $element_queue;
49 -
50 - /**
51 - * An array that keeps track of the elements which have been exported already
52 - */
53 - private $element_done;
54 -
55 - /**
56 - * Date used to filter the export. If a page has not been changed since that
57 - * date it will not be exported
58 - */
59 - private $date;
60 -
61 - /**
62 - * Array of additional namespaces (abbreviation => URI), flushed on
63 - * closing the current namespace tag. Since we export RDF in a streamed
64 - * way, it is not always possible to embed additional namespaces into
65 - * the RDF-tag which might have been sent to the client already. But we
66 - * wait with printing the current Description so that extra namespaces
67 - * from this array can still be printed (note that you never know which
68 - * extra namespaces you encounter during export).
69 - */
70 - private $extra_namespaces;
71 -
72 - /**
73 - * Array of namespaces that have been declared globally already. Contains
74 - * entries of format 'namespace abbreviation' => true, assuming that the
75 - * same abbreviation always refers to the same URI (i.e. you cannot import
76 - * something as rdf:bla if you do not want rdf to be the standard
77 - * namespace that is already given in every RDF export).
78 - */
79 - private $global_namespaces;
80 -
81 - /**
82 - * Unprinted XML is composed from the strings $pre_ns_buffer and $post_ns_buffer.
83 - * The split between the two is such that one can append additional namespace
84 - * declarations to $pre_ns_buffer so that they affect all current elements. The
85 - * buffers are flushed during output in order to achieve "streaming" RDF export
86 - * for larger files.
87 - */
88 - private $pre_ns_buffer;
89 -
90 - /**
91 - * See documentation for SMWOWLExport::pre_ns_buffer.
92 - */
93 - private $post_ns_buffer;
94 -
95 - /**
96 - * Boolean that is true as long as nothing was flushed yet. Indicates that
97 - * extra namespaces can still become global.
98 - */
99 - private $first_flush;
100 -
101 - /**
102 - * Integer that counts down the number of objects we still process before
103 - * doing the first flush. Aggregating some output before flushing is useful
104 - * to get more namespaces global. Flushing will only happen if $delay_flush
105 - * is 0.
106 - */
107 - private $delay_flush;
108 -
109 - /**
110 - * Constructor.
111 - */
112 - public function __construct() {
113 - $this->element_queue = array();
114 - $this->element_done = array();
115 - $this->date = '';
116 - }
117 -
118 - /**
119 - * Sets a date as a filter. Any page that has not been changed since that date
120 - * will not be exported. The date has to be a string in XML Schema format.
121 - */
122 - public function setDate( $date ) {
123 - $timeint = strtotime( $date );
124 - $stamp = date( "YmdHis", $timeint );
125 - $this->date = $stamp;
126 - }
127 -
128 - /**
129 - * This function prints all selected pages. The parameter $recursion determines
130 - * how referenced ressources are treated:
131 - * '0' : add brief declarations for each
132 - * '1' : add full descriptions for each, thus beginning real recursion (and
133 - * probably retrieving the whole wiki ...)
134 - * else: ignore them, though -1 might become a synonym for "export *all*" in the future
135 - * The parameter $backlinks determines whether or not subjects of incoming
136 - * properties are exported as well. Enables "browsable RDF."
137 - */
138 - public function printPages( $pages, $recursion = 1, $backlinks = true ) {
139 - wfProfileIn( "RDF::PrintPages" );
140 -
141 - $linkCache =& LinkCache::singleton();
142 - $this->pre_ns_buffer = '';
143 - $this->post_ns_buffer = '';
144 - $this->first_flush = true;
145 - $this->delay_flush = 10; // flush only after (fully) printing 11 objects
146 - $this->extra_namespaces = array();
147 -
148 - if ( count( $pages ) == 1 ) { // ensure that ontologies that are retrieved as linked data are not confused with their subject!
149 - $ontologyuri = SMWExporter::expandURI( '&export;' ) . '/' . urlencode( end( $pages ) );
150 - } else { // use empty URI, i.e. "location" as URI otherwise
151 - $ontologyuri = '';
152 - }
153 -
154 - $this->printHeader( $ontologyuri ); // also inits global namespaces
155 -
156 - wfProfileIn( "RDF::PrintPages::PrepareQueue" );
157 -
158 - // transform pages into queued export titles
159 - $cur_queue = array();
160 -
161 - foreach ( $pages as $page ) {
162 - $title = Title::newFromText( $page );
163 -
164 - if ( null === $title ) continue; // invalid title name given
165 -
166 - $st = new SMWSmallTitle();
167 - $st->dbkey = $title->getDBkey();
168 - $st->namespace = $title->getNamespace();
169 -
170 - $cur_queue[] = $st;
171 - }
172 -
173 - wfProfileOut( "RDF::PrintPages::PrepareQueue" );
174 -
175 - while ( count( $cur_queue ) > 0 ) {
176 - // first, print all selected pages
177 - foreach ( $cur_queue as $st ) {
178 - wfProfileIn( "RDF::PrintPages::PrintOne" );
179 -
180 - $this->printObject( $st, true, $backlinks );
181 -
182 - wfProfileOut( "RDF::PrintPages::PrintOne" );
183 -
184 - if ( $this->delay_flush > 0 ) $this->delay_flush--;
185 - }
186 -
187 - // prepare array for next iteration
188 - $cur_queue = array();
189 -
190 - if ( 1 == $recursion ) {
191 - $cur_queue = $this->element_queue + $cur_queue; // make sure the array is *duplicated* instead of copying its ref
192 - $this->element_queue = array();
193 - }
194 -
195 - $linkCache->clear();
196 - }
197 -
198 - // for pages not processed recursively, print at least basic declarations
199 - wfProfileIn( "RDF::PrintPages::Auxiliary" );
200 - $this->date = ''; // no date restriction for the rest!
201 -
202 - if ( !empty( $this->element_queue ) ) {
203 - if ( $this->pre_ns_buffer != '' ) {
204 - $this->post_ns_buffer .= "\t<!-- auxiliary definitions -->\n";
205 - } else {
206 - print "\t<!-- auxiliary definitions -->\n"; // just print this comment, so that later outputs still find the empty pre_ns_buffer!
207 - }
208 -
209 - while ( !empty( $this->element_queue ) ) {
210 - $st = array_pop( $this->element_queue );
211 - $this->printObject( $st, false, false );
212 - }
213 - }
214 -
215 - wfProfileOut( "RDF::PrintPages::Auxiliary" );
216 -
217 - $this->printFooter();
218 - $this->flushBuffers( true );
219 -
220 - wfProfileOut( "RDF::PrintPages" );
221 - }
222 -
223 - /**
224 - * This function prints RDF for *all* pages within the wiki, and for all
225 - * elements that are referred to in the exported RDF.
226 - */
227 - public function printAll( $outfile, $ns_restriction = false, $delay, $delayeach ) {
228 - global $smwgNamespacesWithSemanticLinks;
229 -
230 - $linkCache =& LinkCache::singleton();
231 -
232 - $db = & wfGetDB( DB_MASTER );
233 - $this->pre_ns_buffer = '';
234 - $this->post_ns_buffer = '';
235 - $this->first_flush = true;
236 -
237 - if ( $outfile === false ) {
238 - // $this->delay_flush = 10000; //flush only after (fully) printing 10001 objects,
239 - $this->delay_flush = - 1; // do not flush buffer at all
240 - } else {
241 - $file = fopen( $outfile, 'w' );
242 -
243 - if ( !$file ) {
244 - print "\nCannot open \"$outfile\" for writing.\n";
245 - return false;
246 - }
247 -
248 - $this->delay_flush = - 1; // never flush, we flush in another way
249 - }
250 -
251 - $this->extra_namespaces = array();
252 - $this->printHeader(); // also inits global namespaces
253 -
254 - $start = 1;
255 - $end = $db->selectField( 'page', 'max(page_id)', false, $outfile );
256 -
257 - $a_count = 0; $d_count = 0; // DEBUG
258 -
259 - $delaycount = $delayeach;
260 -
261 - for ( $id = $start; $id <= $end; $id++ ) {
262 - $title = Title::newFromID( $id );
263 -
264 - if ( ( $title === null ) || !smwfIsSemanticsProcessed( $title->getNamespace() ) ) continue;
265 - if ( !SMWOWLExport::fitsNsRestriction( $ns_restriction, $title->getNamespace() ) ) continue;
266 -
267 - $st = new SMWSmallTitle();
268 - $st->dbkey = $title->getDBkey();
269 - $st->namespace = $title->getNamespace();
270 -
271 - $cur_queue = array( $st );
272 - $a_count++; // DEBUG
273 - $full_export = true;
274 -
275 - while ( count( $cur_queue ) > 0 ) {
276 - foreach ( $cur_queue as $st ) {
277 - wfProfileIn( "RDF::PrintAll::PrintOne" );
278 - $this->printObject( $st, $full_export, false );
279 - wfProfileOut( "RDF::PrintAll::PrintOne" );
280 - }
281 -
282 - $full_export = false; // make sure added dependencies do not pull more than needed
283 - // resolve dependencies that will otherwise not be printed
284 - $cur_queue = array();
285 -
286 - foreach ( $this->element_queue as $key => $staux ) {
287 - $taux = Title::makeTitle( $staux->namespace, $staux->dbkey );
288 -
289 - if ( !smwfIsSemanticsProcessed( $staux->namespace ) || ( $staux->modifier !== '' ) ||
290 - !SMWOWLExport::fitsNsRestriction( $ns_restriction, $staux->namespace ) ||
291 - ( !$taux->exists() ) ) {
292 - // Note: we do not need to check the cache to guess if an element was already
293 - // printed. If so, it would not be included in the queue in the first place.
294 - $cur_queue[] = $staux;
295 - // $this->post_ns_buffer .= "<!-- Adding dependency '" . $staux->getHash() . "' -->"; //DEBUG
296 - $d_count++; // DEBUG
297 - } else {
298 - unset( $this->element_queue[$key] ); // carrying around the values we do not
299 - // want to export now is a potential memory leak
300 - }
301 - }
302 -
303 - // sleep each $delaycount for $delay ms to be nice to the server
304 - if ( ( $delaycount-- < 0 ) && ( $delayeach != 0 ) ) {
305 - usleep( $delay );
306 - $delaycount = $delayeach;
307 - }
308 - }
309 -
310 - if ( $outfile !== false ) { // flush buffer
311 - fwrite( $file, $this->post_ns_buffer );
312 - $this->post_ns_buffer = '';
313 - }
314 -
315 - $linkCache->clear();
316 - }
317 -
318 - // DEBUG:
319 - $this->post_ns_buffer .= "<!-- Processed $a_count regular articles. -->\n";
320 - $this->post_ns_buffer .= "<!-- Processed $d_count added dependencies. -->\n";
321 - $this->post_ns_buffer .= "<!-- Final cache size was " . sizeof( $this->element_done ) . ". -->\n";
322 -
323 - $this->printFooter();
324 -
325 - if ( $outfile === false ) {
326 - $this->flushBuffers( true );
327 - } else { // prepend headers to file, there is no really efficient solution (`cat(1)`) for this it seems
328 - // print head:
329 - fclose( $file );
330 -
331 - foreach ( $this->extra_namespaces as $nsshort => $nsuri ) {
332 - $this->pre_ns_buffer .= "\n\txmlns:$nsshort=\"$nsuri\"";
333 - }
334 -
335 - $full_export = file_get_contents( $outfile );
336 - $full_export = $this->pre_ns_buffer . $full_export . $this->post_ns_buffer;
337 -
338 - $file = fopen( $outfile, 'w' );
339 - fwrite( $file, $full_export );
340 - fclose( $file );
341 - }
342 - }
343 -
344 - /**
345 - * Print basic definitions a list of pages ordered by their page id.
346 - * Offset and limit refer to the count of existing pages, not to the
347 - * page id.
348 - */
349 - public function printPageList( $offset = 0, $limit = 30 ) {
350 - wfProfileIn( "RDF::PrintPageList" );
351 -
352 - $db = & wfGetDB( DB_MASTER );
353 - $this->pre_ns_buffer = '';
354 - $this->post_ns_buffer = '';
355 - $this->first_flush = true;
356 - $this->delay_flush = 10; // flush only after (fully) printing 11 objects
357 - $this->extra_namespaces = array();
358 - $this->printHeader(); // also inits global namespaces
359 - $linkCache =& LinkCache::singleton();
360 -
361 - global $smwgNamespacesWithSemanticLinks;
362 - $query = '';
363 -
364 - foreach ( $smwgNamespacesWithSemanticLinks as $ns => $enabled ) {
365 - if ( $enabled ) {
366 - if ( $query != '' ) $query .= ' OR ';
367 - $query .= 'page_namespace = ' . $db->addQuotes( $ns );
368 - }
369 - }
370 -
371 - $res = $db->select( $db->tableName( 'page' ),
372 - 'page_id,page_title,page_namespace', $query
373 - , 'SMW::RDF::PrintPageList', array( 'ORDER BY' => 'page_id ASC', 'OFFSET' => $offset, 'LIMIT' => $limit ) );
374 - $foundpages = false;
375 -
376 - while ( $row = $db->fetchObject( $res ) ) {
377 - $foundpages = true;
378 - // $t = Title::makeTitle($row->page_namespace, $row->page_title);
379 - // if ($t === null) continue;
380 - // $et = new SMWExportTitle($t, $this);
381 - $st = new SMWSmallTitle();
382 -
383 - $st->dbkey = $row->page_title;
384 - $st->namespace = $row->page_namespace;
385 -
386 - $this->printObject( $st, false, false );
387 -
388 - if ( $this->delay_flush > 0 ) $this->delay_flush--;
389 -
390 - $linkCache->clear();
391 - }
392 - if ( $foundpages ) { // add link to next result page
393 - if ( strpos( SMWExporter::expandURI( '&wikiurl;' ), '?' ) === false ) { // check whether we have title as a first parameter or in URL
394 - $nexturl = SMWExporter::expandURI( '&export;?offset=' ) . ( $offset + $limit );
395 - } else {
396 - $nexturl = SMWExporter::expandURI( '&export;&amp;offset=' ) . ( $offset + $limit );
397 - }
398 -
399 - $this->post_ns_buffer .=
400 - "\t<!-- Link to next set of results -->\n" .
401 - "\t<owl:Thing rdf:about=\"$nexturl\">\n" .
402 - "\t\t<rdfs:isDefinedBy rdf:resource=\"$nexturl\"/>\n" .
403 - "\t</owl:Thing>\n";
404 - }
405 -
406 - $this->printFooter();
407 - $this->flushBuffers( true );
408 -
409 - wfProfileOut( "RDF::PrintPageList" );
410 - }
411 -
412 -
413 - /**
414 - * Print basic information about this site.
415 - */
416 - public function printWikiInfo() {
417 - wfProfileIn( "RDF::PrintWikiInfo" );
418 -
419 - global $wgSitename, $wgLanguageCode;
420 -
421 - $db = & wfGetDB( DB_MASTER );
422 - $this->pre_ns_buffer = '';
423 - $this->post_ns_buffer = '';
424 - $this->extra_namespaces = array();
425 - $data = new SMWExpData( new SMWExpResource( '&wiki;#wiki' ) );
426 -
427 - $ed = new SMWExpData( SMWExporter::getSpecialElement( 'swivt', 'Wikisite' ) );
428 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ), $ed );
429 -
430 - $ed = new SMWExpData( new SMWExpLiteral( $wgSitename ) );
431 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdfs', 'label' ), $ed );
432 -
433 - $ed = new SMWExpData( new SMWExpLiteral( $wgSitename, null, 'http://www.w3.org/2001/XMLSchema#string' ) );
434 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'siteName' ), $ed );
435 -
436 - $ed = new SMWExpData( new SMWExpLiteral( SMWExporter::expandURI( '&wikiurl;' ), null, 'http://www.w3.org/2001/XMLSchema#string' ) );
437 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'pagePrefix' ), $ed );
438 -
439 - $ed = new SMWExpData( new SMWExpLiteral( SMW_VERSION, null, 'http://www.w3.org/2001/XMLSchema#string' ) );
440 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'smwVersion' ), $ed );
441 -
442 - $ed = new SMWExpData( new SMWExpLiteral( $wgLanguageCode, null, 'http://www.w3.org/2001/XMLSchema#string' ) );
443 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'langCode' ), $ed );
444 -
445 - // stats
446 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::pages(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
447 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'pageCount' ), $ed );
448 -
449 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::articles(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
450 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'contentPageCount' ), $ed );
451 -
452 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::images(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
453 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'mediaCount' ), $ed );
454 -
455 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::edits(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
456 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'editCount' ), $ed );
457 -
458 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::views(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
459 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'viewCount' ), $ed );
460 -
461 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::users(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
462 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'userCount' ), $ed );
463 -
464 - $ed = new SMWExpData( new SMWExpLiteral( SiteStats::admins(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
465 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'adminCount' ), $ed );
466 -
467 - $mainpage = Title::newMainPage();
468 -
469 - if ( $mainpage !== null ) {
470 - $ed = new SMWExpData( new SMWExpResource( $mainpage->getFullURL() ) );
471 - $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'mainPage' ), $ed );
472 - }
473 -
474 - $this->printHeader(); // also inits global namespaces
475 - $this->printExpData( $data );
476 -
477 - if ( strpos( SMWExporter::expandURI( '&wikiurl;' ), '?' ) === false ) { // check whether we have title as a first parameter or in URL
478 - $nexturl = SMWExporter::expandURI( '&export;?offset=0' );
479 - } else {
480 - $nexturl = SMWExporter::expandURI( '&export;&amp;offset=0' );
481 - }
482 -
483 - $this->post_ns_buffer .=
484 - "\t<!-- Link to semantic page list -->\n" .
485 - "\t<owl:Thing rdf:about=\"$nexturl\">\n" .
486 - "\t\t<rdfs:isDefinedBy rdf:resource=\"$nexturl\"/>\n" .
487 - "\t</owl:Thing>\n";
488 -
489 - $this->printFooter();
490 - $this->flushBuffers( true );
491 -
492 - wfProfileOut( "RDF::PrintWikiInfo" );
493 - }
494 -
495 - /* Functions for exporting RDF */
496 -
497 - protected function makeValueEntityString( $string ) {
498 - return "'" . str_replace( '%','&#37;',$string ) . "'";
499 - }
500 -
501 - protected function printHeader( $ontologyuri = '' ) {
502 - global $wgContLang;
503 -
504 - $this->pre_ns_buffer .=
505 - "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" .
506 - "<!DOCTYPE rdf:RDF[\n" .
507 - "\t<!ENTITY rdf " . $this->makeValueEntityString( SMWExporter::expandURI( '&rdf;' ) ) . ">\n" .
508 - "\t<!ENTITY rdfs " . $this->makeValueEntityString( SMWExporter::expandURI( '&rdfs;' ) ) . ">\n" .
509 - "\t<!ENTITY owl " . $this->makeValueEntityString( SMWExporter::expandURI( '&owl;' ) ) . ">\n" .
510 - "\t<!ENTITY swivt " . $this->makeValueEntityString( SMWExporter::expandURI( '&swivt;' ) ) . ">\n" .
511 - // A note on "wiki": this namespace is crucial as a fallback when it would be illegal to start e.g. with a number. In this case, one can always use wiki:... followed by "_" and possibly some namespace, since _ is legal as a first character.
512 - "\t<!ENTITY wiki " . $this->makeValueEntityString( SMWExporter::expandURI( '&wiki;' ) ) . ">\n" .
513 - "\t<!ENTITY property " . $this->makeValueEntityString( SMWExporter::expandURI( '&property;' ) ) . ">\n" .
514 - "\t<!ENTITY wikiurl " . $this->makeValueEntityString( SMWExporter::expandURI( '&wikiurl;' ) ) . ">\n" .
515 - "]>\n\n" .
516 - "<rdf:RDF\n" .
517 - "\txmlns:rdf=\"&rdf;\"\n" .
518 - "\txmlns:rdfs=\"&rdfs;\"\n" .
519 - "\txmlns:owl =\"&owl;\"\n" .
520 - "\txmlns:swivt=\"&swivt;\"\n" .
521 - "\txmlns:wiki=\"&wiki;\"\n" .
522 - "\txmlns:property=\"&property;\"";
523 - $this->global_namespaces = array( 'rdf' => true, 'rdfs' => true, 'owl' => true, 'swivt' => true, 'wiki' => true, 'property' => true );
524 -
525 - $this->post_ns_buffer .=
526 - ">\n\t<!-- Ontology header -->\n" .
527 - "\t<owl:Ontology rdf:about=\"$ontologyuri\">\n" .
528 - "\t\t<swivt:creationDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">" . date( DATE_W3C ) . "</swivt:creationDate>\n" .
529 - "\t\t<owl:imports rdf:resource=\"http://semantic-mediawiki.org/swivt/1.0\" />\n" .
530 - "\t</owl:Ontology>\n" .
531 - "\t<!-- exported page data -->\n";
532 - }
533 -
534 - /**
535 - * Prints the footer.
536 - */
537 - protected function printFooter() {
538 - $this->post_ns_buffer .= "\t<!-- Created by Semantic MediaWiki, http://semantic-mediawiki.org/ -->\n";
539 - $this->post_ns_buffer .= '</rdf:RDF>';
540 - }
541 -
542 - /**
543 - * Serialise the given semantic data.
544 - */
545 - protected function printExpData( /*SMWExpData*/ $data, $indent = '' ) {
546 - $type = $data->extractMainType()->getQName();
547 -
548 - if ( '' == $this->pre_ns_buffer ) { // start new ns block
549 - $this->pre_ns_buffer .= "\t$indent<$type";
550 - } else {
551 - $this->post_ns_buffer .= "\t$indent<$type";
552 - }
553 -
554 - if ( ( $data->getSubject() instanceof SMWExpLiteral ) || ( $data->getSubject() instanceof SMWExpResource ) ) {
555 - $this->post_ns_buffer .= ' rdf:about="' . $data->getSubject()->getName() . '"';
556 - } // else: blank node
557 -
558 - if ( count( $data->getProperties() ) == 0 ) {
559 - $this->post_ns_buffer .= " />\n";
560 - } else {
561 - $this->post_ns_buffer .= ">\n";
562 -
563 - foreach ( $data->getProperties() as $property ) {
564 - $this->queueElement( $property );
565 -
566 - foreach ( $data->getValues( $property ) as $value ) {
567 - $this->post_ns_buffer .= "\t\t$indent<" . $property->getQName();
568 - $this->addExtraNamespace( $property->getNamespaceID(), $property->getNamespace() );
569 - $object = $value->getSubject();
570 -
571 - if ( $object instanceof SMWExpLiteral ) {
572 - if ( $object->getDatatype() != '' ) {
573 - $this->post_ns_buffer .= ' rdf:datatype="' . $object->getDatatype() . '"';
574 - }
575 -
576 - $this->post_ns_buffer .= '>' .
577 - str_replace( array( '&', '>', '<' ), array( '&amp;', '&gt;', '&lt;' ), $object->getName() ) .
578 - '</' . $property->getQName() . ">\n";
579 - } else { // bnode or resource, may have subdescriptions
580 - $collection = $value->getCollection();
581 -
582 - if ( $collection != false ) {
583 - $this->post_ns_buffer .= " rdf:parseType=\"Collection\">\n";
584 -
585 - foreach ( $collection as $subvalue ) {
586 - $this->printExpData( $subvalue, $indent . "\t\t" );
587 - }
588 -
589 - $this->post_ns_buffer .= "\t\t$indent</" . $property->getQName() . ">\n";
590 - } elseif ( count( $value->getProperties() ) > 0 ) {
591 - $this->post_ns_buffer .= ">\n";
592 - $this->printExpData( $value, $indent . "\t\t" );
593 - $this->post_ns_buffer .= "\t\t$indent</" . $property->getQName() . ">\n";
594 - } else {
595 - if ( $object instanceof SMWExpResource ) {
596 - $this->post_ns_buffer .= ' rdf:resource="' . $object->getName() . '"';
597 - $this->queueElement( $object ); // queue only non-explicated resources
598 - }
599 -
600 - $this->post_ns_buffer .= "/>\n";
601 - }
602 - }
603 - }
604 - }
605 -
606 - $this->post_ns_buffer .= "\t$indent</" . $type . ">\n";
607 - }
608 -
609 - $this->flushBuffers();
610 - }
611 -
612 - /**
613 - * Print the triples associated to a specific page, and references those needed.
614 - * They get printed in the printFooter-function.
615 - *
616 - * @param $st SMWSmallTitle wrapping the page to be exported
617 - * @param $fullexport Boolean to define whether all the data for the page should
618 - * be exported, or whether just a definition of the given title.
619 - * @param $backlinks Boolean specifying if properties linking to the exported title
620 - * should be included.
621 - */
622 - protected function printObject( /*SMWSmallTitle*/ $st, $fullexport = true, $backlinks = false ) {
623 - global $smwgMW_1_14;
624 -
625 - if ( array_key_exists( $st->getHash(), $this->element_done ) ) return; // do not export twice
626 -
627 - $value = SMWWikiPageValue::makePage( $st->dbkey, $st->namespace );
628 -
629 - if ( $this->date !== '' ) { // check date restriction if given
630 - $rev = $smwgMW_1_14 ? Revision::getTimeStampFromID( $value->getTitle(), $value->getTitle()->getLatestRevID() ):Revision::getTimeStampFromID( $value->getTitle()->getLatestRevID() );
631 - if ( $rev < $this->date ) return;
632 - }
633 -
634 - $data = SMWExporter::makeExportData( $this->getSemanticData( $value, !$fullexport ), $st->modifier );
635 - $this->printExpData( $data ); // serialise
636 -
637 - // let other extensions add additional RDF data for this page
638 - $additionalDataArray = array();
639 - wfRunHooks( 'smwAddToRDFExport', array( $value->getTitle(), &$additionalDataArray, $fullexport, $backlinks ) );
640 - foreach ( $additionalDataArray as $additionalData ) {
641 - $this->printExpData( $additionalData ); // serialise
642 - }
643 -
644 - $this->markAsDone( $st );
645 -
646 - // possibly add backlinks
647 - if ( ( $fullexport ) && ( $backlinks ) ) {
648 - wfProfileIn( "RDF::PrintPages::GetBacklinks" );
649 - $inRels = smwfGetStore()->getInProperties( $value );
650 -
651 - foreach ( $inRels as $inRel ) {
652 - $inSubs = smwfGetStore()->getPropertySubjects( $inRel, $value );
653 -
654 - foreach ( $inSubs as $inSub ) {
655 - $stb = new SMWSmallTitle();
656 - $stb->dbkey = $inSub->getDBkey();
657 - $stb->namespace = $inSub->getNamespace();
658 -
659 - if ( !array_key_exists( $stb->getHash(), $this->element_done ) ) {
660 - $semdata = $this->getSemanticData( $inSub, true );
661 - $semdata->addPropertyObjectValue( $inRel, $value );
662 - $data = SMWExporter::makeExportData( $semdata );
663 - $this->printExpData( $data );
664 - }
665 - }
666 - }
667 -
668 - if ( NS_CATEGORY === $value->getNamespace() ) { // also print elements of categories
669 - $options = new SMWRequestOptions();
670 - $options->limit = 100; // Categories can be large, always use limit
671 - $instances = smwfGetStore()->getPropertySubjects( SMWPropertyValue::makeProperty( '_INST' ), $value, $options );
672 - $pinst = SMWPropertyValue::makeProperty( '_INST' );
673 -
674 - foreach ( $instances as $instance ) {
675 - $stb = new SMWSmallTitle();
676 - $stb->dbkey = $instance->getDBkey();
677 - $stb->namespace = $instance->getNamespace();
678 -
679 - if ( !array_key_exists( $stb->getHash(), $this->element_done ) ) {
680 - $semdata = $this->getSemanticData( $instance, true );
681 - $semdata->addPropertyObjectValue( $pinst, $value );
682 - $data = SMWExporter::makeExportData( $semdata );
683 - $this->printExpData( $data );
684 - }
685 - }
686 - } elseif ( SMW_NS_CONCEPT === $value->getNamespace() ) { // print concept members (slightly different code)
687 - $desc = new SMWConceptDescription( $value->getTitle() );
688 - $desc->addPrintRequest( new SMWPrintRequest( SMWPrintRequest::PRINT_THIS, '' ) );
689 - $query = new SMWQuery( $desc );
690 - $query->setLimit( 100 );
691 -
692 - $res = smwfGetStore()->getQueryResult( $query );
693 - $resarray = $res->getNext();
694 - $pinst = SMWPropertyValue::makeProperty( '_INST' );
695 -
696 - while ( $resarray !== false ) {
697 - $instance = end( $resarray )->getNextObject();
698 -
699 - $stb = new SMWSmallTitle();
700 - $stb->dbkey = $instance->getDBkey();
701 - $stb->namespace = $instance->getNamespace();
702 -
703 - if ( !array_key_exists( $stb->getHash(), $this->element_done ) ) {
704 - $semdata = $this->getSemanticData( $instance, true );
705 - $semdata->addPropertyObjectValue( $pinst, $value );
706 - $data = SMWExporter::makeExportData( $semdata );
707 - $this->printExpData( $data );
708 - }
709 -
710 - $resarray = $res->getNext();
711 - }
712 - }
713 -
714 - wfProfileOut( "RDF::PrintPages::GetBacklinks" );
715 - }
716 - }
717 -
718 - /**
719 - * Flush all buffers and extra namespaces by printing them to stdout and flushing
720 - * the output buffers afterwards.
721 - *
722 - * @param force if true, the flush cannot be delayed any longer
723 - */
724 - protected function flushBuffers( $force = false ) {
725 - if ( $this->post_ns_buffer == '' ) return; // nothing to flush (every non-empty pre_ns_buffer also requires a non-empty post_ns_buffer)
726 - if ( ( 0 != $this->delay_flush ) && !$force ) return; // wait a little longer
727 -
728 - print $this->pre_ns_buffer;
729 - $this->pre_ns_buffer = '';
730 -
731 - foreach ( $this->extra_namespaces as $nsshort => $nsuri ) {
732 - if ( $this->first_flush ) {
733 - $this->global_namespaces[$nsshort] = true;
734 - print "\n\t";
735 - } else print ' ';
736 -
737 - print "xmlns:$nsshort=\"$nsuri\"";
738 - }
739 -
740 - $this->extra_namespaces = array();
741 - print $this->post_ns_buffer;
742 - $this->post_ns_buffer = '';
743 -
744 - // Ship data in small chunks (even though browsers often do not display anything
745 - // before the file is complete -- this might be due to syntax highlighting features
746 - // for app/xml). You may want to sleep(1) here for debugging this.
747 - ob_flush();
748 - flush();
749 -
750 - $this->first_flush = false;
751 - }
752 -
753 - /**
754 - * Add an extra namespace that was encountered during output. The method
755 - * checks whether the required namespace is available globally and adds
756 - * it to the list of extra_namespace otherwise.
757 - */
758 - public function addExtraNamespace( $nsshort, $nsuri ) {
759 - if ( !array_key_exists( $nsshort, $this->global_namespaces ) ) {
760 - $this->extra_namespaces[$nsshort] = $nsuri;
761 - }
762 - }
763 -
764 - /**
765 - * Add a given SMWExpResource to the export queue if needed.
766 - */
767 - public function queueElement( $element ) {
768 - if ( !( $element instanceof SMWExpResource ) ) return; // only Resources are queued
769 - $title = $element->getDataValue();
770 -
771 - if ( $title instanceof SMWWikiPageValue ) {
772 - $spt = new SMWSmallTitle();
773 - $title = $title->getTitle();
774 - $spt->dbkey = $title->getDBkey();
775 - $spt->namespace = $title->getNamespace();
776 - $spt->modifier = $element->getModifier();
777 -
778 - if ( !array_key_exists( $spt->getHash(), $this->element_done ) ) {
779 - $this->element_queue[$spt->getHash()] = $spt;
780 - }
781 - }
782 - }
783 -
784 - /**
785 - * Mark an article as done while making sure that the cache used for this
786 - * stays reasonably small. Input is given as an SMWExportArticle object.
787 - */
788 - protected function markAsDone( $st ) {
789 - if ( count( $this->element_done ) >= self::MAX_CACHE_SIZE ) {
790 - $this->element_done = array_slice( $this->element_done,
791 - self::CACHE_BACKJUMP,
792 - self::MAX_CACHE_SIZE - self::CACHE_BACKJUMP,
793 - true );
794 - }
795 - $this->element_done[$st->getHash()] = $st; // mark title as done
796 - unset( $this->element_queue[$st->getHash()] ); // make sure it is not in the queue
797 - }
798 -
799 - /**
800 - * Retrieve a copy of the semantic data for a wiki page, possibly filtering
801 - * it so that only essential properties are included (in some cases, we only
802 - * want to export stub information about a page).
803 - * We make a copy of the object since we may want to add more data later on
804 - * and we do not want to modify the store's result which may be used for
805 - * caching purposes elsewhere.
806 - */
807 - protected function getSemanticData( $pagevalue, $core_props_only ) {
808 - $semdata = smwfGetStore()->getSemanticData( $pagevalue, $core_props_only ? array( '__spu', '__typ', '__imp' ) : false ); // advise store to retrieve only core things
809 - if ( $core_props_only ) { // be sure to filter all non-relevant things that may still be present in the retrieved
810 - $result = new SMWSemanticData( $pagevalue );
811 - foreach ( array( '__spu', '__typ', '__imp' ) as $propid ) {
812 - $prop = SMWPropertyValue::makeProperty( $propid );
813 - $values = $semdata->getPropertyValues( $prop );
814 - foreach ( $values as $dv ) {
815 - $result->addPropertyObjectValue( $prop, $dv );
816 - }
817 - }
818 - } else {
819 - $result = clone $semdata;
820 - }
821 - return $result;
822 - }
823 -
824 - /**
825 - * This function checks whether some article fits into a given namespace restriction.
826 - * FALSE means "no restriction," non-negative restictions require to check whether
827 - * the given number equals the given namespace. A restriction of -1 requires the
828 - * namespace to be different from Category:, Relation:, Attribute:, and Type:.
829 - */
830 - static public function fitsNsRestriction( $res, $ns ) {
831 - if ( $res === false ) return true;
832 - if ( is_array( $res ) ) return in_array( $ns, $res );
833 - if ( $res >= 0 ) return ( $res == $ns );
834 - return ( ( $res != NS_CATEGORY ) && ( $res != SMW_NS_PROPERTY ) && ( $res != SMW_NS_TYPE ) );
835 - }
836 -
837 -}
Index: trunk/extensions/SemanticMediaWiki/includes/export/SMW_Exp_Data.php
@@ -40,7 +40,7 @@
4141 $rdftype = SMWExporter::getSpecialElement( 'rdf', 'type' );
4242 $rdffirst = SMWExporter::getSpecialElement( 'rdf', 'first' );
4343 $rdfrest = SMWExporter::getSpecialElement( 'rdf', 'rest' );
44 - $result = new SMWExpData( new SMWExpElement( '' ) ); // bnode
 44+ $result = new SMWExpData( new SMWExpResource( '' ) ); // bnode
4545 $result->addPropertyObjectValue( $rdftype, new SMWExpData( SMWExporter::getSpecialElement( 'rdf', 'List' ) ) );
4646 $result->addPropertyObjectValue( $rdffirst, array_shift( $elements ) );
4747 $result->addPropertyObjectValue( $rdfrest, SMWExpData::makeCollection( $elements ) );
@@ -58,7 +58,7 @@
5959 /**
6060 * Set the subject element.
6161 */
62 - public function setSubject( SMWExpElement $subject ) {
 62+ public function setSubject( SMWExpResource $subject ) {
6363 $this->m_subject = $subject;
6464 }
6565
@@ -66,7 +66,7 @@
6767 * Store a value for a property identified by its title object. No duplicate elimination as this
6868 * is usually done in SMWSemanticData already (which is typically used to generate this object)
6969 */
70 - public function addPropertyObjectValue( SMWExpElement $property, SMWExpData $child ) {
 70+ public function addPropertyObjectValue( SMWExpResource $property, SMWExpData $child ) {
7171 if ( !array_key_exists( $property->getName(), $this->m_edges ) ) {
7272 $this->m_children[$property->getName()] = array();
7373 $this->m_edges[$property->getName()] = $property;
@@ -84,7 +84,7 @@
8585 /**
8686 * Return the list of SMWExpData values associated to some property (element)
8787 */
88 - public function getValues( /*SMWExpElement*/ $property ) {
 88+ public function getValues( SMWExpResource $property ) {
8989 if ( array_key_exists( $property->getName(), $this->m_children ) ) {
9090 return $this->m_children[$property->getName()];
9191 } else {
@@ -185,7 +185,7 @@
186186 if ( ( $name == '' ) || ( $name[0] == '_' ) ) { // bnode, rename ID to avoid unifying bnodes of different contexts
187187 // TODO: should we really rename bnodes of the form "_id" here?
188188 $child = clone $child;
189 - $subject = new SMWExpElement( '_' . $smwgBnodeCount++, $child->getSubject()->getDataValue() );
 189+ $subject = new SMWExpResource( '_' . $smwgBnodeCount++, $child->getSubject()->getDataValue() );
190190 $child->setSubject( $subject );
191191 }
192192 $result[] = array( $this->m_subject, $edge, $child->getSubject() );
Index: trunk/extensions/SemanticMediaWiki/includes/export/SMW_Exp_Element.php
@@ -1,7 +1,7 @@
22 <?php
33 /**
4 - * SMWExpElement is a class for representing single elements that appear in exported
5 - * data, such as individual resources, data literals, or blank nodes.
 4+ * SMWExpElement is a class for representing single elements that appear in
 5+ * exported data, such as individual resources, data literals, or blank nodes.
66 *
77 * @author Markus Krötzsch
88 * @file
@@ -9,18 +9,14 @@
1010 */
1111
1212 /**
13 - * A single element for export, e.g. a data literal, instance name, or blank node.
14 - * Supports various serialisation aids for creating URIs or other strings for export.
15 - * This abstract base class declares the basic common functionality of export elements.
 13+ * A single element for export, e.g. a data literal, instance name, or blank
 14+ * node. This abstract base class declares the basic common functionality of
 15+ * export elements (which is not much, really).
 16+ * @note This class should not be instantiated directly.
1617 *
17 - * This class can also be used to represent blank nodes: It is assumed that all objects
18 - * of class SMWExpElement or any of its subclasses do represent blank node if their name
19 - * is empty or of the form "_id" where "id" is any identifier string. IDs are local to the
20 - * current context, such as a list of triples or an SMWExpData container.
2118 * @ingroup SMW
2219 */
2320 class SMWExpElement {
24 -
2521 protected $m_dv;
2622 protected $m_name;
2723
@@ -51,8 +47,17 @@
5248 }
5349
5450 /**
55 - * A single resource (individual) for export. Defined by a URI, and possibly also providing
56 - * abbreviated forms (QNames).
 51+ * A single resource (individual) for export. Defined by a URI, and possibly
 52+ * also providing abbreviated forms (QNames).
 53+ * This class can also be used to represent blank nodes: It is assumed that all
 54+ * objects of class SMWExpElement or any of its subclasses do represent blank
 55+ * node if their name is empty or of the form "_id" where "id" is any
 56+ * identifier string. IDs are local to the current context, such as a list of
 57+ * triples or an SMWExpData container.
 58+ *
 59+ * @todo This class should be split into two: one general resource class, and
 60+ * one that only supports resources with namespace/qname form, because the
 61+ * latter is strictly necessary in some places where resources are used.
5762 * @ingroup SMW
5863 */
5964 class SMWExpResource extends SMWExpElement {
@@ -79,6 +84,13 @@
8085 SMWExpElement::__construct( $name, $dv );
8186 }
8287 }
 88+
 89+ /**
 90+ * Return true of this resource represents a blank node.
 91+ */
 92+ public function isBlankNode() {
 93+ return ( $this->m_name == '' ) || ( $this->m_name{0} == '_' );
 94+ }
8395
8496 /**
8597 * SMW uses URI-Refs (#) to make "variants" of some base URI, e.g. to create multiple
Index: trunk/extensions/SemanticMediaWiki/includes/export/SMW_ExportController.php
@@ -0,0 +1,652 @@
 2+<?php
 3+
 4+/**
 5+ * File holding the SMWExportController class that provides basic functions for
 6+ * exporting pages to RDF and OWL.
 7+ *
 8+ * @file SMW_ExportController.php
 9+ * @ingroup SMW
 10+ *
 11+ * @author Markus Krötzsch
 12+ */
 13+
 14+/**
 15+ * Small data object that specifies one wiki page to be serialised.
 16+ * SMWSmallTitle objects are used to queue pages for serialisation, hence it
 17+ * should be small to save memory.
 18+ *
 19+ * @ingroup SMW
 20+ */
 21+class SMWSmallTitle {
 22+ /// DB key version of the title.
 23+ public $dbkey;
 24+ /// MediaWiki namespace constant.
 25+ public $namespace;
 26+ /**
 27+ * Recursion depth for serialising this object. Depth of 1 or above means
 28+ * the object is serialised with all property values, and referenced
 29+ * objects are serialised with depth reduced by 1. Depth 0 means that only
 30+ * minimal declarations are serialised, so no dependencies are added. A
 31+ * depth of -1 encodes "infinite" depth, i.e. a complete recursive
 32+ * serialisation without limit.
 33+ * @var integer
 34+ */
 35+ public $recdepth = 1;
 36+
 37+ public function getHash() {
 38+ return $this->dbkey . ' ' . $this->namespace;
 39+ }
 40+}
 41+
 42+/**
 43+ * Class for controlling the export of SMW page data, supporting high-level
 44+ * features such as recursive export and backlink inclusion. The class controls
 45+ * export independent of the serialisation syntax that is used.
 46+ *
 47+ * @ingroup SMW
 48+ */
 49+class SMWExportController {
 50+ const MAX_CACHE_SIZE = 5000; // do not let cache arrays get larger than this
 51+ const CACHE_BACKJUMP = 500; // kill this many cached entries if limit is reached,
 52+ // avoids too much array copying; <= MAX_CACHE_SIZE!
 53+ /**
 54+ * The object used for serialisation.
 55+ * @var SMWSerializer
 56+ */
 57+ protected $serializer;
 58+ /**
 59+ * An array that keeps track of the elements for which we still need to
 60+ * write auxiliary definitions/declarations.
 61+ */
 62+ protected $element_queue;
 63+ /**
 64+ * An array that keeps track of the recursion depth with which each object
 65+ * has been serialised.
 66+ */
 67+ protected $element_done;
 68+ /**
 69+ * Boolean to indicate whether all objects that are exported in full (with
 70+ * all data) should also lead to the inclusion of all "inlinks" that they
 71+ * receive from other objects. If yes, these other objects are also
 72+ * serialised with at least the relevant inlinking properties included.
 73+ * Adding such dependencies counts as "recursive serialisation" and whether
 74+ * or not inlinking objects are included in full depends on the setting for
 75+ * recursion depth. Setting this to true enables "browsable RDF".
 76+ */
 77+ protected $add_backlinks;
 78+ /**
 79+ * Controls how long to wait until flushing content to output. Flushing
 80+ * early may reduce the memory footprint of serialization functions.
 81+ * Flushing later has some advantages for export formats like RDF/XML where
 82+ * global namespace declarations are only possible by modifying the header,
 83+ * so that only local declarations are possible after the first flush.
 84+ */
 85+ protected $delay_flush;
 86+ /**
 87+ * File handle for a potential output file to write to, or null if printing
 88+ * to standard output.
 89+ */
 90+ protected $outputfile;
 91+
 92+ /**
 93+ * Constructor.
 94+ * @param SMWSerializer $serializer defining the object used for syntactic
 95+ * serialization.
 96+ * @param boolean $enable_backlinks defining if backlinks are included,
 97+ * see $add_backlinks for details.
 98+ */
 99+ public function __construct( SMWSerializer $serializer, $enable_backlinks = false ) {
 100+ $this->serializer = $serializer;
 101+ $this->outputfile = null;
 102+ $this->add_backlinks = $enable_backlinks;
 103+ }
 104+
 105+ /**
 106+ * Enable or disable inclusion of backlinks into the output.
 107+ * @param boolean $enable
 108+ */
 109+ public function enableBacklinks( $enable ) {
 110+ $this->add_backlinks = $enable;
 111+ }
 112+
 113+ /**
 114+ * Initialize all internal structures to begin with some serialization.
 115+ * Returns true if initialization was successful (this means that the
 116+ * optional output file is writable).
 117+ * @param string $outfilename URL of the file that output should be written
 118+ * to, or empty string for writting to the standard output.
 119+ */
 120+ protected function prepareSerialization( $outfilename = '' ) {
 121+ $this->serializer->clear();
 122+ $this->element_queue = array();
 123+ $this->element_done = array();
 124+ if ( $outfilename != '' ) {
 125+ $this->outputfile = fopen( $outfilename, 'w' );
 126+ if ( !$this->outputfile ) { // TODO Rather throw an exception here.
 127+ print "\nCannot open \"$outfile\" for writing.\n";
 128+ return false;
 129+ }
 130+ }
 131+ return true;
 132+ }
 133+
 134+ /**
 135+ * Serialize data associated to a specific page. This method works on the
 136+ * level of pages, i.e. it serialises parts of SMW content and implements
 137+ * features like recursive export or backlinks that are available for this
 138+ * type of data.
 139+ *
 140+ * @param SMWWikiPageValue $value specifying the page to be exported
 141+ * @param integer $recursiondepth specifying the depth of recursion, see
 142+ * SMWSmallTitle::$recdepth
 143+ */
 144+ protected function serializePage( SMWWikiPageValue $value, $recursiondepth = 1 ) {
 145+ $st = new SMWSmallTitle();
 146+ $st->dbkey = $value->getDBKey();
 147+ $st->namespace = $value->getNamespace();
 148+ $st->recdepth = $recursiondepth;
 149+ if ( $this->isDone( $st ) ) return; // do not export twice
 150+ $this->markAsDone( $st );
 151+ $data = SMWExporter::makeExportData( $this->getSemanticData( $value, ( $recursiondepth == 0 ) ) );
 152+ $this->serializer->serializeExpData( $data, $recursiondepth );
 153+
 154+ // let other extensions add additional RDF data for this page
 155+ $additionalDataArray = array();
 156+ wfRunHooks( 'smwAddToRDFExport', array( $value->getTitle(), &$additionalDataArray, ( $recursiondepth != 0 ), $this->add_backlinks ) );
 157+ foreach ( $additionalDataArray as $additionalData ) {
 158+ $this->serializer->serializeExpData( $additionalData ); // serialise
 159+ }
 160+
 161+ if ( $recursiondepth != 0 ) {
 162+ $subrecdepth = ($recursiondepth>0) ? ($recursiondepth-1) : ($recursiondepth==0 ? 0 : -1);
 163+
 164+ foreach ( $data->getProperties() as $property ) {
 165+ if ( $property->getDataValue() instanceof SMWWikiPageValue ) {
 166+ // TODO This currently drops modifiers (units of measurement)
 167+ $this->queuePage( $property->getDataValue(), 0 ); // no real recursion along properties
 168+ }
 169+ $wikipagevalues = false;
 170+ foreach ( $data->getValues( $property ) as $expdata ) {
 171+ $subject = $expdata->getSubject();
 172+ if ( !$wikipagevalues && ( $subject->getDataValue() instanceof SMWWikiPageValue ) ) {
 173+ $wikipagevalues = true;
 174+ } elseif ( !$wikipagevalues ) {
 175+ break;
 176+ }
 177+ $this->queuePage( $subject->getDatavalue(), $subrecdepth );
 178+ }
 179+ }
 180+
 181+ // Add backlinks:
 182+ // Note: Backlinks are different from recursive serialisations, since
 183+ // stub declarations (recdepth==0) still need to have the property that
 184+ // links back to the object. So objects that would be exported with
 185+ // recdepth 0 cannot be put into the main queue but must be done right
 186+ // away. They also might be required many times, if they link back to
 187+ // many different objects in many ways (we cannot consider them "Done"
 188+ // if they were serialised at recdepth 0 only).
 189+ if ( $this->add_backlinks ) {
 190+ wfProfileIn( "RDF::PrintPages::GetBacklinks" );
 191+ $inprops = smwfGetStore()->getInProperties( $value );
 192+ foreach ( $inprops as $inprop ) {
 193+ if ( $inprop->getWikiPageValue() instanceof SMWWikiPageValue ) {
 194+ $this->queuePage( $inprop->getWikiPageValue(), 0 ); // no real recursion along properties
 195+ }
 196+ $inSubs = smwfGetStore()->getPropertySubjects( $inprop, $value );
 197+ foreach ( $inSubs as $inSub ) {
 198+ $stb = new SMWSmallTitle();
 199+ $stb->dbkey = $inSub->getDBkey();
 200+ $stb->namespace = $inSub->getNamespace();
 201+ $stb->recdepth = $subrecdepth;
 202+ if ( !$this->isDone($stb) ) {
 203+ $semdata = $this->getSemanticData( $inSub, true );
 204+ $semdata->addPropertyObjectValue( $inprop, $value );
 205+ $data = SMWExporter::makeExportData( $semdata );
 206+ $this->serializer->serializeExpData( $data, $subrecdepth );
 207+ }
 208+ }
 209+ }
 210+
 211+ if ( NS_CATEGORY === $value->getNamespace() ) { // also print elements of categories
 212+ $options = new SMWRequestOptions();
 213+ $options->limit = 100; // Categories can be large, always use limit
 214+ $instances = smwfGetStore()->getPropertySubjects( SMWPropertyValue::makeProperty( '_INST' ), $value, $options );
 215+ $pinst = SMWPropertyValue::makeProperty( '_INST' );
 216+
 217+ foreach ( $instances as $instance ) {
 218+ $stb = new SMWSmallTitle();
 219+ $stb->dbkey = $instance->getDBkey();
 220+ $stb->namespace = $instance->getNamespace();
 221+
 222+ if ( !array_key_exists( $stb->getHash(), $this->element_done ) ) {
 223+ $semdata = $this->getSemanticData( $instance, true );
 224+ $semdata->addPropertyObjectValue( $pinst, $value );
 225+ $data = SMWExporter::makeExportData( $semdata );
 226+ $this->serializer->serializeExpData( $data, $subrecdepth );
 227+ }
 228+ }
 229+ } elseif ( SMW_NS_CONCEPT === $value->getNamespace() ) { // print concept members (slightly different code)
 230+ $desc = new SMWConceptDescription( $value->getTitle() );
 231+ $desc->addPrintRequest( new SMWPrintRequest( SMWPrintRequest::PRINT_THIS, '' ) );
 232+ $query = new SMWQuery( $desc );
 233+ $query->setLimit( 100 );
 234+
 235+ $res = smwfGetStore()->getQueryResult( $query );
 236+ $resarray = $res->getNext();
 237+ $pinst = SMWPropertyValue::makeProperty( '_INST' );
 238+
 239+ while ( $resarray !== false ) {
 240+ $instance = end( $resarray )->getNextObject();
 241+
 242+ $stb = new SMWSmallTitle();
 243+ $stb->dbkey = $instance->getDBkey();
 244+ $stb->namespace = $instance->getNamespace();
 245+
 246+ if ( !array_key_exists( $stb->getHash(), $this->element_done ) ) {
 247+ $semdata = $this->getSemanticData( $instance, true );
 248+ $semdata->addPropertyObjectValue( $pinst, $value );
 249+ $data = SMWExporter::makeExportData( $semdata );
 250+ $this->serializer->serializeExpData( $data );
 251+ }
 252+
 253+ $resarray = $res->getNext();
 254+ }
 255+ }
 256+ wfProfileOut( "RDF::PrintPages::GetBacklinks" );
 257+ }
 258+ }
 259+ }
 260+
 261+ /**
 262+ * Serialize data associated to a specific page.
 263+ *
 264+ * @param SMWSmallTitle $st specifying the page to be exported
 265+ */
 266+ protected function serializeSmallTitle( SMWSmallTitle $st ) {
 267+ if ( $this->isDone( $st ) ) return; // do not export twice
 268+ $value = SMWWikiPageValue::makePage( $st->dbkey, $st->namespace );
 269+ $this->serializePage( $value, $st->recdepth );
 270+ }
 271+
 272+ /**
 273+ * Add a given SMWWikiPageValue to the export queue if needed.
 274+ */
 275+ protected function queuePage( SMWWikiPageValue $pagevalue, $recursiondepth ) {
 276+ $spt = new SMWSmallTitle();
 277+ $spt->dbkey = $pagevalue->getDBkey();
 278+ $spt->namespace = $pagevalue->getNamespace();
 279+ $spt->recdepth = $recursiondepth;
 280+ if ( !$this->isDone( $spt ) ) {
 281+ $this->element_queue[$spt->getHash()] = $spt;
 282+ }
 283+ }
 284+
 285+ /**
 286+ * Mark an article as done while making sure that the cache used for this
 287+ * stays reasonably small. Input is given as an SMWSmallTitle object.
 288+ */
 289+ protected function markAsDone( $st ) {
 290+ if ( count( $this->element_done ) >= self::MAX_CACHE_SIZE ) {
 291+ $this->element_done = array_slice( $this->element_done,
 292+ self::CACHE_BACKJUMP,
 293+ self::MAX_CACHE_SIZE - self::CACHE_BACKJUMP,
 294+ true );
 295+ }
 296+ $hash = $st->getHash();
 297+ if ( !$this->isDone( $st ) ) {
 298+ $this->element_done[$hash] = $st->recdepth; // mark title as done, with given recursion
 299+ }
 300+ unset( $this->element_queue[$hash] ); // make sure it is not in the queue
 301+ }
 302+
 303+ /**
 304+ * Check if the given object has already been serialised at sufficient
 305+ * recursion depth.
 306+ * @param SMWSmallTitle $st specifying the object to check
 307+ */
 308+ protected function isDone( SMWSmallTitle $st ) {
 309+ $hash = $st->getHash();
 310+ return ( ( array_key_exists( $hash, $this->element_done ) ) &&
 311+ ( ( $this->element_done[$hash] == -1 ) ||
 312+ ( ( $st->recdepth != -1 ) && ( $this->element_done[$hash] >= $st->recdepth ) ) ) );
 313+ }
 314+
 315+ /**
 316+ * Retrieve a copy of the semantic data for a wiki page, possibly filtering
 317+ * it so that only essential properties are included (in some cases, we only
 318+ * want to export stub information about a page).
 319+ * We make a copy of the object since we may want to add more data later on
 320+ * and we do not want to modify the store's result which may be used for
 321+ * caching purposes elsewhere.
 322+ */
 323+ protected function getSemanticData( $pagevalue, $core_props_only ) {
 324+ $semdata = smwfGetStore()->getSemanticData( $pagevalue, $core_props_only ? array( '__spu', '__typ', '__imp' ) : false ); // advise store to retrieve only core things
 325+ if ( $core_props_only ) { // be sure to filter all non-relevant things that may still be present in the retrieved
 326+ $result = new SMWSemanticData( $pagevalue );
 327+ foreach ( array( '_URI', '_TYPE', '_IMPO' ) as $propid ) {
 328+ $prop = SMWPropertyValue::makeProperty( $propid );
 329+ $values = $semdata->getPropertyValues( $prop );
 330+ foreach ( $values as $dv ) {
 331+ $result->addPropertyObjectValue( $prop, $dv );
 332+ }
 333+ }
 334+ } else {
 335+ $result = clone $semdata;
 336+ }
 337+ return $result;
 338+ }
 339+
 340+ /**
 341+ * Send to the output what has been serialized so far. The flush might
 342+ * be deferred until later unless $force is true.
 343+ */
 344+ protected function flush( $force = false ) {
 345+ if ( !$force && ( $this->delay_flush > 0 ) ) {
 346+ $this->delay_flush -= 1;
 347+ } elseif ( $this->outputfile !== null ) {
 348+ fwrite( $this->outputfile, $this->serializer->flushContent() );
 349+ } else {
 350+ print $this->serializer->flushContent();
 351+ // Ship data in small chunks (even though browsers often do not display anything
 352+ // before the file is complete -- this might be due to syntax highlighting features
 353+ // for app/xml). You may want to sleep(1) here for debugging this.
 354+ ob_flush();
 355+ flush();
 356+ }
 357+ }
 358+
 359+ /**
 360+ * Create an SMWExpData container that encodes the ontology header for an
 361+ * SMW exported OWL file.
 362+ *
 363+ * @param string $ontologyuri specifying the URI of the ontology, possibly
 364+ * empty
 365+ */
 366+ protected function getOntologyExpData( $ontologyuri ) {
 367+ $data = new SMWExpData( new SMWExpResource( $ontologyuri ) );
 368+ $ed = new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Ontology' ) );
 369+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ), $ed );
 370+ $ed = new SMWExpData( new SMWExpLiteral( date( DATE_W3C ), null, 'http://www.w3.org/2001/XMLSchema#dateTime' ) );
 371+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'creationDate' ), $ed );
 372+ $ed = new SMWExpData( new SMWExpResource( 'http://semantic-mediawiki.org/swivt/1.0' ) );
 373+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'owl', 'imports' ), $ed );
 374+ return $data;
 375+ }
 376+
 377+ /**
 378+ * This function prints all selected pages, specified as an array of page
 379+ * names (strings with namespace identifiers).
 380+ *
 381+ * @param array $pages list of page names to export
 382+ * @param integer $recursion determines how pages are exported recursively:
 383+ * "0" means that referenced resources are only declared briefly, "1" means
 384+ * that all referenced resources are also exported recursively (propbably
 385+ * retrieving the whole wiki).
 386+ * @param string $revisiondate filter page list by including only pages
 387+ * that have been changed since this date; format "YmdHis"
 388+ *
 389+ * @todo Consider dropping the $revisiondate filtering and all associated
 390+ * functionality. Is anybody using this?
 391+ */
 392+ public function printPages( $pages, $recursion = 1, $revisiondate = false ) {
 393+ global $smwgMW_1_14;
 394+ wfProfileIn( "RDF::PrintPages" );
 395+
 396+ $linkCache =& LinkCache::singleton();
 397+ $this->prepareSerialization();
 398+ $this->delay_flush = 10; // flush only after (fully) printing 11 objects
 399+
 400+ // transform pages into queued short titles
 401+ foreach ( $pages as $page ) {
 402+ $title = Title::newFromText( $page );
 403+ if ( null === $title ) continue; // invalid title name given
 404+ if ( $revisiondate !== '' ) { // filter page list by revision date
 405+ $rev = $smwgMW_1_14 ? Revision::getTimeStampFromID( $title, $title->getLatestRevID() ) : Revision::getTimeStampFromID( $title->getLatestRevID() );
 406+ if ( $rev < $revisiondate ) continue;
 407+ }
 408+ $st = new SMWSmallTitle();
 409+ $st->dbkey = $title->getDBkey();
 410+ $st->namespace = $title->getNamespace();
 411+ $st->recdepth = $recursion==1 ? -1 : 1;
 412+ $this->element_queue[$st->getHash()] = $st;
 413+ }
 414+
 415+ $this->serializer->serializeHeader();
 416+
 417+ if ( count( $pages ) == 1 ) { // ensure that ontologies that are retrieved as linked data are not confused with their subject!
 418+ $ontologyuri = SMWExporter::expandURI( '&export;' ) . '/' . urlencode( end( $pages ) );
 419+ } else { // use empty URI, i.e. "location" as URI otherwise
 420+ $ontologyuri = '';
 421+ }
 422+ $this->serializer->serializeExpData( $this->getOntologyExpData( $ontologyuri ) );
 423+
 424+ while ( count( $this->element_queue ) > 0 ) {
 425+ $this->serializeSmallTitle( reset( $this->element_queue ) );
 426+ $this->flush();
 427+ $linkCache->clear(); // avoid potential memory leak
 428+ }
 429+ $this->serializer->serializeFooter();
 430+ $this->flush( true );
 431+
 432+ wfProfileOut( "RDF::PrintPages" );
 433+ }
 434+
 435+
 436+ /**
 437+ * This function exports the semantic data for all pages within the wiki,
 438+ * and for all elements that are referred to in the exported data.
 439+ *
 440+ * @param string $outfile the output file URI, or false if printing to stdout
 441+ * @param mixed $ns_restriction namespace restriction, see fitsNsRestriction()
 442+ * @param integer $delay number of microseconds for which to sleep during
 443+ * export to reduce server load in long-running operations
 444+ * @param integer $delayeach number of pages to process between two sleeps
 445+ */
 446+ public function printAll( $outfile, $ns_restriction = false, $delay, $delayeach ) {
 447+ global $smwgNamespacesWithSemanticLinks;
 448+ $linkCache =& LinkCache::singleton();
 449+ $db = wfGetDB( DB_SLAVE );
 450+
 451+ $this->delay_flush = 10;
 452+ if ( !$this->prepareSerialization( $outfile ) ) return;
 453+
 454+ $this->serializer->serializeHeader();
 455+ $this->serializer->serializeExpData( $this->getOntologyExpData( '' ) );
 456+
 457+ $end = $db->selectField( 'page', 'max(page_id)', false, $outfile );
 458+ $a_count = 0; $d_count = 0; // DEBUG
 459+ $delaycount = $delayeach;
 460+
 461+ for ( $id = 1; $id <= $end; $id += 1 ) {
 462+ $title = Title::newFromID( $id );
 463+ if ( ( $title === null ) || !smwfIsSemanticsProcessed( $title->getNamespace() ) ) continue;
 464+ if ( !SMWExportController::fitsNsRestriction( $ns_restriction, $title->getNamespace() ) ) continue;
 465+ $a_count += 1; // DEBUG
 466+
 467+ $st = new SMWSmallTitle();
 468+ $st->dbkey = $title->getDBkey();
 469+ $st->namespace = $title->getNamespace();
 470+ $st->recdepth = 1;
 471+ $this->element_queue[$st->getHash()] = $st;
 472+
 473+ while ( count( $this->element_queue ) > 0 ) {
 474+ $this->serializeSmallTitle( reset( $this->element_queue ) );
 475+ // resolve dependencies that will otherwise not be printed
 476+ foreach ( $this->element_queue as $key => $staux ) {
 477+ if ( !smwfIsSemanticsProcessed( $staux->namespace ) || //( $staux->modifier !== '' ) ||
 478+ !SMWExportController::fitsNsRestriction( $ns_restriction, $staux->namespace ) ) {
 479+ // Note: we do not need to check the cache to guess if an element was already
 480+ // printed. If so, it would not be included in the queue in the first place.
 481+ $d_count += 1; // DEBUG
 482+ } else { // don't carry values that you do not want to export (yet)
 483+ unset( $this->element_queue[$key] );
 484+ }
 485+ }
 486+ // sleep each $delaycount for $delay µs to be nice to the server
 487+ if ( ( $delaycount-- < 0 ) && ( $delayeach != 0 ) ) {
 488+ usleep( $delay );
 489+ $delaycount = $delayeach;
 490+ }
 491+ }
 492+
 493+ $this->flush();
 494+ $linkCache->clear();
 495+ }
 496+
 497+ $this->serializer->serializeFooter();
 498+ $this->flush( true );
 499+ }
 500+
 501+ /**
 502+ * Print basic definitions a list of pages ordered by their page id.
 503+ * Offset and limit refer to the count of existing pages, not to the
 504+ * page id.
 505+ * @param integer $offset the number of the first (existing) page to
 506+ * serialize a declaration for
 507+ * @param integer $limit the number of pages to serialize
 508+ */
 509+ public function printPageList( $offset = 0, $limit = 30 ) {
 510+ global $smwgNamespacesWithSemanticLinks;
 511+ wfProfileIn( "RDF::PrintPageList" );
 512+
 513+ $db = wfGetDB( DB_SLAVE );
 514+ $this->prepareSerialization();
 515+ $this->delay_flush = 35; // don't do intermediate flushes with default parameters
 516+ $linkCache = LinkCache::singleton();
 517+
 518+ $this->serializer->serializeHeader();
 519+ $this->serializer->serializeExpData( $this->getOntologyExpData( '' ) );
 520+
 521+ $query = '';
 522+ foreach ( $smwgNamespacesWithSemanticLinks as $ns => $enabled ) {
 523+ if ( $enabled ) {
 524+ if ( $query != '' ) $query .= ' OR ';
 525+ $query .= 'page_namespace = ' . $db->addQuotes( $ns );
 526+ }
 527+ }
 528+ $res = $db->select( $db->tableName( 'page' ),
 529+ 'page_id,page_title,page_namespace', $query
 530+ , 'SMW::RDF::PrintPageList', array( 'ORDER BY' => 'page_id ASC', 'OFFSET' => $offset, 'LIMIT' => $limit ) );
 531+ $foundpages = false;
 532+
 533+ while ( $row = $db->fetchObject( $res ) ) {
 534+ $foundpages = true;
 535+ $st = new SMWSmallTitle();
 536+ $st->dbkey = $row->page_title;
 537+ $st->namespace = $row->page_namespace;
 538+ $st->recdepth = 0;
 539+ $this->serializeSmallTitle( $st );
 540+ $this->flush();
 541+ $linkCache->clear();
 542+ }
 543+ if ( $foundpages ) { // add link to next result page
 544+ if ( strpos( SMWExporter::expandURI( '&wikiurl;' ), '?' ) === false ) { // check whether we have title as a first parameter or in URL
 545+ $nexturl = SMWExporter::expandURI( '&export;?offset=' ) . ( $offset + $limit );
 546+ } else {
 547+ $nexturl = SMWExporter::expandURI( '&export;&amp;offset=' ) . ( $offset + $limit );
 548+ }
 549+
 550+ $data = new SMWExpData( new SMWExpResource( $nexturl ) );
 551+ $ed = new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Thing' ) );
 552+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ), $ed );
 553+ $ed = new SMWExpData( new SMWExpResource( $nexturl ) );
 554+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdfs', 'isDefinedBy' ), $ed );
 555+ $this->serializer->serializeExpData( $data );
 556+ }
 557+
 558+ $this->serializer->serializeFooter();
 559+ $this->flush( true );
 560+
 561+ wfProfileOut( "RDF::PrintPageList" );
 562+ }
 563+
 564+
 565+ /**
 566+ * Print basic information about this site.
 567+ */
 568+ public function printWikiInfo() {
 569+ wfProfileIn( "RDF::PrintWikiInfo" );
 570+
 571+ global $wgSitename, $wgLanguageCode;
 572+
 573+ $db = & wfGetDB( DB_SLAVE );
 574+ $this->prepareSerialization();
 575+ $this->delay_flush = 35; // don't do intermediate flushes with default parameters
 576+ $linkCache = LinkCache::singleton();
 577+
 578+ // assemble export data:
 579+ $data = new SMWExpData( new SMWExpResource( '&wiki;#wiki' ) );
 580+ $ed = new SMWExpData( SMWExporter::getSpecialElement( 'swivt', 'Wikisite' ) );
 581+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ), $ed );
 582+ // basic wiki information
 583+ $ed = new SMWExpData( new SMWExpLiteral( $wgSitename ) );
 584+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdfs', 'label' ), $ed );
 585+ $ed = new SMWExpData( new SMWExpLiteral( $wgSitename, null, 'http://www.w3.org/2001/XMLSchema#string' ) );
 586+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'siteName' ), $ed );
 587+ $ed = new SMWExpData( new SMWExpLiteral( SMWExporter::expandURI( '&wikiurl;' ), null, 'http://www.w3.org/2001/XMLSchema#string' ) );
 588+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'pagePrefix' ), $ed );
 589+ $ed = new SMWExpData( new SMWExpLiteral( SMW_VERSION, null, 'http://www.w3.org/2001/XMLSchema#string' ) );
 590+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'smwVersion' ), $ed );
 591+ $ed = new SMWExpData( new SMWExpLiteral( $wgLanguageCode, null, 'http://www.w3.org/2001/XMLSchema#string' ) );
 592+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'langCode' ), $ed );
 593+ $mainpage = Title::newMainPage();
 594+ if ( $mainpage !== null ) {
 595+ $ed = new SMWExpData( new SMWExpResource( $mainpage->getFullURL() ) );
 596+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'mainPage' ), $ed );
 597+ }
 598+ // statistical information
 599+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::pages(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 600+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'pageCount' ), $ed );
 601+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::articles(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 602+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'contentPageCount' ), $ed );
 603+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::images(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 604+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'mediaCount' ), $ed );
 605+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::edits(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 606+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'editCount' ), $ed );
 607+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::views(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 608+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'viewCount' ), $ed );
 609+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::users(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 610+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'userCount' ), $ed );
 611+ $ed = new SMWExpData( new SMWExpLiteral( SiteStats::admins(), null, 'http://www.w3.org/2001/XMLSchema#int' ) );
 612+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'swivt', 'adminCount' ), $ed );
 613+
 614+ $this->serializer->serializeHeader();
 615+ $this->serializer->serializeExpData( $this->getOntologyExpData( '' ) );
 616+ $this->serializer->serializeExpData( $data );
 617+
 618+ // link to list of existing pages:
 619+ if ( strpos( SMWExporter::expandURI( '&wikiurl;' ), '?' ) === false ) { // check whether we have title as a first parameter or in URL
 620+ $nexturl = SMWExporter::expandURI( '&export;?offset=0' );
 621+ } else {
 622+ $nexturl = SMWExporter::expandURI( '&export;&amp;offset=0' );
 623+ }
 624+ $data = new SMWExpData( new SMWExpResource( $nexturl ) );
 625+ $ed = new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Thing' ) );
 626+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ), $ed );
 627+ $ed = new SMWExpData( new SMWExpResource( $nexturl ) );
 628+ $data->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdfs', 'isDefinedBy' ), $ed );
 629+ $this->serializer->serializeExpData( $data );
 630+
 631+ $this->serializer->serializeFooter();
 632+ $this->flush( true );
 633+
 634+ wfProfileOut( "RDF::PrintWikiInfo" );
 635+ }
 636+
 637+ /**
 638+ * This function checks whether some article fits into a given namespace
 639+ * restriction. Restrictions are encoded as follows: a non-negative number
 640+ * requires the namespace to be identical to the given number; "-1"
 641+ * requires the namespace to be different from Category, Property, and
 642+ * Type; "false" means "no restriction".
 643+ * @param $res mixed encoding the restriction as described above
 644+ * @param $ns integer the namespace constant to be checked
 645+ */
 646+ static public function fitsNsRestriction( $res, $ns ) {
 647+ if ( $res === false ) return true;
 648+ if ( is_array( $res ) ) return in_array( $ns, $res );
 649+ if ( $res >= 0 ) return ( $res == $ns );
 650+ return ( ( $res != NS_CATEGORY ) && ( $res != SMW_NS_PROPERTY ) && ( $res != SMW_NS_TYPE ) );
 651+ }
 652+
 653+}
Property changes on: trunk/extensions/SemanticMediaWiki/includes/export/SMW_ExportController.php
___________________________________________________________________
Added: svn:eol-style
1654 + native
Index: trunk/extensions/SemanticMediaWiki/includes/export/SMW_Exporter.php
@@ -149,7 +149,7 @@
150150 if ( $pe !== null ) {
151151 foreach ( $semdata->getPropertyValues( $property ) as $dv ) {
152152 if ( $cat_only ) {
153 - if ( !( $dv instanceof SMWWikiPageValue ) || ( $dv->etNamespace != NS_CATEGORY ) ) {
 153+ if ( !( $dv instanceof SMWWikiPageValue ) || ( $dv->getNamespace() != NS_CATEGORY ) ) {
154154 continue;
155155 }
156156 }
Index: trunk/extensions/SemanticMediaWiki/includes/export/SMW_Serializer.php
@@ -0,0 +1,419 @@
 2+<?php
 3+
 4+/**
 5+ * File holding the SMWSerializer class that provides basic functions for
 6+ * serialising data in OWL and RDF syntaxes.
 7+ *
 8+ * @file SMW_Serializer.php
 9+ * @ingroup SMW
 10+ *
 11+ * @author Markus Krötzsch
 12+ */
 13+
 14+define( 'SMW_SERIALIZER_DECL_CLASS', 1 );
 15+define( 'SMW_SERIALIZER_DECL_OPROP', 2 );
 16+define( 'SMW_SERIALIZER_DECL_APROP', 4 );
 17+
 18+/**
 19+ * Class for serializing exported data (encoded as SMWExpData object) in a
 20+ * concrete syntactic format such as Turtle or RDF/XML. The serializer
 21+ * adds object serialisations to an internal string that can be retrieved for
 22+ * pushing it to an output. RDF and OWL have two types of dependencies that are
 23+ * managed: namespaces (and similar abbreviations) and element declarations.
 24+ * The former need to be defined before being used, while the latter can occur
 25+ * at some later point in the serialization. Declarations are relevant to the
 26+ * OWL data model, being one of Class, DatatypeProperty, and ObjectProperty
 27+ * (only the latter two are mutually exclusive). This class determines the
 28+ * required declaration from the context in which an element is used.
 29+ *
 30+ * @ingroup SMW
 31+ */
 32+class SMWSerializer {
 33+ /**
 34+ * Array for recording required declarations; format:
 35+ * resourcename => decl-flag, where decl-flag is a sum of flags
 36+ * SMW_SERIALIZER_DECL_CLASS, SMW_SERIALIZER_DECL_OPROP,
 37+ * SMW_SERIALIZER_DECL_APROP.
 38+ */
 39+ protected $decl_todo;
 40+ /**
 41+ * Array for recording previous declarations; format:
 42+ * resourcename => decl-flag, where decl-flag is a sum of flags
 43+ * SMW_SERIALIZER_DECL_CLASS, SMW_SERIALIZER_DECL_OPROP,
 44+ * SMW_SERIALIZER_DECL_APROP.
 45+ */
 46+ protected $decl_done;
 47+ /**
 48+ * Array of additional namespaces (abbreviation => URI), flushed on
 49+ * closing the current namespace tag. Since we export in a streamed
 50+ * way, it is not always possible to embed additional namespaces into
 51+ * a syntactic block (e.g. an RDF/XML tag) which might have been sent to
 52+ * the client already. But we wait with printing the current block so that
 53+ * extra namespaces from this array can still be printed (note that one
 54+ * never know which extra namespaces you encounter during export).
 55+ */
 56+ protected $extra_namespaces;
 57+ /**
 58+ * Array of namespaces that have been declared globally already. Contains
 59+ * entries of format 'namespace abbreviation' => true, assuming that the
 60+ * same abbreviation always refers to the same URI.
 61+ */
 62+ protected $global_namespaces;
 63+ /**
 64+ * The current working string is obtained by concatenating the strings
 65+ * $pre_ns_buffer and $post_ns_buffer. The split between the two is such
 66+ * that one can append additional namespace declarations to $pre_ns_buffer
 67+ * so that they affect all current elements. The buffers are flushed during
 68+ * output in order to achieve "streaming" RDF export for larger files.
 69+ */
 70+ protected $pre_ns_buffer;
 71+ /**
 72+ * See documentation for $pre_ns_buffer.
 73+ */
 74+ protected $post_ns_buffer;
 75+ /**
 76+ * True if the $pre_ns_buffer contains the beginning of a namespace
 77+ * declaration block to which further declarations for the current
 78+ * context can be appended.
 79+ */
 80+ protected $namespace_block_started;
 81+ /**
 82+ * True if the namespaces that are added at the current serialization stage
 83+ * become global, i.e. remain available for all later contexts. This is the
 84+ * case in RDF/XML only as long as the header has not been streamed to the
 85+ * client (reflected herein by calling flushContent()). Later, namespaces
 86+ * can only be added locally to individual elements, thus requiring them to
 87+ * be re-added multiple times if used in many elements.
 88+ */
 89+ protected $namespaces_are_global;
 90+
 91+ /**
 92+ * Constructor.
 93+ */
 94+ public function __construct() {
 95+ $this->clear();
 96+ }
 97+
 98+ /**
 99+ * Clear internal states to start a new serialization.
 100+ */
 101+ public function clear() {
 102+ $this->element_queue = array();
 103+ $this->element_done = array();
 104+ $this->decl_todo = array();
 105+ $this->decl_done = array();
 106+ $this->pre_ns_buffer = '';
 107+ $this->post_ns_buffer = '';
 108+ $this->namespaces_are_global = false;
 109+ $this->extra_namespaces = array();
 110+ }
 111+
 112+ /* Functions for exporting RDF */
 113+
 114+ public function serializeHeader() {
 115+ $this->clear();
 116+ $this->namespaces_are_global = true;
 117+ $this->namespace_block_started = true;
 118+ $this->pre_ns_buffer =
 119+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" .
 120+ "<!DOCTYPE rdf:RDF[\n" .
 121+ "\t<!ENTITY rdf " . $this->makeValueEntityString( SMWExporter::expandURI( '&rdf;' ) ) . ">\n" .
 122+ "\t<!ENTITY rdfs " . $this->makeValueEntityString( SMWExporter::expandURI( '&rdfs;' ) ) . ">\n" .
 123+ "\t<!ENTITY owl " . $this->makeValueEntityString( SMWExporter::expandURI( '&owl;' ) ) . ">\n" .
 124+ "\t<!ENTITY swivt " . $this->makeValueEntityString( SMWExporter::expandURI( '&swivt;' ) ) . ">\n" .
 125+ // A note on "wiki": this namespace is crucial as a fallback when it would be illegal to start e.g. with a number.
 126+ // In this case, one can always use wiki:... followed by "_" and possibly some namespace, since _ is legal as a first character.
 127+ "\t<!ENTITY wiki " . $this->makeValueEntityString( SMWExporter::expandURI( '&wiki;' ) ) . ">\n" .
 128+ "\t<!ENTITY property " . $this->makeValueEntityString( SMWExporter::expandURI( '&property;' ) ) . ">\n" .
 129+ "\t<!ENTITY wikiurl " . $this->makeValueEntityString( SMWExporter::expandURI( '&wikiurl;' ) ) . ">\n" .
 130+ "]>\n\n" .
 131+ "<rdf:RDF\n" .
 132+ "\txmlns:rdf=\"&rdf;\"\n" .
 133+ "\txmlns:rdfs=\"&rdfs;\"\n" .
 134+ "\txmlns:owl =\"&owl;\"\n" .
 135+ "\txmlns:swivt=\"&swivt;\"\n" .
 136+ "\txmlns:wiki=\"&wiki;\"\n" .
 137+ "\txmlns:property=\"&property;\"";
 138+ $this->global_namespaces = array( 'rdf' => true, 'rdfs' => true, 'owl' => true, 'swivt' => true, 'wiki' => true, 'property' => true );
 139+ $this->post_ns_buffer .= ">\n\n";
 140+ }
 141+
 142+ /**
 143+ * Serialise the footer.
 144+ */
 145+ public function serializeFooter() {
 146+ $this->serializeDeclarations();
 147+ $this->post_ns_buffer .= "\t<!-- Created by Semantic MediaWiki, http://semantic-mediawiki.org/ -->\n";
 148+ $this->post_ns_buffer .= '</rdf:RDF>';
 149+ }
 150+
 151+ /**
 152+ * Serialize any declarations that have been found to be missing while
 153+ * serializing other elements.
 154+ */
 155+ public function serializeDeclarations() {
 156+ foreach ( $this->decl_todo as $name => $flag ) {
 157+ $types = array();
 158+ if ( $flag & SMW_SERIALIZER_DECL_CLASS ) $types[] = 'owl:Class';
 159+ if ( $flag & SMW_SERIALIZER_DECL_OPROP ) $types[] = 'owl:ObjectProperty';
 160+ if ( $flag & SMW_SERIALIZER_DECL_APROP ) $types[] = 'owl:DatatypeProperty';
 161+ foreach ( $types as $type ) {
 162+ $this->post_ns_buffer .= "\t<$type rdf:about=\"$name\" />\n";
 163+ }
 164+ $curdone = array_key_exists( $name, $this->decl_done ) ? $this->decl_done[$name] : 0;
 165+ $this->decl_done[$name] = $curdone | $flag;
 166+ }
 167+ $this->decl_todo = array(); // reset all
 168+ }
 169+
 170+ /**
 171+ * Serialise the given SMWExpData object. The method does not assume that
 172+ * the exported data refers to wiki pages or other SMW data, and it makes
 173+ * sure that all required auxiliary declarations for obtaining proper OWL
 174+ * are included anyway.
 175+ *
 176+ * @param $data SMWExpData containing the data to be serialised.
 177+ */
 178+ public function serializeExpData( SMWExpData $data ) {
 179+ $this->serializeNestedExpData( $data, '' );
 180+ $this->serializeNamespaces();
 181+ if ( !$this->namespaces_are_global ) {
 182+ $this->pre_ns_buffer .= $this->post_ns_buffer;
 183+ $this->post_ns_buffer = '';
 184+ $this->namespace_block_started = false;
 185+ }
 186+ }
 187+
 188+ /**
 189+ * Serialise the given SMWExpData object, possibly recursively with
 190+ * increased indentation.
 191+ *
 192+ * @param $data SMWExpData containing the data to be serialised.
 193+ * @param $indent string specifying a prefix for indentation (usually a sequence of tabs)
 194+ */
 195+ protected function serializeNestedExpData( SMWExpData $data, $indent ) {
 196+ $this->recordDeclarationTypes( $data );
 197+
 198+ $type = $data->extractMainType()->getQName();
 199+ if ( !$this->namespace_block_started ) { // start new ns block
 200+ $this->pre_ns_buffer .= "\t$indent<$type";
 201+ $this->namespace_block_started = true;
 202+ } else { // continue running block
 203+ $this->post_ns_buffer .= "\t$indent<$type";
 204+ }
 205+
 206+ if ( ( $data->getSubject() instanceof SMWExpLiteral ) ||
 207+ ( $data->getSubject() instanceof SMWExpResource ) ) {
 208+ $this->post_ns_buffer .= ' rdf:about="' . $data->getSubject()->getName() . '"';
 209+ } // else: blank node, no "rdf:about"
 210+
 211+ if ( count( $data->getProperties() ) == 0 ) { // nothing else to export
 212+ $this->post_ns_buffer .= " />\n";
 213+ } else { // process data
 214+ $this->post_ns_buffer .= ">\n";
 215+
 216+ foreach ( $data->getProperties() as $property ) {
 217+ $prop_decl_queued = false;
 218+ $prop_decl_type = 0;
 219+ $class_type_prop = $this->isOWLClassTypeProperty( $property );
 220+
 221+ foreach ( $data->getValues( $property ) as $value ) {
 222+ $this->post_ns_buffer .= "\t\t$indent<" . $property->getQName();
 223+ $this->requireNamespace( $property->getNamespaceID(), $property->getNamespace() );
 224+ $object = $value->getSubject();
 225+
 226+ if ( $object instanceof SMWExpLiteral ) {
 227+ $prop_decl_type = SMW_SERIALIZER_DECL_APROP;
 228+ if ( $object->getDatatype() != '' ) {
 229+ $this->post_ns_buffer .= ' rdf:datatype="' . $object->getDatatype() . '"';
 230+ }
 231+ $this->post_ns_buffer .= '>' .
 232+ str_replace( array( '&', '>', '<' ), array( '&amp;', '&gt;', '&lt;' ), $object->getName() ) .
 233+ '</' . $property->getQName() . ">\n";
 234+ } else { // resource (maybe blank node), could have subdescriptions
 235+ $prop_decl_type = SMW_SERIALIZER_DECL_OPROP;
 236+ $collection = $value->getCollection();
 237+ if ( $collection !== false ) { // RDF-style collection (list)
 238+ $this->post_ns_buffer .= " rdf:parseType=\"Collection\">\n";
 239+ foreach ( $collection as $subvalue ) {
 240+ $this->serializeNestedExpData( $subvalue, $indent . "\t\t" );
 241+ if ( $class_type_prop ) {
 242+ $this->requireDeclaration( $subvalue, SMW_SERIALIZER_DECL_CLASS );
 243+ }
 244+ }
 245+ $this->post_ns_buffer .= "\t\t$indent</" . $property->getQName() . ">\n";
 246+ } else {
 247+ if ( $class_type_prop ) {
 248+ $this->requireDeclaration( $object, SMW_SERIALIZER_DECL_CLASS );
 249+ }
 250+ if ( count( $value->getProperties() ) > 0 ) { // resource with data: serialise
 251+ $this->post_ns_buffer .= ">\n";
 252+ $this->serializeNestedExpData( $value, $indent . "\t\t" );
 253+ $this->post_ns_buffer .= "\t\t$indent</" . $property->getQName() . ">\n";
 254+ } else { // resource without data: may need to be queued
 255+ if ( !$object->isBlankNode() ) {
 256+ $this->post_ns_buffer .= ' rdf:resource="' . $object->getName() . '"';
 257+ }
 258+ $this->post_ns_buffer .= "/>\n";
 259+ }
 260+ }
 261+ }
 262+ if ( !$prop_decl_queued ) {
 263+ $this->requireDeclaration( $property, $prop_decl_type );
 264+ $prop_decl_queued = true;
 265+ }
 266+ }
 267+ }
 268+ $this->post_ns_buffer .= "\t$indent</" . $type . ">\n";
 269+ }
 270+ }
 271+
 272+ /**
 273+ * Get the string that has been serialized so far. This function also
 274+ * resets the internal buffers for serilized strings and namespaces
 275+ * (what is flushed is gone).
 276+ */
 277+ public function flushContent() {
 278+ if ( ( $this->pre_ns_buffer == '' ) && ( $this->post_ns_buffer == '' ) ) return '';
 279+ $this->serializeNamespaces();
 280+ $this->namespaces_are_global = false;
 281+ $result = $this->pre_ns_buffer . $this->post_ns_buffer;
 282+ $this->pre_ns_buffer = '';
 283+ $this->post_ns_buffer = '';
 284+ $this->namespace_block_started = false;
 285+ return $result;
 286+ }
 287+
 288+ /**
 289+ * Require an additional namespace to be declared in the serialization.
 290+ * The function checks whether the required namespace is available globally
 291+ * and add it to the list of required namespaces otherwise.
 292+ */
 293+ protected function requireNamespace( $nsshort, $nsuri ) {
 294+ if ( !array_key_exists( $nsshort, $this->global_namespaces ) ) {
 295+ $this->extra_namespaces[$nsshort] = $nsuri;
 296+ }
 297+ }
 298+
 299+ /**
 300+ * Include collected namespace information into the serialization.
 301+ */
 302+ protected function serializeNamespaces() {
 303+ foreach ( $this->extra_namespaces as $nsshort => $nsuri ) {
 304+ if ( $this->namespaces_are_global ) {
 305+ $this->global_namespaces[$nsshort] = true;
 306+ $this->pre_ns_buffer .= "\n\t";
 307+ } else {
 308+ $this->pre_ns_buffer .= ' ';
 309+ }
 310+ $this->pre_ns_buffer .= "xmlns:$nsshort=\"$nsuri\"";
 311+ }
 312+ $this->extra_namespaces = array();
 313+ }
 314+
 315+ /**
 316+ * State that a certain declaration is needed. The method checks if the
 317+ * declaration is already available, and records a todo otherwise.
 318+ */
 319+ protected function requireDeclaration( SMWExpResource $resource, $decltype ) {
 320+ $namespaceid = $resource->getNamespaceID();
 321+ // Do not declare predefined OWL language constructs:
 322+ if ( ( $namespaceid == 'owl' ) || ( $namespaceid == 'rdf' ) || ( $namespaceid == 'rdfs' ) ) return;
 323+ // Do not declare blank nodes:
 324+ if ( $resource->isBlankNode() ) return;
 325+
 326+ $name = $resource->getName();
 327+ if ( array_key_exists( $name, $this->decl_done ) && ( $this->decl_done[$name] & $decltype ) ) {
 328+ return;
 329+ }
 330+ if ( !array_key_exists( $name, $this->decl_todo ) ) {
 331+ $this->decl_todo[$name] = $decltype;
 332+ } else {
 333+ $this->decl_todo[$name] = $this->decl_todo[$name] | $decltype;
 334+ }
 335+ }
 336+
 337+ /**
 338+ * Update the declaration "todo" and "done" lists for the case that the
 339+ * given data has been serialized with the type information it provides.
 340+ *
 341+ * @param $data specifying the type data upon which declarations are based
 342+ */
 343+ protected function recordDeclarationTypes( SMWExpData $data ) {
 344+ foreach ( $data->getSpecialValues( 'rdf', 'type') as $typedata ) {
 345+ $typeresource = $typedata->getSubject();
 346+ if ( $typeresource instanceof SMWExpResource ) {
 347+ switch ( $typeresource->getQName() ) {
 348+ case 'owl:Class': $typeflag = SMW_SERIALIZER_DECL_CLASS; break;
 349+ case 'owl:ObjectProperty': $typeflag = SMW_SERIALIZER_DECL_OPROP; break;
 350+ case 'owl:DatatypeProperty': $typeflag = SMW_SERIALIZER_DECL_APROP; break;
 351+ default: $typeflag = 0;
 352+ }
 353+ if ( $typeflag != 0 ) {
 354+ $this->declarationDone( $data->getSubject(), $typeflag );
 355+ }
 356+ }
 357+ }
 358+ }
 359+
 360+ /**
 361+ * Update the declaration "todo" and "done" lists to reflect the fact that
 362+ * the given element has been declared to has the given type.
 363+ *
 364+ * @param $element SMWExpResource specifying the element to update
 365+ * @param $typeflag integer specifying the type (e.g. SMW_SERIALIZER_DECL_CLASS)
 366+ */
 367+ protected function declarationDone( SMWExpResource $element, $typeflag ) {
 368+ $name = $element->getName();
 369+ $curdone = array_key_exists( $name, $this->decl_done ) ? $this->decl_done[$name] : 0;
 370+ $this->decl_done[$name] = $curdone | $typeflag;
 371+ if ( array_key_exists( $name, $this->decl_todo ) ) {
 372+ $this->decl_todo[$name] = $this->decl_todo[$name] & ( ~$typeflag );
 373+ if ( $this->decl_todo[$name] == 0 ) {
 374+ unset( $this->decl_todo[$name] );
 375+ }
 376+ }
 377+ }
 378+
 379+ /**
 380+ * Check if the given property is one of the special properties of the OWL
 381+ * language that require their values to be classes or RDF lists of
 382+ * classes. In these cases, it is necessary to declare this in the exported
 383+ * data.
 384+ *
 385+ * @note The list of properties checked here is not complete for the OWL
 386+ * language but covers what is used in SMW.
 387+ * @note OWL 2 allows URIs to refer to both classes and individual elements
 388+ * in different contexts. We only need declarations for classes that are
 389+ * used as such, hence it is enough to check the property. Moreover, we do
 390+ * not use OWL Datatypes in SMW, so rdf:type, rdfs:domain, etc. always
 391+ * refer to classes.
 392+ * @param SMWExpResource $property
 393+ */
 394+ protected function isOWLClassTypeProperty( SMWExpResource $property ) {
 395+ $locname = $property->getLocalName();
 396+ if ( $property->getNamespaceID() == 'rdf' ) {
 397+ return ( $locname == 'type' );
 398+ } elseif ( $property->getNamespaceID() == 'owl' ) {
 399+ return ( $locname == 'intersectionOf' ) || ( $locname == 'unionOf' ) ||
 400+ ( $locname == 'equivalentClass' ) ||
 401+ ( $locname == 'complementOf' ) || ( $locname == 'someValuesFrom' ) ||
 402+ ( $locname == 'allValuesFrom' ) || ( $locname == 'onClass' );
 403+ } elseif ( $property->getNamespaceID() == 'rdfs' ) {
 404+ return ( $locname == 'subClassOf' ) || ( $locname == 'range' ) || ( $locname == 'domain' );
 405+ } else {
 406+ return false;
 407+ }
 408+ }
 409+
 410+ /**
 411+ * Escape a string in the special form that is required for values in
 412+ * DTD entity declarations in XML. Namely, this require the percent sign
 413+ * to be replaced.
 414+ * @param string $string to be escaped
 415+ */
 416+ protected function makeValueEntityString( $string ) {
 417+ return "'" . str_replace( '%','&#37;',$string ) . "'";
 418+ }
 419+
 420+}
Property changes on: trunk/extensions/SemanticMediaWiki/includes/export/SMW_Serializer.php
___________________________________________________________________
Added: svn:eol-style
1421 + native
Index: trunk/extensions/SemanticMediaWiki/includes/datavalues/SMW_DV_Record.php
@@ -151,7 +151,7 @@
152152 public function getExportData() {
153153 if ( !$this->isValid() ) return null;
154154
155 - $result = new SMWExpData( new SMWExpElement( '', $this ) ); // bnode
 155+ $result = new SMWExpData( new SMWExpResource( '', $this ) ); // bnode
156156 $ed = new SMWExpData( SMWExporter::getSpecialElement( 'swivt', 'Container' ) );
157157 $result->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ), $ed );
158158 $count = 0;
Index: trunk/extensions/SemanticMediaWiki/includes/datavalues/SMW_DV_Concept.php
@@ -102,7 +102,7 @@
103103 $element = new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Thing' ) );
104104 }
105105 if ( !$exact ) {
106 - $result = new SMWExpData( new SMWExpElement( '' ) );
 106+ $result = new SMWExpData( new SMWExpResource( '' ) );
107107 $result->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ),
108108 new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Class' ) ) );
109109 $result->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdfs', 'subClassOf' ), $owldesc );
@@ -117,7 +117,7 @@
118118
119119 public function descriptionToExpData( $desc, &$exact ) {
120120 if ( ( $desc instanceof SMWConjunction ) || ( $desc instanceof SMWDisjunction ) ) {
121 - $result = new SMWExpData( new SMWExpElement( '' ) );
 121+ $result = new SMWExpData( new SMWExpResource( '' ) );
122122 $result->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ),
123123 new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Class' ) ) );
124124 $elements = array();
@@ -135,7 +135,7 @@
136136 if ( count( $desc->getCategories() ) == 1 ) { // single category
137137 $result = new SMWExpData( SMWExporter::getResourceElement( end( $desc->getCategories() ) ) );
138138 } else { // disjunction of categories
139 - $result = new SMWExpData( new SMWExpElement( '' ) );
 139+ $result = new SMWExpData( new SMWExpResource( '' ) );
140140 $elements = array();
141141 foreach ( $desc->getCategories() as $cat ) {
142142 $elements[] = new SMWExpData( SMWExporter::getResourceElement( $cat ) ); ;
@@ -148,7 +148,7 @@
149149 } elseif ( $desc instanceof SMWConceptDescription ) {
150150 $result = new SMWExpData( SMWExporter::getResourceElement( $desc->getConcept() ) );
151151 } elseif ( $desc instanceof SMWSomeProperty ) {
152 - $result = new SMWExpData( new SMWExpElement( '' ) );
 152+ $result = new SMWExpData( new SMWExpResource( '' ) );
153153 $result->addPropertyObjectValue( SMWExporter::getSpecialElement( 'rdf', 'type' ),
154154 new SMWExpData( SMWExporter::getSpecialElement( 'owl', 'Restriction' ) ) );
155155 $result->addPropertyObjectValue( SMWExporter::getSpecialElement( 'owl', 'onProperty' ),
Index: trunk/extensions/SemanticMediaWiki/includes/SMW_SetupLight.php
@@ -130,11 +130,14 @@
131131
132132
133133 // Export
134 -// $wgAutoloadClasses['SMWExporter'] = $smwgIP . 'includes/export/SMW_Exporter.php';
135 -// $wgAutoloadClasses['SMWExpData'] = $smwgIP . 'includes/export/SMW_Exp_Data.php';
136 -// $wgAutoloadClasses['SMWExpElement'] = $smwgIP . 'includes/export/SMW_Exp_Element.php';
137 -// $wgAutoloadClasses['SMWExpLiteral'] = $smwgIP . 'includes/export/SMW_Exp_Element.php';
138 -// $wgAutoloadClasses['SMWExpResource'] = $smwgIP . 'includes/export/SMW_Exp_Element.php';
 134+// $expDir = $smwgIP . 'includes/export/';
 135+// $wgAutoloadClasses['SMWExporter'] = $expDir . 'SMW_Exporter.php';
 136+// $wgAutoloadClasses['SMWExpData'] = $expDir . 'SMW_Exp_Data.php';
 137+// $wgAutoloadClasses['SMWExpElement'] = $expDir . 'SMW_Exp_Element.php';
 138+// $wgAutoloadClasses['SMWExpLiteral'] = $expDir . 'SMW_Exp_Element.php';
 139+// $wgAutoloadClasses['SMWExpResource'] = $expDir . 'SMW_Exp_Element.php';
 140+// $wgAutoloadClasses['SMWExportController'] = $expDir . 'SMW_ExportController.php';
 141+// $wgAutoloadClasses['SMWSerializer'] = $expDir . 'SMW_Serializer.php';
139142
140143 // Parser hooks
141144 $phDir = $smwgIP . 'includes/parserhooks/';
Index: trunk/extensions/SemanticMediaWiki/includes/SMW_Setup.php
@@ -156,6 +156,8 @@
157157 $wgAutoloadClasses['SMWExpElement'] = $expDir . 'SMW_Exp_Element.php';
158158 $wgAutoloadClasses['SMWExpLiteral'] = $expDir . 'SMW_Exp_Element.php';
159159 $wgAutoloadClasses['SMWExpResource'] = $expDir . 'SMW_Exp_Element.php';
 160+ $wgAutoloadClasses['SMWExportController'] = $expDir . 'SMW_ExportController.php';
 161+ $wgAutoloadClasses['SMWSerializer'] = $expDir . 'SMW_Serializer.php';
160162
161163 // Parser hooks
162164 $phDir = $smwgIP . 'includes/parserhooks/';
@@ -231,7 +233,6 @@
232234 $wgSpecialPages['SemanticStatistics'] = array( 'SMWSpecialSemanticStatistics' );
233235 $wgSpecialPageGroups['SemanticStatistics'] = 'wiki'; // Similar to Special:Statistics
234236
235 - $wgAutoloadClasses['SMWOWLExport'] = $smwgIP . 'includes/export/SMW_OWLExport.php';
236237 $wgAutoloadClasses['SMWSpecialOWLExport'] = $smwgIP . 'specials/Export/SMW_SpecialOWLExport.php';
237238 $wgSpecialPages['ExportRDF'] = array( 'SMWSpecialOWLExport' );
238239 $wgSpecialPageGroups['ExportRDF'] = 'smw_group';

Status & tagging log