r69387 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69386‎ | r69387 | r69388 >
Date:15:28, 15 July 2010
Author:daniel
Status:deferred
Tags:
Comment:
added support for XML content with XPath
Modified paths:
  • /trunk/extensions/DataTransclusion/DataTransclusion.php (modified) (history)
  • /trunk/extensions/DataTransclusion/OpenLibrarySource.php (modified) (history)
  • /trunk/extensions/DataTransclusion/WebDataTransclusionSource.php (modified) (history)
  • /trunk/extensions/DataTransclusion/XmlDataTransclusionSource.php (added) (history)
  • /trunk/extensions/DataTransclusion/tests/DataTransclusionTest.php (modified) (history)
  • /trunk/extensions/DataTransclusion/tests/test-data-item-Berlin.rdf.xml (added) (history)

Diff [purge]

Index: trunk/extensions/DataTransclusion/tests/DataTransclusionTest.php
@@ -64,6 +64,7 @@
6565 $this->testHandleRecordTag();
6666 $this->testDBDataTransclusionSource();
6767 $this->testWebDataTransclusionSource();
 68+ $this->testXmlDataTransclusionSource();
6869 }
6970
7071 function testErrorMessage() {
@@ -467,6 +468,28 @@
468469 $this->assertEquals( $rec['id'], 3 );
469470 }
470471 }
 472+
 473+ function testXmlDataTransclusionSource() {
 474+ $spec = array(
 475+ 'name' => 'FOO',
 476+ 'keyFields' => 'item',
 477+ 'optionNames' => 'lang',
 478+ 'url' => 'http://acme.com/{name}',
 479+ 'dataFormat' => 'rdf+xml',
 480+ 'dataPath' => '/rdf:RDF',
 481+ 'errorPath' => '/html//*[@class="error"]',
 482+ 'fieldPathes' => array(
 483+ 'latitude' => './/pos:lat',
 484+ 'longitude' => './/pos:long',
 485+ ),
 486+ );
 487+
 488+ $spec['url'] = 'file://' . dirname( realpath( __FILE__ ) ) . '/test-data-item-{item}.rdf.xml';
 489+ $source = new XmlDataTransclusionSource( $spec );
 490+
 491+ $rec = $source->fetchRecord( 'item', 'Berlin' );
 492+ $this->assertEquals( $rec['latitude'], "52.461" );
 493+ }
471494 }
472495
473496 $wgShowExceptionDetails = true;
Index: trunk/extensions/DataTransclusion/tests/test-data-item-Berlin.rdf.xml
@@ -0,0 +1,38 @@
 2+<?xml version="1.0" encoding="UTF-8" ?>
 3+<rdf:RDF
 4+ xml:base="http://wikitravel.org/en/Berlin#"
 5+ xmlns:xml="http://www.w3.org/XML/1998/namespace"
 6+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 7+ xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
 8+ xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
 9+ xmlns:owl="http://www.w3.org/2002/07/owl#"
 10+ xmlns:dc="http://purl.org/dc/elements/1.1/"
 11+ xmlns:dcterms="http://purl.org/dc/terms/"
 12+ xmlns:vcard="http://www.w3.org/2001/vcard-rdf/3.0#"
 13+ xmlns:kb_sys="http://purl.org/knowledgebay/ontology/sys#"
 14+ xmlns:kb_person="http://purl.org/knowledgebay/ontology/person#"
 15+ xmlns:kb_keyword="http://purl.org/knowledgebay/ontology/keyword#"
 16+ xmlns:kb_lecture="http://purl.org/knowledgebay/ontology/lecture#"
 17+ xmlns:kb_location="http://purl.org/knowledgebay/ontology/location#"
 18+ xmlns:cc="http://web.resource.org/cc/"
 19+ xmlns:place="urn:x-wikitravel:en:"
 20+ xmlns:pos="http://www.w3.org/2003/01/geo/wgs84_pos#"
 21+ xmlns:wts="http://wikitravel.org/schema#">
 22+
 23+<rdf:Description rdf:about="http://wikitravel.org/en/Berlin">
 24+ <dc:relation rdf:resource="http://wikitravel.org/en/UNESCO_Creative_Cities"/>
 25+ <dcterms:spatial rdf:resource="urn:x-wikitravel:en:Berlin"/>
 26+ <wts:stage rdf:resource="http://wikitravel.org/schema#Usable"/>
 27+</rdf:Description>
 28+
 29+<rdf:Description rdf:about="http://wikitravel.org/en/Eastern_Germany">
 30+ <dcterms:spatial rdf:resource="urn:x-wikitravel:en:Eastern_Germany"/>
 31+</rdf:Description>
 32+
 33+<wts:City rdf:about="urn:x-wikitravel:en:Berlin">
 34+ <dcterms:isPartOf rdf:resource="urn:x-wikitravel:en:Eastern_Germany"/>
 35+ <pos:lat>52.461</pos:lat>
 36+ <pos:long>13.4747</pos:long>
 37+</wts:City>
 38+
 39+</rdf:RDF>
\ No newline at end of file
Index: trunk/extensions/DataTransclusion/WebDataTransclusionSource.php
@@ -68,19 +68,13 @@
6969 DataTransclusionSource::__construct( $spec );
7070
7171 $this->url = $spec[ 'url' ];
 72+ $this->dataPath = @$spec[ 'dataPath' ];
 73+ $this->errorPath = @$spec[ 'errorPath' ];
7274 $this->dataFormat = @$spec[ 'dataFormat' ];
73 - $this->dataPath = DataTransclusionSource::splitList( @$spec[ 'dataPath' ], '/' );
7475 $this->fieldPathes = @$spec[ 'fieldPathes' ];
75 - $this->errorPath = DataTransclusionSource::splitList( @$spec[ 'errorPath' ], '/' );
7676 $this->httpOptions = @$spec[ 'httpOptions' ];
7777 $this->timeout = @$spec[ 'timeout' ];
7878
79 - if ( $this->fieldPathes ) {
80 - foreach ( $this->fieldPathes as $i => $p ) {
81 - $this->fieldPathes[ $i ] = DataTransclusionSource::splitList( $p, '/' );
82 - }
83 - }
84 -
8579 if ( !$this->dataFormat ) {
8680 $this->dataFormat = 'php';
8781 }
@@ -200,16 +194,23 @@
201195 }
202196
203197 public function extractError( $data ) {
204 - return $this->extractField( $data, $this->errorPath );
 198+ $err = $this->resolvePath( $data, $this->errorPath );
 199+
 200+ $err = $this->asString( $err );
 201+ return $err;
205202 }
206203
207204 public function extractRecord( $data ) {
208 - $rec = $this->extractField( $data, $this->dataPath );
 205+ $rec = $this->resolvePath( $data, $this->dataPath );
209206
210207 $rec = $this->flattenRecord( $rec );
211208 return $rec;
212209 }
213210
 211+ public function asString( $value ) {
 212+ return "$value"; //XXX: will often fail. we could just throw here for non-primitives?
 213+ }
 214+
214215 public function flattenRecord( $rec ) {
215216 if ( !$rec ) return $rec;
216217
@@ -219,7 +220,7 @@
220221 foreach ( $this->fieldNames as $k ) {
221222 if ( isset( $this->fieldPathes[$k] ) ) {
222223 $path = $this->fieldPathes[$k];
223 - $v = $this->extractField( $rec, $path );
 224+ $v = $this->resolvePath( $rec, $path );
224225 } else {
225226 $v = $rec[ $k ];
226227 }
@@ -231,9 +232,15 @@
232233 } else {
233234 return $rec;
234235 }
 236+
 237+ foreach ( $rec as $k => $v ) {
 238+ if ( !is_null( $v ) && !is_string( $v ) && !is_int( $v ) ) {
 239+ $rec[ $k ] = $this->asString( $v );
 240+ }
 241+ }
235242 }
236243
237 - public function extractField( $data, $path ) {
 244+ public function resolvePath( $data, $path, $split = true ) {
238245 if ( is_object( $data ) ) {
239246 $data = wfObjectToArray( $data );
240247 }
@@ -242,6 +249,10 @@
243250 return $data;
244251 }
245252
 253+ if ( $split && is_string( $path ) ) {
 254+ $path = DataTransclusionSource::splitList( $path, '/' );
 255+ }
 256+
246257 if ( is_string( $path ) || is_int( $path ) ) {
247258 return @$data[ $path ];
248259 }
@@ -268,7 +279,7 @@
269280 $next = $data[ $p ];
270281
271282 if ( $next && $path ) {
272 - return $this->extractField( $next, $path );
 283+ return $this->resolvePath( $next, $path );
273284 } else {
274285 return $next;
275286 }
Index: trunk/extensions/DataTransclusion/DataTransclusion.php
@@ -33,6 +33,7 @@
3434 $wgAutoloadClasses['FakeDataTransclusionSource'] = $dir . 'DataTransclusionSource.php';
3535 $wgAutoloadClasses['DBDataTransclusionSource'] = $dir . 'DBDataTransclusionSource.php';
3636 $wgAutoloadClasses['WebDataTransclusionSource'] = $dir . 'WebDataTransclusionSource.php';
 37+$wgAutoloadClasses['XmlDataTransclusionSource'] = $dir . 'XmlDataTransclusionSource.php';
3738 $wgAutoloadClasses['OpenLibrarySource'] = $dir . 'OpenLibrarySource.php';
3839
3940 $wgHooks['ParserFirstCallInit'][] = 'efDataTransclusionSetHooks';
Index: trunk/extensions/DataTransclusion/XmlDataTransclusionSource.php
@@ -0,0 +1,115 @@
 2+<?php
 3+/**
 4+ * DataTransclusion Source implementation
 5+ *
 6+ * @file
 7+ * @ingroup Extensions
 8+ * @author Daniel Kinzler for Wikimedia Deutschland
 9+ * @copyright © 2010 Wikimedia Deutschland (Author: Daniel Kinzler)
 10+ * @licence GNU General Public Licence 2.0 or later
 11+ */
 12+
 13+if ( !defined( 'MEDIAWIKI' ) ) {
 14+ echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
 15+ die( 1 );
 16+}
 17+
 18+/**
 19+ * Extension of WebDataTransclusionSource that allows to parse and process arbitrary XML.
 20+ *
 21+ * In addition to the options supported by the WebDataTransclusionSource class,
 22+ * XmlDataTransclusionSource accepts some additional options, and changes the convention for others.
 23+ *
 24+ * * $spec['dataFormat']: must be "xml" or end with "+xml" if given. Defaults to "xml".
 25+ * * $spec['dataPath']: xpath to the actual data in the structure returned from the
 26+ * HTTP request. This uses standard W3C XPath syntax. REQUIRED.
 27+ * * $spec['fieldPathes']: an associative array giving a XPath for each fied which points
 28+ * to the actual field values inside the record, that is, the structure that
 29+ * $spec['dataPath'] resolved to. Useful when field values are returned as complex
 30+ * records. For more complex processing, override the method flattenRecord().
 31+ * If given, $spec['fieldNames'] defaults to array_keys( $spec['fieldPathes'] ).
 32+ * * $spec['errorPath']: xpath to error messages in the structure returned from the
 33+ * HTTP request. If an
 34+ * entry is found at the given position in the response structure, the request
 35+ * is assumed to have failed. For more complex detection of errors, override
 36+ * extractError(). REQUIRED.
 37+ *
 38+ * For more information on options supported by DataTransclusionSource and
 39+ * WebDataTransclusionSource, see the class-level documentation there.
 40+ */
 41+class XmlDataTransclusionSource extends WebDataTransclusionSource {
 42+
 43+ function __construct( $spec ) {
 44+ if ( !isset( $spec['dataFormat'] ) ) {
 45+ $spec['dataFormat'] = 'xml';
 46+ }
 47+
 48+ if ( !preg_match( '/^(.*\+)?xml$/', $spec['dataFormat'] ) ) {
 49+ throw new MWException( "not a known XML data format: {$spec['dataFormat']}" );
 50+ }
 51+
 52+ parent::__construct( $spec );
 53+ }
 54+
 55+ public function decodeData( $raw, $format = null ) {
 56+ $dom = new DOMDocument();
 57+ $dom->loadXML( $raw );
 58+ return $dom->documentElement;
 59+ }
 60+
 61+ public function resolvePath( $dom, $xpath ) {
 62+ $lookup = new DOMXPath( $dom->ownerDocument );
 63+ $res = $lookup->query( $xpath, $dom );
 64+
 65+ if ( $res instanceof DOMNodeList ) {
 66+ if ( $res->length == 0 ) $res = null;
 67+ else $res = $res->item( 0 );
 68+ }
 69+
 70+ return $res;
 71+ }
 72+
 73+ public function asString( $v ) {
 74+ if ( is_object($v) ) {
 75+ if ( $v instanceof DOMNodeList ) {
 76+ if ( $v->length ) $v = $v->item( 0 );
 77+ else $v = null;
 78+ }
 79+
 80+ if ( $v instanceof DOMNamedNodeMap ) {
 81+ $v = $v->item( 0 );
 82+ }
 83+
 84+ if ( $v instanceof DOMNode ) {
 85+ $v = $v->textContent;
 86+ }
 87+ }
 88+
 89+ return "$v";
 90+ }
 91+
 92+ public function flattenRecord( $rec ) {
 93+ $rec = parent::flattenRecord( $rec );
 94+
 95+ if ( !$rec ) return $rec;
 96+
 97+ foreach ( $rec as $k => $v ) {
 98+ if ( is_object($v) ) {
 99+ if ( $v instanceof DOMNodeList ) {
 100+ $v = $v->item( 0 );
 101+ }
 102+
 103+ if ( $v instanceof DOMNamedNodeMap ) {
 104+ $v = $v->item( 0 );
 105+ }
 106+
 107+ if ( $v instanceof DOMNode ) {
 108+ $rec[ $k ] = $v->textContent;
 109+ }
 110+ }
 111+ }
 112+
 113+ return $rec;
 114+ }
 115+
 116+}
Property changes on: trunk/extensions/DataTransclusion/XmlDataTransclusionSource.php
___________________________________________________________________
Added: svn:mergeinfo
Added: svn:eol-style
1117 + native
Index: trunk/extensions/DataTransclusion/OpenLibrarySource.php
@@ -38,6 +38,7 @@
3939 if ( !isset( $spec['url'] ) ) {
4040 $spec['url'] = 'http://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&details=true';
4141 //TODO: custom function to normalize ISBN (trim, strip dashes, correct checksum, etc)
 42+ // <^demon> Daniel_WMDE: I believe Special:BookSources has an ISBN normalization thing. Might be worth looking at.
4243 }
4344
4445 if ( !isset( $spec['dataFormat'] ) ) {

Status & tagging log