Index: trunk/extensions/DataTransclusion/tests/DataTransclusionTest.php |
— | — | @@ -64,6 +64,7 @@ |
65 | 65 | $this->testHandleRecordTag(); |
66 | 66 | $this->testDBDataTransclusionSource(); |
67 | 67 | $this->testWebDataTransclusionSource(); |
| 68 | + $this->testXmlDataTransclusionSource(); |
68 | 69 | } |
69 | 70 | |
70 | 71 | function testErrorMessage() { |
— | — | @@ -467,6 +468,28 @@ |
468 | 469 | $this->assertEquals( $rec['id'], 3 ); |
469 | 470 | } |
470 | 471 | } |
| 472 | + |
| 473 | + function testXmlDataTransclusionSource() { |
| 474 | + $spec = array( |
| 475 | + 'name' => 'FOO', |
| 476 | + 'keyFields' => 'item', |
| 477 | + 'optionNames' => 'lang', |
| 478 | + 'url' => 'http://acme.com/{name}', |
| 479 | + 'dataFormat' => 'rdf+xml', |
| 480 | + 'dataPath' => '/rdf:RDF', |
| 481 | + 'errorPath' => '/html//*[@class="error"]', |
| 482 | + 'fieldPathes' => array( |
| 483 | + 'latitude' => './/pos:lat', |
| 484 | + 'longitude' => './/pos:long', |
| 485 | + ), |
| 486 | + ); |
| 487 | + |
| 488 | + $spec['url'] = 'file://' . dirname( realpath( __FILE__ ) ) . '/test-data-item-{item}.rdf.xml'; |
| 489 | + $source = new XmlDataTransclusionSource( $spec ); |
| 490 | + |
| 491 | + $rec = $source->fetchRecord( 'item', 'Berlin' ); |
| 492 | + $this->assertEquals( $rec['latitude'], "52.461" ); |
| 493 | + } |
471 | 494 | } |
472 | 495 | |
473 | 496 | $wgShowExceptionDetails = true; |
Index: trunk/extensions/DataTransclusion/tests/test-data-item-Berlin.rdf.xml |
— | — | @@ -0,0 +1,38 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8" ?> |
| 3 | +<rdf:RDF |
| 4 | + xml:base="http://wikitravel.org/en/Berlin#" |
| 5 | + xmlns:xml="http://www.w3.org/XML/1998/namespace" |
| 6 | + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" |
| 7 | + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" |
| 8 | + xmlns:xsd="http://www.w3.org/2001/XMLSchema#" |
| 9 | + xmlns:owl="http://www.w3.org/2002/07/owl#" |
| 10 | + xmlns:dc="http://purl.org/dc/elements/1.1/" |
| 11 | + xmlns:dcterms="http://purl.org/dc/terms/" |
| 12 | + xmlns:vcard="http://www.w3.org/2001/vcard-rdf/3.0#" |
| 13 | + xmlns:kb_sys="http://purl.org/knowledgebay/ontology/sys#" |
| 14 | + xmlns:kb_person="http://purl.org/knowledgebay/ontology/person#" |
| 15 | + xmlns:kb_keyword="http://purl.org/knowledgebay/ontology/keyword#" |
| 16 | + xmlns:kb_lecture="http://purl.org/knowledgebay/ontology/lecture#" |
| 17 | + xmlns:kb_location="http://purl.org/knowledgebay/ontology/location#" |
| 18 | + xmlns:cc="http://web.resource.org/cc/" |
| 19 | + xmlns:place="urn:x-wikitravel:en:" |
| 20 | + xmlns:pos="http://www.w3.org/2003/01/geo/wgs84_pos#" |
| 21 | + xmlns:wts="http://wikitravel.org/schema#"> |
| 22 | + |
| 23 | +<rdf:Description rdf:about="http://wikitravel.org/en/Berlin"> |
| 24 | + <dc:relation rdf:resource="http://wikitravel.org/en/UNESCO_Creative_Cities"/> |
| 25 | + <dcterms:spatial rdf:resource="urn:x-wikitravel:en:Berlin"/> |
| 26 | + <wts:stage rdf:resource="http://wikitravel.org/schema#Usable"/> |
| 27 | +</rdf:Description> |
| 28 | + |
| 29 | +<rdf:Description rdf:about="http://wikitravel.org/en/Eastern_Germany"> |
| 30 | + <dcterms:spatial rdf:resource="urn:x-wikitravel:en:Eastern_Germany"/> |
| 31 | +</rdf:Description> |
| 32 | + |
| 33 | +<wts:City rdf:about="urn:x-wikitravel:en:Berlin"> |
| 34 | + <dcterms:isPartOf rdf:resource="urn:x-wikitravel:en:Eastern_Germany"/> |
| 35 | + <pos:lat>52.461</pos:lat> |
| 36 | + <pos:long>13.4747</pos:long> |
| 37 | +</wts:City> |
| 38 | + |
| 39 | +</rdf:RDF> |
\ No newline at end of file |
Index: trunk/extensions/DataTransclusion/WebDataTransclusionSource.php |
— | — | @@ -68,19 +68,13 @@ |
69 | 69 | DataTransclusionSource::__construct( $spec ); |
70 | 70 | |
71 | 71 | $this->url = $spec[ 'url' ]; |
| 72 | + $this->dataPath = @$spec[ 'dataPath' ]; |
| 73 | + $this->errorPath = @$spec[ 'errorPath' ]; |
72 | 74 | $this->dataFormat = @$spec[ 'dataFormat' ]; |
73 | | - $this->dataPath = DataTransclusionSource::splitList( @$spec[ 'dataPath' ], '/' ); |
74 | 75 | $this->fieldPathes = @$spec[ 'fieldPathes' ]; |
75 | | - $this->errorPath = DataTransclusionSource::splitList( @$spec[ 'errorPath' ], '/' ); |
76 | 76 | $this->httpOptions = @$spec[ 'httpOptions' ]; |
77 | 77 | $this->timeout = @$spec[ 'timeout' ]; |
78 | 78 | |
79 | | - if ( $this->fieldPathes ) { |
80 | | - foreach ( $this->fieldPathes as $i => $p ) { |
81 | | - $this->fieldPathes[ $i ] = DataTransclusionSource::splitList( $p, '/' ); |
82 | | - } |
83 | | - } |
84 | | - |
85 | 79 | if ( !$this->dataFormat ) { |
86 | 80 | $this->dataFormat = 'php'; |
87 | 81 | } |
— | — | @@ -200,16 +194,23 @@ |
201 | 195 | } |
202 | 196 | |
203 | 197 | public function extractError( $data ) { |
204 | | - return $this->extractField( $data, $this->errorPath ); |
| 198 | + $err = $this->resolvePath( $data, $this->errorPath ); |
| 199 | + |
| 200 | + $err = $this->asString( $err ); |
| 201 | + return $err; |
205 | 202 | } |
206 | 203 | |
207 | 204 | public function extractRecord( $data ) { |
208 | | - $rec = $this->extractField( $data, $this->dataPath ); |
| 205 | + $rec = $this->resolvePath( $data, $this->dataPath ); |
209 | 206 | |
210 | 207 | $rec = $this->flattenRecord( $rec ); |
211 | 208 | return $rec; |
212 | 209 | } |
213 | 210 | |
| 211 | + public function asString( $value ) { |
| 212 | + return "$value"; //XXX: will often fail. we could just throw here for non-primitives? |
| 213 | + } |
| 214 | + |
214 | 215 | public function flattenRecord( $rec ) { |
215 | 216 | if ( !$rec ) return $rec; |
216 | 217 | |
— | — | @@ -219,7 +220,7 @@ |
220 | 221 | foreach ( $this->fieldNames as $k ) { |
221 | 222 | if ( isset( $this->fieldPathes[$k] ) ) { |
222 | 223 | $path = $this->fieldPathes[$k]; |
223 | | - $v = $this->extractField( $rec, $path ); |
| 224 | + $v = $this->resolvePath( $rec, $path ); |
224 | 225 | } else { |
225 | 226 | $v = $rec[ $k ]; |
226 | 227 | } |
— | — | @@ -231,9 +232,15 @@ |
232 | 233 | } else { |
233 | 234 | return $rec; |
234 | 235 | } |
| 236 | + |
| 237 | + foreach ( $rec as $k => $v ) { |
| 238 | + if ( !is_null( $v ) && !is_string( $v ) && !is_int( $v ) ) { |
| 239 | + $rec[ $k ] = $this->asString( $v ); |
| 240 | + } |
| 241 | + } |
235 | 242 | } |
236 | 243 | |
237 | | - public function extractField( $data, $path ) { |
| 244 | + public function resolvePath( $data, $path, $split = true ) { |
238 | 245 | if ( is_object( $data ) ) { |
239 | 246 | $data = wfObjectToArray( $data ); |
240 | 247 | } |
— | — | @@ -242,6 +249,10 @@ |
243 | 250 | return $data; |
244 | 251 | } |
245 | 252 | |
| 253 | + if ( $split && is_string( $path ) ) { |
| 254 | + $path = DataTransclusionSource::splitList( $path, '/' ); |
| 255 | + } |
| 256 | + |
246 | 257 | if ( is_string( $path ) || is_int( $path ) ) { |
247 | 258 | return @$data[ $path ]; |
248 | 259 | } |
— | — | @@ -268,7 +279,7 @@ |
269 | 280 | $next = $data[ $p ]; |
270 | 281 | |
271 | 282 | if ( $next && $path ) { |
272 | | - return $this->extractField( $next, $path ); |
| 283 | + return $this->resolvePath( $next, $path ); |
273 | 284 | } else { |
274 | 285 | return $next; |
275 | 286 | } |
Index: trunk/extensions/DataTransclusion/DataTransclusion.php |
— | — | @@ -33,6 +33,7 @@ |
34 | 34 | $wgAutoloadClasses['FakeDataTransclusionSource'] = $dir . 'DataTransclusionSource.php'; |
35 | 35 | $wgAutoloadClasses['DBDataTransclusionSource'] = $dir . 'DBDataTransclusionSource.php'; |
36 | 36 | $wgAutoloadClasses['WebDataTransclusionSource'] = $dir . 'WebDataTransclusionSource.php'; |
| 37 | +$wgAutoloadClasses['XmlDataTransclusionSource'] = $dir . 'XmlDataTransclusionSource.php'; |
37 | 38 | $wgAutoloadClasses['OpenLibrarySource'] = $dir . 'OpenLibrarySource.php'; |
38 | 39 | |
39 | 40 | $wgHooks['ParserFirstCallInit'][] = 'efDataTransclusionSetHooks'; |
Index: trunk/extensions/DataTransclusion/XmlDataTransclusionSource.php |
— | — | @@ -0,0 +1,115 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * DataTransclusion Source implementation |
| 5 | + * |
| 6 | + * @file |
| 7 | + * @ingroup Extensions |
| 8 | + * @author Daniel Kinzler for Wikimedia Deutschland |
| 9 | + * @copyright © 2010 Wikimedia Deutschland (Author: Daniel Kinzler) |
| 10 | + * @licence GNU General Public Licence 2.0 or later |
| 11 | + */ |
| 12 | + |
| 13 | +if ( !defined( 'MEDIAWIKI' ) ) { |
| 14 | + echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" ); |
| 15 | + die( 1 ); |
| 16 | +} |
| 17 | + |
| 18 | +/** |
| 19 | + * Extension of WebDataTransclusionSource that allows to parse and process arbitrary XML. |
| 20 | + * |
| 21 | + * In addition to the options supported by the WebDataTransclusionSource class, |
| 22 | + * XmlDataTransclusionSource accepts some additional options, and changes the convention for others. |
| 23 | + * |
| 24 | + * * $spec['dataFormat']: must be "xml" or end with "+xml" if given. Defaults to "xml". |
| 25 | + * * $spec['dataPath']: xpath to the actual data in the structure returned from the |
| 26 | + * HTTP request. This uses standard W3C XPath syntax. REQUIRED. |
| 27 | + * * $spec['fieldPathes']: an associative array giving a XPath for each fied which points |
| 28 | + * to the actual field values inside the record, that is, the structure that |
| 29 | + * $spec['dataPath'] resolved to. Useful when field values are returned as complex |
| 30 | + * records. For more complex processing, override the method flattenRecord(). |
| 31 | + * If given, $spec['fieldNames'] defaults to array_keys( $spec['fieldPathes'] ). |
| 32 | + * * $spec['errorPath']: xpath to error messages in the structure returned from the |
| 33 | + * HTTP request. If an |
| 34 | + * entry is found at the given position in the response structure, the request |
| 35 | + * is assumed to have failed. For more complex detection of errors, override |
| 36 | + * extractError(). REQUIRED. |
| 37 | + * |
| 38 | + * For more information on options supported by DataTransclusionSource and |
| 39 | + * WebDataTransclusionSource, see the class-level documentation there. |
| 40 | + */ |
| 41 | +class XmlDataTransclusionSource extends WebDataTransclusionSource { |
| 42 | + |
| 43 | + function __construct( $spec ) { |
| 44 | + if ( !isset( $spec['dataFormat'] ) ) { |
| 45 | + $spec['dataFormat'] = 'xml'; |
| 46 | + } |
| 47 | + |
| 48 | + if ( !preg_match( '/^(.*\+)?xml$/', $spec['dataFormat'] ) ) { |
| 49 | + throw new MWException( "not a known XML data format: {$spec['dataFormat']}" ); |
| 50 | + } |
| 51 | + |
| 52 | + parent::__construct( $spec ); |
| 53 | + } |
| 54 | + |
| 55 | + public function decodeData( $raw, $format = null ) { |
| 56 | + $dom = new DOMDocument(); |
| 57 | + $dom->loadXML( $raw ); |
| 58 | + return $dom->documentElement; |
| 59 | + } |
| 60 | + |
| 61 | + public function resolvePath( $dom, $xpath ) { |
| 62 | + $lookup = new DOMXPath( $dom->ownerDocument ); |
| 63 | + $res = $lookup->query( $xpath, $dom ); |
| 64 | + |
| 65 | + if ( $res instanceof DOMNodeList ) { |
| 66 | + if ( $res->length == 0 ) $res = null; |
| 67 | + else $res = $res->item( 0 ); |
| 68 | + } |
| 69 | + |
| 70 | + return $res; |
| 71 | + } |
| 72 | + |
| 73 | + public function asString( $v ) { |
| 74 | + if ( is_object($v) ) { |
| 75 | + if ( $v instanceof DOMNodeList ) { |
| 76 | + if ( $v->length ) $v = $v->item( 0 ); |
| 77 | + else $v = null; |
| 78 | + } |
| 79 | + |
| 80 | + if ( $v instanceof DOMNamedNodeMap ) { |
| 81 | + $v = $v->item( 0 ); |
| 82 | + } |
| 83 | + |
| 84 | + if ( $v instanceof DOMNode ) { |
| 85 | + $v = $v->textContent; |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + return "$v"; |
| 90 | + } |
| 91 | + |
| 92 | + public function flattenRecord( $rec ) { |
| 93 | + $rec = parent::flattenRecord( $rec ); |
| 94 | + |
| 95 | + if ( !$rec ) return $rec; |
| 96 | + |
| 97 | + foreach ( $rec as $k => $v ) { |
| 98 | + if ( is_object($v) ) { |
| 99 | + if ( $v instanceof DOMNodeList ) { |
| 100 | + $v = $v->item( 0 ); |
| 101 | + } |
| 102 | + |
| 103 | + if ( $v instanceof DOMNamedNodeMap ) { |
| 104 | + $v = $v->item( 0 ); |
| 105 | + } |
| 106 | + |
| 107 | + if ( $v instanceof DOMNode ) { |
| 108 | + $rec[ $k ] = $v->textContent; |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + return $rec; |
| 114 | + } |
| 115 | + |
| 116 | +} |
Property changes on: trunk/extensions/DataTransclusion/XmlDataTransclusionSource.php |
___________________________________________________________________ |
Added: svn:mergeinfo |
Added: svn:eol-style |
1 | 117 | + native |
Index: trunk/extensions/DataTransclusion/OpenLibrarySource.php |
— | — | @@ -38,6 +38,7 @@ |
39 | 39 | if ( !isset( $spec['url'] ) ) { |
40 | 40 | $spec['url'] = 'http://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&details=true'; |
41 | 41 | //TODO: custom function to normalize ISBN (trim, strip dashes, correct checksum, etc) |
| 42 | + // <^demon> Daniel_WMDE: I believe Special:BookSources has an ISBN normalization thing. Might be worth looking at. |
42 | 43 | } |
43 | 44 | |
44 | 45 | if ( !isset( $spec['dataFormat'] ) ) { |