Index: trunk/extensions/DataTransclusion/DataTransclusionSource.php |
— | — | @@ -31,7 +31,20 @@ |
32 | 32 | * to a key, to refine the output. Optional. |
33 | 33 | * * $spec['fieldNames']: names of all fields present in each record. |
34 | 34 | * Fields not listed here will not be available on the wiki, |
35 | | - * even if they are returned by the data source. REQUIRED. |
| 35 | + * even if they are returned by the data source. If not given, this defaults to |
| 36 | + * $spec['keyFields'] + array_keys( $spec['fieldInfo'] ). |
| 37 | + * * $spec['fieldInfo']: Assiciative array mapping logical field names to additional |
| 38 | + * information for using and interpreting these fields. Different data sources |
| 39 | + * may allow different hints for each field. The following hints are known per |
| 40 | + * default: |
| 41 | + * * $spec['fieldInfo'][$field]['type']: specifies the data types for the field: |
| 42 | + * 'int' for integers, 'float' or 'decimal' for decimals, or 'string' for |
| 43 | + * string fields. Serialization types 'json', 'wddx' and 'php' are also |
| 44 | + * supported. Defaults to 'string'. |
| 45 | + * * $spec['fieldInfo'][$field]['normalization']: normalization to be applied for |
| 46 | + * this field, when used as a query key. This may be a callable, or an object |
| 47 | + * that supports the function normalize(), or a regular expression for patterns |
| 48 | + * to be removed from the value. |
36 | 49 | * * $spec['cacheDuration']: the number of seconds a result from this source |
37 | 50 | * may be cached for. If not set, results are assumed to be cacheable |
38 | 51 | * indefinitely. This setting determines the expiry time of the parser |
— | — | @@ -87,10 +100,24 @@ |
88 | 101 | $this->keyFields = self::splitList( $spec[ 'keyFields' ] ); |
89 | 102 | $this->optionNames = self::splitList( @$spec[ 'optionNames' ] ); |
90 | 103 | |
| 104 | + if ( isset( $spec[ 'fieldInfo' ] ) ) { |
| 105 | + $this->fieldInfo = $spec[ 'fieldInfo' ]; |
| 106 | + } else { |
| 107 | + $this->fieldInfo = null; |
| 108 | + } |
| 109 | + |
91 | 110 | if ( isset( $spec[ 'fieldNames' ] ) ) { |
92 | 111 | $this->fieldNames = self::splitList( $spec[ 'fieldNames' ] ); |
| 112 | + } else if ( isset( $spec[ 'fieldInfo' ] ) ) { |
| 113 | + $this->fieldNames = array_keys( $spec[ 'fieldInfo' ] ); |
93 | 114 | } else { |
94 | 115 | $this->fieldNames = $this->keyFields; |
| 116 | + |
| 117 | + if ( !empty( $this->fieldInfo ) ) { |
| 118 | + $this->fieldNames = array_merge( $this->fieldNames, array_keys( $this->fieldInfo ) ); |
| 119 | + } |
| 120 | + |
| 121 | + $this->fieldNames = array_unique( $this->fieldNames ); |
95 | 122 | } |
96 | 123 | |
97 | 124 | if ( !empty( $spec[ 'cacheDuration' ] ) ) { |
— | — | @@ -120,6 +147,51 @@ |
121 | 148 | $this->sourceInfo[ 'source-name' ] = $this->name; // force this one |
122 | 149 | } |
123 | 150 | |
| 151 | + public function normalize( $key, $value, $norm = null ) { |
| 152 | + if ( $norm ); |
| 153 | + else if ( isset( $this->fieldInfo[ $key ]['normalization'] ) ) { |
| 154 | + $norm = trim( $this->fieldInfo[ $key ]['normalization'] ); |
| 155 | + } else { |
| 156 | + return $value; |
| 157 | + } |
| 158 | + |
| 159 | + if ( is_object( $norm ) ) { |
| 160 | + return $norm->normalize( $value ); |
| 161 | + } else if ( is_callable( $norm ) || preg_match( '/^(\w[\w\d]*::)?(\w[\w\d]*)$/', $norm ) ) { |
| 162 | + return call_user_func( $norm, $value ); |
| 163 | + } else if ( is_array( $norm ) ) { |
| 164 | + return preg_replace( $norm[0], $norm[1], $value ); |
| 165 | + } else { |
| 166 | + return preg_replace( $norm, '', $value ); |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + public function convert( $key, $value, $format = null ) { |
| 171 | + if ( $format ); |
| 172 | + else if ( isset( $this->fieldInfo[ $key ]['type'] ) ) { |
| 173 | + $format = strtolower( trim( $this->fieldInfo[ $key ]['type'] ) ); |
| 174 | + } else { |
| 175 | + return (string)$value; |
| 176 | + } |
| 177 | + |
| 178 | + if ( $format == 'int' ) { |
| 179 | + return (int)$value; |
| 180 | + } else if ( $format == 'decimal' || $format == 'float' ) { |
| 181 | + return (float)$value; |
| 182 | + } else if ( $format == 'json' || $format == 'js' ) { |
| 183 | + return DataTransclusionSource::decodeJson( $value ); |
| 184 | + } else if ( $format == 'wddx' ) { |
| 185 | + return DataTransclusionSource::decodeWddx( $value ); |
| 186 | + } else if ( $format == 'xml' ) { |
| 187 | + return DataTransclusionSource::parseXml( $value ); #WARNING: returns DOM |
| 188 | + } else if ( $format == 'php' || $format == 'pser' ) { |
| 189 | + return DataTransclusionSource::decodeSerialized( $value ); |
| 190 | + } else { |
| 191 | + return (string)$value; |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + |
124 | 196 | public function getName() { |
125 | 197 | return $this->name; |
126 | 198 | } |
— | — | @@ -147,6 +219,9 @@ |
148 | 220 | public abstract function fetchRawRecord( $field, $value, $options = null ); |
149 | 221 | |
150 | 222 | public function fetchRecord( $field, $value, $options = null ) { |
| 223 | + $value = $this->normalize( $field, $value ); |
| 224 | + $value = $this->convert( $field, $value ); |
| 225 | + |
151 | 226 | $rec = $this->fetchRawRecord( $field, $value, $options ); |
152 | 227 | |
153 | 228 | if ( $this->transformer ) { |
— | — | @@ -155,6 +230,27 @@ |
156 | 231 | |
157 | 232 | return $rec; |
158 | 233 | } |
| 234 | + |
| 235 | + public static function decodeSerialized( $raw ) { |
| 236 | + return unserialize( $raw ); |
| 237 | + } |
| 238 | + |
| 239 | + public static function decodeJson( $raw ) { |
| 240 | + $raw = preg_replace( '/^\s*(var\s)?\w([\w\d]*)\s+=\s*|\s*;\s*$/sim', '', $raw); |
| 241 | + return FormatJson::decode( $raw, true ); |
| 242 | + } |
| 243 | + |
| 244 | + public static function decodeWddx( $raw ) { |
| 245 | + return wddx_unserialize( $raw ); |
| 246 | + } |
| 247 | + |
| 248 | + public static function parseXml( $raw ) { |
| 249 | + $dom = new DOMDocument(); |
| 250 | + $dom->loadXML( $raw ); |
| 251 | + |
| 252 | + #NOTE: returns a DOM, RecordTransformer must be aware! |
| 253 | + return $dom->documentElement; |
| 254 | + } |
159 | 255 | } |
160 | 256 | |
161 | 257 | /** |
— | — | @@ -279,22 +375,4 @@ |
280 | 376 | public function fetchRawRecord( $field, $value, $options = null ) { |
281 | 377 | return @$this->lookup[ $field ][ $value ]; |
282 | 378 | } |
283 | | - |
284 | | - public static function decodeJson( $raw ) { |
285 | | - $raw = preg_replace( '/^\s*(var\s)?\w([\w\d]*)\s+=\s*|\s*;\s*$/sim', '', $raw); |
286 | | - return FormatJson::decode( $raw, true ); |
287 | | - } |
288 | | - |
289 | | - public static function decodeWddx( $raw ) { |
290 | | - return wddx_unserialize( $raw ); |
291 | | - } |
292 | | - |
293 | | - public static function parseXml( $raw ) { |
294 | | - $dom = new DOMDocument(); |
295 | | - $dom->loadXML( $raw ); |
296 | | - |
297 | | - #NOTE: returns a DOM, RecordTransformer must be aware! |
298 | | - return $dom->documentElement; |
299 | | - } |
300 | | - |
301 | 379 | } |
Index: trunk/extensions/DataTransclusion/ValueNormalizers.php |
— | — | @@ -0,0 +1,29 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Collection of normalization functions to be applied to data values. |
| 5 | + * |
| 6 | + * @file |
| 7 | + * @ingroup Extensions |
| 8 | + * @author Daniel Kinzler for Wikimedia Deutschland |
| 9 | + * @copyright © 2010 Wikimedia Deutschland (Author: Daniel Kinzler) |
| 10 | + * @licence GNU General Public Licence 2.0 or later |
| 11 | + */ |
| 12 | + |
| 13 | +if ( !defined( 'MEDIAWIKI' ) ) { |
| 14 | + echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" ); |
| 15 | + die( 1 ); |
| 16 | +} |
| 17 | + |
| 18 | +/* |
| 19 | + */ |
| 20 | +abstract class ValueNormalizers { |
| 21 | + static function trim( $v ) { |
| 22 | + return trim( $v ); |
| 23 | + } |
| 24 | + |
| 25 | + static function strip_punctuation( $v ) { |
| 26 | + $w = preg_replace( '/[^a-zA-Z0-9]+/', '', $v ); |
| 27 | + return $w; |
| 28 | + } |
| 29 | +} |
| 30 | + |
Property changes on: trunk/extensions/DataTransclusion/ValueNormalizers.php |
___________________________________________________________________ |
Added: svn:mergeinfo |
Added: svn:eol-style |
1 | 31 | + native |
Index: trunk/extensions/DataTransclusion/MAB2RecordTransformer.php |
— | — | @@ -14,6 +14,7 @@ |
15 | 15 | die( 1 ); |
16 | 16 | } |
17 | 17 | |
| 18 | +global $mab_field_map; # auto-loaded from within a function, so top scope is not global! |
18 | 19 | $mab_field_map = array( |
19 | 20 | 'title' => array( '081', '200b', '304', '310', '331', '335' ), |
20 | 21 | 'series' => array( '089', '090', '451', '545' ), |
— | — | @@ -22,6 +23,7 @@ |
23 | 24 | 'author' => array( ), # added later |
24 | 25 | 'editor' => array( ), # added later |
25 | 26 | 'institution' => array( '200', '204', ), |
| 27 | + 'language' => array( '037', '037a', '037b', '037c', '037z' ), |
26 | 28 | 'annote' => array( '334', '434', '501' ), |
27 | 29 | 'note' => array( '359' ), |
28 | 30 | 'journal' => array( '376' ), |
— | — | @@ -31,10 +33,15 @@ |
32 | 34 | 'pages' => array( '433' ), |
33 | 35 | 'type' => array( '509' ), |
34 | 36 | 'copyright' => array( '531' ), |
35 | | - 'ISBN' => array( '540', '540a', '540b' ), |
36 | | - 'ISSN' => array( '542', ), |
37 | | - 'LCC' => array( '544', ), |
38 | | - 'DOI' => array( '552', ), |
| 37 | + 'isbn' => array( '540', '540a', '540b' ), |
| 38 | + 'issn' => array( '542', ), |
| 39 | + 'lcc' => array( '544', '25l' ), |
| 40 | + 'doi' => array( '552', ), |
| 41 | + 'dnb' => array( '025a', ), |
| 42 | + 'zdb' => array( '025z', ), |
| 43 | + 'zka' => array( '025g', ), |
| 44 | + 'hzb' => array( '025h', ), |
| 45 | + 'id' => array( '001', ), |
39 | 46 | 'howpublished' => array( '590', '596', ), |
40 | 47 | ); |
41 | 48 | |
— | — | @@ -48,15 +55,15 @@ |
49 | 56 | } |
50 | 57 | |
51 | 58 | $mab_field_map['author'][] = '333'; |
| 59 | +$mab_field_map['author'][] = '359'; |
| 60 | +$mab_field_map['author'][] = '369'; |
52 | 61 | |
53 | 62 | /** |
54 | 63 | * Implementations of RecordTransformer for processing data from the OpenLibrary web API. |
55 | 64 | * No configuration options are needed. |
56 | 65 | */ |
57 | | -class MABRecordTransformer extends RecordTransformer { |
| 66 | +class MAB2RecordTransformer extends RecordTransformer { |
58 | 67 | |
59 | | - |
60 | | - |
61 | 68 | /** |
62 | 69 | * Initializes the RecordTransformer from the given parameter array. |
63 | 70 | * @param $spec associative array of options. See class-level documentation for details. |
— | — | @@ -69,17 +76,32 @@ |
70 | 77 | $this->fieldPrefix = @$spec[ 'fieldPrefix' ]; |
71 | 78 | } |
72 | 79 | |
| 80 | + public static function getMABFields( $logical ) { |
| 81 | + global $mab_field_map; |
| 82 | + |
| 83 | + if ( isset( $mab_field_map[ $logical ] ) ) { |
| 84 | + return $mab_field_map[ $logical ]; |
| 85 | + } else { |
| 86 | + return false; |
| 87 | + } |
| 88 | + } |
| 89 | + |
73 | 90 | public function transform( $rec ) { |
74 | 91 | global $mab_field_map; |
75 | 92 | |
76 | 93 | $r = array(); |
77 | 94 | |
78 | | - foreach ($mab_field_map as $field => $items) { |
| 95 | + foreach ( $mab_field_map as $field => $items ) { |
79 | 96 | foreach ( $items as $item ) { |
80 | 97 | if ( $this->fieldPrefix ) $item = fieldPrefix + $item; |
81 | 98 | |
82 | 99 | if ( !empty( $rec[ $item ] ) ) { |
83 | | - $r[ $field ][] = $rec[ $item ]; |
| 100 | + if ( is_array( $rec[ $item ] ) ) { |
| 101 | + $r[ $field ] = array_merge( $r[ $field ], $rec[ $item ] ); |
| 102 | + } else { |
| 103 | + $r[ $field ][] = $rec[ $item ]; |
| 104 | + } |
| 105 | + |
84 | 106 | break; |
85 | 107 | } |
86 | 108 | } |
— | — | @@ -87,9 +109,10 @@ |
88 | 110 | |
89 | 111 | foreach ($r as $f => $values) { |
90 | 112 | if ( count($values) == 0 ) unset( $r[ $f ] ); |
91 | | - else if ( count($values) == 1 ) $r[ $f ] = $values[0]; |
| 113 | + else if ( count($values) == 1 ) $r[ $f ] = MAB2RecordTransformer::mangleValue( $values[0] ); |
92 | 114 | else { |
93 | 115 | $values = array_unique( $values ); |
| 116 | + $values = array_map( 'MAB2RecordTransformer::mangleValue', $values ); |
94 | 117 | $r[ $f ] = join(', ', $values); |
95 | 118 | } |
96 | 119 | } |
— | — | @@ -97,6 +120,15 @@ |
98 | 121 | return $r; |
99 | 122 | } |
100 | 123 | |
| 124 | + function mangleValue( $v ) { |
| 125 | + $v = preg_replace( '/<<\[(.*?)\]>>/', '', $v ); |
| 126 | + $v = preg_replace( '/\[(.*?)\]/', '$1', $v ); |
| 127 | + $v = preg_replace( '/<<(.*?)>>/', '$1', $v ); |
| 128 | + $v = preg_replace( '/<(.*?)>/', '$1', $v ); |
| 129 | + $v = preg_replace( '/^[¤¬]/', '', $v ); |
| 130 | + return $v; |
| 131 | + } |
| 132 | + |
101 | 133 | /** |
102 | 134 | * Extracts any error message from the $data from the data source. This is done |
103 | 135 | * by calling resolvePath() on the $spec['errorPath'] provided to the constructor. |
— | — | @@ -104,6 +136,13 @@ |
105 | 137 | * @param $rec a structured data response, as received from the data source |
106 | 138 | */ |
107 | 139 | public function extractError( $data ) { |
| 140 | + if ( !$this->dataPath ) { |
| 141 | + $r = $this->extractRecord( $data ); |
| 142 | + |
| 143 | + if ( $r ) return false; |
| 144 | + else return true; |
| 145 | + } |
| 146 | + |
108 | 147 | $err = $this->resolvePath( $data, $this->errorPath ); |
109 | 148 | $err = $this->asString( $err ); |
110 | 149 | |
— | — | @@ -117,6 +156,10 @@ |
118 | 157 | * @param $rec a structured data response, as received from the data source |
119 | 158 | */ |
120 | 159 | public function extractRecord( $data ) { |
| 160 | + if ( !$this->dataPath ) { |
| 161 | + return $data; |
| 162 | + } |
| 163 | + |
121 | 164 | $rec = $this->resolvePath( $data, $this->dataPath ); |
122 | 165 | |
123 | 166 | return $rec; |
Index: trunk/extensions/DataTransclusion/DBDataTransclusionSource.php |
— | — | @@ -28,19 +28,17 @@ |
29 | 29 | * not supported reliably. REQUIRED. |
30 | 30 | * * $spec['querySuffix']: additional clauses to be added after the WHERE clause. |
31 | 31 | * Useful mostly to specify GROUP BY (or ORDER BY or LIMIT). |
32 | | - * * $spec['keyTypes']: associative arrays specifying the data types for the key fields. |
33 | | - * Array keys are the field names, the associated values specify the type |
34 | | - * as 'int' for integers, 'float' or 'decimal' for decimals, or 'string' |
35 | | - * for string fields. |
36 | | - * * $spec['serializedFields']: associative array of fields that contain serialized data |
37 | | - * structures. The keys in the array are the field names, the values are the |
38 | | - * specify the data format: 'json', 'wddx' and 'php' for php serialized objects. |
39 | | - * If deserialzation yields an array, the array will be merged with rest of the |
40 | | - * record. |
41 | | - * * $spec['keyFields']: like for DataTransclusionSource, this is list of fields |
42 | | - * that can be used as the key for fetching a record. However, it's not required |
43 | | - * for DBDataTransclusionSource: if not provided, array_keys( $spec['keyTypes'] ) |
44 | | - * will be used. |
| 32 | + * * $spec['fieldNames']: like for DataTransclusionSource; However, it's not required |
| 33 | + * for DBDataTransclusionSource: if not provided, array_keys( $spec['fieldInfo'] ) |
| 34 | + * will be used. |
| 35 | + * * $spec['fieldInfo']: like for DataTransclusionSource; Some additional hints are |
| 36 | + * supported for each field: |
| 37 | + * * $spec['fieldInfo'][...]['dbfield']: the field's name in the database table, |
| 38 | + * if different from the logical name. |
| 39 | + * * $spec['fieldInfo'][...]['serialized']: format if the field contains a |
| 40 | + * serialized structure as a blob. If deserialzation yields an array, it is |
| 41 | + * merged with the data record. Supported formats are 'json', 'wddx' and |
| 42 | + * 'php' for php serialized objects. |
45 | 43 | * |
46 | 44 | * For more information on options supported by DataTransclusionSource, see the class-level |
47 | 45 | * documentation there. |
— | — | @@ -52,59 +50,16 @@ |
53 | 51 | * @param $spec associative array of options. See class-level documentation for details. |
54 | 52 | */ |
55 | 53 | function __construct( $spec ) { |
56 | | - if ( !isset( $spec[ 'keyFields' ] ) && isset( $spec[ 'keyTypes' ] ) ) { |
57 | | - $spec[ 'keyFields' ] = array_keys( $spec[ 'keyTypes' ] ); |
| 54 | + if ( !isset( $spec[ 'fieldNames' ] ) && isset( $spec[ 'fieldInfo' ] ) ) { |
| 55 | + $spec[ 'fieldNames' ] = array_keys( $spec[ 'fieldInfo' ] ); |
58 | 56 | } |
59 | 57 | |
60 | 58 | DataTransclusionSource::__construct( $spec ); |
61 | 59 | |
62 | 60 | $this->query = $spec[ 'query' ]; |
63 | 61 | $this->querySuffix = @$spec[ 'querySuffix' ]; |
64 | | - $this->serializedFields = DataTransclusionSource::splitList( @$spec[ 'serializedFields' ] ); |
65 | | - |
66 | | - if ( isset( $spec[ 'keyTypes' ] ) ) { |
67 | | - $this->keyTypes = $spec[ 'keyTypes' ]; |
68 | | - } else { |
69 | | - $this->keyTypes = null; |
70 | | - } |
71 | 62 | } |
72 | 63 | |
73 | | - public function convertKey( $key, $value ) { |
74 | | - if ( !isset( $this->keyTypes[ $key ] ) ) { |
75 | | - return (string)$value; |
76 | | - } |
77 | | - |
78 | | - $t = strtolower( trim( $this->keyTypes[ $key ] ) ); |
79 | | - |
80 | | - if ( $t == 'int' ) { |
81 | | - return (int)$value; |
82 | | - } else if ( $t == 'decimal' || $t == 'float' ) { |
83 | | - return (float)$value; |
84 | | - } else if ( $format == 'json' || $format == 'js' ) { |
85 | | - return DataTransclusionSource::decodeJson( $raw ); |
86 | | - } else if ( $format == 'wddx' ) { |
87 | | - return DataTransclusionSource::decodeWddx( $raw ); |
88 | | - } else if ( $format == 'xml' ) { |
89 | | - return DataTransclusionSource::parseXml( $raw ); #WARNING: returns DOM |
90 | | - } else if ( $format == 'php' || $format == 'pser' ) { |
91 | | - return DataTransclusionSource::decodeSerialized( $raw ); |
92 | | - } else { |
93 | | - return (string)$value; |
94 | | - } |
95 | | - } |
96 | | - |
97 | | - public function unserialize( $data, $format ) { |
98 | | - if ( $format == 'json' || $format == 'js' ) { |
99 | | - return DataTransclusionSource::decodeJson( $raw ); |
100 | | - } else if ( $format == 'wddx' ) { |
101 | | - return DataTransclusionSource::decodeWddx( $raw ); |
102 | | - } else if ( $format == 'php' || $format == 'pser' ) { |
103 | | - return DataTransclusionSource::decodeSerialized( $raw ); |
104 | | - } |
105 | | - |
106 | | - return $data; |
107 | | - } |
108 | | - |
109 | 64 | public function getQuery( $field, $value, $db = null ) { |
110 | 65 | if ( !$db ) { |
111 | 66 | $db = wfGetDB( DB_SLAVE ); |
— | — | @@ -114,7 +69,9 @@ |
115 | 70 | return false; // redundant, but make extra sure we don't get anythign evil here |
116 | 71 | } |
117 | 72 | |
118 | | - $value = $this->convertKey( $field, $value ); |
| 73 | + if ( !empty( $this->fieldInfo ) && isset( $this->fieldInfo[$field]['dbfield'] ) ) { |
| 74 | + $field = $this->fieldInfo[$field]['dbfield']; |
| 75 | + } |
119 | 76 | |
120 | 77 | if ( is_string( $value ) ) { |
121 | 78 | $v = $db->addQuotes( $value ); |
— | — | @@ -159,16 +116,34 @@ |
160 | 117 | |
161 | 118 | $db->freeResult( $rs ); |
162 | 119 | |
163 | | - if ( $this->serializedFields ) { |
164 | | - foreach ( $this->serializedFields as $f => $format ) { |
165 | | - if ( empty( $rec[ $f ] ) ) continue; |
| 120 | + foreach ( $rec as $k => $v ) { |
| 121 | + if ( is_int( $k ) ) { # remove numeric keys, keep only assoc. |
| 122 | + unset( $rec[ $k ] ); |
| 123 | + } |
| 124 | + } |
166 | 125 | |
167 | | - $data = $this->unserialize( $rec[ $f ], $format ); |
| 126 | + foreach ( $rec as $k => $v ) { |
| 127 | + if ( isset( $this->fieldInfo[ $k ] ) ) { |
| 128 | + $format = null; # auto format |
| 129 | + $serialized = !empty( $this->fieldInfo[ $k ]['serialized'] ); |
168 | 130 | |
169 | | - if ( is_array( $data ) ) { |
170 | | - $rec = array_merge( $data, $rec ); |
| 131 | + if ( $serialized && is_string( $this->fieldInfo[ $k ]['serialized'] ) ) { |
| 132 | + $format = $this->fieldInfo[ $k ]['serialized']; # serialization format, else use ['type'] |
| 133 | + } |
| 134 | + |
| 135 | + $data = $this->convert( $k , $rec[ $k ], $format ); # unserialize or convert |
| 136 | + |
| 137 | + if ( $serialized && is_array( $data ) ) { # flatten serialized |
| 138 | + # flatten |
| 139 | + unset( $rec[ $k ] ); |
| 140 | + |
| 141 | + foreach ( $data as $m => $w ) { # don't use array_merge, it stinks. |
| 142 | + $rec[ $m ] = $w; |
| 143 | + } |
| 144 | + } else { |
| 145 | + $rec[ $k ] = $data; |
| 146 | + } |
171 | 147 | } |
172 | | - } |
173 | 148 | } |
174 | 149 | |
175 | 150 | wfDebugLog( 'DataTransclusion', "loaded record for $field=$value from database\n" ); |
Index: trunk/extensions/DataTransclusion/DataTransclusionHandler.php |
— | — | @@ -163,10 +163,14 @@ |
164 | 164 | } |
165 | 165 | |
166 | 166 | // render the record into wiki text |
167 | | - $t = Title::newFromText( $template, NS_TEMPLATE ); |
168 | | - if ( empty( $t ) ) { |
169 | | - wfDebugLog( 'DataTransclusion', "illegal template name: $template\n" ); |
170 | | - return DataTransclusionHandler::errorMessage( 'datatransclusion-bad-template-name', $asHTML, $template ); |
| 167 | + if ( $template === "#dump" ) { |
| 168 | + $t = null; |
| 169 | + } else { |
| 170 | + $t = Title::newFromText( $template, NS_TEMPLATE ); |
| 171 | + if ( empty( $t ) ) { |
| 172 | + wfDebugLog( 'DataTransclusion', "illegal template name: $template\n" ); |
| 173 | + return DataTransclusionHandler::errorMessage( 'datatransclusion-bad-template-name', $asHTML, $template ); |
| 174 | + } |
171 | 175 | } |
172 | 176 | |
173 | 177 | $handler = new DataTransclusionHandler( $parser, $source, $t, $templateText ); |
— | — | @@ -201,6 +205,20 @@ |
202 | 206 | } |
203 | 207 | |
204 | 208 | function render( $record ) { |
| 209 | + if ( empty( $this->templateText ) && $this->template === null ) { |
| 210 | + // magic record dump |
| 211 | + $t = null; |
| 212 | + |
| 213 | + $this->templateText = "\n{|"; |
| 214 | + $this->templateText .= "|--\n"; |
| 215 | + $this->templateText .= "! key !! value\n"; |
| 216 | + foreach ( $record as $k => $v ) { |
| 217 | + $this->templateText .= "|--\n"; |
| 218 | + $this->templateText .= "| $k || {{{{$k}}}}\n"; |
| 219 | + } |
| 220 | + $this->templateText .= "|}\n"; |
| 221 | + } |
| 222 | + |
205 | 223 | if ( $this->templateText ) { |
206 | 224 | // explicit template content set. Used for testing and debugging. |
207 | 225 | if ( is_string( $this->templateText ) ) { |
— | — | @@ -264,7 +282,7 @@ |
265 | 283 | if ( $args ) { |
266 | 284 | // add arguments |
267 | 285 | foreach ( $args as $f => $v ) { |
268 | | - if ( is_array( $v ) || is_object( $v ) || is_resource( $v ) ) { |
| 286 | + if ( is_int( $f ) || is_array( $v ) || is_object( $v ) || is_resource( $v ) ) { |
269 | 287 | continue; |
270 | 288 | } |
271 | 289 | |
Index: trunk/extensions/DataTransclusion/ImportMAB2.php |
— | — | @@ -0,0 +1,229 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + */ |
| 5 | + |
| 6 | +if ( getenv( 'MW_INSTALL_PATH' ) ) { |
| 7 | + $IP = getenv( 'MW_INSTALL_PATH' ); |
| 8 | +} else { |
| 9 | + $IP = dirname( __FILE__ ) . '/../..'; |
| 10 | + |
| 11 | + if ( !file_exists( "$IP/LocalSettings.php" ) ) { |
| 12 | + $IP = dirname( __FILE__ ) . '/../../phase3'; |
| 13 | + } |
| 14 | +} |
| 15 | +require_once( "$IP/maintenance/Maintenance.php" ); |
| 16 | + |
| 17 | +class ImportMAB2 extends Maintenance { |
| 18 | + public function __construct( ) { |
| 19 | + parent::__construct(); |
| 20 | + |
| 21 | + $this->addArg( "name", "name of a transclusion data source, as specified in \$wgDataTransclusionSources", true ); |
| 22 | + $this->addArg( "dir", "directory containing MAB files", true ); |
| 23 | + $this->addArg( "blob_table", "database table for data blobs, without prefix", true ); |
| 24 | + $this->addArg( "index_table", "database table for index entries, without prefix", true ); |
| 25 | + |
| 26 | + $this->addOption( "create", "create database tables if they do not exist", false, false ); |
| 27 | + $this->addOption( "truncate", "truncate (empty) database tables", false, false ); |
| 28 | + $this->addOption( "prefix", "database table prefix. May contain a period (\".\") to reference tables in another database. If not given, the wiki's table prefix will be used", false, true ); |
| 29 | + $this->addOption( "limit", "max number of files to process", false, true ); |
| 30 | + $this->addOption( "debug", "don't write to the database, dump to console instead", false, false ); |
| 31 | + } |
| 32 | + |
| 33 | + public function createTables( ) { |
| 34 | + $db = wfGetDB( DB_MASTER ); |
| 35 | + |
| 36 | + $this->output( "creating blob table {$this->blob_table}\n" ); |
| 37 | + $sql = "CREATE TABLE IF NOT EXISTS " . $this->blob_table . " ( "; |
| 38 | + $sql .= " id INT(12) NOT NULL AUTO_INCREMENT, "; |
| 39 | + $sql .= " data BLOB NOT NULL, "; |
| 40 | + $sql .= " PRIMARY KEY (id) "; |
| 41 | + $sql .= ") "; |
| 42 | + $db->query( $sql, __METHOD__ ); |
| 43 | + |
| 44 | + $this->output( "creating index table {$this->index_table}\n" ); |
| 45 | + $sql = "CREATE TABLE IF NOT EXISTS " . $this->index_table . " ( "; |
| 46 | + $sql .= " field VARCHAR(255) NOT NULL, "; #FIXME: varchar vs varbinary! |
| 47 | + $sql .= " value VARCHAR(255) NOT NULL, "; #FIXME: varchar vs varbinary! |
| 48 | + $sql .= " data_id INT(12) NOT NULL, "; |
| 49 | + $sql .= " PRIMARY KEY (field, value, data_id) "; #NOTE: we don't require (field,value) to be unique! |
| 50 | + $sql .= ") "; |
| 51 | + $db->query( $sql, __METHOD__ ); |
| 52 | + } |
| 53 | + |
| 54 | + public function truncateTables( ) { |
| 55 | + $db = wfGetDB( DB_MASTER ); |
| 56 | + |
| 57 | + $this->output( "truncating blob table {$this->blob_table}\n" ); |
| 58 | + $sql = "TRUNCATE TABLE " . $this->blob_table; |
| 59 | + $db->query( $sql, __METHOD__ ); |
| 60 | + |
| 61 | + $this->output( "truncating index table {$this->index_table}\n" ); |
| 62 | + $sql = "TRUNCATE TABLE " . $this->index_table; |
| 63 | + $db->query( $sql, __METHOD__ ); |
| 64 | + } |
| 65 | + |
| 66 | + public function execute() { |
| 67 | + global $wgDataTransclusionSources; |
| 68 | + |
| 69 | + $this->debug = $this->hasOption( 'debug' ); |
| 70 | + $limit = (int)$this->getOption( 'limit' ); |
| 71 | + |
| 72 | + $src = $this->mArgs[0]; |
| 73 | + $dir = $this->mArgs[1]; |
| 74 | + $this->blob_table = $this->mArgs[2]; |
| 75 | + $this->index_table = $this->mArgs[3]; |
| 76 | + |
| 77 | + if ( !isset( $wgDataTransclusionSources[ $src ] ) ) { |
| 78 | + throw new MWException( "unknown transclusion data source '$src', not found in \$wgDataTransclusionSources" ); |
| 79 | + } |
| 80 | + |
| 81 | + $this->source = DataTransclusionHandler::getDataSource( $src ); |
| 82 | + |
| 83 | + if ( !( $this->source instanceof DBDataTransclusionSource ) ) { |
| 84 | + throw new MWException( "bad data source '$src': not compatible with DBDataTransclusionSource" ); |
| 85 | + } |
| 86 | + |
| 87 | + if ( $this->hasOption( 'prefix' ) ) { |
| 88 | + $prefix = $this->getOption( "prefix" ); |
| 89 | + $this->blob_table = $prefix . $this->blob_table; |
| 90 | + $this->index_table = $prefix . $this->index_table; |
| 91 | + } else { |
| 92 | + $db = wfGetDB( DB_MASTER ); # we'll need the master anyway later |
| 93 | + $this->blob_table = $db->tableName( $this->blob_table ); |
| 94 | + $this->index_table = $db->tableName( $this->index_table ); |
| 95 | + } |
| 96 | + |
| 97 | + if ( $this->hasOption('create') && !$this->debug ) { |
| 98 | + $this->createTables( $this->blob_table, $this->index_table ); |
| 99 | + } else if ( $this->hasOption('truncate') && !$this->debug ) { |
| 100 | + $this->truncateTables( $this->blob_table, $this->index_table ); |
| 101 | + } |
| 102 | + |
| 103 | + $this->id_map = array(); |
| 104 | + foreach ( $this->source->keyFields as $key ) { |
| 105 | + $this->id_map[ $key ] = MAB2RecordTransformer::getMABFields( $key ); |
| 106 | + if ( !$this->id_map[ $key ] ) { |
| 107 | + $this->error( "unknown key field '$key', no MAB fields mapped." ); |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + $dir = "$dir/"; |
| 112 | + |
| 113 | + $this->output( "scanning directory $dir\n" ); |
| 114 | + $d = opendir( $dir ); |
| 115 | + while( ( $file = readdir( $d ) ) ) { |
| 116 | + if ( $file == "." or $file == ".." ) { |
| 117 | + continue; |
| 118 | + } |
| 119 | + |
| 120 | + $rec = $this->readMabFile( $dir . $file ); |
| 121 | + |
| 122 | + if ( !$rec ) { |
| 123 | + $this->output( "error processing $file\n" ); |
| 124 | + } else { |
| 125 | + $ids = $this->getIds($rec); |
| 126 | + |
| 127 | + if ( $ids ) { |
| 128 | + if ( $this->debug ) { |
| 129 | + var_export( $ids ); |
| 130 | + var_export( $rec ); |
| 131 | + print "------------------------------------\n"; |
| 132 | + } else { |
| 133 | + $this->output( "importing file $file\n" ); |
| 134 | + $this->storeRecord($rec, $ids); |
| 135 | + } |
| 136 | + } else { |
| 137 | + $this->output( "skipping file $file\n" ); |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + if ( $limit > 0 ) { |
| 142 | + $limit -= 1; |
| 143 | + if ( $limit <= 0 ) break; |
| 144 | + } |
| 145 | + } |
| 146 | + closedir( $d ); |
| 147 | + } |
| 148 | + |
| 149 | + public function getIds( $rec ) { |
| 150 | + $ids = array(); |
| 151 | + foreach ( $this->id_map as $field => $items ) { |
| 152 | + if ( !$items ) continue; |
| 153 | + |
| 154 | + foreach ( $items as $item ) { |
| 155 | + if ( isset( $rec[ $item ] ) ) { |
| 156 | + if ( !isset( $ids[ $field ] ) ) { |
| 157 | + $ids[ $field ] = array(); |
| 158 | + } |
| 159 | + |
| 160 | + if ( is_array( $rec[ $item ] ) ) { |
| 161 | + foreach( $rec[ $item ] as $k => $v ) { |
| 162 | + $v = $this->source->normalize( $field, $v ); |
| 163 | + $v = $this->source->convert( $field, $v ); |
| 164 | + |
| 165 | + $ids[ $field ][] = $v; |
| 166 | + } |
| 167 | + } else { |
| 168 | + $v = $rec[ $item ]; |
| 169 | + $v = $this->source->normalize( $field, $v ); |
| 170 | + $v = $this->source->convert( $field, $v ); |
| 171 | + |
| 172 | + $ids[ $field ][] = $v; |
| 173 | + } |
| 174 | + } |
| 175 | + } |
| 176 | + } |
| 177 | + |
| 178 | + return $ids; |
| 179 | + } |
| 180 | + |
| 181 | + public function storeRecord( $rec, $ids ) { |
| 182 | + $db = wfGetDB( DB_MASTER ); |
| 183 | + |
| 184 | + $insert = array( 'data' => serialize($rec) ); |
| 185 | + |
| 186 | + $db->insert( $this->blob_table, $insert ); |
| 187 | + $id = $db->insertId(); |
| 188 | + |
| 189 | + $insert = array(); |
| 190 | + foreach ( $ids as $field => $values ) { |
| 191 | + foreach ( $values as $v ) { |
| 192 | + $insert[] = array( |
| 193 | + 'field' => $field, |
| 194 | + 'value' => $v, |
| 195 | + 'data_id' => $id ); |
| 196 | + } |
| 197 | + } |
| 198 | + |
| 199 | + $db->insert( $this->index_table, $insert ); |
| 200 | + } |
| 201 | + |
| 202 | + public function readMabFile( $file ) { |
| 203 | + $rec = array(); |
| 204 | + $f = fopen( $file, 'r' ); |
| 205 | + if ( !$f ) return false; |
| 206 | + |
| 207 | + while( ( $s = fgets( $f ) ) ) { |
| 208 | + if ( preg_match( '/^(\d+[a-z]?)\s*([a-z])?=(.*$)/', $s, $m ) ) { |
| 209 | + $k = $m[1]; |
| 210 | + $t = $m[2]; |
| 211 | + $v = $m[3]; |
| 212 | + |
| 213 | + if ( isset( $rec[$k] ) ) { |
| 214 | + if ( !is_array( $rec[$k] ) ) { |
| 215 | + $rec[$k] = array( $rec[$k] ); |
| 216 | + } |
| 217 | + |
| 218 | + $rec[$k][] = $v; |
| 219 | + } else { |
| 220 | + $rec[$k] = $v; |
| 221 | + } |
| 222 | + } |
| 223 | + } |
| 224 | + fclose( $f ); |
| 225 | + return $rec; |
| 226 | + } |
| 227 | +} |
| 228 | + |
| 229 | +$maintClass = "ImportMAB2"; |
| 230 | +require_once( DO_MAINTENANCE ); |
Index: trunk/extensions/DataTransclusion/DataTransclusion.php |
— | — | @@ -28,10 +28,12 @@ |
29 | 29 | |
30 | 30 | $wgAutoloadClasses['DataTransclusionRenderer'] = $dir . 'DataTransclusionRenderer.php'; |
31 | 31 | $wgAutoloadClasses['DataTransclusionHandler'] = $dir . 'DataTransclusionHandler.php'; |
| 32 | +$wgAutoloadClasses['ValueNormalizers'] = $dir . 'ValueNormalizers.php'; |
32 | 33 | $wgAutoloadClasses['RecordTransformer'] = $dir . 'RecordTransformer.php'; |
33 | 34 | $wgAutoloadClasses['FlattenRecord'] = $dir . 'FlattenRecord.php'; |
34 | 35 | $wgAutoloadClasses['XPathFlattenRecord'] = $dir . 'XPathFlattenRecord.php'; |
35 | 36 | $wgAutoloadClasses['OpenLibraryRecordTransformer'] = $dir . 'OpenLibraryRecordTransformer.php'; |
| 37 | +$wgAutoloadClasses['MAB2RecordTransformer'] = $dir . 'MAB2RecordTransformer.php'; |
36 | 38 | $wgAutoloadClasses['DataTransclusionSource'] = $dir . 'DataTransclusionSource.php'; |
37 | 39 | $wgAutoloadClasses['CachingDataTransclusionSource'] = $dir . 'DataTransclusionSource.php'; |
38 | 40 | $wgAutoloadClasses['FakeDataTransclusionSource'] = $dir . 'DataTransclusionSource.php'; |