r74718 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r74717‎ | r74718 | r74719 >
Date:16:19, 13 October 2010
Author:yaron
Status:deferred
Tags:
Comment:
#get_web_data parser function added, meant to replace the (now-deprecated) #get_external_data function
Modified paths:
  • /trunk/extensions/ExternalData/ED_ParserFunctions.php (modified) (history)
  • /trunk/extensions/ExternalData/ExternalData.i18n.magic.php (modified) (history)
  • /trunk/extensions/ExternalData/ExternalData.php (modified) (history)
  • /trunk/extensions/ExternalData/README (modified) (history)

Diff [purge]

Index: trunk/extensions/ExternalData/ExternalData.i18n.magic.php
@@ -4,6 +4,7 @@
55
66 $magicWords['en'] = array(
77 'get_external_data' => array ( 0, 'get_external_data' ),
 8+ 'get_web_data' => array ( 0, 'get_web_data' ),
89 'get_ldap_data' => array ( 0, 'get_ldap_data' ),
910 'get_db_data' => array ( 0, 'get_db_data' ),
1011 'external_value' => array ( 0, 'external_value' ),
Index: trunk/extensions/ExternalData/ExternalData.php
@@ -51,6 +51,7 @@
5252
5353 function edgRegisterParser( &$parser ) {
5454 $parser->setFunctionHook( 'get_external_data', array( 'EDParserFunctions', 'doGetExternalData' ) );
 55+ $parser->setFunctionHook( 'get_web_data', array( 'EDParserFunctions', 'doGetWebData' ) );
5556 $parser->setFunctionHook( 'get_ldap_data', array( 'EDParserFunctions', 'doGetLDAPData' ) );
5657 $parser->setFunctionHook( 'get_db_data', array( 'EDParserFunctions', 'doGetDBData' ) );
5758 $parser->setFunctionHook( 'external_value', array( 'EDParserFunctions', 'doExternalValue' ) );
@@ -65,6 +66,7 @@
6667 switch ( $langCode ) {
6768 default:
6869 $magicWords['get_external_data'] = array ( 0, 'get_external_data' );
 70+ $magicWords['get_web_data'] = array ( 0, 'get_web_data' );
6971 $magicWords['get_ldap_data'] = array ( 0, 'get_ldap_data' );
7072 $magicWords['get_db_data'] = array ( 0, 'get_db_data' );
7173 $magicWords['external_value'] = array ( 0, 'external_value' );
Index: trunk/extensions/ExternalData/ED_ParserFunctions.php
@@ -11,6 +11,7 @@
1212
1313 /**
1414 * Render the #get_external_data parser function
 15+ * @deprecated
1516 */
1617 static function doGetExternalData( &$parser ) {
1718 global $wgTitle, $edgCurPageName, $edgValues;
@@ -27,46 +28,17 @@
2829 array_shift( $params ); // we already know the $parser ...
2930 $url = array_shift( $params );
3031 $url = str_replace( ' ', '%20', $url ); // do some minor URL-encoding
31 - // check whether this URL is allowed - code based on
32 - // Parser::maybeMakeExternalImage()
33 - global $edgAllowExternalDataFrom;
34 - $data_from = $edgAllowExternalDataFrom;
35 - $text = false;
36 - if ( empty( $data_from ) ) {
37 - $url_match = true;
38 - } elseif ( is_array( $data_from ) ) {
39 - $url_match = false;
40 - foreach ( $data_from as $match ) {
41 - if ( strpos( $url, $match ) === 0 ) {
42 - $url_match = true;
43 - break;
44 - }
45 - }
46 - } else {
47 - $url_match = ( strpos( $url, $data_from ) === 0 );
 32+ // if the URL isn't allowed (based on a whitelist), exit
 33+ if ( ! EDUtils::isURLAllowed( $url ) ) {
 34+ return;
4835 }
49 - if ( ! $url_match )
 36+
 37+ $format = strtolower( array_shift( $params ) ); // make case-insensitive
 38+ $external_values = EDUtils::getDataFromURL( $url, $format );
 39+ if ( count( $external_values ) == 0 ) {
5040 return;
51 -
52 - // now, get the contents of the URL - exit if there's nothing
53 - // there
54 - $url_contents = EDUtils::fetchURL( $url );
55 - if ( empty( $url_contents ) )
56 - return;
57 -
58 - $format = strtolower( array_shift( $params ) ); // make case-insensitive
59 - $external_values = array();
60 - if ( $format == 'xml' ) {
61 - $external_values = EDUtils::getXMLData( $url_contents );
62 - } elseif ( $format == 'csv' ) {
63 - $external_values = EDUtils::getCSVData( $url_contents, false );
64 - } elseif ( $format == 'csv with header' ) {
65 - $external_values = EDUtils::getCSVData( $url_contents, true );
66 - } elseif ( $format == 'json' ) {
67 - $external_values = EDUtils::getJSONData( $url_contents );
68 - } elseif ( $format == 'gff' ) {
69 - $external_values = EDUtils::getGFFData( $url_contents );
7041 }
 42+
7143 // get set of filters and set of mappings, determining each
7244 // one by whether there's a double or single equals sign,
7345 // respectively
@@ -88,6 +60,16 @@
8961 // do nothing
9062 }
9163 }
 64+ self::setGlobalValuesArray( $external_values, $filters, $mappings );
 65+ }
 66+
 67+ /**
 68+ * A helper function, since it's called by both doGetExternalData()
 69+ * and doGetWebData() - the former is deprecated.
 70+ */
 71+ static public function setGlobalValuesArray( $external_values, $filters, $mappings ) {
 72+ global $edgValues;
 73+
9274 foreach ( $filters as $filter_var => $filter_value ) {
9375 // find the entry of $external_values that matches
9476 // the filter variable; if none exists, just ignore
@@ -131,9 +113,62 @@
132114 $edgValues[$local_var][] = $external_values[$external_var];
133115 }
134116 }
135 - return;
136117 }
137118
 119+ /**
 120+ * Render the #get_web_data parser function
 121+ */
 122+ static function doGetWebData( &$parser ) {
 123+ global $wgTitle, $edgCurPageName, $edgValues;
 124+
 125+ // if we're handling multiple pages, reset $edgValues
 126+ // when we move from one page to another
 127+ $cur_page_name = $wgTitle->getText();
 128+ if ( ! isset( $edgCurPageName ) || $edgCurPageName != $cur_page_name ) {
 129+ $edgValues = array();
 130+ $edgCurPageName = $cur_page_name;
 131+ }
 132+
 133+ $params = func_get_args();
 134+ array_shift( $params ); // we already know the $parser ...
 135+ $args = EDUtils::parseParams( $params ); // parse params into name-value pairs
 136+ if ( array_key_exists( 'url', $args ) ) {
 137+ $url = $args['url'];
 138+ } else {
 139+ return;
 140+ }
 141+ $url = str_replace( ' ', '%20', $url ); // do some minor URL-encoding
 142+ // if the URL isn't allowed (based on a whitelist), exit
 143+ if ( ! EDUtils::isURLAllowed( $url ) ) {
 144+ return;
 145+ }
 146+
 147+ if ( array_key_exists( 'format', $args ) ) {
 148+ $format = strtolower( $args['format'] );
 149+ } else {
 150+ $format = '';
 151+ }
 152+ $external_values = EDUtils::getDataFromURL( $url, $format );
 153+ if ( count( $external_values ) == 0 ) {
 154+ return;
 155+ }
 156+
 157+ if ( array_key_exists( 'data', $args ) ) {
 158+ // parse the 'data' arg into mappings
 159+ $mappings = EDUtils::paramToArray( $args['data'], false, true );
 160+ } else {
 161+ return;
 162+ }
 163+ if ( array_key_exists( 'filters', $args ) ) {
 164+ // parse the 'filters' arg
 165+ $filters = EDUtils::paramToArray( $args['filters'], true, false );
 166+ } else {
 167+ $filters = array();
 168+ }
 169+
 170+ self::setGlobalValuesArray( $external_values, $filters, $mappings );
 171+ }
 172+
138173 /**
139174 * Render the #get_ldap_data parser function
140175 */
@@ -151,7 +186,7 @@
152187 $params = func_get_args();
153188 array_shift( $params ); // we already know the $parser ...
154189 $args = EDUtils::parseParams( $params ); // parse params into name-value pairs
155 - $mappings = EDUtils::parseMappings( $args['data'] ); // parse the data arg into mappings
 190+ $mappings = EDUtils::paramToArray( $args['data'] ); // parse the data arg into mappings
156191
157192 $external_values = EDUtils::getLDAPData( $args['filter'], $args['domain'], array_values( $mappings ) );
158193
@@ -179,7 +214,7 @@
180215 $params = func_get_args();
181216 array_shift( $params ); // we already know the $parser ...
182217 $args = EDUtils::parseParams( $params ); // parse params into name-value pairs
183 - $mappings = EDUtils::parseMappings( $args['data'] ); // parse the data arg into mappings
 218+ $mappings = EDUtils::paramToArray( $args['data'] ); // parse the data arg into mappings
184219
185220 $external_values = EDUtils::getDBData( $args['server'], $args['from'], $args['where'], array_values( $mappings ) );
186221 // handle error cases
Index: trunk/extensions/ExternalData/README
@@ -13,13 +13,16 @@
1414 from various sources: external URLs and local wiki pages (in CSV, GFF, JSON
1515 and XML formats), database tables, and LDAP servers.
1616
17 -The extension defines six parser functions - #get_external_data,
18 -#get_db_data, #get_ldap_data, #external_value, #for_external_table and
19 -#store_external_table. They are described below:
 17+The extension defines seven parser functions - #get_web_data,
 18+#get_external_data, #get_db_data, #get_ldap_data, #external_value,
 19+#for_external_table and #store_external_table. They are described below:
2020
21 -* #get_external_data retrieves the data from a URL that holds CSV, GFF,
 21+* #get_web_data retrieves the data from a URL that holds CSV, GFF,
2222 JSON or XML, and assigns it to local variables or arrays.
2323
 24+* #get_external_data is an earlier version of #get_web_data that does
 25+the same thing; it is currently deprecated.
 26+
2427 * #get_db_data retrieves data from a database, using a SQL query, and
2528 assigns it to local variables or arrays.
2629
@@ -30,16 +33,16 @@
3134 first value if it's an array.
3235
3336 * #for_external_table applies processing onto multiple rows retrieved by
34 -#get_external_data.
 37+any of the #get_*_data functions
3538
3639 * #store_external_table mimics a call to the Semantic Internal Objects
3740 extension's #set_internal parser function onto each row retrieved by
38 -#get_external_data, in order to store the entire array of data
39 -semantically.
 41+any of the #get_*_data functions, in order to store the entire array
 42+of data semantically.
4043
4144 In addition, the extension defines a new special page, 'GetData', that
42 -exports selected rows from a wiki page that holds CSV data, in a format that
43 -is readable by #get_external_data.
 45+exports selected rows from a wiki page that holds CSV data, in a format
 46+that is readable by #get_web_data.
4447
4548 For more information, see the extension homepage at:
4649 http://www.mediawiki.org/wiki/Extension:External_Data

Status & tagging log