Index: trunk/extensions/ExternalData/ExternalData.i18n.magic.php |
— | — | @@ -4,6 +4,7 @@ |
5 | 5 | |
6 | 6 | $magicWords['en'] = array( |
7 | 7 | 'get_external_data' => array ( 0, 'get_external_data' ), |
| 8 | + 'get_web_data' => array ( 0, 'get_web_data' ), |
8 | 9 | 'get_ldap_data' => array ( 0, 'get_ldap_data' ), |
9 | 10 | 'get_db_data' => array ( 0, 'get_db_data' ), |
10 | 11 | 'external_value' => array ( 0, 'external_value' ), |
Index: trunk/extensions/ExternalData/ExternalData.php |
— | — | @@ -51,6 +51,7 @@ |
52 | 52 | |
53 | 53 | function edgRegisterParser( &$parser ) { |
54 | 54 | $parser->setFunctionHook( 'get_external_data', array( 'EDParserFunctions', 'doGetExternalData' ) ); |
| 55 | + $parser->setFunctionHook( 'get_web_data', array( 'EDParserFunctions', 'doGetWebData' ) ); |
55 | 56 | $parser->setFunctionHook( 'get_ldap_data', array( 'EDParserFunctions', 'doGetLDAPData' ) ); |
56 | 57 | $parser->setFunctionHook( 'get_db_data', array( 'EDParserFunctions', 'doGetDBData' ) ); |
57 | 58 | $parser->setFunctionHook( 'external_value', array( 'EDParserFunctions', 'doExternalValue' ) ); |
— | — | @@ -65,6 +66,7 @@ |
66 | 67 | switch ( $langCode ) { |
67 | 68 | default: |
68 | 69 | $magicWords['get_external_data'] = array ( 0, 'get_external_data' ); |
| 70 | + $magicWords['get_web_data'] = array ( 0, 'get_web_data' ); |
69 | 71 | $magicWords['get_ldap_data'] = array ( 0, 'get_ldap_data' ); |
70 | 72 | $magicWords['get_db_data'] = array ( 0, 'get_db_data' ); |
71 | 73 | $magicWords['external_value'] = array ( 0, 'external_value' ); |
Index: trunk/extensions/ExternalData/ED_ParserFunctions.php |
— | — | @@ -11,6 +11,7 @@ |
12 | 12 | |
13 | 13 | /** |
14 | 14 | * Render the #get_external_data parser function |
| 15 | + * @deprecated |
15 | 16 | */ |
16 | 17 | static function doGetExternalData( &$parser ) { |
17 | 18 | global $wgTitle, $edgCurPageName, $edgValues; |
— | — | @@ -27,46 +28,17 @@ |
28 | 29 | array_shift( $params ); // we already know the $parser ... |
29 | 30 | $url = array_shift( $params ); |
30 | 31 | $url = str_replace( ' ', '%20', $url ); // do some minor URL-encoding |
31 | | - // check whether this URL is allowed - code based on |
32 | | - // Parser::maybeMakeExternalImage() |
33 | | - global $edgAllowExternalDataFrom; |
34 | | - $data_from = $edgAllowExternalDataFrom; |
35 | | - $text = false; |
36 | | - if ( empty( $data_from ) ) { |
37 | | - $url_match = true; |
38 | | - } elseif ( is_array( $data_from ) ) { |
39 | | - $url_match = false; |
40 | | - foreach ( $data_from as $match ) { |
41 | | - if ( strpos( $url, $match ) === 0 ) { |
42 | | - $url_match = true; |
43 | | - break; |
44 | | - } |
45 | | - } |
46 | | - } else { |
47 | | - $url_match = ( strpos( $url, $data_from ) === 0 ); |
| 32 | + // if the URL isn't allowed (based on a whitelist), exit |
| 33 | + if ( ! EDUtils::isURLAllowed( $url ) ) { |
| 34 | + return; |
48 | 35 | } |
49 | | - if ( ! $url_match ) |
| 36 | + |
| 37 | + $format = strtolower( array_shift( $params ) ); // make case-insensitive |
| 38 | + $external_values = EDUtils::getDataFromURL( $url, $format ); |
| 39 | + if ( count( $external_values ) == 0 ) { |
50 | 40 | return; |
51 | | - |
52 | | - // now, get the contents of the URL - exit if there's nothing |
53 | | - // there |
54 | | - $url_contents = EDUtils::fetchURL( $url ); |
55 | | - if ( empty( $url_contents ) ) |
56 | | - return; |
57 | | - |
58 | | - $format = strtolower( array_shift( $params ) ); // make case-insensitive |
59 | | - $external_values = array(); |
60 | | - if ( $format == 'xml' ) { |
61 | | - $external_values = EDUtils::getXMLData( $url_contents ); |
62 | | - } elseif ( $format == 'csv' ) { |
63 | | - $external_values = EDUtils::getCSVData( $url_contents, false ); |
64 | | - } elseif ( $format == 'csv with header' ) { |
65 | | - $external_values = EDUtils::getCSVData( $url_contents, true ); |
66 | | - } elseif ( $format == 'json' ) { |
67 | | - $external_values = EDUtils::getJSONData( $url_contents ); |
68 | | - } elseif ( $format == 'gff' ) { |
69 | | - $external_values = EDUtils::getGFFData( $url_contents ); |
70 | 41 | } |
| 42 | + |
71 | 43 | // get set of filters and set of mappings, determining each |
72 | 44 | // one by whether there's a double or single equals sign, |
73 | 45 | // respectively |
— | — | @@ -88,6 +60,16 @@ |
89 | 61 | // do nothing |
90 | 62 | } |
91 | 63 | } |
| 64 | + self::setGlobalValuesArray( $external_values, $filters, $mappings ); |
| 65 | + } |
| 66 | + |
| 67 | + /** |
| 68 | + * A helper function, since it's called by both doGetExternalData() |
| 69 | + * and doGetWebData() - the former is deprecated. |
| 70 | + */ |
| 71 | + static public function setGlobalValuesArray( $external_values, $filters, $mappings ) { |
| 72 | + global $edgValues; |
| 73 | + |
92 | 74 | foreach ( $filters as $filter_var => $filter_value ) { |
93 | 75 | // find the entry of $external_values that matches |
94 | 76 | // the filter variable; if none exists, just ignore |
— | — | @@ -131,9 +113,62 @@ |
132 | 114 | $edgValues[$local_var][] = $external_values[$external_var]; |
133 | 115 | } |
134 | 116 | } |
135 | | - return; |
136 | 117 | } |
137 | 118 | |
| 119 | + /** |
| 120 | + * Render the #get_web_data parser function |
| 121 | + */ |
| 122 | + static function doGetWebData( &$parser ) { |
| 123 | + global $wgTitle, $edgCurPageName, $edgValues; |
| 124 | + |
| 125 | + // if we're handling multiple pages, reset $edgValues |
| 126 | + // when we move from one page to another |
| 127 | + $cur_page_name = $wgTitle->getText(); |
| 128 | + if ( ! isset( $edgCurPageName ) || $edgCurPageName != $cur_page_name ) { |
| 129 | + $edgValues = array(); |
| 130 | + $edgCurPageName = $cur_page_name; |
| 131 | + } |
| 132 | + |
| 133 | + $params = func_get_args(); |
| 134 | + array_shift( $params ); // we already know the $parser ... |
| 135 | + $args = EDUtils::parseParams( $params ); // parse params into name-value pairs |
| 136 | + if ( array_key_exists( 'url', $args ) ) { |
| 137 | + $url = $args['url']; |
| 138 | + } else { |
| 139 | + return; |
| 140 | + } |
| 141 | + $url = str_replace( ' ', '%20', $url ); // do some minor URL-encoding |
| 142 | + // if the URL isn't allowed (based on a whitelist), exit |
| 143 | + if ( ! EDUtils::isURLAllowed( $url ) ) { |
| 144 | + return; |
| 145 | + } |
| 146 | + |
| 147 | + if ( array_key_exists( 'format', $args ) ) { |
| 148 | + $format = strtolower( $args['format'] ); |
| 149 | + } else { |
| 150 | + $format = ''; |
| 151 | + } |
| 152 | + $external_values = EDUtils::getDataFromURL( $url, $format ); |
| 153 | + if ( count( $external_values ) == 0 ) { |
| 154 | + return; |
| 155 | + } |
| 156 | + |
| 157 | + if ( array_key_exists( 'data', $args ) ) { |
| 158 | + // parse the 'data' arg into mappings |
| 159 | + $mappings = EDUtils::paramToArray( $args['data'], false, true ); |
| 160 | + } else { |
| 161 | + return; |
| 162 | + } |
| 163 | + if ( array_key_exists( 'filters', $args ) ) { |
| 164 | + // parse the 'filters' arg |
| 165 | + $filters = EDUtils::paramToArray( $args['filters'], true, false ); |
| 166 | + } else { |
| 167 | + $filters = array(); |
| 168 | + } |
| 169 | + |
| 170 | + self::setGlobalValuesArray( $external_values, $filters, $mappings ); |
| 171 | + } |
| 172 | + |
138 | 173 | /** |
139 | 174 | * Render the #get_ldap_data parser function |
140 | 175 | */ |
— | — | @@ -151,7 +186,7 @@ |
152 | 187 | $params = func_get_args(); |
153 | 188 | array_shift( $params ); // we already know the $parser ... |
154 | 189 | $args = EDUtils::parseParams( $params ); // parse params into name-value pairs |
155 | | - $mappings = EDUtils::parseMappings( $args['data'] ); // parse the data arg into mappings |
| 190 | + $mappings = EDUtils::paramToArray( $args['data'] ); // parse the data arg into mappings |
156 | 191 | |
157 | 192 | $external_values = EDUtils::getLDAPData( $args['filter'], $args['domain'], array_values( $mappings ) ); |
158 | 193 | |
— | — | @@ -179,7 +214,7 @@ |
180 | 215 | $params = func_get_args(); |
181 | 216 | array_shift( $params ); // we already know the $parser ... |
182 | 217 | $args = EDUtils::parseParams( $params ); // parse params into name-value pairs |
183 | | - $mappings = EDUtils::parseMappings( $args['data'] ); // parse the data arg into mappings |
| 218 | + $mappings = EDUtils::paramToArray( $args['data'] ); // parse the data arg into mappings |
184 | 219 | |
185 | 220 | $external_values = EDUtils::getDBData( $args['server'], $args['from'], $args['where'], array_values( $mappings ) ); |
186 | 221 | // handle error cases |
Index: trunk/extensions/ExternalData/README |
— | — | @@ -13,13 +13,16 @@ |
14 | 14 | from various sources: external URLs and local wiki pages (in CSV, GFF, JSON |
15 | 15 | and XML formats), database tables, and LDAP servers. |
16 | 16 | |
17 | | -The extension defines six parser functions - #get_external_data, |
18 | | -#get_db_data, #get_ldap_data, #external_value, #for_external_table and |
19 | | -#store_external_table. They are described below: |
| 17 | +The extension defines seven parser functions - #get_web_data, |
| 18 | +#get_external_data, #get_db_data, #get_ldap_data, #external_value, |
| 19 | +#for_external_table and #store_external_table. They are described below: |
20 | 20 | |
21 | | -* #get_external_data retrieves the data from a URL that holds CSV, GFF, |
| 21 | +* #get_web_data retrieves the data from a URL that holds CSV, GFF, |
22 | 22 | JSON or XML, and assigns it to local variables or arrays. |
23 | 23 | |
| 24 | +* #get_external_data is an earlier version of #get_web_data that does |
| 25 | +the same thing; it is currently deprecated. |
| 26 | + |
24 | 27 | * #get_db_data retrieves data from a database, using a SQL query, and |
25 | 28 | assigns it to local variables or arrays. |
26 | 29 | |
— | — | @@ -30,16 +33,16 @@ |
31 | 34 | first value if it's an array. |
32 | 35 | |
33 | 36 | * #for_external_table applies processing onto multiple rows retrieved by |
34 | | -#get_external_data. |
| 37 | +any of the #get_*_data functions |
35 | 38 | |
36 | 39 | * #store_external_table mimics a call to the Semantic Internal Objects |
37 | 40 | extension's #set_internal parser function onto each row retrieved by |
38 | | -#get_external_data, in order to store the entire array of data |
39 | | -semantically. |
| 41 | +any of the #get_*_data functions, in order to store the entire array |
| 42 | +of data semantically. |
40 | 43 | |
41 | 44 | In addition, the extension defines a new special page, 'GetData', that |
42 | | -exports selected rows from a wiki page that holds CSV data, in a format that |
43 | | -is readable by #get_external_data. |
| 45 | +exports selected rows from a wiki page that holds CSV data, in a format |
| 46 | +that is readable by #get_web_data. |
44 | 47 | |
45 | 48 | For more information, see the extension homepage at: |
46 | 49 | http://www.mediawiki.org/wiki/Extension:External_Data |