r86140 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86139‎ | r86140 | r86141 >
Date:21:36, 15 April 2011
Author:mkroetzsch
Status:deferred
Tags:
Comment:
some cleaning up, comments
Modified paths:
  • /trunk/extensions/SemanticMediaWiki/includes/sparql/SMW_SparqlDatabase.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/sparql/SMW_SparqlResultParser.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SemanticMediaWiki/includes/sparql/SMW_SparqlDatabase.php
@@ -18,8 +18,8 @@
1919
2020 /**
2121 * Class to escalate SPARQL query errors to the interface. We only do this for
22 - * malformed queries or permission issues. Connection problems are usually
23 - * ignored so as to keep the wiki running even if the RDF backend is down.
 22+ * malformed queries, permission issues, etc. Connection problems are usually
 23+ * ignored so as to keep the wiki running even if the SPARQL backend is down.
2424 *
2525 * @ingroup SMWSparql
2626 */
@@ -47,6 +47,14 @@
4848 */
4949 public $errorCode;
5050
 51+ /**
 52+ * Constructor that creates an error message based on the given data.
 53+ *
 54+ * @param $errorCode integer error code as defined in this class
 55+ * @param $queryText string with the original SPARQL query/update
 56+ * @param $endpoint string URL of the endpoint
 57+ * @param $httpCode mixed integer HTTP error code or some string to print there
 58+ */
5159 function __construct( $errorCode, $queryText, $endpoint, $httpCode = '<not given>' ) {
5260 switch ( $errorCode ) {
5361 case self::ERROR_MALFORMED:
@@ -88,23 +96,34 @@
8997 class SMWSparqlDatabase {
9098
9199 /**
92 - * The URL of the endpoint to contact the database.
 100+ * The URL of the endpoint for executing read queries.
93101 * @var string
94102 */
95 - protected $m_endpoint;
 103+ protected $m_queryEndpoint;
96104
97105 /**
98 - * The curl handle we use for communicating.
 106+ * The URL of the endpoint for executing update queries, or empty if
 107+ * update is not allowed/supported.
 108+ * @var string
 109+ */
 110+ protected $m_updateEndpoint;
 111+
 112+ /**
 113+ * The curl handle we use for communicating. We reuse the same handle
 114+ * throughout as this safes some initialization effort.
99115 * @var resource
100116 */
101117 protected $m_curlhandle;
102118
103119 /**
104 - * Constructor.
105 - * @param $endpoint string of URL to contact the database at
 120+ * Constructor
 121+ *
 122+ * @param $queryEndpoint string of URL of query service (reading)
 123+ * @param $updateEndpoint string of URL of update service (writing)
106124 */
107 - public function __construct( $endpoint ) {
108 - $this->m_endpoint = $endpoint;
 125+ public function __construct( $queryEndpoint, $updateEndpoint = '' ) {
 126+ $this->m_queryEndpoint = $queryEndpoint;
 127+ $this->m_updateEndpoint = $updateEndpoint;
109128 $this->m_curlhandle = curl_init();
110129 curl_setopt( $this->m_curlhandle, CURLOPT_FORBID_REUSE, false );
111130 curl_setopt( $this->m_curlhandle, CURLOPT_FRESH_CONNECT, false );
@@ -114,45 +133,126 @@
115134 }
116135
117136 /**
118 - * Check if the database can be contacted,
 137+ * Check if the database can be contacted.
119138 *
 139+ * @param $pingQueryEndpoint boolean true if the query endpoint should
 140+ * be pinged, false if the update enpoint should be pinged
120141 * @return boolean to indicate success
 142+ * @todo SPARQL endpoints sometimes return errors if no (valid) query
 143+ * is posted. The current implementation tries to catch this, but this
 144+ * might not be entirely correct. Especially, the SPARQL 1.1 HTTP error
 145+ * codes for Update are not defined yet (April 15 2011).
121146 */
122 - public function ping(){
123 - curl_setopt( $this->m_curlhandle, CURLOPT_URL, $this->m_endpoint );
124 - curl_setopt( $this->m_curlhandle, CURLOPT_NOBODY, 1 );
 147+ public function ping( $pingQueryEndpoint = true ){
 148+ if ( $pingQueryEndpoint ) {
 149+ curl_setopt( $this->m_curlhandle, CURLOPT_URL, $this->m_queryEndpoint );
 150+ curl_setopt( $this->m_curlhandle, CURLOPT_NOBODY, true );
 151+ } else {
 152+ if ( $this->m_updateEndpoint == '' ) {
 153+ return false;
 154+ }
 155+ curl_setopt( $this->m_curlhandle, CURLOPT_URL, $this->m_updateEndpoint );
 156+ curl_setopt( $this->m_curlhandle, CURLOPT_NOBODY, false ); // 4Store gives 404 instead of 500 with CURLOPT_NOBODY
 157+ }
 158+
125159 curl_exec( $this->m_curlhandle );
126 - return ( curl_errno( $this->m_curlhandle ) == 0 );
 160+
 161+ if ( curl_errno( $this->m_curlhandle ) == 0 ) {
 162+ return true;
 163+ } else {
 164+ $httpCode = curl_getinfo( $this->m_curlhandle, CURLINFO_HTTP_CODE );
 165+ return ( ( $httpCode == 500 ) || ( $httpCode == 400 ) ); // valid HTTP responses from a complaining SPARQL endpoint that is alive and kicking
 166+ }
127167 }
128168
 169+ /**
 170+ * Execute a SPARQL query and return an SMWSparqlResultWrapper object
 171+ * that contains the results. The method throws exceptions based on
 172+ * SMWSparqlDatabase::throwSparqlErrors(). If errors occur and this
 173+ * method does not throw anything, then an empty result with an error
 174+ * code is returned.
 175+ *
 176+ * @param $sparql string with the complete SPARQL query (SELECT or ASK)
 177+ * @return SMWSparqlResultWrapper
 178+ */
129179 public function doQuery( $sparql ) {
130 - curl_setopt( $this->m_curlhandle, CURLOPT_URL, $this->m_endpoint );
 180+ curl_setopt( $this->m_curlhandle, CURLOPT_URL, $this->m_queryEndpoint );
131181 curl_setopt( $this->m_curlhandle, CURLOPT_POST, true );
132182 $parameterString = "query=" . urlencode( $sparql );
133183 curl_setopt( $this->m_curlhandle, CURLOPT_POSTFIELDS, $parameterString );
 184+
134185 $xmlResult = curl_exec( $this->m_curlhandle );
 186+
 187+ if ( curl_errno( $this->m_curlhandle ) == 0 ) {
 188+ $xmlParser = new SMWSparqlResultParser();
 189+ return $xmlParser->makeResultFromXml( $xmlResult );
 190+ } else {
 191+ $this->throwSparqlErrors( $this->m_updateEndpoint, $sparql );
 192+ return new SMWSparqlResultWrapper( array(), array(), SMWSparqlResultWrapper::ERROR_UNREACHABLE );
 193+ }
 194+ }
 195+
 196+ /**
 197+ * Execute a SPARQL update and return a boolean to indicate if the
 198+ * operations was sucessfull. The method throws exceptions based on
 199+ * SMWSparqlDatabase::throwSparqlErrors(). If errors occur and this
 200+ * method does not throw anything, then false is returned.
 201+ *
 202+ * @param $sparql string with the complete SPARQL update query (INSERT or DELETE)
 203+ * @return boolean
 204+ */
 205+ public function doUpdate( $sparql ) {
 206+ if ( $this->m_updateEndpoint == '' ) {
 207+ throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_READONLY, $sparql, $this->m_queryEndpoint, $error );
 208+ }
 209+ curl_setopt( $this->m_curlhandle, CURLOPT_URL, $this->m_updateEndpoint );
 210+ curl_setopt( $this->m_curlhandle, CURLOPT_POST, true );
 211+ $parameterString = "update=" . urlencode( $sparql );
 212+ curl_setopt( $this->m_curlhandle, CURLOPT_POSTFIELDS, $parameterString );
 213+
 214+ $xmlResult = curl_exec( $this->m_curlhandle );
 215+
 216+ if ( curl_errno( $this->m_curlhandle ) == 0 ) {
 217+ $xmlParser = new SMWSparqlResultParser();
 218+ return true;
 219+ } else {
 220+ $this->throwSparqlErrors( $this->m_updateEndpoint, $sparql );
 221+ return false;
 222+ }
 223+ }
 224+
 225+ /**
 226+ * Decide what to make of the errors reported by the Curl handler.
 227+ * Either throw a suitable exception or fall through if the error
 228+ * should be handled gracefully. It is attempted to throw exceptions
 229+ * for all errors that can generally be prevented by proper coding or
 230+ * configuration (e.g. query syntax errors), and to be silent on all
 231+ * errors that might be caused by network issues or temporary
 232+ * overloading of the server. In this case, calling methods rather
 233+ * return something that helps to make the best out of the situation.
 234+ *
 235+ * @param $endpoint string URL of endpoint that was used
 236+ * @param $sparql string query that caused the problem
 237+ */
 238+ protected function throwSparqlErrors( $endpoint, $sparql ) {
135239 $error = curl_errno( $this->m_curlhandle );
136 - if ( $error == 0 ) {
137 - $xmlParser = new SMWSparqlResultParser();
138 - $resultWrapper = $xmlParser->makeResultFromXml( $xmlResult );
139 - } elseif ( $error == CURLE_COULDNT_CONNECT ) { // fail gracefully if backend is down
140 - $resultWrapper = new SMWSparqlResultWrapper( array(), array(), SMWSparqlResultWrapper::ERROR_UNREACHABLE );
141 - } elseif ( $error == 22 ) { // 22 == CURLE_HTTP_RETURNED_ERROR, but this constant is not defined in PHP, it seems
 240+ if ( $error == 22 ) { // 22 == CURLE_HTTP_RETURNED_ERROR, but this constant is not defined in PHP, it seems
142241 $httpCode = curl_getinfo( $this->m_curlhandle, CURLINFO_HTTP_CODE );
 242+ /// TODO We are guessing the meaning of HTTP codes here -- the SPARQL 1.1 spec does not yet provide this information for updates (April 15 2011)
143243 if ( $httpCode == 400 ) { // malformed query
144 - throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_MALFORMED, $sparql, $this->m_endpoint, $error );
 244+ throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_MALFORMED, $sparql, $endpoint, $error );
145245 } elseif ( $httpCode == 500 ) { // query refused; maybe fail gracefully here (depending on how stores use this)
146 - throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_REFUSED, $sparql, $this->m_endpoint, $error );
147 - } elseif ( $httpCode == 404 ) { // endpoint not found, maybe down; fail gracefully
148 - $resultWrapper = new SMWSparqlResultWrapper( array(), array(), SMWSparqlResultWrapper::ERROR_UNREACHABLE );
 246+ throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_REFUSED, $sparql, $endpoint, $error );
 247+ } elseif ( $httpCode == 404 ) {
 248+ return; // endpoint not found, maybe down; fail gracefully
149249 } else {
150 - throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_OTHER, $sparql, $this->m_endpoint, $error );
 250+ throw new SMWSparqlDatabaseError( SMWSparqlDatabaseError::ERROR_OTHER, $sparql, $endpoint, $error );
151251 }
 252+ } elseif ( $error == CURLE_COULDNT_CONNECT ) {
 253+ retur; // fail gracefully if backend is down
152254 } else {
153 - throw new Exception( "Failed to communicate with SPARQL store.\n Endpoint: " . $this->m_endpoint . "\n Curl error: '" . curl_error( $this->m_curlhandle ) . "' ($error)" );
 255+ throw new Exception( "Failed to communicate with SPARQL store.\n Endpoint: " . $endpoint . "\n Curl error: '" . curl_error( $this->m_curlhandle ) . "' ($error)" );
154256 }
155 -
156 - return $resultWrapper;
157257 }
158258
159259 }
Index: trunk/extensions/SemanticMediaWiki/includes/sparql/SMW_SparqlResultParser.php
@@ -52,7 +52,7 @@
5353 * @param $xmlQueryResult string
5454 */
5555 public function makeResultFromXml( $xmlQueryResult ) {
56 - $parser = xml_parser_create ();
 56+ $parser = xml_parser_create();
5757 xml_parser_set_option( $parser, XML_OPTION_SKIP_WHITE, 0 );
5858 xml_parser_set_option( $parser, XML_OPTION_TARGET_ENCODING, 'UTF-8' );
5959 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, 0 );
@@ -77,7 +77,6 @@
7878 * @see xml_set_element_handler
7979 */
8080 protected function xmlHandleOpen( $parser, $tagName, $attributes ) {
81 - print " ($tagName( ";
8281 $prevTag = end( $this->m_xml_opentags );
8382 $this->m_xml_opentags[] = $tagName;
8483 if ( ( $tagName == 'binding' ) && ( $prevTag == 'result' ) ) {
@@ -110,7 +109,6 @@
111110 * @see xml_set_element_handler
112111 */
113112 protected function xmlHandleClose( $parser, $tagName ) {
114 - print " )$tagName)";
115113 array_pop( $this->m_xml_opentags );
116114 }
117115

Status & tagging log