r47635 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r47634‎ | r47635 | r47636 >
Date:22:10, 21 February 2009
Author:ashley
Status:deferred (Comments)
Tags:
Comment:
adding GetFamily and WikiCurl extensions from wikia. code cleanup by me, some fixes for GetFamily by Misza and Pinky
Modified paths:
  • /trunk/extensions/GetFamily (added) (history)
  • /trunk/extensions/GetFamily/GetFamily.i18n.php (added) (history)
  • /trunk/extensions/GetFamily/GetFamily.php (added) (history)
  • /trunk/extensions/WikiCurl (added) (history)
  • /trunk/extensions/WikiCurl/WikiCurl.php (added) (history)

Diff [purge]

Index: trunk/extensions/WikiCurl/WikiCurl.php
@@ -0,0 +1,307 @@
 2+<?php
 3+/**
 4+ * Wiki engine CURL extension
 5+ *
 6+ * @file
 7+ * @ingroup Extensions
 8+ * @version 1.0
 9+ * @author Mikołaj Musielak (CorfiX) <corfix@wikia.com> - original code/ideas
 10+ * @author Tomasz Klim <tomek@wikia.com> - fixes, porting to PHP5, referer tracking, POST, timeout, verbose, caching functionality, bandwidth counting etc.
 11+ * @copyright Copyright © 2007 Tomasz Klim, Wikia Inc.
 12+ * @link http://www.mediawiki.org/wiki/Extension:WikiCurl Documentation
 13+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 14+ */
 15+if( !defined( 'MEDIAWIKI' ) ){
 16+ die( "This is not a valid entry point.\n" );
 17+}
 18+
 19+// Extension credits that will show up on Special:Version
 20+$wgExtensionCredits['other'][] = array(
 21+ 'name' => 'WikiCurl',
 22+ 'version' => '1.0',
 23+ 'author' => array('Tomasz Klim', 'Mikołaj Musielak'),
 24+ 'description' => 'Universal CURL extension',
 25+ 'url' => 'http://www.mediawiki.org/wiki/Extension:WikiCurl'
 26+);
 27+
 28+class WikiCurl {
 29+ var $conn;
 30+ var $referer;
 31+ var $useReferer = false;
 32+ var $cacheDir = false;
 33+ var $cachePeriod = 86400;
 34+ var $cacheOutput = '';
 35+ var $totalTime = 0;
 36+ var $totalSize = 0;
 37+ var $lastTime = 0;
 38+ var $delay = 0;
 39+
 40+ function __construct( $allow_redirects = true ) {
 41+ $this->conn = curl_init();
 42+ curl_setopt( $this->conn, CURLOPT_HEADER, 1 );
 43+ curl_setopt( $this->conn, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT'] );
 44+ curl_setopt( $this->conn, CURLOPT_RETURNTRANSFER, 1 );
 45+
 46+ // using additional parameter only for compatibility
 47+ // with old code, which assumes redirects to be allowed
 48+ if ( $allow_redirects ) {
 49+ curl_setopt( $this->conn, CURLOPT_FOLLOWLOCATION, 1 );
 50+ }
 51+
 52+ // workaround for windows apache2-nossl
 53+ if( strpos( PHP_OS, 'WIN' ) ) {
 54+ curl_setopt( $this->conn, CURLOPT_SSL_VERIFYPEER, 0 );
 55+ }
 56+ }
 57+
 58+ function __destruct() {
 59+ curl_close( $this->conn );
 60+ }
 61+
 62+ function setReferer( $url ) {
 63+ $this->useReferer = true;
 64+ $this->referer = $url;
 65+ }
 66+
 67+ function setDelay( $delay ) {
 68+ $this->delay = $delay;
 69+ }
 70+
 71+ function setInterface( $ip ) {
 72+ curl_setopt( $this->conn, CURLOPT_INTERFACE, $ip );
 73+ }
 74+
 75+ function setTimeout( $sec ) {
 76+ curl_setopt( $this->conn, CURLOPT_TIMEOUT, $sec );
 77+ }
 78+
 79+ function setRange( $range ) {
 80+ curl_setopt( $this->conn, CURLOPT_RANGE, $range ); // "0-4096"
 81+ }
 82+
 83+ function setAgent( $agent ) {
 84+ curl_setopt( $this->conn, CURLOPT_USERAGENT, $agent );
 85+ }
 86+
 87+ function setProxy( $proxy ) {
 88+ curl_setopt( $this->conn, CURLOPT_PROXY, $proxy );
 89+ }
 90+
 91+ function setProxyPass( $username, $password ) {
 92+ curl_setopt( $this->conn, CURLOPT_PROXYUSERPWD, "$username:$password" );
 93+ }
 94+
 95+ function setProxyPort( $port ) {
 96+ curl_setopt( $this->conn, CURLOPT_PROXYPORT, $port );
 97+ }
 98+
 99+ function setAuth( $username, $password ) {
 100+ curl_setopt( $this->conn, CURLOPT_USERPWD, "$username:$password" );
 101+ }
 102+
 103+ function setCookies( $cookies ) {
 104+ curl_setopt( $this->conn, CURLOPT_COOKIEJAR, $cookies );
 105+ curl_setopt( $this->conn, CURLOPT_COOKIEFILE, $cookies );
 106+ }
 107+
 108+ // this method works properly only if this class is included from console script, because of php5-curl bug
 109+ function setVerbose() {
 110+ curl_setopt( $this->conn, CURLOPT_VERBOSE, 1 );
 111+ }
 112+
 113+ // sets the cache directory root and thus enables the file cache
 114+ function setCacheDir( $dir ) {
 115+ $this->cacheDir = $dir;
 116+ }
 117+
 118+ // sets the cache period in seconds
 119+ function setCachePeriod( $period ) {
 120+ $this->cachePeriod = $period;
 121+ }
 122+
 123+ function getCacheOutput() {
 124+ $output = $this->cacheOutput;
 125+ unset( $this->cacheOutput );
 126+ $this->cacheOutput = '';
 127+ return $output;
 128+ }
 129+
 130+ // TODO: make better error/status handling
 131+ function getError() {
 132+ return curl_error( $this->conn );
 133+ }
 134+
 135+ function getErrno() {
 136+ return curl_errno( $this->conn );
 137+ }
 138+
 139+ function getMicroTime() {
 140+ list( $usec, $sec ) = explode( ' ', microtime() );
 141+ return ((float)$usec + (float)$sec);
 142+ }
 143+
 144+ function getTotalTime() {
 145+ return $this->totalTime;
 146+ }
 147+
 148+ function getTotalSize() {
 149+ return $this->totalSize;
 150+ }
 151+
 152+ function getTotalSpeed() {
 153+ return (int)($this->totalSize / ($this->totalTime ? $this->totalTime : 1));
 154+ }
 155+
 156+ // this method doesn't work properly, if the last request hit the cache
 157+ function getEffectiveUrl() {
 158+ return curl_getinfo( $this->conn, CURLINFO_EFFECTIVE_URL) ;
 159+ }
 160+
 161+ // this method doesn't work properly, if the last request hit the cache
 162+ function getResponseCode() {
 163+ return curl_getinfo( $this->conn, CURLINFO_HTTP_CODE );
 164+ }
 165+
 166+ function deleteCachedUrl( $url ) {
 167+ if( $this->cacheDir ) {
 168+ $md5 = md5( $url );
 169+ $file = $this->cacheDir . "/" . substr($md5, 0, 1) . "/" . substr($md5, 0, 2) . "/" . substr($md5, 0, 3) . "/" . substr($md5, 0, 4) . "/$md5.gz";
 170+
 171+ if ( file_exists( $file ) ) {
 172+ $this->cacheOutput .= "cache delete $file $url\n";
 173+ unlink( $file );
 174+ }
 175+ }
 176+ }
 177+
 178+ function request( $url, $vars = '', $post = false ) {
 179+ if( $post ) {
 180+ curl_setopt( $this->conn, CURLOPT_POST, true ); // this option is broken. turning off doesn't work.
 181+ curl_setopt( $this->conn, CURLOPT_POSTFIELDS, $vars );
 182+ } else {
 183+ curl_setopt( $this->conn, CURLOPT_HTTPGET, true ); // this is the only way to turn off POST.
 184+ }
 185+
 186+ $fullurl = $url . ($post || $vars == '' ? '' : '?' . $vars);
 187+ curl_setopt( $this->conn, CURLOPT_URL, $fullurl );
 188+
 189+ if( $this->useReferer ) {
 190+ curl_setopt( $this->conn, CURLOPT_REFERER, $referer );
 191+ $referer = $fullurl;
 192+ }
 193+
 194+ $time_start = $this->getMicroTime();
 195+ $ret = curl_exec( $this->conn );
 196+ $time_end = $this->getMicroTime();
 197+
 198+ $this->totalTime += ($time_end - $time_start);
 199+ $this->totalSize += curl_getinfo( $this->conn, CURLINFO_SIZE_DOWNLOAD );
 200+ return $ret;
 201+ }
 202+
 203+ function get( $url, $vars = null ) {
 204+
 205+ if( $vars != null && is_array( $vars ) ) {
 206+ foreach( $vars as $k => $v ) {
 207+ $q .= urlencode($k) . '=' . urlencode($v) . '&';
 208+ }
 209+ }
 210+ else $q = $vars;
 211+
 212+ if( $this->cacheDir ) {
 213+ $fullurl = $url . ($q == '' ? '' : '?' . $q);
 214+ $md5 = md5( $fullurl );
 215+ $file = $this->cacheDir . '/' . substr( $md5, 0, 1 ) . '/' . substr( $md5, 0, 2 ) . '/' . substr( $md5, 0, 3 ) . '/' . substr( $md5, 0, 4 ) . "/$md5.gz";
 216+
 217+ if ( file_exists( $file ) && filemtime( $file ) + $this->cachePeriod > time() ) {
 218+ $this->cacheOutput .= "cache get $file $fullurl\n";
 219+ return $this->load_gz( $file );
 220+ }
 221+ }
 222+
 223+ // enforce the delay, to protect bandwidth
 224+ if( $this->delay ) {
 225+ if( $this->lastTime + $this->delay > time() ) {
 226+ sleep( $this->delay - (time() - $this->lastTime) );
 227+ }
 228+ $this->lastTime = time();
 229+ }
 230+
 231+ $ret = $this->request( $url, $q );
 232+
 233+ // TODO: add http code checking here - we only want to cache 2xx, and maybe some of 3xx
 234+ if( $ret && $this->cacheDir && $this->getResponseCode() < 400 ) {
 235+ $this->cacheOutput .= "cache set $file $fullurl\n";
 236+ $this->save_gz( $file, $ret );
 237+ }
 238+
 239+ return $ret;
 240+ }
 241+
 242+ function post( $url, $vars = null ) {
 243+ if( $vars != null && is_array( $vars ) ) {
 244+ foreach( $vars as $k => $v ) {
 245+ $q .= "$k=$v&"; //$q .= utf8_encode($k) . '=' . utf8_encode($v) . '&';
 246+ }
 247+ }
 248+ else $q = $vars;
 249+ return $this->request( $url, $q, true );
 250+ }
 251+
 252+ // load file from cache
 253+ function load( $file ) {
 254+ $fp = fopen( $file, 'r' );
 255+ flock( $fp, LOCK_SH );
 256+ $data = fread( $fp, filesize( $file ) );
 257+ flock( $fp, LOCK_UN );
 258+ fclose( $fp );
 259+ return $data;
 260+ }
 261+
 262+ function load_gz( $file ) {
 263+ $zp = gzopen( $file, 'r' );
 264+ $data = '';
 265+ while ( !gzeof( $zp ) ) {
 266+ $data .= gzread( $zp, 4096 );
 267+ }
 268+ gzclose( $zp );
 269+ return $data;
 270+ }
 271+
 272+ // save file to cache
 273+ function save( $file, $data ) {
 274+ $fp = fopen( $file, 'w' );
 275+ flock( $fp, LOCK_EX );
 276+ fwrite( $fp, $data );
 277+ flock( $fp, LOCK_UN );
 278+ fclose( $fp );
 279+ }
 280+
 281+ function save_gz( $file, $data ) {
 282+ $zp = gzopen( $file, 'w9' );
 283+ gzwrite( $zp, $data );
 284+ gzclose( $zp );
 285+ }
 286+
 287+ // this method builds the entire cache directory tree
 288+ function buildCacheTree() {
 289+ if( $this->cacheDir && !file_exists( $this->cacheDir ) ) {
 290+ mkdir( $this->cacheDir, 0700 );
 291+ for ( $a = 0; $a <= 15; $a++ ) {
 292+ $aa = $this->cacheDir . '/' . dechex($a);
 293+ mkdir( $aa, 0700 );
 294+ for ( $b = 0; $b <= 15; $b++ ) {
 295+ $bb = $aa . '/' . dechex($a) . dechex($b);
 296+ mkdir( $bb, 0700 );
 297+ for ( $c = 0; $c <= 15; $c++ ) {
 298+ $cc = $bb . '/' . dechex($a) . dechex($b) . dechex($c);
 299+ mkdir( $cc, 0700 );
 300+ for ( $d = 0; $d <= 15; $d++ ) {
 301+ mkdir( $cc . '/' . dechex($a) . dechex($b) . dechex($c) . dechex($d), 0700 );
 302+ }
 303+ }
 304+ }
 305+ }
 306+ }
 307+ }
 308+}
\ No newline at end of file
Property changes on: trunk/extensions/WikiCurl/WikiCurl.php
___________________________________________________________________
Name: svn:eol-style
1309 + native
Index: trunk/extensions/GetFamily/GetFamily.i18n.php
@@ -0,0 +1,17 @@
 2+<?php
 3+/**
 4+ * Internationalization file for GetFamily extension.
 5+ *
 6+ * @file
 7+ * @ingroup Extensions
 8+ */
 9+
 10+$messages = array();
 11+
 12+/** English
 13+ * @author Łukasz Matysiak <egon@wikia.com>
 14+ */
 15+$messages['en'] = array(
 16+ 'getfamily' => 'Get Pywikipediabot Family',
 17+ 'getfamily-interwikierror' => 'Error: incorrect interwiki setup, contact Your local admin.',
 18+);
\ No newline at end of file
Property changes on: trunk/extensions/GetFamily/GetFamily.i18n.php
___________________________________________________________________
Name: svn:eol-style
119 + native
Index: trunk/extensions/GetFamily/GetFamily.php
@@ -0,0 +1,208 @@
 2+<?php
 3+/**
 4+ * Special:GetFamily extension
 5+ * Generates family file for pywikipediabot
 6+ * Wikia @2007
 7+ *
 8+ * @file
 9+ * @ingroup Extensions
 10+ * @version 1.0
 11+ * @author Łukasz "Egon" Matysiak <egon@wikia.com>
 12+ * @link http://www.mediawiki.org/wiki/Extension:GetFamily Documentation
 13+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 14+ */
 15+
 16+if ( !defined( 'MEDIAWIKI' ) ) {
 17+ echo "[ <b> Error <b /> ] This is not a valid entry point.\n";
 18+ exit( 1 );
 19+}
 20+
 21+// Extension credits that will show up on Special:Version
 22+$wgExtensionCredits['specialpage'][] = array(
 23+ 'name' => 'GetFamily',
 24+ 'version' => '1.0',
 25+ 'author' => 'Łukasz Matysiak',
 26+ 'description' => 'Generates a family file for pywikipediabot',
 27+ 'url' => 'http://www.mediawiki.org/wiki/Extension:GetFamily'
 28+);
 29+
 30+// Set up the new special page
 31+$dir = dirname(__FILE__) . '/';
 32+$wgExtensionMessagesFiles['GetFamily'] = $dir . 'GetFamily.i18n.php';
 33+$wgSpecialPages['GetFamily'] = array('SpecialPage', 'GetFamily', 'getfamily');
 34+
 35+// New user right
 36+$wgAvailableRights[] = 'getfamily';
 37+$wgGroupPermissions['*']['getfamily'] = true;
 38+
 39+function wfSpecialGetFamily(){
 40+ global $wgRequest, $wgLanguageNames;
 41+ global $wgScript, $wgDBname, $wgLanguageCode, $wgSitename, $wgServer, $wgArticlePath, $wgCanonicalNamespaceNames, $wgVersion;
 42+
 43+ $out = '';
 44+
 45+ if ( $wgRequest->getVal( 'action' ) == 'GetLocal' ){
 46+ $dbr = wfGetDB( DB_SLAVE );
 47+ $fromLang = $wgRequest->getVal( 'fromLang' );
 48+ $result = $dbr->select( 'interwiki', array( 'iw_url' ), array( 'iw_prefix' => $fromLang ), __METHOD__ );
 49+ if ( $object = $dbr->fetchObject( $result ) ){
 50+ $fromLang = $object->iw_url;
 51+ } else {
 52+ $fromLang = '';
 53+ }
 54+
 55+ header( 'Content-Type: text/xml' );
 56+ $out .= "<family>\n";
 57+
 58+ $out .= Xml::element( 'urlcheck', array(), $fromLang )."\n";
 59+ $out .= Xml::element( 'language', array(), $wgLanguageCode )."\n";
 60+ $out .= Xml::element( 'hostname', array(), str_replace( 'http://', '', $wgServer ) )."\n";
 61+ $out .= Xml::element( 'path', array(), $wgScript )."\n";
 62+
 63+ //$keys = array_keys ($wgCanonicalNamespaceNames);
 64+ $language = Language::factory( $wgLanguageCode );
 65+ $array = $language->getNamespaces();
 66+ $keys = array_keys( $array );
 67+
 68+ foreach( $keys as $key ){
 69+ $out .= Xml::openElement( 'namespace', array() ) . "\n";
 70+ $out .= Xml::element( 'key', array(), $key ) . "\n";
 71+ //$out .= Xml::element('name', array(), $wgCanonicalNamespaceNames[$key]) . "\n";
 72+ $out .= Xml::element( 'name', array(), $array[$key] ) . "\n";
 73+ $out .= Xml::closeElement( 'namespace' );
 74+ }
 75+ $out .= "</family>\n";
 76+ } else {
 77+ header('Content-Type: text/plain');
 78+
 79+ $langcodes = array_keys( $wgLanguageNames );
 80+
 81+ $dbr = wfGetDB( DB_SLAVE );
 82+ foreach( $langcodes as $lang_code ){
 83+ $where .= ', ' . $dbr->addQuotes( $lang_code );
 84+ }
 85+ $where = substr( $where, 1 );
 86+ $result = $dbr->query( "SELECT iw_prefix, iw_url FROM interwiki WHERE iw_prefix IN ( $where );" );
 87+
 88+ $datalinks = array();
 89+
 90+ while( $dbObject = $dbr->fetchObject( $result ) ){
 91+ $datalinks[$dbObject->iw_prefix] = $dbObject->iw_url;
 92+ }
 93+
 94+ $datalinks[$wgLanguageCode] = $wgServer.$wgArticlePath;
 95+
 96+ unset( $datalinks['bug'] );
 97+
 98+ $metadata = array();
 99+ $metadata['langs'] = array();
 100+ $metadata['path'] = array();
 101+ $namespacedata = array();
 102+
 103+ foreach( $datalinks as $lang => $link ){
 104+ $link = str_replace( '$1', 'Special:GetFamily?action=GetLocal&fromLang='.$wgLanguageCode, $link );
 105+ if( !class_exists( 'WikiCurl' ) ){
 106+ global $IP;
 107+ require_once("$IP/extensions/WikiCurl/WikiCurl.php");
 108+ }
 109+ $handler = new WikiCurl();
 110+ $content = $handler->get( $link );
 111+ if ( strpos( $content, '<family>' ) === false ){
 112+ unset( $handler );
 113+ continue;
 114+ }
 115+ $content = substr( $content, strpos( $content, "\r\n\r\n" ) + 4 );
 116+ unset( $handler );
 117+
 118+ try {
 119+ $xml = new SimpleXMLElement( $content );
 120+ } catch( Exception $e ){
 121+ continue;
 122+ }
 123+
 124+ //$urlcheck = (string)$xml->urlcheck;
 125+ //if ( strcmp( $urlcheck, $wgServer.$wgArticlePath ) != 0 ){
 126+ // continue;
 127+ //}
 128+ $metadata['langs'][$lang] = (string)$xml->hostname;
 129+ $metadata['path'][$lang] = (string)$xml->path;
 130+
 131+ foreach( $xml->namespace as $namespace ){
 132+ $namespacedata[(int)$namespace->key][$lang] = (string)$namespace->name;
 133+ }
 134+ }
 135+
 136+ if ( $namespacedata != array() ){
 137+$out .= "# -*- coding: utf-8 -*-
 138+
 139+'''
 140+The $wgSitename family.
 141+
 142+This is config file for pywikipediabot framework.
 143+It was generated by Special:GetFamily (a Wikia extension).
 144+
 145+Save this file to families/{$wgDBname}_family.py in your pywikibot installation
 146+The pywikipediabot itself is available for free download from svn.wikimedia.org
 147+'''
 148+
 149+import family
 150+
 151+class Family(family.Family):
 152+
 153+ def __init__(self):
 154+ family.Family.__init__(self)
 155+ self.name = '$wgDBname' # Set the family name; this should be the same as in the filename.
 156+
 157+ self.langs = {\n";
 158+
 159+ $keys = array_keys( $metadata['langs'] );
 160+
 161+ foreach( $keys as $key ){
 162+ $out .= " '$key': '{$metadata['langs'][$key]}', \n";
 163+ }
 164+
 165+ $out .= " }\n \n";
 166+
 167+ $namespace_keys = array_keys( $namespacedata );
 168+
 169+ foreach( $namespace_keys as $key ){
 170+ $langs = array_keys( $namespacedata[$key] );
 171+ $out .= " self.namespaces[$key] = {\n";
 172+
 173+ foreach( $langs as $lang ){
 174+ $out .= " '$lang': u'{$namespacedata[$key][$lang]}',\n";
 175+ }
 176+ $out .= " }\n \n";
 177+ }
 178+ $version = array();
 179+ preg_match( '/[0-9]*\.[0-9]*/', $wgVersion, $version );
 180+ $version = $version[0];
 181+
 182+$out .= "
 183+ def hostname(self, code):
 184+ return self.langs[code]
 185+
 186+ def path(self, code):
 187+ path = ''\n";
 188+
 189+ $keys = array_keys( $metadata['langs'] );
 190+
 191+ foreach( $keys as $key ){
 192+ $out .= " if code == '$key':\n path = '{$metadata['path'][$key]}'\n";
 193+ }
 194+
 195+
 196+$out .= " return path
 197+
 198+ def login_address(self, code):
 199+ return '%s?title=%s:Userlogin&action=submitlogin' % (self.path(code), self.special_namespace_url(code))
 200+
 201+ def version(self, code):
 202+ return '".$version."' # The MediaWiki version used. Not very important in most cases.
 203+";
 204+} else {
 205+ $out .= wfMsg( 'getfamily-interwikierror' );
 206+}
 207+ }
 208+ die( $out );
 209+}
Property changes on: trunk/extensions/GetFamily/GetFamily.php
___________________________________________________________________
Name: svn:eol-style
1210 + native

Comments

#Comment by Siebrand (talk | contribs)   22:42, 21 February 2009

Please fix:

  • GetFamily: no descriptionmsg in extension credits
  • GetFamily: right-getfamily message missing
  • GetFamily: a special page should have a $wgSpecialPageGroups[] entry
  • GetFamily: no i18n for special page aliases
  • GetFamily: special page is using an old method for adding special pages. Put it in a class in a separate file
  • WikiCurl: put classes in a separate file
  • WikiCurl: no descriptionmsg in extension credits
#Comment by Pinky (talk | contribs)   16:48, 22 March 2009

Those fixmes seem rather minor and this extension is not core or used by Wikimedia, so moving from fixme to deferred.

#Comment by Siebrand (talk | contribs)   17:45, 22 March 2009

Please advise on the right course of action when comments to commits are to be made that are not part of core. Where f.e. is it stated that this extension on this wiki is only about Wikimedia, or that only non-minor comments are to be marked 'fixme'? How many minor remarks on a commit warrent a fixme?

Status & tagging log