r112494 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r112493‎ | r112494 | r112495 >
Date:17:38, 27 February 2012
Author:maxsem
Status:ok
Tags:
Comment:
A quick script to collect extract length stats
Modified paths:
  • /trunk/extensions/MobileFrontend/collectExtractStats.php (added) (history)

Diff [purge]

Index: trunk/extensions/MobileFrontend/collectExtractStats.php
@@ -0,0 +1,73 @@
 2+<?php
 3+
 4+$IP = getenv( 'MW_INSTALL_PATH' );
 5+if ( $IP === false ) {
 6+ $IP = dirname( __FILE__ ) . '/../..';
 7+}
 8+require_once( "$IP/maintenance/Maintenance.php" );
 9+
 10+class CollectStats extends Maintenance {
 11+ public function __construct() {
 12+ parent::__construct();
 13+ $this->mDescription = 'Developer script that calculates average full extract size';
 14+ $this->addArg( 'rate', 'Check excerpt length for one page of this number', true );
 15+ }
 16+
 17+ public function execute() {
 18+ if ( !class_exists( 'ApiQueryExcerpt' ) ) {
 19+ $this->error( 'This script requires MobileFrontend to be properly installed', true );
 20+ }
 21+ $rate = $this->getArg( 0 );
 22+ $ns = array( NS_MAIN );
 23+ $pageId = 0;
 24+ $dbr = $this->getDB( DB_SLAVE );
 25+ $total = 0;
 26+ $calls = 0;
 27+ $html = 0;
 28+ $plain = 0;
 29+ do {
 30+ $res = $dbr->select( 'page',
 31+ array( 'page_id', 'page_namespace', 'page_title' ),
 32+ array( 'page_namespace' => $ns, 'page_is_redirect' => 0, "page_id > $pageId" ),
 33+ __METHOD__,
 34+ array( 'ORDER BY' => 'page_id', 'LIMIT' => 500 )
 35+ );
 36+ foreach ( $res as $row ) {
 37+ $pageId = $row->page_id;
 38+ if ( $total++ % $rate == 0 ) {
 39+ $title = Title::newFromRow( $row );
 40+ $html += $this->getLength( $title, false );
 41+ $plain += $this->getLength( $title, true );
 42+ if ( ++$calls % 10 == 0 ) {
 43+ $this->output( "$calls\n" );
 44+ }
 45+ }
 46+ }
 47+ } while( $res->numRows() > 0 );
 48+
 49+ $this->output( "Total pages processed: $calls\n" );
 50+ if ( $calls > 0 ) {
 51+ $html /= $calls;
 52+ $plain /= $calls;
 53+ $this->output( " Average HTML length: $html\n Average plaintext length: $plain" );
 54+ }
 55+ }
 56+
 57+ private function getLength( Title $title, $plainText ) {
 58+ $params = array(
 59+ 'action' => 'query',
 60+ 'prop' => 'excerpt',
 61+ 'titles' => $title->getPrefixedText(),
 62+ );
 63+ if ( $plainText ) {
 64+ $params['explaintext'] = 1;
 65+ }
 66+ $main = new ApiMain( new FauxRequest( $params ) );
 67+ $main->execute();
 68+ $data = $main->getResultData();
 69+ return strlen( $data['query']['pages'][$title->getArticleID()]['excerpt'][0] );
 70+ }
 71+}
 72+
 73+$maintClass = 'CollectStats';
 74+require_once( DO_MAINTENANCE );
\ No newline at end of file
Property changes on: trunk/extensions/MobileFrontend/collectExtractStats.php
___________________________________________________________________
Added: svn:eol-style
175 + native

Follow-up revisions

RevisionCommit summaryAuthorDate
r113172FOllow-up r112494 and r112495: this script isn't needed anymoremaxsem19:42, 6 March 2012

Status & tagging log