r105641 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105640‎ | r105641 | r105642 >
Date:02:24, 9 December 2011
Author:kaldari
Status:resolved (Comments)
Tags:
Comment:
an initial implementation of a stats summary system. not yet integrated into any of the special pages
Modified paths:
  • /trunk/extensions/ContributionReporting/PopulateFundraisingStatistics.php (added) (history)
  • /trunk/extensions/ContributionReporting/patches (added) (history)
  • /trunk/extensions/ContributionReporting/patches/public-reporting-summaries.sql (added) (history)

Diff [purge]

Index: trunk/extensions/ContributionReporting/PopulateFundraisingStatistics.php
@@ -0,0 +1,321 @@
 2+<?php
 3+
 4+$IP = getenv( 'MW_INSTALL_PATH' );
 5+if ( $IP === false ) {
 6+ $IP = dirname( __FILE__ ) . '/../..';
 7+}
 8+require( "$IP/maintenance/Maintenance.php" );
 9+
 10+class PopulateFundraisingStatistics extends Maintenance {
 11+ /**
 12+ * DB slave
 13+ * @var object
 14+ */
 15+ protected $dbr;
 16+
 17+ /**
 18+ * DB master
 19+ * @var object
 20+ */
 21+ protected $dbw;
 22+
 23+ /**
 24+ * Valid operations and their execution methods for this script to perform
 25+ *
 26+ * Operations are passed in as options during run-time - only valid options,
 27+ * which are defined here, can be executed. Valid operations are mapped here
 28+ * to a corresponding method ( array( 'operation' => 'method' ))
 29+ * @var array
 30+ */
 31+ protected $operation_map = array(
 32+ 'populatedays' => 'populateDays',
 33+ 'populatefundraisers' => 'populateFundraisers',
 34+ 'updatedays' => 'updateDays',
 35+ );
 36+
 37+ /**
 38+ * Operations to execute
 39+ * @var array
 40+ */
 41+ public $operations = array();
 42+
 43+ public function __construct() {
 44+ parent::__construct();
 45+ $this->mDescription = "Populates the public reporting summary tables";
 46+
 47+ $this->addOption( 'op', 'The ContributionReporting stats gathering operation to run (e.g. "updatedays"). Can specify multiple operations, separated by comma.', true, true );
 48+ }
 49+
 50+ /**
 51+ * Wait for all the slave database servers to sync up with the master
 52+ */
 53+ public function syncDBs() {
 54+ // FIXME: Copied from populateAFRevisions.php, which coppied from updateCollation.php, should be centralized somewhere
 55+ $lb = wfGetLB();
 56+ // bug 27975 - Don't try to wait for slaves if there are none
 57+ // Prevents permission error when getting master position
 58+ if ( $lb->getServerCount() > 1 ) {
 59+ $dbw = $lb->getConnection( DB_MASTER );
 60+ $pos = $dbw->getMasterPos();
 61+ $lb->waitForAll( $pos );
 62+ }
 63+ }
 64+
 65+ /**
 66+ * Bootstrap this maintenance script
 67+ *
 68+ * Performs operations necessary for this maintenance script to run which
 69+ * cannot or do not make sense to run in the constructor.
 70+ */
 71+ public function bootstrap() {
 72+ /**
 73+ * Set user-specified operations to perform
 74+ */
 75+ $operations = explode( ',', $this->getOption( 'op' ) );
 76+ // check sanity of specified operations
 77+ if ( !$this->checkOperations( $operations ) ) {
 78+ $this->error( 'Invalid operation specified.', true );
 79+ } else {
 80+ $this->operations = $operations;
 81+ }
 82+
 83+ // set db objects
 84+ $this->dbr = wfGetDB( DB_SLAVE );
 85+ $this->dbw = wfGetDB( DB_MASTER );
 86+ }
 87+
 88+ /**
 89+ * Check whether or not specified operations are valid.
 90+ *
 91+ * A specified operation is considered valid if it exists
 92+ * as a key in the operation map.
 93+ *
 94+ * @param array $ops An array of operations to check
 95+ * @return bool
 96+ */
 97+ public function checkOperations( array $ops ) {
 98+ foreach ( $ops as $operation ) {
 99+ if ( !isset( $this->operation_map[ $operation ] ) ) {
 100+ return false;
 101+ }
 102+ }
 103+ return true;
 104+ }
 105+
 106+ public function execute() {
 107+ // finish bootstrapping the script
 108+ $this->bootstrap();
 109+
 110+ // execute requested operations
 111+ foreach ( $this->operations as $operation ) {
 112+ $method = $this->operation_map[ $operation ];
 113+ $this->$method();
 114+ }
 115+ }
 116+
 117+ /**
 118+ * Populate the donation stats for every day
 119+ *
 120+ * Note that these summaries exclude amounts that are not between the mimumum and maximum
 121+ * donation amounts. This is because every once in a while we have to condense multiple
 122+ * donations that may span several days into a single donation for CiviCRM consumption.
 123+ */
 124+ public function populateDays() {
 125+ global $egFundraiserStatisticsMinimum, $egFundraiserStatisticsMaximum;
 126+
 127+ $begin = time();
 128+ $records = 0;
 129+ $insertArray = array();
 130+ $this->output( "Writing data to public_reporting_days...\n" );
 131+
 132+ $conditions = array(
 133+ 'converted_amount >= ' . $egFundraiserStatisticsMinimum,
 134+ 'converted_amount <= ' . $egFundraiserStatisticsMaximum
 135+ );
 136+
 137+ // Get the data for a fundraiser
 138+ $result = $this->dbr->select( 'public_reporting',
 139+ array(
 140+ "DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' ) AS date",
 141+ 'sum( converted_amount ) AS total',
 142+ 'count( * ) AS number',
 143+ 'avg( converted_amount ) AS average',
 144+ 'max( converted_amount ) AS maximum',
 145+ ),
 146+ $conditions,
 147+ __METHOD__,
 148+ array(
 149+ 'ORDER BY' => 'received',
 150+ 'GROUP BY' => "DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' )"
 151+ )
 152+ );
 153+
 154+ while ( $row = $this->dbr->fetchRow( $result ) ) {
 155+ // Add it to the insert array
 156+ $insertArray[] = array(
 157+ 'prd_date' => $row['date'],
 158+ 'prd_total' => $row['total'],
 159+ 'prd_number' => $row['number'],
 160+ 'prd_average' => $row['average'],
 161+ 'prd_maximum' => $row['maximum'],
 162+ 'prd_insert_timestamp' => time(),
 163+ );
 164+ $records++;
 165+ }
 166+
 167+ if ( $records > 0 ) {
 168+ // Empty the table of previous totals
 169+ $res = $this->dbw->delete( 'public_reporting_days', array( 1 ) );
 170+ // Insert the new totals
 171+ $res = $this->dbw->insert( 'public_reporting_days', $insertArray, __METHOD__ );
 172+ // Sync the databases
 173+ $this->syncDBs();
 174+ }
 175+
 176+ $lag = time() - $begin;
 177+ $this->output( "Inserted " . $records . " rows. ($lag seconds)\n" );
 178+ $this->output( "Done.\n" );
 179+ }
 180+
 181+ /**
 182+ * Populate the cumulative donation stats for all fundraisers
 183+ *
 184+ * Note that we do not build these stats from the daily summaries since the summaries exclude
 185+ * amounts that are not between the minumum and maximum donation amounts.
 186+ */
 187+ public function populateFundraisers() {
 188+ global $egFundraiserStatisticsFundraisers;
 189+
 190+ $begin = time();
 191+ $records = 0;
 192+ $insertArray = array();
 193+ $this->output( "Writing data to public_reporting_fundraisers...\n" );
 194+ foreach ( $egFundraiserStatisticsFundraisers as $fundraiser ) {
 195+
 196+ $conditions = array(
 197+ 'received >= ' . $this->dbr->addQuotes( wfTimestamp( TS_UNIX, strtotime( $fundraiser['start'] ) ) ),
 198+ 'received <= ' . $this->dbr->addQuotes( wfTimestamp( TS_UNIX, strtotime( $fundraiser['end'] ) + 24 * 60 * 60 ) ),
 199+ );
 200+
 201+ // Get the total for a fundraiser
 202+ $result = $this->dbr->select(
 203+ 'public_reporting',
 204+ array(
 205+ 'sum( converted_amount ) AS total',
 206+ 'count( * ) AS number',
 207+ 'avg( converted_amount ) AS average',
 208+ 'max( converted_amount ) AS maximum',
 209+ ),
 210+ $conditions,
 211+ __METHOD__
 212+ );
 213+ $row = $this->dbr->fetchRow( $result );
 214+
 215+ // Add it to the insert array
 216+ $insertArray[] = array(
 217+ 'prf_id' => $fundraiser['id'],
 218+ 'prf_total' => $row['total'],
 219+ 'prf_number' => $row['number'],
 220+ 'prf_average' => $row['average'],
 221+ 'prf_maximum' => $row['maximum'],
 222+ 'prf_insert_timestamp' => time(),
 223+ );
 224+
 225+ $records++;
 226+ }
 227+ if ( $records > 0 ) {
 228+ // Empty the table of previous totals
 229+ $res = $this->dbw->delete( 'public_reporting_fundraisers', array( 1 ) );
 230+ // Insert the new totals
 231+ $res = $this->dbw->insert( 'public_reporting_fundraisers', $insertArray, __METHOD__ );
 232+ // Sync the databases
 233+ $this->syncDBs();
 234+ }
 235+ $lag = time() - $begin;
 236+ $this->output( "Inserted " . $records . " rows. ($lag seconds)\n" );
 237+ $this->output( "Done.\n" );
 238+ }
 239+
 240+ /**
 241+ * Populate the donation stats for every day of the current fundraiser
 242+ *
 243+ * Note that if you are running more than one fundraiser at once, you'll want to use
 244+ * populateDays instead of updateDays.
 245+ * Note that these summaries exclude amounts that are not between the mimumum and maximum
 246+ * donation amounts. This is because every once in a while we have to condense multiple
 247+ * donations that may span several days into a single donation for CiviCRM consumption.
 248+ */
 249+ public function updateDays() {
 250+ global $egFundraiserStatisticsFundraisers, $egFundraiserStatisticsMinimum, $egFundraiserStatisticsMaximum;
 251+
 252+ $mostRecentFundraiser = end( $egFundraiserStatisticsFundraisers );
 253+ $start = $mostRecentFundraiser['start'];
 254+ $end = $mostRecentFundraiser['end'];
 255+ $begin = time();
 256+ $records = 0;
 257+ $insertArray = array();
 258+ $this->output( "Writing data to public_reporting_days...\n" );
 259+
 260+ $conditions = array(
 261+ 'received >= ' . $this->dbr->addQuotes( wfTimestamp( TS_UNIX, strtotime( $start ) ) ),
 262+ 'received <= ' . $this->dbr->addQuotes( wfTimestamp( TS_UNIX, strtotime( $end ) + 24 * 60 * 60 ) ),
 263+ 'converted_amount >= ' . $egFundraiserStatisticsMinimum,
 264+ 'converted_amount <= ' . $egFundraiserStatisticsMaximum
 265+ );
 266+
 267+ // Get the data for a fundraiser
 268+ $result = $this->dbr->select( 'public_reporting',
 269+ array(
 270+ "DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' ) AS date",
 271+ 'sum( converted_amount ) AS total',
 272+ 'count( * ) AS number',
 273+ 'avg( converted_amount ) AS average',
 274+ 'max( converted_amount ) AS maximum',
 275+ ),
 276+ $conditions,
 277+ __METHOD__,
 278+ array(
 279+ 'ORDER BY' => 'received',
 280+ 'GROUP BY' => "DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' )"
 281+ )
 282+ );
 283+
 284+ while ( $row = $this->dbr->fetchRow( $result ) ) {
 285+ // Add it to the insert array
 286+ $insertArray[] = array(
 287+ 'prd_date' => $row['date'],
 288+ 'prd_total' => $row['total'],
 289+ 'prd_number' => $row['number'],
 290+ 'prd_average' => $row['average'],
 291+ 'prd_maximum' => $row['maximum'],
 292+ 'prd_insert_timestamp' => time(),
 293+ );
 294+ $records++;
 295+ }
 296+
 297+ if ( $records > 0 ) {
 298+ // Set timezone to UTC to match donation servers
 299+ date_default_timezone_set( 'UTC' );
 300+ // Empty the table of previous totals for this fundraiser
 301+ $res = $this->dbw->delete(
 302+ 'public_reporting_days',
 303+ array(
 304+ 'prd_date >= ' . $this->dbr->addQuotes( wfTimestamp( TS_DB, strtotime( $start ) ) ),
 305+ 'prd_date <= ' . $this->dbr->addQuotes( wfTimestamp( TS_DB, strtotime( $end ) ) ),
 306+ )
 307+ );
 308+ // Insert the new totals
 309+ $res = $this->dbw->insert( 'public_reporting_days', $insertArray, __METHOD__ );
 310+ // Sync the databases
 311+ $this->syncDBs();
 312+ }
 313+
 314+ $lag = time() - $begin;
 315+ $this->output( "Updated " . $records . " rows. ($lag seconds)\n" );
 316+ $this->output( "Done.\n" );
 317+ }
 318+
 319+}
 320+
 321+$maintClass = "PopulateFundraisingStatistics";
 322+require_once( DO_MAINTENANCE );
Index: trunk/extensions/ContributionReporting/patches/public-reporting-summaries.sql
@@ -0,0 +1,21 @@
 2+-- Create summary tables for fundraising statistics
 3+
 4+CREATE TABLE IF NOT EXISTS /*$wgDBprefix*/public_reporting_fundraisers (
 5+ `prf_id` varchar(32) NOT NULL,
 6+ `prf_total` decimal(14,4) unsigned NOT NULL DEFAULT '0.0000',
 7+ `prf_number` int(11) NOT NULL DEFAULT '0',
 8+ `prf_average` decimal(8,4) NOT NULL DEFAULT '0.0000',
 9+ `prf_maximum` decimal(11,4) NOT NULL DEFAULT '0.0000',
 10+ `prf_insert_timestamp` int(10) NOT NULL,
 11+) /*$wgDBTableOptions*/;
 12+CREATE UNIQUE INDEX /*i*/prf_id ON /*$wgDBprefix*/public_reporting_fundraisers (prf_id);
 13+
 14+CREATE TABLE IF NOT EXISTS /*$wgDBprefix*/public_reporting_days (
 15+ `prd_date` date NOT NULL,
 16+ `prd_total` decimal(14,4) unsigned NOT NULL DEFAULT '0.0000',
 17+ `prd_number` int(11) NOT NULL DEFAULT '0',
 18+ `prd_average` decimal(8,4) unsigned NOT NULL DEFAULT '0.0000',
 19+ `prd_maximum` decimal(11,4) unsigned NOT NULL DEFAULT '0.0000',
 20+ `prd_insert_timestamp` int(10) NOT NULL,
 21+) /*$wgDBTableOptions*/;
 22+CREATE UNIQUE INDEX /*i*/prf_id ON /*$wgDBprefix*/public_reporting_days (prd_date);

Follow-up revisions

RevisionCommit summaryAuthorDate
r106234follow-up to r105641 - better db syncing, using primary keys, etckaldari19:30, 14 December 2011
r106696MFT r105145, r105454, r105641, r105657, r105659, r105757, r105916, r105965, r...awjrichards20:59, 19 December 2011

Comments

#Comment by Catrope (talk | contribs)   17:03, 14 December 2011
+	/**
+	 * Wait for all the slave database servers to sync up with the master
+	 */	
+	public function syncDBs() {
+		// FIXME: Copied from populateAFRevisions.php, which coppied from updateCollation.php, should be centralized somewhere
+		$lb = wfGetLB();
+		// [https://bugzilla.wikimedia.org/show_bug.cgi?id=27975 bug 27975] - Don't try to wait for slaves if there are none
+		// Prevents permission error when getting master position
+		if ( $lb->getServerCount() > 1 ) {
+			$dbw = $lb->getConnection( DB_MASTER );
+			$pos = $dbw->getMasterPos();
+			$lb->waitForAll( $pos );
+		}
+	}

You don't need this anymore since 1.18 was deployed. You can now simply use wfWaitForSlaves() .

+			'converted_amount >= ' . $egFundraiserStatisticsMinimum,
+			'converted_amount <= ' . $egFundraiserStatisticsMaximum

I would feel safer if you ran these through intval() before injecting them into SQL.

+			date_default_timezone_set( 'UTC' );

This looks like a hack. Why is this done here, rather than further up where strtotime() is first used? Why not avoid using strtotime() altogether by specifying the start and end dates in a time format recognized by wfTimestamp()?

+CREATE UNIQUE INDEX /*i*/prf_id ON /*$wgDBprefix*/public_reporting_fundraisers (prf_id);

You can just make this the primary key.

+CREATE UNIQUE INDEX /*i*/prf_id ON /*$wgDBprefix*/public_reporting_days (prd_date);

Misnamed index.

OK otherwise, this is looking good.

#Comment by Kaldari (talk | contribs)   19:44, 14 December 2011

Fixed in r106234.

I've left the config/strtotime stuff how it is for now, but I think you have a point and we should look into changing these configs from simple dates into standard timestamp formats.

#Comment by Awjrichards (talk | contribs)   03:16, 15 December 2011

I should've caught this the first time around but it just occurred to me that this is not going to work as expected the way things are currently set up:

+		$this->dbr = wfGetDB( DB_SLAVE );
+		$this->dbw = wfGetDB( DB_MASTER );
<snip>
+		// Get the data for a fundraiser
+		$result = $this->dbr->select( 'public_reporting',
+			array(
+				"DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' ) AS date",
+				'sum( converted_amount ) AS total',
+				'count( * ) AS number',
+				'avg( converted_amount ) AS average',
+				'max( converted_amount ) AS maximum',
+			),
+			$conditions,
+			__METHOD__,
+			array(
+				'ORDER BY' => 'received',
+				'GROUP BY' => "DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' )"
+			)
+		);

The public_reporting table is NOT in the wiki's db. There is a function in ContributionReporting.php that will build you a db connection to the db that holds the public reporting table - efContributionReportingConnection(). We'll need to decide whether or not adding the summary tables to the wiki's db is the appropriate thing to do, or if it makes more sense to leave in the same place as public_reporting. If you want to keep the summary tables in the wiki's db you could just create another db object to fetch data from public_reporting and leave the dbr/dbw as they are. IMHO it's probably better to keep the summary tables in the wiki's db - if that's the road we go down, you could revise this to be something like:

+		$this->dbr = wfGetDB( DB_SLAVE );
+		$this->dbw = wfGetDB( DB_MASTER );
                $this->dbpr = efContributionReportingConnection();
<snip>
+		// Get the data for a fundraiser
+		$result = $this->dbpr->select( 'public_reporting',
+			array(
+				"DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' ) AS date",
+				'sum( converted_amount ) AS total',
+				'count( * ) AS number',
+				'avg( converted_amount ) AS average',
+				'max( converted_amount ) AS maximum',
+			),
+			$conditions,
+			__METHOD__,
+			array(
+				'ORDER BY' => 'received',
+				'GROUP BY' => "DATE_FORMAT( FROM_UNIXTIME( received ),'%Y-%m-%d' )"
+			)
+		);

Repeat replacing $this->dbr with $this->dbpr everywhere you're fetching data from public reporting and leave the rest as-is.

#Comment by Kaldari (talk | contribs)   22:12, 15 December 2011

Fixed in r106362.

Status & tagging log