r91252 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91251‎ | r91252 | r91253 >
Date:07:09, 1 July 2011
Author:yuvipanda
Status:deferred (Comments)
Tags:
Comment:
Initial import of YuviPanda's GSoC work.

Extension to ease assessment parsing/building
collections for offline use.
Modified paths:
  • /trunk/extensions/GPoC (added) (history)
  • /trunk/extensions/GPoC/.vimrc (added) (history)
  • /trunk/extensions/GPoC/AssessmentsExtractor.php (added) (history)
  • /trunk/extensions/GPoC/GPoC.hooks.php (added) (history)
  • /trunk/extensions/GPoC/GPoC.php (added) (history)
  • /trunk/extensions/GPoC/README (added) (history)
  • /trunk/extensions/GPoC/models (added) (history)
  • /trunk/extensions/GPoC/models/Rating.php (added) (history)
  • /trunk/extensions/GPoC/schema (added) (history)
  • /trunk/extensions/GPoC/schema/log.sql (added) (history)
  • /trunk/extensions/GPoC/schema/project_stats.sql (added) (history)
  • /trunk/extensions/GPoC/schema/projects.sql (added) (history)
  • /trunk/extensions/GPoC/schema/ratings.sql (added) (history)

Diff [purge]

Index: trunk/extensions/GPoC/GPoC.hooks.php
@@ -0,0 +1,61 @@
 2+<?php
 3+/**
 4+ *
 5+ * @file
 6+ * @ingroup Extensions
 7+ * @author Yuvi Panda, http://yuvi.in
 8+ * @copyright © 2011 Yuvaraj Pandian (yuvipanda@yuvi.in)
 9+ * @licence Modified BSD License
 10+ */
 11+
 12+if ( !defined( 'MEDIAWIKI' ) ) {
 13+ exit( 1 );
 14+}
 15+
 16+require_once "AssessmentsExtractor.php";
 17+require_once "models/Rating.php";
 18+
 19+class GPoCHooks {
 20+
 21+ private static function updateDatabase( $title, $assessments, $timestamp ) {
 22+ $main_title = Title::makeTitle( NS_MAIN, $title->getText() );
 23+ $ratings = Rating::forTitle( $main_title );
 24+ foreach ( $assessments as $project => $assessment ) {
 25+ $curRating = $ratings[$project];
 26+ if( $curRating ) {
 27+ $curRating->update( $assessment['importance'], $assessment['quality'], 0 );
 28+ } else {
 29+ $rating = new Rating(
 30+ $project,
 31+ $main_title->getNamespace(),
 32+ $main_title->getText(),
 33+ $assessment['quality'],
 34+ 0,
 35+ $assessment['importance'],
 36+ 0
 37+ );
 38+ $rating->saveAll();
 39+ }
 40+ }
 41+ }
 42+
 43+ public static function ArticleSaveComplete(&$article, &$user, $text, $summary, $minoredit, $watchthis, $sectionanchor, &$flags, $revision, &$status, $baseRevId) {
 44+ global $wgParser;
 45+ $title = $article->getTitle();
 46+ if( $title->getNamespace() == NS_TALK && $revision ) {
 47+ // All conditions to minimize the situations we've to run the job to update the data
 48+ $preparedText = $article->prepareTextForEdit( $text )->output->getText();
 49+ $extractor = new AssessmentsExtractor( $article, $preparedText );
 50+ $assessments = $extractor->extractAssessments();
 51+ GPoCHooks::updateDatabase( $title, $assessments, $revision );
 52+ }
 53+ return true;
 54+ }
 55+
 56+ public static function SetupSchema( DatabaseUpdater $du ) {
 57+ $base = dirname( __FILE__ ) . '/schema';
 58+ $du->addExtensionTable( "ratings", "$base/ratings.sql");
 59+ $du->addExtensionTable( "project_stats", "$base/project_stats.sql" );
 60+ return true;
 61+ }
 62+}
Index: trunk/extensions/GPoC/schema/log.sql
@@ -0,0 +1,38 @@
 2+-- Replace /*_*/ with the proper prefix
 3+-- Replace /*$wgDBTableOptions*/ with the correct options
 4+
 5+CREATE TABLE IF NOT EXISTS /*_*/log (
 6+ l_project varchar(63) not null,
 7+ -- project name
 8+
 9+ l_namespace int unsigned not null,
 10+ -- article namespace
 11+
 12+ l_article varchar(255) not null,
 13+ -- article name
 14+
 15+ l_action varchar(20) character set ascii not null,
 16+ -- type of log entry (e.g. 'quality')
 17+
 18+ -- NOTE: this is ASCII because of maximum index key
 19+ -- length constraints interacting with utf-8 fields in
 20+ -- mysql. The primary key for this table is just under the limit.
 21+
 22+ l_timestamp binary(14) not null,
 23+ -- timestamp when log entry was added
 24+
 25+ l_old varchar(63),
 26+ -- old value (e.g. B-Class)
 27+
 28+ l_new varchar(63),
 29+ -- new value (e.g. GA-Class)
 30+
 31+ l_revision_timestamp binary(20) not null,
 32+ -- timestamp when page was edited
 33+ -- a wiki-format timestamp
 34+
 35+ primary key (l_project, l_namespace, l_article, l_action, l_timestamp),
 36+ key (l_article, l_namespace)
 37+) /*$wgDBTableOptions*/;
 38+
 39+CREATE INDEX /*i*/l_project ON /*_*/log (l_project);
Index: trunk/extensions/GPoC/schema/project_stats.sql
@@ -0,0 +1,49 @@
 2+-- Replace /*_*/ with the proper prefix
 3+-- Replace /*$wgDBTableOptions*/ with the correct options
 4+
 5+CREATE TABLE IF NOT EXISTS /*_*/project_stats (
 6+
 7+ ps_project varchar(63) not null,
 8+ -- project name
 9+
 10+ ps_timestamp binary(14) not null,
 11+ -- last time project data was updated
 12+
 13+ ps_quality varchar(63) not null,
 14+ -- quality assessment. lowercase.
 15+ -- possible values: fa, a, ga, b, b1, b2, b3, b4, b5, b6, c, start, stub, fl, l, unclassified
 16+
 17+ ps_count int unsigned default 0,
 18+ -- how many pages are assessed in project
 19+
 20+ ps_top_icount int unsigned default 0,
 21+ -- how many pages are assessed in project to be top importance
 22+
 23+ ps_high_icount int unsigned default 0,
 24+ -- how many pages are assessed in project to be high importance
 25+
 26+ ps_mid_icount int unsigned default 0,
 27+ -- how many pages are assessed in project to be mid importance
 28+
 29+ ps_low_icount int unsigned default 0,
 30+ -- how many pages are assessed in project to be low importance
 31+
 32+ ps_bottom_icount int unsigned default 0,
 33+ -- how many pages are assessed in project to be bottom importance
 34+
 35+ ps_no_icount int unsigned default 0,
 36+ -- how many pages are assessed in project to be of no importance
 37+
 38+ ps_unclassified_icount int unsigned default 0,
 39+ -- how many pages are assessed in project without a classified importance
 40+
 41+ ps_qcount int unsigned default 0,
 42+ -- how many pages have quality assessments in the project
 43+
 44+ ps_icount int unsigned default 0,
 45+ -- how many pages have importance assessments in the project
 46+
 47+ primary key (ps_project, ps_quality)
 48+) /*$wgDBTableOptions*/;
 49+
 50+CREATE INDEX /*i*/ps_project ON /*_*/project_stats (ps_project);
Index: trunk/extensions/GPoC/schema/ratings.sql
@@ -0,0 +1,33 @@
 2+-- Replace /*_*/ with the proper prefix
 3+-- Replace /*$wgDBTableOptions*/ with the correct options
 4+
 5+CREATE TABLE IF NOT EXISTS /*_*/ratings (
 6+ r_project varchar(63) not null,
 7+ -- project name
 8+
 9+ r_namespace int unsigned not null,
 10+ -- article namespace
 11+
 12+ r_article varchar(255) not null,
 13+ -- article title
 14+
 15+ r_quality varchar(63),
 16+ -- quality rating
 17+
 18+ r_quality_timestamp binary(20),
 19+ -- time when quality rating was assigned
 20+ -- NOTE: a revid can be obtained from timestamp via API
 21+ -- a wiki-format timestamp
 22+
 23+ r_importance varchar(63),
 24+ -- importance rating
 25+
 26+ r_importance_timestamp binary(20),
 27+ -- time when importance rating was assigned
 28+ -- a wiki-style timestamp
 29+
 30+ primary key (r_project, r_namespace, r_article)
 31+) /*$wgDBTableOptions*/;
 32+
 33+CREATE INDEX /*i*/r_article ON /*_*/ratings (r_namespace, r_article);
 34+CREATE INDEX /*i*/r_project ON /*_*/ratings (r_project);
Index: trunk/extensions/GPoC/schema/projects.sql
@@ -0,0 +1,36 @@
 2+-- Replace /*_*/ with the proper prefix
 3+-- Replace /*$wgDBTableOptions*/ with the correct options
 4+
 5+CREATE TABLE IF NOT EXISTS /*_*/projects (
 6+
 7+ p_project varchar(63) not null,
 8+ -- project name
 9+
 10+ p_timestamp binary(14) not null,
 11+ -- last time project data was updated
 12+
 13+ p_wikipage varchar(255),
 14+ -- homepage on the wiki for this project
 15+
 16+ p_parent varchar(63),
 17+ -- parent project (for task forces)
 18+
 19+ p_shortname varchar(255),
 20+ -- display name in headers
 21+
 22+ p_count int unsigned default 0,
 23+ -- how many pages are assessed in project
 24+
 25+ p_qcount int unsigned default 0,
 26+ -- how many pages have quality assessments in the project
 27+
 28+ p_icount int unsigned default 0,
 29+ -- how many pages have importance assessments in the project
 30+
 31+ p_scope int unsigned not null default 0,
 32+ -- the project's "scope points", used to compute selection scores
 33+
 34+ primary key (p_project)
 35+) /*$wgDBTableOptions*/;
 36+
 37+CREATE INDEX /*i*/p_project ON /*_*/projects (p_project);
Index: trunk/extensions/GPoC/models/Rating.php
@@ -0,0 +1,148 @@
 2+<?php
 3+
 4+/**
 5+ * Represents an article and associated rating
 6+ **/
 7+class Rating {
 8+ public $project;
 9+ public $namespace;
 10+ public $title;
 11+ public $quality;
 12+ public $quality_timestamp;
 13+ public $importance;
 14+ public $importance_timestamp;
 15+
 16+ private $old_importance;
 17+ private $old_quality;
 18+ private $inDB = false;
 19+
 20+ private static function getImportanceColumn( $importance ) {
 21+ $importanceColumnMapping = array(
 22+ 'top' => 'ps_top_icount',
 23+ 'high' => 'ps_high_icount',
 24+ 'mid' => 'ps_mid_icount',
 25+ 'low' => 'ps_mid_icount',
 26+ 'no' => 'ps_no_icount',
 27+ '' => 'ps_unclassified_icount'
 28+ );
 29+ return $importanceColumnMapping[ strtolower( $importance ) ];
 30+ }
 31+
 32+ public function __construct( $project, $namespace, $title, $quality, $quality_timestamp, $importance, $importance_timestamp ) {
 33+ $this->project = $project;
 34+ $this->namespace = $namespace;
 35+ $this->title = $title;
 36+ $this->quality = $quality;
 37+ $this->quality_timestamp = $quality_timestamp;
 38+ $this->importance = $importance;
 39+ $this->importance_timestamp = $importance_timestamp;
 40+ }
 41+
 42+ public function update( $importance, $quality, $timestamp ) {
 43+ if( $quality != $this->quality ) {
 44+ $this->old_quality = $this->quality;
 45+ $this->quality = $quality;
 46+ $this->quality_timestamp = $timestamp;
 47+ }
 48+ if( $importance != $this->importance ) {
 49+ $this->old_importance = $this->importance;
 50+ $this->importance = $importance;
 51+ $this->importance_timestamp = $timestamp;
 52+ }
 53+ $this->saveAll();
 54+ }
 55+
 56+ // Note: Huge sql injection vector ahead. FIXME
 57+ private function updateAggregateStats( $is_new_rating ) {
 58+ if(! $is_new_rating && empty($this->old_importance) && empty($this->old_quality) ) {
 59+ return;
 60+ }
 61+ $dbw = wfGetDB( DB_MASTER );
 62+ // Rating has just been detected.
 63+ // So we can ignore $old_importance and $old_quality
 64+ $importance_column = Rating::getImportanceColumn( $this->importance );
 65+ $query = "INSERT INTO project_stats (ps_project, ps_quality, $importance_column) ";
 66+ $query .= "VALUES ('$this->project', '$this->quality', 1) ";
 67+ $query .= "ON DUPLICATE KEY ";
 68+ $query .= "UPDATE $importance_column = $importance_column + 1 ";
 69+ if(! $is_new_rating && ! empty( $this->old_importance ) ) {
 70+ $old_importance_column = Rating::getImportanceColumn( $this->old_importance );
 71+ $query .= ", $old_importance_column = $old_importance_column - 1";
 72+ }
 73+ $query .= ";";
 74+ $dbw->query($query);
 75+ if(! $is_new_rating && ! empty( $this->old_quality ) ) {
 76+ if(! isset($old_importance_column) ) {
 77+ $old_importance_column = $importance_column;
 78+ }
 79+ $query = "UPDATE project_stats SET $old_importance_column = $old_importance_column - 1 ";
 80+ $query .= "WHERE ps_project = '$this->project' and ps_quality = '$this->old_quality';";
 81+ $dbw->query($query);
 82+ }
 83+ }
 84+ public function saveAll() {
 85+ $data_array = array(
 86+ 'r_project' => $this->project,
 87+ 'r_namespace' => $this->namespace,
 88+ 'r_article' => $this->title,
 89+ 'r_quality' => $this->quality,
 90+ 'r_quality_timestamp' => $this->quality_timestamp,
 91+ 'r_importance' => $this->importance,
 92+ 'r_importance_timestamp' => $this->importance_timestamp
 93+ );
 94+ $dbw = wfGetDB( DB_MASTER );
 95+ if( $this->inDB ) {
 96+ $dbw->update(
 97+ "ratings",
 98+ $data_array,
 99+ array(
 100+ 'r_namespace' => $this->namespace,
 101+ 'r_article' => $this->title,
 102+ 'r_project' => $this->project
 103+ ),
 104+ __METHOD__
 105+ );
 106+
 107+ $this->updateAggregateStats( false );
 108+ } else {
 109+ $dbw->insert(
 110+ "ratings",
 111+ $data_array,
 112+ __METHOD__
 113+ );
 114+
 115+ $this->updateAggregateStats( true );
 116+ $this->inDB = true;
 117+ }
 118+
 119+ }
 120+
 121+ public static function forTitle( $title ) {
 122+ $dbr = wfGetDB( DB_SLAVE );
 123+ $query = $dbr->select(
 124+ "ratings",
 125+ array(
 126+ "r_project", "r_namespace", "r_article", "r_quality",
 127+ "r_quality_timestamp", "r_importance", "r_importance_timestamp"
 128+ ),
 129+ array(
 130+ "r_namespace" => $title->getNamespace(),
 131+ "r_article" => $title->getText(),
 132+ ),
 133+ __METHOD__
 134+ );
 135+
 136+ $ratings = array();
 137+
 138+ foreach( $query as $row ) {
 139+ $rating = new Rating(
 140+ $row->r_project, $row->r_namespace,
 141+ $row->r_article, $row->r_quality,
 142+ $row->r_quality_timestamp, $row->r_importance,
 143+ $row->r_importance_timestamp);
 144+ $rating->inDB = true;
 145+ $ratings[$rating->project] = $rating;
 146+ }
 147+ return $ratings;
 148+ }
 149+}
Index: trunk/extensions/GPoC/AssessmentsExtractor.php
@@ -0,0 +1,30 @@
 2+<?php
 3+
 4+/**
 5+ * Helps extract assessments from a parsed $DOM file
 6+ **/
 7+class AssessmentsExtractor
 8+{
 9+ private $mArticle;
 10+ private $mText;
 11+
 12+ function __construct( $article, $preparedText ) {
 13+ $this->mText = $preparedText;
 14+ $this->mArticle = $article;
 15+ }
 16+
 17+ public function extractAssessments() {
 18+ $regex = '/<span data-project-name="(?P<project>.*)" data-importance="(?P<importance>.*)" data-quality="(?P<quality>.*)"\s*>/';
 19+ $matches = array();
 20+ preg_match_all($regex, $this->mText, $matches, PREG_SET_ORDER);
 21+
 22+ $assessments = array();
 23+ foreach($matches as $match) {
 24+ $assessments[$match['project']] = array(
 25+ 'importance' => $match['importance'],
 26+ 'quality' => $match['quality']
 27+ );
 28+ }
 29+ return $assessments;
 30+ }
 31+}
Index: trunk/extensions/GPoC/README
@@ -0,0 +1,3 @@
 2+This is the Proof of Concept for YuviPanda's GSoC 2011 Project.
 3+
 4+Throwaway code. Don't blame me if it cuts off your left foot.
Index: trunk/extensions/GPoC/.vimrc
@@ -0,0 +1,2 @@
 2+set noexpandtab
 3+set tabstop=4
Index: trunk/extensions/GPoC/GPoC.php
@@ -0,0 +1,27 @@
 2+<?php
 3+/**
 4+ * Proof of Concept for Yuvi Panda's 2011 GSoC
 5+ *
 6+ * @file
 7+ * @ingroup Extensions
 8+ * @author Yuvi Panda, http://yuvi.in
 9+ * @copyright © 2011 Yuvaraj Pandian (yuvipanda@yuvi.in)
 10+ * @licence Modified BSD License
 11+ */
 12+
 13+if( !defined( 'MEDIAWIKI' ) ) {
 14+ echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
 15+ die( 1 );
 16+}
 17+
 18+// Extension credits that will show up on Special:Version
 19+
 20+// Set up the new special page
 21+$dir = dirname( __FILE__ ) . '/';
 22+
 23+$wgAutoloadClasses['GPoCHooks'] = $dir . 'GPoC.hooks.php';
 24+
 25+$wgHooks['ArticleSaveComplete'][] = 'GPoCHooks::ArticleSaveComplete';
 26+$wgHooks['LoadExtensionSchemaUpdates'][] = 'GPoCHooks::SetupSchema';
 27+
 28+// Configuration

Comments

#Comment by Bawolff (talk | contribs)   07:53, 1 July 2011

I noticed in the projects table, you have a p_wikipage field. You should probably have a namespace field to, or better yet (provided that the wikipage always exists) just use a page_id (foreign key to page table), instead of writing the actual page name in your table.

#Comment by Nikerabbit (talk | contribs)   08:10, 1 July 2011

GPoC is short for GSOC Proof of Code or what? :D

#Comment by YuviPanda (talk | contribs)   14:36, 1 July 2011

@Nikerabbit: Ah yes. Proof of Concept :) Need to rename it, have a nice name in mind?

@Bawolff: Ah yes, the projects table will very probably disappear :) Holdover from the WP1.0 bot (from where I stole some SQL)

#Comment by Nikerabbit (talk | contribs)   14:39, 1 July 2011

Giant Pack of Collections? :O~

#Comment by Reedy (talk | contribs)   16:46, 1 July 2011

Missing svn:eol-style native

And you shouldn't need to do

+require_once "AssessmentsExtractor.php";
+require_once "models/Rating.php";
#Comment by YuviPanda (talk | contribs)   02:14, 5 July 2011

Done (thanks ialex?) and used the Autoloader to fix the other

#Comment by Reedy (talk | contribs)   17:39, 1 July 2011

And you're missing $wgExtensionCredits! ;D

#Comment by YuviPanda (talk | contribs)   02:15, 5 July 2011

Well, what can I say - I am quite modest :D

#Comment by Reedy (talk | contribs)   02:50, 5 July 2011

Another FYI, but I guess it's more a matter of preference

Most other extensions have 1 file to do the initial install, kept upto date, etc etc, and then any additional tables, columns, indexes etc, added later are added to the original file for install, but then a new file for each change (or group of changes). But like I say, it's matter of preference. You're keeping things well contained and obvious to what you're doing, so no issue doing it like this =)


And yeah, sort your extension credits! ;P

Status & tagging log