Index: trunk/extensions/GPoC/GPoC.hooks.php |
— | — | @@ -0,0 +1,61 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * |
| 5 | + * @file |
| 6 | + * @ingroup Extensions |
| 7 | + * @author Yuvi Panda, http://yuvi.in |
| 8 | + * @copyright © 2011 Yuvaraj Pandian (yuvipanda@yuvi.in) |
| 9 | + * @licence Modified BSD License |
| 10 | + */ |
| 11 | + |
| 12 | +if ( !defined( 'MEDIAWIKI' ) ) { |
| 13 | + exit( 1 ); |
| 14 | +} |
| 15 | + |
| 16 | +require_once "AssessmentsExtractor.php"; |
| 17 | +require_once "models/Rating.php"; |
| 18 | + |
| 19 | +class GPoCHooks { |
| 20 | + |
| 21 | + private static function updateDatabase( $title, $assessments, $timestamp ) { |
| 22 | + $main_title = Title::makeTitle( NS_MAIN, $title->getText() ); |
| 23 | + $ratings = Rating::forTitle( $main_title ); |
| 24 | + foreach ( $assessments as $project => $assessment ) { |
| 25 | + $curRating = $ratings[$project]; |
| 26 | + if( $curRating ) { |
| 27 | + $curRating->update( $assessment['importance'], $assessment['quality'], 0 ); |
| 28 | + } else { |
| 29 | + $rating = new Rating( |
| 30 | + $project, |
| 31 | + $main_title->getNamespace(), |
| 32 | + $main_title->getText(), |
| 33 | + $assessment['quality'], |
| 34 | + 0, |
| 35 | + $assessment['importance'], |
| 36 | + 0 |
| 37 | + ); |
| 38 | + $rating->saveAll(); |
| 39 | + } |
| 40 | + } |
| 41 | + } |
| 42 | + |
| 43 | + public static function ArticleSaveComplete(&$article, &$user, $text, $summary, $minoredit, $watchthis, $sectionanchor, &$flags, $revision, &$status, $baseRevId) { |
| 44 | + global $wgParser; |
| 45 | + $title = $article->getTitle(); |
| 46 | + if( $title->getNamespace() == NS_TALK && $revision ) { |
| 47 | + // All conditions to minimize the situations we've to run the job to update the data |
| 48 | + $preparedText = $article->prepareTextForEdit( $text )->output->getText(); |
| 49 | + $extractor = new AssessmentsExtractor( $article, $preparedText ); |
| 50 | + $assessments = $extractor->extractAssessments(); |
| 51 | + GPoCHooks::updateDatabase( $title, $assessments, $revision ); |
| 52 | + } |
| 53 | + return true; |
| 54 | + } |
| 55 | + |
| 56 | + public static function SetupSchema( DatabaseUpdater $du ) { |
| 57 | + $base = dirname( __FILE__ ) . '/schema'; |
| 58 | + $du->addExtensionTable( "ratings", "$base/ratings.sql"); |
| 59 | + $du->addExtensionTable( "project_stats", "$base/project_stats.sql" ); |
| 60 | + return true; |
| 61 | + } |
| 62 | +} |
Index: trunk/extensions/GPoC/schema/log.sql |
— | — | @@ -0,0 +1,38 @@ |
| 2 | +-- Replace /*_*/ with the proper prefix |
| 3 | +-- Replace /*$wgDBTableOptions*/ with the correct options |
| 4 | + |
| 5 | +CREATE TABLE IF NOT EXISTS /*_*/log ( |
| 6 | + l_project varchar(63) not null, |
| 7 | + -- project name |
| 8 | + |
| 9 | + l_namespace int unsigned not null, |
| 10 | + -- article namespace |
| 11 | + |
| 12 | + l_article varchar(255) not null, |
| 13 | + -- article name |
| 14 | + |
| 15 | + l_action varchar(20) character set ascii not null, |
| 16 | + -- type of log entry (e.g. 'quality') |
| 17 | + |
| 18 | + -- NOTE: this is ASCII because of maximum index key |
| 19 | + -- length constraints interacting with utf-8 fields in |
| 20 | + -- mysql. The primary key for this table is just under the limit. |
| 21 | + |
| 22 | + l_timestamp binary(14) not null, |
| 23 | + -- timestamp when log entry was added |
| 24 | + |
| 25 | + l_old varchar(63), |
| 26 | + -- old value (e.g. B-Class) |
| 27 | + |
| 28 | + l_new varchar(63), |
| 29 | + -- new value (e.g. GA-Class) |
| 30 | + |
| 31 | + l_revision_timestamp binary(20) not null, |
| 32 | + -- timestamp when page was edited |
| 33 | + -- a wiki-format timestamp |
| 34 | + |
| 35 | + primary key (l_project, l_namespace, l_article, l_action, l_timestamp), |
| 36 | + key (l_article, l_namespace) |
| 37 | +) /*$wgDBTableOptions*/; |
| 38 | + |
| 39 | +CREATE INDEX /*i*/l_project ON /*_*/log (l_project); |
Index: trunk/extensions/GPoC/schema/project_stats.sql |
— | — | @@ -0,0 +1,49 @@ |
| 2 | +-- Replace /*_*/ with the proper prefix |
| 3 | +-- Replace /*$wgDBTableOptions*/ with the correct options |
| 4 | + |
| 5 | +CREATE TABLE IF NOT EXISTS /*_*/project_stats ( |
| 6 | + |
| 7 | + ps_project varchar(63) not null, |
| 8 | + -- project name |
| 9 | + |
| 10 | + ps_timestamp binary(14) not null, |
| 11 | + -- last time project data was updated |
| 12 | + |
| 13 | + ps_quality varchar(63) not null, |
| 14 | + -- quality assessment. lowercase. |
| 15 | + -- possible values: fa, a, ga, b, b1, b2, b3, b4, b5, b6, c, start, stub, fl, l, unclassified |
| 16 | + |
| 17 | + ps_count int unsigned default 0, |
| 18 | + -- how many pages are assessed in project |
| 19 | + |
| 20 | + ps_top_icount int unsigned default 0, |
| 21 | + -- how many pages are assessed in project to be top importance |
| 22 | + |
| 23 | + ps_high_icount int unsigned default 0, |
| 24 | + -- how many pages are assessed in project to be high importance |
| 25 | + |
| 26 | + ps_mid_icount int unsigned default 0, |
| 27 | + -- how many pages are assessed in project to be mid importance |
| 28 | + |
| 29 | + ps_low_icount int unsigned default 0, |
| 30 | + -- how many pages are assessed in project to be low importance |
| 31 | + |
| 32 | + ps_bottom_icount int unsigned default 0, |
| 33 | + -- how many pages are assessed in project to be bottom importance |
| 34 | + |
| 35 | + ps_no_icount int unsigned default 0, |
| 36 | + -- how many pages are assessed in project to be of no importance |
| 37 | + |
| 38 | + ps_unclassified_icount int unsigned default 0, |
| 39 | + -- how many pages are assessed in project without a classified importance |
| 40 | + |
| 41 | + ps_qcount int unsigned default 0, |
| 42 | + -- how many pages have quality assessments in the project |
| 43 | + |
| 44 | + ps_icount int unsigned default 0, |
| 45 | + -- how many pages have importance assessments in the project |
| 46 | + |
| 47 | + primary key (ps_project, ps_quality) |
| 48 | +) /*$wgDBTableOptions*/; |
| 49 | + |
| 50 | +CREATE INDEX /*i*/ps_project ON /*_*/project_stats (ps_project); |
Index: trunk/extensions/GPoC/schema/ratings.sql |
— | — | @@ -0,0 +1,33 @@ |
| 2 | +-- Replace /*_*/ with the proper prefix |
| 3 | +-- Replace /*$wgDBTableOptions*/ with the correct options |
| 4 | + |
| 5 | +CREATE TABLE IF NOT EXISTS /*_*/ratings ( |
| 6 | + r_project varchar(63) not null, |
| 7 | + -- project name |
| 8 | + |
| 9 | + r_namespace int unsigned not null, |
| 10 | + -- article namespace |
| 11 | + |
| 12 | + r_article varchar(255) not null, |
| 13 | + -- article title |
| 14 | + |
| 15 | + r_quality varchar(63), |
| 16 | + -- quality rating |
| 17 | + |
| 18 | + r_quality_timestamp binary(20), |
| 19 | + -- time when quality rating was assigned |
| 20 | + -- NOTE: a revid can be obtained from timestamp via API |
| 21 | + -- a wiki-format timestamp |
| 22 | + |
| 23 | + r_importance varchar(63), |
| 24 | + -- importance rating |
| 25 | + |
| 26 | + r_importance_timestamp binary(20), |
| 27 | + -- time when importance rating was assigned |
| 28 | + -- a wiki-style timestamp |
| 29 | + |
| 30 | + primary key (r_project, r_namespace, r_article) |
| 31 | +) /*$wgDBTableOptions*/; |
| 32 | + |
| 33 | +CREATE INDEX /*i*/r_article ON /*_*/ratings (r_namespace, r_article); |
| 34 | +CREATE INDEX /*i*/r_project ON /*_*/ratings (r_project); |
Index: trunk/extensions/GPoC/schema/projects.sql |
— | — | @@ -0,0 +1,36 @@ |
| 2 | +-- Replace /*_*/ with the proper prefix |
| 3 | +-- Replace /*$wgDBTableOptions*/ with the correct options |
| 4 | + |
| 5 | +CREATE TABLE IF NOT EXISTS /*_*/projects ( |
| 6 | + |
| 7 | + p_project varchar(63) not null, |
| 8 | + -- project name |
| 9 | + |
| 10 | + p_timestamp binary(14) not null, |
| 11 | + -- last time project data was updated |
| 12 | + |
| 13 | + p_wikipage varchar(255), |
| 14 | + -- homepage on the wiki for this project |
| 15 | + |
| 16 | + p_parent varchar(63), |
| 17 | + -- parent project (for task forces) |
| 18 | + |
| 19 | + p_shortname varchar(255), |
| 20 | + -- display name in headers |
| 21 | + |
| 22 | + p_count int unsigned default 0, |
| 23 | + -- how many pages are assessed in project |
| 24 | + |
| 25 | + p_qcount int unsigned default 0, |
| 26 | + -- how many pages have quality assessments in the project |
| 27 | + |
| 28 | + p_icount int unsigned default 0, |
| 29 | + -- how many pages have importance assessments in the project |
| 30 | + |
| 31 | + p_scope int unsigned not null default 0, |
| 32 | + -- the project's "scope points", used to compute selection scores |
| 33 | + |
| 34 | + primary key (p_project) |
| 35 | +) /*$wgDBTableOptions*/; |
| 36 | + |
| 37 | +CREATE INDEX /*i*/p_project ON /*_*/projects (p_project); |
Index: trunk/extensions/GPoC/models/Rating.php |
— | — | @@ -0,0 +1,148 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * Represents an article and associated rating |
| 6 | + **/ |
| 7 | +class Rating { |
| 8 | + public $project; |
| 9 | + public $namespace; |
| 10 | + public $title; |
| 11 | + public $quality; |
| 12 | + public $quality_timestamp; |
| 13 | + public $importance; |
| 14 | + public $importance_timestamp; |
| 15 | + |
| 16 | + private $old_importance; |
| 17 | + private $old_quality; |
| 18 | + private $inDB = false; |
| 19 | + |
| 20 | + private static function getImportanceColumn( $importance ) { |
| 21 | + $importanceColumnMapping = array( |
| 22 | + 'top' => 'ps_top_icount', |
| 23 | + 'high' => 'ps_high_icount', |
| 24 | + 'mid' => 'ps_mid_icount', |
| 25 | + 'low' => 'ps_mid_icount', |
| 26 | + 'no' => 'ps_no_icount', |
| 27 | + '' => 'ps_unclassified_icount' |
| 28 | + ); |
| 29 | + return $importanceColumnMapping[ strtolower( $importance ) ]; |
| 30 | + } |
| 31 | + |
| 32 | + public function __construct( $project, $namespace, $title, $quality, $quality_timestamp, $importance, $importance_timestamp ) { |
| 33 | + $this->project = $project; |
| 34 | + $this->namespace = $namespace; |
| 35 | + $this->title = $title; |
| 36 | + $this->quality = $quality; |
| 37 | + $this->quality_timestamp = $quality_timestamp; |
| 38 | + $this->importance = $importance; |
| 39 | + $this->importance_timestamp = $importance_timestamp; |
| 40 | + } |
| 41 | + |
| 42 | + public function update( $importance, $quality, $timestamp ) { |
| 43 | + if( $quality != $this->quality ) { |
| 44 | + $this->old_quality = $this->quality; |
| 45 | + $this->quality = $quality; |
| 46 | + $this->quality_timestamp = $timestamp; |
| 47 | + } |
| 48 | + if( $importance != $this->importance ) { |
| 49 | + $this->old_importance = $this->importance; |
| 50 | + $this->importance = $importance; |
| 51 | + $this->importance_timestamp = $timestamp; |
| 52 | + } |
| 53 | + $this->saveAll(); |
| 54 | + } |
| 55 | + |
| 56 | + // Note: Huge sql injection vector ahead. FIXME |
| 57 | + private function updateAggregateStats( $is_new_rating ) { |
| 58 | + if(! $is_new_rating && empty($this->old_importance) && empty($this->old_quality) ) { |
| 59 | + return; |
| 60 | + } |
| 61 | + $dbw = wfGetDB( DB_MASTER ); |
| 62 | + // Rating has just been detected. |
| 63 | + // So we can ignore $old_importance and $old_quality |
| 64 | + $importance_column = Rating::getImportanceColumn( $this->importance ); |
| 65 | + $query = "INSERT INTO project_stats (ps_project, ps_quality, $importance_column) "; |
| 66 | + $query .= "VALUES ('$this->project', '$this->quality', 1) "; |
| 67 | + $query .= "ON DUPLICATE KEY "; |
| 68 | + $query .= "UPDATE $importance_column = $importance_column + 1 "; |
| 69 | + if(! $is_new_rating && ! empty( $this->old_importance ) ) { |
| 70 | + $old_importance_column = Rating::getImportanceColumn( $this->old_importance ); |
| 71 | + $query .= ", $old_importance_column = $old_importance_column - 1"; |
| 72 | + } |
| 73 | + $query .= ";"; |
| 74 | + $dbw->query($query); |
| 75 | + if(! $is_new_rating && ! empty( $this->old_quality ) ) { |
| 76 | + if(! isset($old_importance_column) ) { |
| 77 | + $old_importance_column = $importance_column; |
| 78 | + } |
| 79 | + $query = "UPDATE project_stats SET $old_importance_column = $old_importance_column - 1 "; |
| 80 | + $query .= "WHERE ps_project = '$this->project' and ps_quality = '$this->old_quality';"; |
| 81 | + $dbw->query($query); |
| 82 | + } |
| 83 | + } |
| 84 | + public function saveAll() { |
| 85 | + $data_array = array( |
| 86 | + 'r_project' => $this->project, |
| 87 | + 'r_namespace' => $this->namespace, |
| 88 | + 'r_article' => $this->title, |
| 89 | + 'r_quality' => $this->quality, |
| 90 | + 'r_quality_timestamp' => $this->quality_timestamp, |
| 91 | + 'r_importance' => $this->importance, |
| 92 | + 'r_importance_timestamp' => $this->importance_timestamp |
| 93 | + ); |
| 94 | + $dbw = wfGetDB( DB_MASTER ); |
| 95 | + if( $this->inDB ) { |
| 96 | + $dbw->update( |
| 97 | + "ratings", |
| 98 | + $data_array, |
| 99 | + array( |
| 100 | + 'r_namespace' => $this->namespace, |
| 101 | + 'r_article' => $this->title, |
| 102 | + 'r_project' => $this->project |
| 103 | + ), |
| 104 | + __METHOD__ |
| 105 | + ); |
| 106 | + |
| 107 | + $this->updateAggregateStats( false ); |
| 108 | + } else { |
| 109 | + $dbw->insert( |
| 110 | + "ratings", |
| 111 | + $data_array, |
| 112 | + __METHOD__ |
| 113 | + ); |
| 114 | + |
| 115 | + $this->updateAggregateStats( true ); |
| 116 | + $this->inDB = true; |
| 117 | + } |
| 118 | + |
| 119 | + } |
| 120 | + |
| 121 | + public static function forTitle( $title ) { |
| 122 | + $dbr = wfGetDB( DB_SLAVE ); |
| 123 | + $query = $dbr->select( |
| 124 | + "ratings", |
| 125 | + array( |
| 126 | + "r_project", "r_namespace", "r_article", "r_quality", |
| 127 | + "r_quality_timestamp", "r_importance", "r_importance_timestamp" |
| 128 | + ), |
| 129 | + array( |
| 130 | + "r_namespace" => $title->getNamespace(), |
| 131 | + "r_article" => $title->getText(), |
| 132 | + ), |
| 133 | + __METHOD__ |
| 134 | + ); |
| 135 | + |
| 136 | + $ratings = array(); |
| 137 | + |
| 138 | + foreach( $query as $row ) { |
| 139 | + $rating = new Rating( |
| 140 | + $row->r_project, $row->r_namespace, |
| 141 | + $row->r_article, $row->r_quality, |
| 142 | + $row->r_quality_timestamp, $row->r_importance, |
| 143 | + $row->r_importance_timestamp); |
| 144 | + $rating->inDB = true; |
| 145 | + $ratings[$rating->project] = $rating; |
| 146 | + } |
| 147 | + return $ratings; |
| 148 | + } |
| 149 | +} |
Index: trunk/extensions/GPoC/AssessmentsExtractor.php |
— | — | @@ -0,0 +1,30 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * Helps extract assessments from a parsed $DOM file |
| 6 | + **/ |
| 7 | +class AssessmentsExtractor |
| 8 | +{ |
| 9 | + private $mArticle; |
| 10 | + private $mText; |
| 11 | + |
| 12 | + function __construct( $article, $preparedText ) { |
| 13 | + $this->mText = $preparedText; |
| 14 | + $this->mArticle = $article; |
| 15 | + } |
| 16 | + |
| 17 | + public function extractAssessments() { |
| 18 | + $regex = '/<span data-project-name="(?P<project>.*)" data-importance="(?P<importance>.*)" data-quality="(?P<quality>.*)"\s*>/'; |
| 19 | + $matches = array(); |
| 20 | + preg_match_all($regex, $this->mText, $matches, PREG_SET_ORDER); |
| 21 | + |
| 22 | + $assessments = array(); |
| 23 | + foreach($matches as $match) { |
| 24 | + $assessments[$match['project']] = array( |
| 25 | + 'importance' => $match['importance'], |
| 26 | + 'quality' => $match['quality'] |
| 27 | + ); |
| 28 | + } |
| 29 | + return $assessments; |
| 30 | + } |
| 31 | +} |
Index: trunk/extensions/GPoC/README |
— | — | @@ -0,0 +1,3 @@ |
| 2 | +This is the Proof of Concept for YuviPanda's GSoC 2011 Project. |
| 3 | + |
| 4 | +Throwaway code. Don't blame me if it cuts off your left foot. |
Index: trunk/extensions/GPoC/.vimrc |
— | — | @@ -0,0 +1,2 @@ |
| 2 | +set noexpandtab |
| 3 | +set tabstop=4 |
Index: trunk/extensions/GPoC/GPoC.php |
— | — | @@ -0,0 +1,27 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Proof of Concept for Yuvi Panda's 2011 GSoC |
| 5 | + * |
| 6 | + * @file |
| 7 | + * @ingroup Extensions |
| 8 | + * @author Yuvi Panda, http://yuvi.in |
| 9 | + * @copyright © 2011 Yuvaraj Pandian (yuvipanda@yuvi.in) |
| 10 | + * @licence Modified BSD License |
| 11 | + */ |
| 12 | + |
| 13 | +if( !defined( 'MEDIAWIKI' ) ) { |
| 14 | + echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" ); |
| 15 | + die( 1 ); |
| 16 | +} |
| 17 | + |
| 18 | +// Extension credits that will show up on Special:Version |
| 19 | + |
| 20 | +// Set up the new special page |
| 21 | +$dir = dirname( __FILE__ ) . '/'; |
| 22 | + |
| 23 | +$wgAutoloadClasses['GPoCHooks'] = $dir . 'GPoC.hooks.php'; |
| 24 | + |
| 25 | +$wgHooks['ArticleSaveComplete'][] = 'GPoCHooks::ArticleSaveComplete'; |
| 26 | +$wgHooks['LoadExtensionSchemaUpdates'][] = 'GPoCHooks::SetupSchema'; |
| 27 | + |
| 28 | +// Configuration |