r110649 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r110648‎ | r110649 | r110650 >
Date:10:18, 3 February 2012
Author:maxsem
Status:resolved (Comments)
Tags:geodata 
Comment:
DB schema revamp based on discussion with Asher
Modified paths:
  • /trunk/extensions/GeoData/GeoData.body.php (modified) (history)
  • /trunk/extensions/GeoData/GeoData.sql (modified) (history)
  • /trunk/extensions/GeoData/api/ApiQueryAllPages_GeoData.php (modified) (history)
  • /trunk/extensions/GeoData/api/ApiQueryCategoryMembers_GeoData.php (modified) (history)
  • /trunk/extensions/GeoData/api/ApiQueryCoordinates.php (modified) (history)
  • /trunk/extensions/GeoData/api/ApiQueryGeoSearch.php (modified) (history)
  • /trunk/extensions/GeoData/api/GeoDataQueryExtender.php (modified) (history)

Diff [purge]

Index: trunk/extensions/GeoData/GeoData.sql
@@ -11,6 +11,10 @@
1212 -- Whether this coordinate is primary (defines the principal location of article subject)
1313 -- or secondary (just mentioned in text)
1414 gt_primary bool NOT NULL,
 15+ -- Latitude in tenths of degree
 16+ gt_lat_int smallint NOT NULL,
 17+ -- Longitude in tenths of degree
 18+ gt_lon_int smallint NOT NULL,
1519 -- Latitude of the point in degrees
1620 gt_lat float NOT NULL,
1721 -- Longitude of the point in degrees
@@ -27,23 +31,6 @@
2832 gt_region varchar(3) NULL
2933 )/*$wgDBTableOptions*/;
3034
3135 CREATE INDEX /*i*/gt_page_id ON /*_*/geo_tags ( gt_page_id );
32 -CREATE INDEX /*i*/gt_lat_lon ON /*_*/geo_tags ( gt_lat, gt_lon );
33 -
34 -DELIMITER //
35 -CREATE FUNCTION /*_*/gd_distance( lat1 double, lon1 double, lat2 double, lon2 double )
36 - RETURNS double DETERMINISTIC
37 -BEGIN
38 - SET lat1 = radians( lat1 );
39 - SET lon1 = radians( lon1 );
40 - SET lat2 = radians( lat2 );
41 - SET lon2 = radians( lon2 );
42 - SET @sin1 = sin( ( lat2 - lat1 ) / 2 );
43 - SET @sin2 = sin( ( lon2 - lon1 ) / 2 );
44 - RETURN 2 * 6371010 * asin( sqrt( @sin1 * @sin1 + cos( lat1 ) * cos( lat2 ) * @sin2 * @sin2 ) );
45 -END//
46 -
47 -DELIMITER ;
 36+CREATE INDEX /*i*/gt_id_page_id ON /*_*/geo_tags ( gt_page_id, gt_id );
 37+CREATE INDEX /*i*/gt_spatial ON /*_*/geo_tags ( gt_lati, gt_loni, gt_lon );
Index: trunk/extensions/GeoData/GeoData.body.php
@@ -230,11 +230,18 @@
231231 && $this->region == $coord->region;
232232 }
233233
234 - public function getRow( $pageId = null ) {
 234+ /**
 235+ * Returns this object's representation suitable for insertion into the DB via Databse::insert()
 236+ * @param int $pageId: ID of page associated with this coordinate
 237+ * @return Array: Associative array in format 'field' => 'value'
 238+ */
 239+ public function getRow( $pageId ) {
235240 $row = array( 'gt_page_id' => $pageId );
236241 foreach ( self::$fieldMapping as $field => $column ) {
237242 $row[$column] = $this->$field;
238243 }
 244+ $row['gt_lat_int'] = round( $this->lat * 10 );
 245+ $row['gt_lon_int'] = round( $this->lon * 10 );
239246 return $row;
240247 }
241248
Index: trunk/extensions/GeoData/api/GeoDataQueryExtender.php
@@ -16,12 +16,13 @@
1717 $tables = array();
1818 $fields = array();
1919 $joins = array();
20 - $options = array();
 20+ $options = array( 'USE INDEX' => array() );
2121 $where = array();
2222
2323 if ( isset( $params['withcoordinates'] ) || $params['withoutcoordinates'] ) {
2424 $tables[] = 'geo_tags';
2525 $joins['geo_tags'] = array( 'LEFT JOIN', "$joinField = gt_page_id" );
 26+ $options['USE INDEX']['geo_tags'] = 'gt_page_id'; // Yes, MySQL is THAT stupid
2627 if ( isset( $params['withcoordinates'] ) ) {
2728 switch ( $params['withcoordinates'] ) {
2829 case 'primary':
@@ -40,7 +41,7 @@
4142 $where[] = 'gt_primary IS NULL';
4243 }
4344 } elseif ( $useIndex ) {
44 - $options['USE INDEX'] = $useIndex;
 45+ $options['USE INDEX']['page'] = $useIndex;
4546 }
4647 return array( $tables, $fields, $joins, $options, $where );
4748 }
Index: trunk/extensions/GeoData/api/ApiQueryCoordinates.php
@@ -36,6 +36,8 @@
3737 $parts[0] = intval( $parts[0] );
3838 $parts[1] = intval( $parts[1] );
3939 $this->addWhere( "gt_page_id > {$parts[0]} OR ( gt_page_id = {$parts[0]} AND gt_id > {$parts[1]} )" );
 40+ } else {
 41+ $this->addOption( 'USE INDEX', 'gt_page_id' );
4042 }
4143
4244 $this->addOption( 'ORDER BY', array( 'gt_page_id', 'gt_id' ) );
Index: trunk/extensions/GeoData/api/ApiQueryAllPages_GeoData.php
@@ -4,7 +4,7 @@
55 * Overrides and extends core's list=allpages query module
66 */
77 class ApiQueryAllPages_GeoData extends ApiQueryAllPages {
8 - private $useIndex, $alreadyAltered;
 8+ private $useIndex = false, $alreadyAltered;
99
1010 public function __construct( $query, $moduleName ) {
1111 parent::__construct( $query, $moduleName );
@@ -22,7 +22,7 @@
2323 $this->addFields( $fields );
2424 $this->addJoinConds( $joins );
2525 foreach ( $options as $name => $value ) {
26 - $this->addOption( $name, $value );
 26+ parent::addOption( $name, $value );
2727 }
2828 $this->addWhere( $where );
2929 $this->alreadyAltered = true;
Index: trunk/extensions/GeoData/api/ApiQueryCategoryMembers_GeoData.php
@@ -4,7 +4,7 @@
55 * Overrides and extends core's list=categorymembers query module
66 */
77 class ApiQueryCategoryMembers_GeoData extends ApiQueryCategoryMembers {
8 - private $useIndex, $alreadyAltered;
 8+ private $useIndex = false, $alreadyAltered;
99
1010 public function __construct( $query, $moduleName ) {
1111 parent::__construct( $query, $moduleName );
@@ -22,7 +22,7 @@
2323 $this->addFields( $fields );
2424 $this->addJoinConds( $joins );
2525 foreach ( $options as $name => $value ) {
26 - $this->addOption( $name, $value );
 26+ parent::addOption( $name, $value );
2727 }
2828 $this->addWhere( $where );
2929 $this->alreadyAltered = true;
Index: trunk/extensions/GeoData/api/ApiQueryGeoSearch.php
@@ -20,7 +20,7 @@
2121 }
2222
2323 /**
24 - * @param $resultPageSet ApiPageSet
 24+ * @param ApiPageSet $resultPageSet
2525 * @return
2626 */
2727 private function run( $resultPageSet = null ) {
@@ -57,15 +57,13 @@
5858 $rect = GeoMath::rectAround( $lat, $lon, $radius );
5959
6060 $dbr = wfGetDB( DB_SLAVE );
61 - $this->addTables( array( 'geo_tags', 'page' ) );
62 - $this->addFields( array( 'gt_lat', 'gt_lon', 'gt_primary',
63 - "{$dbr->tablePrefix()}gd_distance( {$lat}, {$lon}, gt_lat, gt_lon ) AS dist" )
64 - );
 61+ $this->addTables( array( 'page', 'geo_tags' ) );
 62+ $this->addFields( array( 'gt_lat', 'gt_lon', 'gt_primary' ) );
6563 // retrieve some fields only if page set needs them
6664 if ( is_null( $resultPageSet ) ) {
6765 $this->addFields( array( 'page_id', 'page_namespace', 'page_title' ) );
6866 } else {
69 - $this->addFields( array( "{$dbr->tableName( 'page' )}.*" ) );
 67+ $this->addFields( WikiPage::selectFields() );
7068 }
7169 foreach( $params['prop'] as $prop ) {
7270 if ( isset( Coord::$fieldMapping[$prop] ) ) {
@@ -73,9 +71,10 @@
7472 }
7573 }
7674 $this->addWhereFld( 'gt_globe', $params['globe'] );
 75+ $this->addWhereFld( 'gt_lat_int', self::intRange( $rect["minLat"], $rect["maxLat"] ) );
 76+ $this->addWhereFld( 'gt_lon_int', self::intRange( $rect["minLon"], $rect["maxLon"] ) );
7777 $this->addWhereRange( 'gt_lat', 'newer', $rect["minLat"], $rect["maxLat"], false );
7878 $this->addWhereRange( 'gt_lon', 'newer', $rect["minLon"], $rect["maxLon"], false );
79 - //$this->addWhere( 'dist < ' . intval( $radius ) ); hasta be in HAVING, not WHERE
8079 $this->addWhereFld( 'page_namespace', $params['namespace'] );
8180 $this->addWhere( 'gt_page_id = page_id' );
8281 if ( $exclude ) {
@@ -87,15 +86,24 @@
8887 $primary = array_flip( $params['primary'] );
8988 $this->addWhereIf( array( 'gt_primary' => 1 ), isset( $primary['yes'] ) && !isset( $primary['no'] ) );
9089 $this->addWhereIf( array( 'gt_primary' => 0 ), !isset( $primary['yes'] ) && isset( $primary['no'] ) );
91 - $this->addOption( 'ORDER BY', 'dist' );
 90+ $this->addOption( 'USE INDEX', 'gt_spatial' );
9291
9392 $limit = $params['limit'];
94 - $this->addOption( 'LIMIT', $limit );
9593
9694 $res = $this->select( __METHOD__ );
9795
 96+ $rows = array();
 97+ foreach ( $res as $row ) {
 98+ $row->dist = GeoMath::distance( $lat, $lon, $row->gt_lat, $row->gt_lon );
 99+ $rows[] = $row;
 100+ }
 101+ // sort in PHP because sorting via SQL involves a filesort
 102+ usort( $rows, 'ApiQueryGeoSearch::compareRows' );
98103 $result = $this->getResult();
99 - foreach ( $res as $row ) {
 104+ foreach ( $rows as $row ) {
 105+ if ( !$limit-- ) {
 106+ break;
 107+ }
100108 if ( is_null( $resultPageSet ) ) {
101109 $title = Title::newFromRow( $row );
102110 $vals = array(
@@ -129,6 +137,27 @@
130138 }
131139 }
132140
 141+ private static function compareRows( $row1, $row2 ) {
 142+ if ( $row1->dist < $row2->dist ) {
 143+ return -1;
 144+ } elseif ( $row1->dist > $row2->dist ) {
 145+ return 1;
 146+ }
 147+ return 0;
 148+ }
 149+
 150+ /**
 151+ * Returns a range of tenths
 152+ * @todo: wrap around
 153+ * @param float $start
 154+ * @param float $end
 155+ * @return Array
 156+ */
 157+ public static function intRange( $start, $end ) {
 158+ $start = round( $start * 10 );
 159+ return range( $start, round( $end * 10 ) - $start + 1 );
 160+ }
 161+
133162 public function getAllowedParams() {
134163 global $wgMaxGeoSearchRadius, $wgDefaultGlobe;
135164 return array (

Follow-up revisions

RevisionCommit summaryAuthorDate
r110659Follow-up r110649: fix SQL file, SQLite support (tested, but not extensively)maxsem14:55, 3 February 2012

Comments

#Comment by Hashar (talk | contribs)   13:06, 14 February 2012

That or use MySQL spatial indexes / PostGIS :-D

Anyway, I am now wondering if we should keep support for stored procedures in core (r108671) :-D

#Comment by MaxSem (talk | contribs)   13:33, 14 February 2012

> That or use MySQL spatial indexes

Yay for MyISAM on cluster! XD

> PostGIS

If we're to switch from MySQL, I choose SQLite!

> Anyway, I am now wondering if we should keep support for stored procedures in core

We should, because there were questions about it in wikitech-l. And the code is cleaner now, anyway.

Status & tagging log