r98275 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r98274‎ | r98275 | r98276 >
Date:22:23, 27 September 2011
Author:asher
Status:deferred
Tags:
Comment:
lessons learned from S5 rotation site outage:
don't replicate master flush tables; kill old queries on slaves as well
Modified paths:
  • /trunk/tools/switch-master/10-master-readonly (modified) (history)
  • /trunk/tools/switch-master/30-slaves (modified) (history)
  • /trunk/tools/switch-master/MasterSwitcher.php (modified) (history)

Diff [purge]

Index: trunk/tools/switch-master/10-master-readonly
@@ -3,7 +3,10 @@
44 . config
55
66 ssh root@$master 'sed -i~ '\''s/#\s*read-only/read-only/'\'' /etc/my.cnf'
7 -echo "set global read_only=1; flush tables;" | mysql -h $master
 7+echo "set global read_only=1;" | mysql -h $master
 8+echo "killing queries running over 10 seconds"
 9+mysql -h $master -e "show processlist" | awk '{ if (($6 > 9) && ($2 ~ "^wiki")) { print "kill " $1 ";" } }' | mysql -h $master
 10+echo "set sql_log_bin=0; flush tables;" | mysql -h $master
811 echo '\sselect @@read_only' | mysql -h $master
912 echo
1013
Index: trunk/tools/switch-master/MasterSwitcher.php
@@ -74,6 +74,7 @@
7575 // Stop slave on the new master and reset it so it can't start again
7676 $this->log( 'Configuring the new master' );
7777 $newMasterDB = $this->getConnection( $newMaster );
 78+ $this->killOldQueries( $newMasterDB );
7879 $newMasterDB->query( 'STOP SLAVE' );
7980 $newMasterDB->query( 'CHANGE MASTER TO master_host=\'\'' );
8081 $newMasterDB->query( 'RESET SLAVE' );
@@ -116,6 +117,7 @@
117118 $this->log( "Cannot change master on $slave: connection error" );
118119 continue;
119120 }
 121+ $this->killOldQueries( $conn );
120122 $this->doQueryLogErrors( $conn, $slave, 'SLAVE STOP' );
121123 $this->doQueryLogErrors( $conn, $slave, $changeMasterSql );
122124 $this->doQueryLogErrors( $conn, $slave, 'SLAVE START' );
@@ -330,10 +332,7 @@
331333 return true;
332334 }
333335
334 - function prepareOldMaster( $hostName, $conn ) {
335 - // Set the old master to read-only
336 - $conn->query( 'SET GLOBAL read_only=1' );
337 -
 336+ function killOldQueries ( $conn ) {
338337 // Kill long-running queries
339338 $res = $conn->query( 'SHOW PROCESSLIST' );
340339 $killQueries = array();
@@ -349,10 +348,18 @@
350349 $conn->query( $query );
351350 } catch ( DBQueryError $e ) {}
352351 }
 352+ }
353353
 354+ function prepareOldMaster( $hostName, $conn ) {
 355+ // Set the old master to read-only
 356+ $conn->query( 'SET GLOBAL read_only=1' );
 357+
 358+ // Kill Long Running Queries
 359+ $this->killOldQueries( $conn );
 360+
354361 // Flush tables
355362 // This ensures that pending transactions are committed
356 - $conn->query( 'FLUSH TABLES' );
 363+ $conn->query( 'SET SQL_LOG_BIN=0; FLUSH TABLES' );
357364
358365 // Sanity check
359366 $res = $conn->query( 'SELECT @@read_only as read_only' );
Index: trunk/tools/switch-master/30-slaves
@@ -2,6 +2,8 @@
33
44 . config
55
 6+# kill long running wiki queries on new master
 7+mysql -h $newmaster -e "show processlist" | awk '{ if (($6 > 9) && ($2 ~ "^wiki")) { print "kill " $1 ";" } }' | mysql -h $newmaster
68 # Turn off slave on new master, to avoid replication loop
79 echo "slave stop; change master to master_host=''" | mysql -h $newmaster
810
@@ -18,6 +20,7 @@
1921 for slave in $slaves $master
2022 do
2123 echo $slave
 24+ mysql -h $slave -e "show processlist" | awk '{ if (($6 > 9) && ($2 ~ "^wiki")) { print "kill " $1 ";" } }' | mysql -h $slave
2225 echo "$sql" | mysql -f -h $slave
2326 done
2427

Status & tagging log