Index: trunk/phase3/maintenance/updateSearchIndex.php |
— | — | @@ -63,9 +63,18 @@ |
64 | 64 | $lockTime = $this->getOption( 'l', 20 ); |
65 | 65 | |
66 | 66 | $this->doUpdateSearchIndex( $start, $end, $lockTime ); |
67 | | - $file = fopen( $posFile, 'w' ); |
68 | | - fwrite( $file, $end ); |
69 | | - fclose( $file ); |
| 67 | + if( is_writable( dirname( realpath( $posFile ) ) ) ) { |
| 68 | + $file = fopen( $posFile, 'w' ); |
| 69 | + if( $file !== false ) { |
| 70 | + fwrite( $file, $end ); |
| 71 | + fclose( $file ); |
| 72 | + } else { |
| 73 | + echo posix_get_last_error(); |
| 74 | + $this->output( "*** Couldn't write to the $posFile!" ); |
| 75 | + } |
| 76 | + } else { |
| 77 | + $this->output( "*** Couldn't write to the $posFile!" ); |
| 78 | + } |
70 | 79 | } |
71 | 80 | |
72 | 81 | private function doUpdateSearchIndex( $start, $end, $maxLockTime ) { |
— | — | @@ -89,84 +98,23 @@ |
90 | 99 | "; |
91 | 100 | $res = $dbw->query( $sql, __METHOD__ ); |
92 | 101 | |
| 102 | + $this->updateSearchIndex($maxLockTime, array($this, 'searchIndexUpdateCallback'), $dbw, $res); |
93 | 103 | |
94 | | - # Lock searchindex |
95 | | - if ( $maxLockTime ) { |
96 | | - $this->output( " --- Waiting for lock ---" ); |
97 | | - $this->lockSearchindex( $dbw ); |
98 | | - $lockTime = time(); |
99 | | - $this->output( "\n" ); |
100 | | - } |
| 104 | + $this->output( "Done\n" ); |
| 105 | + } |
101 | 106 | |
102 | | - # Loop through the results and do a search update |
103 | | - foreach ( $res as $row ) { |
104 | | - # Allow reads to be processed |
105 | | - if ( $maxLockTime && time() > $lockTime + $maxLockTime ) { |
106 | | - $this->output( " --- Relocking ---" ); |
107 | | - $this->relockSearchindex( $dbw ); |
108 | | - $lockTime = time(); |
109 | | - $this->output( "\n" ); |
110 | | - } |
111 | | - if ( $row->rc_type == RC_LOG ) { |
112 | | - continue; |
113 | | - } elseif ( $row->rc_type == RC_MOVE || $row->rc_type == RC_MOVE_OVER_REDIRECT ) { |
114 | | - # Rename searchindex entry |
115 | | - $titleObj = Title::makeTitle( $row->rc_moved_to_ns, $row->rc_moved_to_title ); |
116 | | - $title = $titleObj->getPrefixedDBkey(); |
117 | | - $this->output( "$title..." ); |
118 | | - $u = new SearchUpdate( $row->rc_cur_id, $title, false ); |
119 | | - $this->output( "\n" ); |
120 | | - } else { |
121 | | - // Get current revision |
122 | | - $rev = Revision::loadFromPageId( $dbw, $row->rc_cur_id ); |
123 | | - if( $rev ) { |
124 | | - $titleObj = $rev->getTitle(); |
125 | | - $title = $titleObj->getPrefixedDBkey(); |
126 | | - $this->output( $title ); |
127 | | - # Update searchindex |
128 | | - $u = new SearchUpdate( $row->rc_cur_id, $titleObj->getText(), $rev->getText() ); |
129 | | - $u->doUpdate(); |
130 | | - $this->output( "\n" ); |
131 | | - } |
132 | | - } |
133 | | - } |
134 | | - |
135 | | - # Unlock searchindex |
136 | | - if ( $maxLockTime ) { |
137 | | - $this->output( " --- Unlocking --" ); |
138 | | - $this->unlockSearchindex( $dbw ); |
| 107 | + public function searchIndexUpdateCallback($dbw, $row) { |
| 108 | + if ( $row->rc_type == RC_MOVE || $row->rc_type == RC_MOVE_OVER_REDIRECT ) { |
| 109 | + # Rename searchindex entry |
| 110 | + $titleObj = Title::makeTitle( $row->rc_moved_to_ns, $row->rc_moved_to_title ); |
| 111 | + $title = $titleObj->getPrefixedDBkey(); |
| 112 | + $this->output( "$title..." ); |
| 113 | + $u = new SearchUpdate( $row->rc_cur_id, $title, false ); |
139 | 114 | $this->output( "\n" ); |
| 115 | + } elseif ( $row->rc_type !== RC_LOG ) { |
| 116 | + $this->updateSearchIndexForPage( $dbw, $row->rc_cur_id ); |
140 | 117 | } |
141 | | - $this->output( "Done\n" ); |
142 | 118 | } |
143 | | - |
144 | | - /** |
145 | | - * Lock the search index |
146 | | - * @param &$db Database object |
147 | | - */ |
148 | | - private function lockSearchindex( &$db ) { |
149 | | - $write = array( 'searchindex' ); |
150 | | - $read = array( 'page', 'revision', 'text', 'interwiki' ); |
151 | | - $db->lockTables( $read, $write, 'updateSearchIndex.php ' . __METHOD__ ); |
152 | | - } |
153 | | - |
154 | | - /** |
155 | | - * Unlock the tables |
156 | | - * @param &$db Database object |
157 | | - */ |
158 | | - private function unlockSearchindex( &$db ) { |
159 | | - $db->unlockTables( 'updateSearchIndex.php ' . __METHOD__ ); |
160 | | - } |
161 | | - |
162 | | - /** |
163 | | - * Unlock and lock again |
164 | | - * Since the lock is low-priority, queued reads will be able to complete |
165 | | - * @param &$db Database object |
166 | | - */ |
167 | | - private function relockSearchindex( &$db ) { |
168 | | - $this->unlockSearchindex( $db ); |
169 | | - $this->lockSearchindex( $db ); |
170 | | - } |
171 | 119 | } |
172 | 120 | |
173 | 121 | $maintClass = "UpdateSearchIndex"; |
Index: trunk/phase3/maintenance/Maintenance.php |
— | — | @@ -844,4 +844,91 @@ |
845 | 845 | } |
846 | 846 | return self::$mCoreScripts; |
847 | 847 | } |
| 848 | + |
| 849 | + /** |
| 850 | + * Lock the search index |
| 851 | + * @param &$db Database object |
| 852 | + */ |
| 853 | + private function lockSearchindex( &$db ) { |
| 854 | + $write = array( 'searchindex' ); |
| 855 | + $read = array( 'page', 'revision', 'text', 'interwiki', 'l10n_cache' ); |
| 856 | + $db->lockTables( $read, $write, __CLASS__ . '::' . __METHOD__ ); |
| 857 | + } |
| 858 | + |
| 859 | + /** |
| 860 | + * Unlock the tables |
| 861 | + * @param &$db Database object |
| 862 | + */ |
| 863 | + private function unlockSearchindex( &$db ) { |
| 864 | + $db->unlockTables( __CLASS__ . '::' . __METHOD__ ); |
| 865 | + } |
| 866 | + |
| 867 | + /** |
| 868 | + * Unlock and lock again |
| 869 | + * Since the lock is low-priority, queued reads will be able to complete |
| 870 | + * @param &$db Database object |
| 871 | + */ |
| 872 | + private function relockSearchindex( &$db ) { |
| 873 | + $this->unlockSearchindex( $db ); |
| 874 | + $this->lockSearchindex( $db ); |
| 875 | + } |
| 876 | + |
| 877 | + /** |
| 878 | + * Perform a search index update with locking |
| 879 | + * @param $maxLockTime integer the maximum time to keep the search index locked. |
| 880 | + * @param $updateFunction callback the function that will update the function. |
| 881 | + */ |
| 882 | + public function updateSearchIndex( $maxLockTime, $callback, $dbw, $results ) { |
| 883 | + $lockTime = time(); |
| 884 | + |
| 885 | + # Lock searchindex |
| 886 | + if ( $maxLockTime ) { |
| 887 | + $this->output( " --- Waiting for lock ---" ); |
| 888 | + $this->lockSearchindex( $dbw ); |
| 889 | + $lockTime = time(); |
| 890 | + $this->output( "\n" ); |
| 891 | + } |
| 892 | + |
| 893 | + # Loop through the results and do a search update |
| 894 | + foreach ( $results as $row ) { |
| 895 | + # Allow reads to be processed |
| 896 | + if ( $maxLockTime && time() > $lockTime + $maxLockTime ) { |
| 897 | + $this->output( " --- Relocking ---" ); |
| 898 | + $this->relockSearchindex( $dbw ); |
| 899 | + $lockTime = time(); |
| 900 | + $this->output( "\n" ); |
| 901 | + } |
| 902 | + call_user_func( $callback, $dbw, $row ); |
| 903 | + } |
| 904 | + |
| 905 | + # Unlock searchindex |
| 906 | + if ( $maxLockTime ) { |
| 907 | + $this->output( " --- Unlocking --" ); |
| 908 | + $this->unlockSearchindex( $dbw ); |
| 909 | + $this->output( "\n" ); |
| 910 | + } |
| 911 | + |
| 912 | + } |
| 913 | + |
| 914 | + /** |
| 915 | + * Update the searchindex table for a given pageid |
| 916 | + * @param $dbw Database a database write handle |
| 917 | + * @param $pageId the page ID to update. |
| 918 | + */ |
| 919 | + public function updateSearchIndexForPage( $dbw, $pageId ) { |
| 920 | + // Get current revision |
| 921 | + $rev = Revision::loadFromPageId( $dbw, $pageId ); |
| 922 | + $title = null; |
| 923 | + if( $rev ) { |
| 924 | + $titleObj = $rev->getTitle(); |
| 925 | + $title = $titleObj->getPrefixedDBkey(); |
| 926 | + $this->output( "$title..." ); |
| 927 | + # Update searchindex |
| 928 | + $u = new SearchUpdate( $pageId, $titleObj->getText(), $rev->getText() ); |
| 929 | + $u->doUpdate(); |
| 930 | + $this->output( "\n" ); |
| 931 | + } |
| 932 | + return $title; |
| 933 | + } |
| 934 | + |
848 | 935 | } |
Index: trunk/phase3/maintenance/updateDoubleWidthSearch.php |
— | — | @@ -0,0 +1,72 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Script to normalize double-byte latin UTF-8 characters |
| 5 | + * |
| 6 | + * Usage: php updateDoubleWidthSearch.php |
| 7 | + * |
| 8 | + * This program is free software; you can redistribute it and/or modify |
| 9 | + * it under the terms of the GNU General Public License as published by |
| 10 | + * the Free Software Foundation; either version 2 of the License, or |
| 11 | + * (at your option) any later version. |
| 12 | + * |
| 13 | + * This program is distributed in the hope that it will be useful, |
| 14 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | + * GNU General Public License for more details. |
| 17 | + * |
| 18 | + * You should have received a copy of the GNU General Public License along |
| 19 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 20 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 21 | + * http://www.gnu.org/copyleft/gpl.html |
| 22 | + * |
| 23 | + * @ingroup Maintenance |
| 24 | + */ |
| 25 | + |
| 26 | +require_once( dirname(__FILE__) . '/Maintenance.php' ); |
| 27 | + |
| 28 | +class UpdateDoubleWidthSearch extends Maintenance { |
| 29 | + |
| 30 | + public function __construct() { |
| 31 | + parent::__construct(); |
| 32 | + $this->mDescription = "Script to normalize double-byte latin UTF-8 characters"; |
| 33 | + $this->addOption( 'q', 'quiet', false, true ); |
| 34 | + $this->addOption( 'l', 'How long the searchindex and revision tables will be locked for', false, true ); |
| 35 | + } |
| 36 | + |
| 37 | + public function getDbType() { |
| 38 | + return Maintenance::DB_ADMIN; |
| 39 | + } |
| 40 | + |
| 41 | + public function execute() { |
| 42 | + $quiet = $this->hasOption( 'q' ); |
| 43 | + $maxLockTime = $this->getOption( 'l', 20 ); |
| 44 | + $lockTime = time(); |
| 45 | + |
| 46 | + $dbw = wfGetDB( DB_MASTER ); |
| 47 | + if( $dbw->getType() !== 'mysql' ) { |
| 48 | + $this->output( "This change is only needed on MySQL, quitting..." ); |
| 49 | + exit(1); |
| 50 | + } |
| 51 | + |
| 52 | + $res = $this->findRows($dbw); |
| 53 | + $this->updateSearchIndex($maxLockTime, array($this, 'searchIndexUpdateCallback'), $dbw, $res); |
| 54 | + |
| 55 | + $this->output( "Done\n" ); |
| 56 | + } |
| 57 | + |
| 58 | + public function searchIndexUpdateCallback($dbw, $row) { |
| 59 | + return $this->updateSearchIndexForPage( $dbw, $row->si_page ); |
| 60 | + } |
| 61 | + |
| 62 | + private function findRows($dbw) { |
| 63 | + $searchindex = $dbw->tableName( 'searchindex' ); |
| 64 | + $regexp = '[[:<:]]u8efbd([89][1-9a]|8[b-f]|90)[[:>:]]'; |
| 65 | + $sql = "SELECT si_page FROM $searchindex |
| 66 | + WHERE ( si_text RLIKE '$regexp' ) |
| 67 | + OR ( si_title RLIKE '$regexp' )"; |
| 68 | + return $dbw->query( $sql, __METHOD__ ); |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +$maintClass = "UpdateDoubleWidthSearch"; |
| 73 | +require_once( DO_MAINTENANCE ); |
Property changes on: trunk/phase3/maintenance/updateDoubleWidthSearch.php |
___________________________________________________________________ |
Name: svn:eol-syle |
1 | 74 | + native |
Index: trunk/phase3/UPGRADE |
— | — | @@ -53,11 +53,19 @@ |
54 | 54 | You will need to have $wgDBadminuser and $wgDBadminpass set in your |
55 | 55 | LocalSettings.php, see there for more info. |
56 | 56 | |
57 | | -From the command line, browse to the "maintenance" directory and run the |
| 57 | +From the command line, browse to the "maintenance" directory and run the |
58 | 58 | update.php script to check and update the schema. This will insert missing |
59 | 59 | tables, update existing tables, and move data around as needed. In most cases, |
60 | 60 | this is successful and nothing further needs to be done. |
61 | 61 | |
| 62 | +If you have a Chinese or Japanese wiki ($wgLanguageCode is set to one |
| 63 | +of "zh", "ja", or "yue") and you are using MySQL fulltext search, you |
| 64 | +will probably want to update the search index. |
| 65 | + |
| 66 | +In the "maintenance" directory, run the updateDoubleWidthSearch.php |
| 67 | +script. This will update the searchindex table for those pages that |
| 68 | +contain double-byte latin characters. |
| 69 | + |
62 | 70 | === Check configuration settings === |
63 | 71 | |
64 | 72 | The names of configuration variables, and their default values and purposes, |
— | — | @@ -67,6 +75,7 @@ |
68 | 76 | behaviour of MediaWiki. |
69 | 77 | |
70 | 78 | === Check installed extensions === |
| 79 | + |
71 | 80 | In MediaWiki 1.14 some extensions are migrated into the core. Please see the |
72 | 81 | HISTORY section "Migrated extensions" and disable these extensions in your |
73 | 82 | LocalSettings.php |
Index: trunk/phase3/languages/Language.php |
— | — | @@ -1695,7 +1695,7 @@ |
1696 | 1696 | * @param $string String |
1697 | 1697 | * @return String |
1698 | 1698 | */ |
1699 | | - function wordSegmentation( $string ) { |
| 1699 | + function segmentByWord( $string ) { |
1700 | 1700 | return $string; |
1701 | 1701 | } |
1702 | 1702 | |
Index: trunk/phase3/languages/classes/LanguageZh_hans.php |
— | — | @@ -13,7 +13,7 @@ |
14 | 14 | * for now just treat each character as a word. |
15 | 15 | * @todo Fixme: only do this for Han characters... |
16 | 16 | */ |
17 | | - function wordSegmentation( $string ) { |
| 17 | + function segmentByWord( $string ) { |
18 | 18 | $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/"; |
19 | 19 | $s = self::insertSpace( $string, $reg ); |
20 | 20 | return $s; |
— | — | @@ -25,7 +25,7 @@ |
26 | 26 | // Double-width roman characters |
27 | 27 | $s = self::convertDoubleWidth( $string ); |
28 | 28 | $s = trim( $s ); |
29 | | - $s = self::wordSegmentation( $s ); |
| 29 | + $s = self::segmentByWord( $s ); |
30 | 30 | $s = parent::normalizeForSearch( $s ); |
31 | 31 | |
32 | 32 | wfProfileOut( __METHOD__ ); |
Index: trunk/phase3/languages/classes/LanguageJa.php |
— | — | @@ -6,7 +6,7 @@ |
7 | 7 | * @ingroup Language |
8 | 8 | */ |
9 | 9 | class LanguageJa extends Language { |
10 | | - function wordSegmentation( $string ) { |
| 10 | + function segmentByWord( $string ) { |
11 | 11 | // Strip known punctuation ? |
12 | 12 | // $s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f |
13 | 13 | |
Index: trunk/phase3/languages/classes/LanguageYue.php |
— | — | @@ -12,7 +12,7 @@ |
13 | 13 | * for now just treat each character as a word. |
14 | 14 | * @todo Fixme: only do this for Han characters... |
15 | 15 | */ |
16 | | - function wordSegmentation( $string ) { |
| 16 | + function segmentByWord( $string ) { |
17 | 17 | $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/"; |
18 | 18 | $s = self::insertSpace( $string, $reg ); |
19 | 19 | return $s; |