Index: trunk/phase3/maintenance/tests/SearchEngineTest.php |
— | — | @@ -6,41 +6,43 @@ |
7 | 7 | * @group Stub |
8 | 8 | */ |
9 | 9 | class SearchEngineTest extends MediaWiki_Setup { |
10 | | - var $db, $search; |
11 | | - private $count = 0; |
| 10 | + var $db, $search, $pageList; |
12 | 11 | |
13 | | - function insertSearchData() { |
14 | | - $this->insertPage("Main_Page", "This is a main page", 0); |
15 | | - $this->insertPage('Main_Page', 'This is a talk page to the main page, see [[smithee]]', 1); |
16 | | - $this->insertPage('Smithee', 'A smithee is one who smiths. See also [[Alan Smithee]]', 0); |
17 | | - $this->insertPage('Smithee', 'This article sucks.', 1); |
18 | | - $this->insertPage('Unrelated_page', 'Nothing in this page is about the S word.', 0); |
19 | | - $this->insertPage('Another_page', 'This page also is unrelated.', 0); |
20 | | - $this->insertPage('Help', 'Help me!', 4); |
21 | | - $this->insertPage('Thppt', 'Blah blah', 0); |
22 | | - $this->insertPage('Alan_Smithee', 'yum', 0); |
23 | | - $this->insertPage('Pages', 'are food', 0); |
24 | | - $this->insertPage('DblPageOne', 'ABCDEF', 0); |
25 | | - $this->insertPage('DblPageTwo', 'ABCDE', 0); |
26 | | - $this->insertPage('DblPageTwoLow', 'abcde', 0); |
| 12 | + function pageExists( $title ) { |
| 13 | + return false; |
27 | 14 | } |
28 | 15 | |
29 | | - function normalize( $text ) { |
30 | | - return strtolower(preg_replace("/[^[:alnum:] ]/", " ", $text)); |
| 16 | + function insertSearchData() { |
| 17 | + if( $this->pageExists( 'Not_Main_Page' ) ) { |
| 18 | + return; |
| 19 | + } |
| 20 | + $this->insertPage("Not_Main_Page", "This is not a main page", 0); |
| 21 | + $this->insertPage('Talk:Not_Main_Page', 'This is not a talk page to the main page, see [[smithee]]', 1); |
| 22 | + $this->insertPage('Smithee', 'A smithee is one who smiths. See also [[Alan Smithee]]', 0); |
| 23 | + $this->insertPage('Talk:Smithee', 'This article sucks.', 1); |
| 24 | + $this->insertPage('Unrelated_page', 'Nothing in this page is about the S word.', 0); |
| 25 | + $this->insertPage('Another_page', 'This page also is unrelated.', 0); |
| 26 | + $this->insertPage('Help:Help', 'Help me!', 4); |
| 27 | + $this->insertPage('Thppt', 'Blah blah', 0); |
| 28 | + $this->insertPage('Alan_Smithee', 'yum', 0); |
| 29 | + $this->insertPage('Pages', 'are\'food', 0); |
| 30 | + $this->insertPage('HalfOneUp', 'AZ', 0); |
| 31 | + $this->insertPage('FullOneUp', 'AZ', 0); |
| 32 | + $this->insertPage('HalfTwoLow', 'az', 0); |
| 33 | + $this->insertPage('FullTwoLow', 'az', 0); |
| 34 | + $this->insertPage('HalfNumbers', '1234567890', 0); |
| 35 | + $this->insertPage('FullNumbers', '1234567890', 0); |
| 36 | + $this->insertPage('DomainName', 'example.com', 0); |
31 | 37 | } |
32 | 38 | |
33 | | - function insertPage( $pageName, $text, $ns ) { |
34 | | - $this->count++; |
35 | | - $this->db->safeQuery( 'INSERT INTO ! (page_id,page_namespace,page_title,page_latest) VALUES (?,?,?,?)', |
36 | | - $this->db->tableName( 'page' ), $this->count, $ns, $pageName, $this->count ); |
37 | | - $this->db->safeQuery( 'INSERT INTO ! (rev_id,rev_page) VALUES (?, ?)', |
38 | | - $this->db->tableName( 'revision' ), $this->count, $this->count ); |
39 | | - $this->db->safeQuery( 'INSERT INTO ! (old_id,old_text) VALUES (?, ?)', |
40 | | - $this->db->tableName( 'text' ), $this->count, $text ); |
41 | | - $this->db->safeQuery( 'INSERT INTO ! (si_page,si_title,si_text) VALUES (?, ?, ?)', |
42 | | - $this->db->tableName( 'searchindex' ), $this->count, |
43 | | - $this->normalize( $pageName ), $this->normalize( $text ) ); |
44 | | - } |
| 39 | + function removeSearchData() { |
| 40 | + return; |
| 41 | + while( count($this->pageList) ) { |
| 42 | + list( $title, $id ) = array_pop( $this->pageList ); |
| 43 | + $article = new Article( $title, $id ); |
| 44 | + $article->doDeleteArticle("Search Test"); |
| 45 | + } |
| 46 | + } |
45 | 47 | |
46 | 48 | function fetchIds( $results ) { |
47 | 49 | $matches = array(); |
— | — | @@ -55,34 +57,98 @@ |
56 | 58 | return $matches; |
57 | 59 | } |
58 | 60 | |
59 | | - function testTextSearch() { |
60 | | - if( is_null( $this->db ) ) { |
61 | | - $this->markTestIncomplete( "Can't find a database to test with." ); |
62 | | - } |
63 | | - $this->assertEquals( |
64 | | - array( 'Smithee' ), |
65 | | - $this->fetchIds( $this->search->searchText( 'smithee' ) ), |
66 | | - "Plain search failed" ); |
| 61 | + // Modified version of WikiRevision::importOldRevision() |
| 62 | + function insertPage( $pageName, $text, $ns ) { |
| 63 | + $dbw = $this->db; |
| 64 | + $title = Title::newFromText( $pageName ); |
| 65 | + |
| 66 | + $userId = 0; |
| 67 | + $userText = 'WikiSysop'; |
| 68 | + $comment = 'Search Test'; |
| 69 | + |
| 70 | + // avoid memory leak...? |
| 71 | + $linkCache = LinkCache::singleton(); |
| 72 | + $linkCache->clear(); |
| 73 | + |
| 74 | + $article = new Article( $title ); |
| 75 | + $pageId = $article->getId(); |
| 76 | + $created = false; |
| 77 | + if( $pageId == 0 ) { |
| 78 | + # must create the page... |
| 79 | + $pageId = $article->insertOn( $dbw ); |
| 80 | + $created = true; |
| 81 | + } |
| 82 | + |
| 83 | + # FIXME: Use original rev_id optionally (better for backups) |
| 84 | + # Insert the row |
| 85 | + $revision = new Revision( array( |
| 86 | + 'page' => $pageId, |
| 87 | + 'text' => $text, |
| 88 | + 'comment' => $comment, |
| 89 | + 'user' => $userId, |
| 90 | + 'user_text' => $userText, |
| 91 | + 'timestamp' => 0, |
| 92 | + 'minor_edit' => false, |
| 93 | + ) ); |
| 94 | + $revId = $revision->insertOn( $dbw ); |
| 95 | + $changed = $article->updateIfNewerOn( $dbw, $revision ); |
| 96 | + |
| 97 | + $GLOBALS['wgTitle'] = $title; |
| 98 | + if( $created ) { |
| 99 | + Article::onArticleCreate( $title ); |
| 100 | + $article->createUpdates( $revision ); |
| 101 | + } elseif( $changed ) { |
| 102 | + Article::onArticleEdit( $title ); |
| 103 | + $article->editUpdates( |
| 104 | + $text, $comment, false, 0, $revId ); |
| 105 | + } |
| 106 | + |
| 107 | + $su = new SearchUpdate($article->getId(), $pageName, $text); |
| 108 | + $su->doUpdate(); |
| 109 | + |
| 110 | + $this->pageList[] = array( $title, $article->getId() ); |
| 111 | + |
| 112 | + return true; |
| 113 | + } |
| 114 | + |
| 115 | + function testFullWidth() { |
| 116 | + $this->assertEquals( |
| 117 | + array( 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ), |
| 118 | + $this->fetchIds( $this->search->searchText( 'AZ' ) ), |
| 119 | + "Search for normalized from Half-width Upper" ); |
| 120 | + $this->assertEquals( |
| 121 | + array( 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ), |
| 122 | + $this->fetchIds( $this->search->searchText( 'az' ) ), |
| 123 | + "Search for normalized from Half-width Lower" ); |
| 124 | + $this->assertEquals( |
| 125 | + array( 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ), |
| 126 | + $this->fetchIds( $this->search->searchText( 'AZ' ) ), |
| 127 | + "Search for normalized from Full-width Upper" ); |
| 128 | + $this->assertEquals( |
| 129 | + array( 'FullOneUp', 'FullTwoLow', 'HalfOneUp', 'HalfTwoLow' ), |
| 130 | + $this->fetchIds( $this->search->searchText( 'az' ) ), |
| 131 | + "Search for normalized from Full-width Lower" ); |
67 | 132 | } |
68 | 133 | |
| 134 | + function testTextSearch() { |
| 135 | + $this->assertEquals( |
| 136 | + array( 'Smithee' ), |
| 137 | + $this->fetchIds( $this->search->searchText( 'smithee' ) ), |
| 138 | + "Plain search failed" ); |
| 139 | + } |
| 140 | + |
69 | 141 | function testTextPowerSearch() { |
70 | | - if( is_null( $this->db ) ) { |
71 | | - $this->markTestIncomplete( "Can't find a database to test with." ); |
72 | | - } |
73 | 142 | $this->search->setNamespaces( array( 0, 1, 4 ) ); |
74 | 143 | $this->assertEquals( |
75 | 144 | array( |
76 | 145 | 'Smithee', |
77 | | - 'Talk:Main Page', |
| 146 | + 'Talk:Not Main Page', |
78 | 147 | ), |
79 | 148 | $this->fetchIds( $this->search->searchText( 'smithee' ) ), |
80 | 149 | "Power search failed" ); |
81 | 150 | } |
82 | 151 | |
83 | 152 | function testTitleSearch() { |
84 | | - if( is_null( $this->db ) ) { |
85 | | - $this->markTestIncomplete( "Can't find a database to test with." ); |
86 | | - } |
87 | 153 | $this->assertEquals( |
88 | 154 | array( |
89 | 155 | 'Alan Smithee', |
— | — | @@ -93,9 +159,6 @@ |
94 | 160 | } |
95 | 161 | |
96 | 162 | function testTextTitlePowerSearch() { |
97 | | - if( is_null( $this->db ) ) { |
98 | | - $this->markTestIncomplete( "Can't find a database to test with." ); |
99 | | - } |
100 | 163 | $this->search->setNamespaces( array( 0, 1, 4 ) ); |
101 | 164 | $this->assertEquals( |
102 | 165 | array( |
— | — | @@ -108,6 +171,3 @@ |
109 | 172 | } |
110 | 173 | |
111 | 174 | } |
112 | | - |
113 | | - |
114 | | - |
Index: trunk/phase3/maintenance/tests/SearchDbTest.php |
— | — | @@ -6,23 +6,22 @@ |
7 | 7 | |
8 | 8 | function setUp() { |
9 | 9 | global $wgDBprefix, $wgDBtype; |
| 10 | + $this->db = wfGetDB( DB_MASTER ); |
| 11 | + if( !$this->db ) { |
| 12 | + $this->markTestIncomplete( "Can't find a database to test with." ); |
| 13 | + } |
10 | 14 | |
11 | | - if($wgDBprefix === "parsertest_" || |
12 | | - ($wgDBtype === 'oracle' && $wgDBprefix === 'pt_')) { |
13 | | - $this->markTestSkipped("This test can't (yet?) be run with the parser tests"); |
14 | | - } |
15 | | - |
16 | 15 | $GLOBALS['wgContLang'] = new Language; |
17 | | - $this->db = $this->buildTestDatabase( |
18 | | - array( 'page', 'revision', 'text', 'searchindex', 'user' ) ); |
19 | | - if( $this->db ) { |
20 | | - $this->insertSearchData(); |
21 | | - } |
22 | | - $searchType = preg_replace("/Database/", "Search", get_class($this->db)); |
| 16 | + $this->insertSearchData(); |
| 17 | + |
| 18 | + $this->insertSearchData(); |
| 19 | + $searchType = preg_replace("/Database/", "Search", |
| 20 | + get_class($this->db)); |
23 | 21 | $this->search = new $searchType( $this->db ); |
24 | 22 | } |
25 | 23 | |
26 | 24 | function tearDown() { |
| 25 | + $this->removeSearchData(); |
27 | 26 | if( !is_null( $this->db ) ) { |
28 | 27 | wfGetLB()->closeConnecton( $this->db ); |
29 | 28 | } |
Index: trunk/phase3/languages/Language.php |
— | — | @@ -1707,7 +1707,7 @@ |
1708 | 1708 | * @return String |
1709 | 1709 | */ |
1710 | 1710 | function normalizeForSearch( $string ) { |
1711 | | - return $string; |
| 1711 | + return self::convertDoubleWidth($string); |
1712 | 1712 | } |
1713 | 1713 | |
1714 | 1714 | /** |
— | — | @@ -1715,8 +1715,17 @@ |
1716 | 1716 | * range: ff00-ff5f ~= 0020-007f |
1717 | 1717 | */ |
1718 | 1718 | protected static function convertDoubleWidth( $string ) { |
1719 | | - $string = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $string ); |
1720 | | - $string = preg_replace( '/\xef\xbd([\x80-\x9a])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $string ); |
| 1719 | + static $full = null; |
| 1720 | + static $half = null; |
| 1721 | + |
| 1722 | + if( $full === null ) { |
| 1723 | + $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
| 1724 | + $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
| 1725 | + $full = str_split( $fullWidth, 3 ); |
| 1726 | + $half = str_split( $halfWidth ); |
| 1727 | + } |
| 1728 | + |
| 1729 | + $string = str_replace( $full, $half, $string ); |
1721 | 1730 | return $string; |
1722 | 1731 | } |
1723 | 1732 | |
Index: trunk/phase3/languages/classes/LanguageZh_hans.php |
— | — | @@ -23,10 +23,9 @@ |
24 | 24 | wfProfileIn( __METHOD__ ); |
25 | 25 | |
26 | 26 | // Double-width roman characters |
27 | | - $s = self::convertDoubleWidth( $string ); |
| 27 | + $s = parent::normalizeForSearch( $s ); |
28 | 28 | $s = trim( $s ); |
29 | 29 | $s = self::segmentByWord( $s ); |
30 | | - $s = parent::normalizeForSearch( $s ); |
31 | 30 | |
32 | 31 | wfProfileOut( __METHOD__ ); |
33 | 32 | return $s; |
Index: trunk/phase3/languages/classes/LanguageJa.php |
— | — | @@ -23,14 +23,6 @@ |
24 | 24 | return $s; |
25 | 25 | } |
26 | 26 | |
27 | | - function normalizeForSearch( $string ) { |
28 | | - // Double-width roman characters |
29 | | - $s = self::convertDoubleWidth( $string ); |
30 | | - |
31 | | - # Do general case folding and UTF-8 armoring |
32 | | - return parent::normalizeForSearch( $s ); |
33 | | - } |
34 | | - |
35 | 27 | # Italic is not appropriate for Japanese script |
36 | 28 | # Unfortunately most browsers do not recognise this, and render <em> as italic |
37 | 29 | function emphasize( $text ) { |