Index: trunk/phase3/maintenance/updaters.inc |
— | — | @@ -57,6 +57,7 @@ |
58 | 58 | array( 'ipblocks', 'ipb_range_start', 'patch-ipb_range_start.sql' ), |
59 | 59 | array( 'site_stats', 'ss_images', 'patch-ss_images.sql' ), |
60 | 60 | array( 'ipblocks', 'ipb_anon_only', 'patch-ipb_anon_only.sql' ), |
| 61 | + array( 'page', 'page_no_title_convert','patch-page_no_title_convert.sql' ), |
61 | 62 | ); |
62 | 63 | |
63 | 64 | function rename_table( $from, $to, $patch ) { |
Index: trunk/phase3/maintenance/mysql5/tables.sql |
— | — | @@ -218,6 +218,10 @@ |
219 | 219 | -- Uncompressed length in bytes of the page's current source text. |
220 | 220 | page_len int(8) unsigned NOT NULL, |
221 | 221 | |
| 222 | + -- Set to 1 if the page contains __NOTITLECONVERT__ magic word. |
| 223 | + -- Used only for languages with variants to prevent title conversion |
| 224 | + page_no_title_convert bool NOT NULL default 0, |
| 225 | + |
222 | 226 | PRIMARY KEY page_id (page_id), |
223 | 227 | UNIQUE INDEX name_title (page_namespace,page_title), |
224 | 228 | |
Index: trunk/phase3/maintenance/tables.sql |
— | — | @@ -206,6 +206,10 @@ |
207 | 207 | -- Uncompressed length in bytes of the page's current source text. |
208 | 208 | page_len int(8) unsigned NOT NULL, |
209 | 209 | |
| 210 | + -- Set to 1 if the page contains __NOTITLECONVERT__ magic word. |
| 211 | + -- Used only for languages with variants to prevent title conversion |
| 212 | + page_no_title_convert bool NOT NULL default 0, |
| 213 | + |
210 | 214 | PRIMARY KEY page_id (page_id), |
211 | 215 | UNIQUE INDEX name_title (page_namespace,page_title), |
212 | 216 | |
Index: trunk/phase3/includes/Article.php |
— | — | @@ -30,6 +30,8 @@ |
31 | 31 | var $mGoodAdjustment; //!< |
32 | 32 | var $mLatest; //!< |
33 | 33 | var $mMinorEdit; //!< |
| 34 | + var $mNoTitleConvert; //!< |
| 35 | + var $mNoTitleConvertParser; //!< |
34 | 36 | var $mOldId; //!< |
35 | 37 | var $mRedirectedFrom; //!< |
36 | 38 | var $mRedirectUrl; //!< |
— | — | @@ -130,6 +132,8 @@ |
131 | 133 | $this->mRevIdFetched = 0; |
132 | 134 | $this->mRedirectUrl = false; |
133 | 135 | $this->mLatest = false; |
| 136 | + $this->mNoTitleConvert = false; |
| 137 | + $this->mNoTitleConvertParser = false; |
134 | 138 | } |
135 | 139 | |
136 | 140 | /** |
— | — | @@ -267,7 +271,8 @@ |
268 | 272 | 'page_random', |
269 | 273 | 'page_touched', |
270 | 274 | 'page_latest', |
271 | | - 'page_len' ) ; |
| 275 | + 'page_len', |
| 276 | + 'page_no_title_convert') ; |
272 | 277 | wfRunHooks( 'ArticlePageDataBefore', array( &$this , &$fields ) ) ; |
273 | 278 | $row = $dbr->selectRow( 'page', |
274 | 279 | $fields, |
— | — | @@ -303,6 +308,8 @@ |
304 | 309 | * @private |
305 | 310 | */ |
306 | 311 | function loadPageData( $data = 'fromdb' ) { |
| 312 | + global $wgContLang; |
| 313 | + |
307 | 314 | if ( $data === 'fromdb' ) { |
308 | 315 | $dbr =& $this->getDB(); |
309 | 316 | $data = $this->pageDataFromId( $dbr, $this->getId() ); |
— | — | @@ -320,6 +327,10 @@ |
321 | 328 | $this->mTouched = wfTimestamp( TS_MW, $data->page_touched ); |
322 | 329 | $this->mIsRedirect = $data->page_is_redirect; |
323 | 330 | $this->mLatest = $data->page_latest; |
| 331 | + $this->mNoTitleConvert = $data->page_no_title_convert; |
| 332 | + |
| 333 | + if($this->mNoTitleConvert) |
| 334 | + $wgContLang->setNoTitleConvert(); |
324 | 335 | } else { |
325 | 336 | if ( is_object( $this->mTitle ) ) { |
326 | 337 | $lc->addBadLinkObj( $this->mTitle ); |
— | — | @@ -984,6 +995,7 @@ |
985 | 996 | 'page_touched' => $dbw->timestamp(), |
986 | 997 | 'page_latest' => 0, # Fill this in shortly... |
987 | 998 | 'page_len' => 0, # Fill this in shortly... |
| 999 | + 'page_no_title_convert' => 0, |
988 | 1000 | ), __METHOD__ ); |
989 | 1001 | $newid = $dbw->insertId(); |
990 | 1002 | |
— | — | @@ -1023,12 +1035,21 @@ |
1024 | 1036 | 'page_is_new' => ($lastRevision === 0) ? 1 : 0, |
1025 | 1037 | 'page_is_redirect' => Article::isRedirect( $text ) ? 1 : 0, |
1026 | 1038 | 'page_len' => strlen( $text ), |
| 1039 | + 'page_no_title_convert' => ($this->mNoTitleConvertParser)? 1 : 0, |
1027 | 1040 | ), |
1028 | 1041 | $conditions, |
1029 | 1042 | __METHOD__ ); |
1030 | 1043 | |
| 1044 | + $succ = $dbw->affectedRows() != 0; |
| 1045 | + |
| 1046 | + // check if no title magic word has been changed |
| 1047 | + if($succ && $this->mNoTitleConvert != $this->mNoTitleConvertParser){ |
| 1048 | + // Clear caches |
| 1049 | + Article::onArticleCreate( $this->mTitle ); |
| 1050 | + } |
| 1051 | + |
1031 | 1052 | wfProfileOut( __METHOD__ ); |
1032 | | - return ( $dbw->affectedRows() != 0 ); |
| 1053 | + return $succ; |
1033 | 1054 | } |
1034 | 1055 | |
1035 | 1056 | /** |
— | — | @@ -1194,7 +1215,7 @@ |
1195 | 1216 | * @return bool success |
1196 | 1217 | */ |
1197 | 1218 | function doEdit( $text, $summary, $flags = 0 ) { |
1198 | | - global $wgUser, $wgDBtransactions; |
| 1219 | + global $wgUser, $wgDBtransactions, $wgContLang; |
1199 | 1220 | |
1200 | 1221 | wfProfileIn( __METHOD__ ); |
1201 | 1222 | $good = true; |
— | — | @@ -1221,6 +1242,15 @@ |
1222 | 1243 | $isminor = ( $flags & EDIT_MINOR ) && $wgUser->isAllowed('minoredit'); |
1223 | 1244 | $bot = $wgUser->isAllowed( 'bot' ) || ( $flags & EDIT_FORCE_BOT ); |
1224 | 1245 | |
| 1246 | + // process the notitleconvert magic for languages with variants |
| 1247 | + $this->mNoTitleConvertParser = false; |
| 1248 | + if(sizeof($wgContLang->getVariants())>1){ |
| 1249 | + $mw =& MagicWord::get( 'notitleconvert' ); |
| 1250 | + if( $mw->match( $text ) ){ |
| 1251 | + $this->mNoTitleConvertParser = true; |
| 1252 | + } |
| 1253 | + } |
| 1254 | + |
1225 | 1255 | $text = $this->preSaveTransform( $text ); |
1226 | 1256 | |
1227 | 1257 | $dbw =& wfGetDB( DB_MASTER ); |
Index: trunk/phase3/includes/SearchEngine.php |
— | — | @@ -51,6 +51,7 @@ |
52 | 52 | * @private |
53 | 53 | */ |
54 | 54 | function getNearMatch( $term ) { |
| 55 | + global $wgContLang; |
55 | 56 | # Exact match? No need to look further. |
56 | 57 | $title = Title::newFromText( $term ); |
57 | 58 | if (is_null($title)) |
— | — | @@ -62,33 +63,27 @@ |
63 | 64 | |
64 | 65 | # Now try all lower case (i.e. first letter capitalized) |
65 | 66 | # |
66 | | - $title = Title::newFromText( strtolower( $term ) ); |
| 67 | + $title = Title::newFromText( $wgContLang->lc( $term ) ); |
67 | 68 | if ( $title->exists() ) { |
68 | 69 | return $title; |
69 | 70 | } |
70 | 71 | |
71 | 72 | # Now try capitalized string |
72 | 73 | # |
73 | | - $title = Title::newFromText( ucwords( strtolower( $term ) ) ); |
| 74 | + $title = Title::newFromText( $wgContLang->ucwords( $term ) ); |
74 | 75 | if ( $title->exists() ) { |
75 | 76 | return $title; |
76 | 77 | } |
77 | 78 | |
78 | 79 | # Now try all upper case |
79 | 80 | # |
80 | | - $title = Title::newFromText( strtoupper( $term ) ); |
| 81 | + $title = Title::newFromText( $wgContLang->uc( $term ) ); |
81 | 82 | if ( $title->exists() ) { |
82 | 83 | return $title; |
83 | 84 | } |
84 | 85 | |
85 | 86 | # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc |
86 | | - $title = Title::newFromText( preg_replace_callback( |
87 | | - '/\b([\w\x80-\xff]+)\b/', |
88 | | - create_function( '$matches', ' |
89 | | - global $wgContLang; |
90 | | - return $wgContLang->ucfirst($matches[1]); |
91 | | - ' ), |
92 | | - $term ) ); |
| 87 | + $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); |
93 | 88 | if ( $title->exists() ) { |
94 | 89 | return $title; |
95 | 90 | } |
Index: trunk/phase3/includes/Parser.php |
— | — | @@ -1507,7 +1507,6 @@ |
1508 | 1508 | } |
1509 | 1509 | |
1510 | 1510 | $selflink = $this->mTitle->getPrefixedText(); |
1511 | | - $checkVariantLink = sizeof($wgContLang->getVariants())>1; |
1512 | 1511 | $useSubpages = $this->areSubpagesAllowed(); |
1513 | 1512 | wfProfileOut( $fname.'-setup' ); |
1514 | 1513 | |
— | — | @@ -1602,13 +1601,6 @@ |
1603 | 1602 | continue; |
1604 | 1603 | } |
1605 | 1604 | |
1606 | | - #check other language variants of the link |
1607 | | - #if the article does not exist |
1608 | | - if( $checkVariantLink |
1609 | | - && $nt->getArticleID() == 0 ) { |
1610 | | - $wgContLang->findVariantLink($link, $nt); |
1611 | | - } |
1612 | | - |
1613 | 1605 | $ns = $nt->getNamespace(); |
1614 | 1606 | $iw = $nt->getInterWiki(); |
1615 | 1607 | wfProfileOut( "$fname-title" ); |
— | — | @@ -3897,6 +3889,7 @@ |
3898 | 3890 | function replaceLinkHolders( &$text, $options = 0 ) { |
3899 | 3891 | global $wgUser; |
3900 | 3892 | global $wgOutputReplace; |
| 3893 | + global $wgContLang, $wgLanguageCode; |
3901 | 3894 | |
3902 | 3895 | $fname = 'Parser::replaceLinkHolders'; |
3903 | 3896 | wfProfileIn( $fname ); |
— | — | @@ -3987,6 +3980,97 @@ |
3988 | 3981 | } |
3989 | 3982 | wfProfileOut( $fname.'-check' ); |
3990 | 3983 | |
| 3984 | + # Do a second query for different language variants of links (if needed) |
| 3985 | + if($wgContLang->hasVariants()){ |
| 3986 | + $linkBatch = new LinkBatch(); |
| 3987 | + $variantMap = array(); // maps $pdbkey_Variant => $pdbkey_original |
| 3988 | + |
| 3989 | + // Add variants of links to link batch |
| 3990 | + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { |
| 3991 | + $title = $this->mLinkHolders['titles'][$key]; |
| 3992 | + if ( is_null( $title ) ) |
| 3993 | + continue; |
| 3994 | + |
| 3995 | + $pdbk = $title->getPrefixedDBkey(); |
| 3996 | + |
| 3997 | + // add the original text into query to check for notitleconvert pages |
| 3998 | + $variantTitle = Title::makeTitle( $ns, $title->getText() ); |
| 3999 | + $linkBatch->addObj( $variantTitle ); |
| 4000 | + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; |
| 4001 | + |
| 4002 | + // generate all variants of the link title text |
| 4003 | + $allTextVariants = $wgContLang->convertLinkToAllVariants($title->getText()); |
| 4004 | + |
| 4005 | + // if link was not found (in first query), add all variants to query |
| 4006 | + if ( !isset($colours[$pdbk]) ){ |
| 4007 | + foreach($allTextVariants as $textVariant){ |
| 4008 | + $variantTitle = Title::makeTitle( $ns, $textVariant ); |
| 4009 | + if(is_null($variantTitle)) continue; |
| 4010 | + $linkBatch->addObj( $variantTitle ); |
| 4011 | + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; |
| 4012 | + } |
| 4013 | + } |
| 4014 | + } |
| 4015 | + |
| 4016 | + # construct query |
| 4017 | + $titleClause = $linkBatch->constructSet('page', $dbr); |
| 4018 | + $variantQuery = "SELECT page_id, page_namespace, page_title"; |
| 4019 | + if ( $threshold > 0 ) { |
| 4020 | + $variantQuery .= ', page_len, page_is_redirect'; |
| 4021 | + } |
| 4022 | + $variantQuery .= ", page_no_title_convert FROM $page WHERE $titleClause"; |
| 4023 | + if ( $options & RLH_FOR_UPDATE ) { |
| 4024 | + $query .= ' FOR UPDATE'; |
| 4025 | + } |
| 4026 | + |
| 4027 | + $varRes = $dbr->query( $variantQuery, $fname ); |
| 4028 | + |
| 4029 | + # for each found variants, figure out link holders and replace |
| 4030 | + while ( $s = $dbr->fetchObject($varRes) ) { |
| 4031 | + |
| 4032 | + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); |
| 4033 | + $varPdbk = $variantTitle->getPrefixedDBkey(); |
| 4034 | + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle ); |
| 4035 | + $this->mOutput->addLink( $variantTitle, $s->page_id ); |
| 4036 | + |
| 4037 | + $noTitleConvert = $s->page_no_title_convert; |
| 4038 | + |
| 4039 | + $holderKeys = $variantMap[$varPdbk]; |
| 4040 | + |
| 4041 | + // loop over link holders |
| 4042 | + foreach($holderKeys as $key){ |
| 4043 | + $title = $this->mLinkHolders['titles'][$key]; |
| 4044 | + if ( is_null( $title ) ) continue; |
| 4045 | + |
| 4046 | + $pdbk = $title->getPrefixedDBkey(); |
| 4047 | + |
| 4048 | + if(!isset($colours[$pdbk]) || ($noTitleConvert && $colours[$pdbk] == 1)){ |
| 4049 | + // found link in some of the variants, replace the link holder data |
| 4050 | + $this->mLinkHolders['titles'][$key] = $variantTitle; |
| 4051 | + $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); |
| 4052 | + |
| 4053 | + // prevent link conversion if needed |
| 4054 | + if($noTitleConvert) |
| 4055 | + $this->mLinkHolders['texts'][$key] = $wgContLang->markNoConversion($variantTitle->getText(),true); |
| 4056 | + |
| 4057 | + // set pdbk and colour |
| 4058 | + $pdbks[$key] = $varPdbk; |
| 4059 | + if ( $threshold > 0 ) { |
| 4060 | + $size = $s->page_len; |
| 4061 | + if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) { |
| 4062 | + $colours[$varPdbk] = 1; |
| 4063 | + } else { |
| 4064 | + $colours[$varPdbk] = 2; |
| 4065 | + } |
| 4066 | + } |
| 4067 | + else { |
| 4068 | + $colours[$varPdbk] = 1; |
| 4069 | + } |
| 4070 | + } |
| 4071 | + } |
| 4072 | + } |
| 4073 | + } |
| 4074 | + |
3991 | 4075 | # Construct search and replace arrays |
3992 | 4076 | wfProfileIn( $fname.'-construct' ); |
3993 | 4077 | $wgOutputReplace = array(); |
Index: trunk/phase3/includes/SpecialSearch.php |
— | — | @@ -77,6 +77,7 @@ |
78 | 78 | function goResult( $term ) { |
79 | 79 | global $wgOut; |
80 | 80 | global $wgGoToEdit; |
| 81 | + global $wgContLang; |
81 | 82 | |
82 | 83 | $this->setupPage( $term ); |
83 | 84 | |
— | — | @@ -96,6 +97,20 @@ |
97 | 98 | return; |
98 | 99 | } |
99 | 100 | |
| 101 | + # if language supports variants, search in all variants |
| 102 | + if($wgContLang->hasVariants()){ |
| 103 | + $allTermVariants = $wgContLang->convertLinkToAllVariants($term); |
| 104 | + |
| 105 | + foreach($allTermVariants as $termVariant){ |
| 106 | + $t = SearchEngine::getNearMatch( $termVariant ); |
| 107 | + if( !is_null( $t ) ) { |
| 108 | + $wgOut->redirect( $t->getFullURL() ); |
| 109 | + wfProfileOut( $fname ); |
| 110 | + return; |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + |
100 | 115 | # No match, generate an edit URL |
101 | 116 | $t = Title::newFromText( $term ); |
102 | 117 | if( is_null( $t ) ) { |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -206,6 +206,8 @@ |
207 | 207 | preferences don't get stuck in proxy caches for other people |
208 | 208 | * (bug 7324) Fix error message for failure of Database::sourceFile() |
209 | 209 | * (bug 7309) Plurals: use singular form for zero in French and Brazilian Portuguese |
| 210 | +* Add page_no_title_convert field to support language variant conversion |
| 211 | + for page titles which shouldn't be converted on display/linking |
210 | 212 | |
211 | 213 | |
212 | 214 | == Languages updated == |
Index: trunk/phase3/languages/MessagesSr_el.php |
— | — | @@ -175,7 +175,7 @@ |
176 | 176 | 'servername' => array( 0, 'SERVERNAME', 'IMESERVERA' ), |
177 | 177 | 'scriptpath' => array( 0, 'SCRIPTPATH', 'SKRIPTA' ), |
178 | 178 | 'grammar' => array( 0, 'GRAMMAR:', 'GRAMATIKA:' ), |
179 | | - 'notitleconvert' => array( 0, '__NOTITLECONVERT__', '__NOTC__', '__BEZTC__' ), |
| 179 | + 'notitleconvert' => array( 0, '__NOTITLECONVERT__', '__NOTC__', '__БЕЗКН__', '__BEZKN__' ), |
180 | 180 | 'nocontentconvert' => array( 0, '__NOCONTENTCONVERT__', '__NOCC__', '__BEZCC__' ), |
181 | 181 | 'currentweek' => array( 1, 'CURRENTWEEK', 'TRENUTNANEDELjA' ), |
182 | 182 | 'currentdow' => array( 1, 'CURRENTDOW', 'TRENUTNIDOV' ), |
Index: trunk/phase3/languages/Language.php |
— | — | @@ -49,9 +49,10 @@ |
50 | 50 | function findVariantLink(&$l, &$n) {} |
51 | 51 | function getExtraHashOptions() {return '';} |
52 | 52 | function getParsedTitle() {return '';} |
53 | | - function markNoConversion($text) {return $text;} |
| 53 | + function markNoConversion($text, $noParse=false) {return $text;} |
54 | 54 | function convertCategoryKey( $key ) {return $key; } |
55 | | - |
| 55 | + function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); } |
| 56 | + function setNoTitleConvert(){} |
56 | 57 | } |
57 | 58 | |
58 | 59 | #-------------------------------------------------------------------------- |
— | — | @@ -712,6 +713,34 @@ |
713 | 714 | return iconv( $in, $out, $string ); |
714 | 715 | } |
715 | 716 | |
| 717 | + // callback functions for uc(), lc(), ucwords(), ucwordbreaks() |
| 718 | + function ucwordbreaksCallbackAscii($matches){ |
| 719 | + return $this->ucfirst($matches[1]); |
| 720 | + } |
| 721 | + |
| 722 | + function ucwordbreaksCallbackMB($matches){ |
| 723 | + return mb_strtoupper($matches[0]); |
| 724 | + } |
| 725 | + |
| 726 | + function ucCallback($matches){ |
| 727 | + global $wikiUpperChars; |
| 728 | + return strtr( $matches[1] , $wikiUpperChars ); |
| 729 | + } |
| 730 | + |
| 731 | + function lcCallback($matches){ |
| 732 | + global $wikiLowerChars; |
| 733 | + return strtr( $matches[1] , $wikiLowerChars ); |
| 734 | + } |
| 735 | + |
| 736 | + function ucwordsCallbackMB($matches){ |
| 737 | + return mb_strtoupper($matches[0]); |
| 738 | + } |
| 739 | + |
| 740 | + function ucwordsCallbackWiki($matches){ |
| 741 | + global $wikiUpperChars; |
| 742 | + return strtr( $matches[0] , $wikiUpperChars ); |
| 743 | + } |
| 744 | + |
716 | 745 | function ucfirst( $str ) { |
717 | 746 | return self::uc( $str, true ); |
718 | 747 | } |
— | — | @@ -729,9 +758,9 @@ |
730 | 759 | if ( self::isMultibyte( $str ) ) { |
731 | 760 | list( $wikiUpperChars ) = $this->getCaseMaps(); |
732 | 761 | $x = $first ? '^' : ''; |
733 | | - return preg_replace( |
734 | | - "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", |
735 | | - "strtr( \"\$1\" , \$wikiUpperChars )", |
| 762 | + return preg_replace_callback( |
| 763 | + "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/", |
| 764 | + array($this,"ucCallback"), |
736 | 765 | $str |
737 | 766 | ); |
738 | 767 | } else |
— | — | @@ -755,9 +784,9 @@ |
756 | 785 | if ( self::isMultibyte( $str ) ) { |
757 | 786 | list( , $wikiLowerChars ) = self::getCaseMaps(); |
758 | 787 | $x = $first ? '^' : ''; |
759 | | - return preg_replace( |
760 | | - "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", |
761 | | - "strtr( \"\$1\" , \$wikiLowerChars )", |
| 788 | + return preg_replace_callback( |
| 789 | + "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/", |
| 790 | + array($this,"lcCallback"), |
762 | 791 | $str |
763 | 792 | ); |
764 | 793 | } else |
— | — | @@ -768,6 +797,66 @@ |
769 | 798 | return (bool)preg_match( '/[\x80-\xff]/', $str ); |
770 | 799 | } |
771 | 800 | |
| 801 | + function ucwords($str) { |
| 802 | + global $wikiUpperChars; |
| 803 | + |
| 804 | + if ( self::isMultibyte( $str ) ) { |
| 805 | + $str = self::lc($str); |
| 806 | + |
| 807 | + // regexp to find first letter in each word (i.e. after each space) |
| 808 | + $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/"; |
| 809 | + |
| 810 | + // function to use to capitalize a single char |
| 811 | + if ( function_exists( 'mb_strtoupper' ) ) |
| 812 | + return preg_replace_callback( |
| 813 | + $replaceRegexp, |
| 814 | + array($this,"ucwordsCallbackMB"), |
| 815 | + $str |
| 816 | + ); |
| 817 | + else |
| 818 | + return preg_replace_callback( |
| 819 | + $replaceRegexp, |
| 820 | + array($this,"ucwordsCallbackWiki"), |
| 821 | + $str |
| 822 | + ); |
| 823 | + } |
| 824 | + else |
| 825 | + return ucwords( strtolower( $str ) ); |
| 826 | + } |
| 827 | + |
| 828 | + # capitalize words at word breaks |
| 829 | + function ucwordbreaks($str){ |
| 830 | + global $wikiUpperChars; |
| 831 | + |
| 832 | + if (self::isMultibyte( $str ) ) { |
| 833 | + $str = self::lc($str); |
| 834 | + |
| 835 | + // since \b doesn't work for UTF-8, we explicitely define word break chars |
| 836 | + $breaks= "[ \-\(\)\}\{\.,\?!]"; |
| 837 | + |
| 838 | + // find first letter after word break |
| 839 | + $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/"; |
| 840 | + |
| 841 | + if ( function_exists( 'mb_strtoupper' ) ) |
| 842 | + return preg_replace_callback( |
| 843 | + $replaceRegexp, |
| 844 | + array($this,"ucwordbreaksCallbackMB"), |
| 845 | + $str |
| 846 | + ); |
| 847 | + else |
| 848 | + return preg_replace_callback( |
| 849 | + $replaceRegexp, |
| 850 | + array($this,"ucwordsCallbackWiki"), |
| 851 | + $str |
| 852 | + ); |
| 853 | + } |
| 854 | + else |
| 855 | + return preg_replace_callback( |
| 856 | + '/\b([\w\x80-\xff]+)\b/', |
| 857 | + array($this,"ucwordbreaksCallbackAscii"), |
| 858 | + $str ); |
| 859 | + } |
| 860 | + |
772 | 861 | function checkTitleEncoding( $s ) { |
773 | 862 | if( is_array( $s ) ) { |
774 | 863 | wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' ); |
— | — | @@ -1169,6 +1258,17 @@ |
1170 | 1259 | return $this->mConverter->parserConvert( $text, $parser ); |
1171 | 1260 | } |
1172 | 1261 | |
| 1262 | + # Tell the converter that it shouldn't convert titles |
| 1263 | + function setNoTitleConvert(){ |
| 1264 | + $this->mConverter->setNotitleConvert(); |
| 1265 | + } |
| 1266 | + |
| 1267 | + # Check if this is a language with variants |
| 1268 | + function hasVariants(){ |
| 1269 | + return sizeof($this->getVariants())>1; |
| 1270 | + } |
| 1271 | + |
| 1272 | + |
1173 | 1273 | /** |
1174 | 1274 | * Perform output conversion on a string, and encode for safe HTML output. |
1175 | 1275 | * @param string $text |
— | — | @@ -1214,6 +1314,17 @@ |
1215 | 1315 | } |
1216 | 1316 | |
1217 | 1317 | /** |
| 1318 | + * If a language supports multiple variants, converts text |
| 1319 | + * into an array of all possible variants of the text: |
| 1320 | + * 'variant' => text in that variant |
| 1321 | + */ |
| 1322 | + |
| 1323 | + function convertLinkToAllVariants($text){ |
| 1324 | + return $this->mConverter->convertLinkToAllVariants($text); |
| 1325 | + } |
| 1326 | + |
| 1327 | + |
| 1328 | + /** |
1218 | 1329 | * returns language specific options used by User::getPageRenderHash() |
1219 | 1330 | * for example, the preferred language variant |
1220 | 1331 | * |
— | — | @@ -1242,8 +1353,8 @@ |
1243 | 1354 | * @param string $text text to be tagged for no conversion |
1244 | 1355 | * @return string the tagged text |
1245 | 1356 | */ |
1246 | | - function markNoConversion( $text ) { |
1247 | | - return $this->mConverter->markNoConversion( $text ); |
| 1357 | + function markNoConversion( $text, $noParse=false ) { |
| 1358 | + return $this->mConverter->markNoConversion( $text, $noParse ); |
1248 | 1359 | } |
1249 | 1360 | |
1250 | 1361 | /** |
Index: trunk/phase3/languages/MessagesSr_ec.php |
— | — | @@ -173,7 +173,7 @@ |
174 | 174 | 'servername' => array( 0, 'SERVERNAME', 'ИМЕСЕРВЕРА' ), |
175 | 175 | 'scriptpath' => array( 0, 'SCRIPTPATH', 'СКРИПТА' ), |
176 | 176 | 'grammar' => array( 0, 'GRAMMAR:', 'ГРАМАТИКА:' ), |
177 | | - 'notitleconvert' => array( 0, '__NOTITLECONVERT__', '__NOTC__', '__БЕЗТЦ__' ), |
| 177 | + 'notitleconvert' => array( 0, '__NOTITLECONVERT__', '__NOTC__', '__БЕЗКН__', '__BEZKN__' ), |
178 | 178 | 'nocontentconvert' => array( 0, '__NOCONTENTCONVERT__', '__NOCC__', '__БЕЗЦЦ__' ), |
179 | 179 | 'currentweek' => array( 1, 'CURRENTWEEK', 'ТРЕНУТНАНЕДЕЉА' ), |
180 | 180 | 'currentdow' => array( 1, 'CURRENTDOW', 'ТРЕНУТНИДОВ' ), |
Index: trunk/phase3/languages/LanguageConverter.php |
— | — | @@ -22,6 +22,7 @@ |
23 | 23 | var $mMarkup; |
24 | 24 | var $mFlags; |
25 | 25 | var $mUcfirst = false; |
| 26 | + var $mNoTitleConvert = false; |
26 | 27 | /** |
27 | 28 | * Constructor |
28 | 29 | * |
— | — | @@ -38,6 +39,7 @@ |
39 | 40 | $markup=array(), |
40 | 41 | $flags = array()) { |
41 | 42 | global $wgDBname; |
| 43 | + global $wgLegalTitleChars; |
42 | 44 | $this->mLangObj = $langobj; |
43 | 45 | $this->mMainLanguageCode = $maincode; |
44 | 46 | $this->mVariants = $variants; |
— | — | @@ -155,14 +157,17 @@ |
156 | 158 | $marker = ""; |
157 | 159 | |
158 | 160 | // this one is needed when the text is inside an html markup |
159 | | - $htmlfix = '|<[^>]+=\"[^(>=)]*$|^[^(<>=\")]*\"[^>]*>'; |
| 161 | + $htmlfix = '|<[^>]+$|^[^<>]*>'; |
160 | 162 | |
161 | | - $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/'; |
| 163 | + // disable convert to variants between <code></code> tags |
| 164 | + $codefix = '<code>.+?<\/code>|'; |
| 165 | + |
| 166 | + $reg = '/'.$codefix.'<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; |
162 | 167 | |
163 | 168 | $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); |
164 | 169 | |
| 170 | + $m = array_shift($matches); |
165 | 171 | |
166 | | - $m = array_shift($matches); |
167 | 172 | $ret = $this->translate($m[0], $toVariant); |
168 | 173 | $mstart = $m[1]+strlen($m[0]); |
169 | 174 | foreach($matches as $m) { |
— | — | @@ -197,7 +202,7 @@ |
198 | 203 | * |
199 | 204 | * @param string $text the text to be converted |
200 | 205 | * @return array of string |
201 | | - * @private |
| 206 | + * @public |
202 | 207 | */ |
203 | 208 | function autoConvertToAllVariants($text) { |
204 | 209 | $fname="LanguageConverter::autoConvertToAllVariants"; |
— | — | @@ -209,18 +214,52 @@ |
210 | 215 | foreach($this->mVariants as $variant) { |
211 | 216 | $ret[$variant] = $this->translate($text, $variant); |
212 | 217 | } |
| 218 | + |
213 | 219 | wfProfileOut( $fname ); |
214 | 220 | return $ret; |
215 | 221 | } |
216 | 222 | |
217 | 223 | /** |
| 224 | + * convert link text to all supported variants |
| 225 | + * |
| 226 | + * @param string $text the text to be converted |
| 227 | + * @return array of string |
| 228 | + * @public |
| 229 | + */ |
| 230 | + function convertLinkToAllVariants($text) { |
| 231 | + if( !$this->mTablesLoaded ) |
| 232 | + $this->loadTables(); |
| 233 | + |
| 234 | + $ret = array(); |
| 235 | + $tarray = explode($this->mMarkup['begin'], $text); |
| 236 | + $tfirst = array_shift($tarray); |
| 237 | + |
| 238 | + foreach($this->mVariants as $variant) |
| 239 | + $ret[$variant] = $this->translate($tfirst,$variant); |
| 240 | + |
| 241 | + foreach($tarray as $txt) { |
| 242 | + $marked = explode($this->mMarkup['end'], $txt, 2); |
| 243 | + |
| 244 | + foreach($this->mVariants as $variant){ |
| 245 | + $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end']; |
| 246 | + if(array_key_exists(1, $marked)) |
| 247 | + $ret[$variant] .= $this->translate($marked[1],$variant); |
| 248 | + } |
| 249 | + |
| 250 | + } |
| 251 | + |
| 252 | + return $ret; |
| 253 | + } |
| 254 | + |
| 255 | + |
| 256 | + /** |
218 | 257 | * Convert text using a parser object for context |
219 | 258 | */ |
220 | 259 | function parserConvert( $text, &$parser ) { |
221 | 260 | global $wgDisableLangConversion; |
222 | 261 | /* don't do anything if this is the conversion table */ |
223 | 262 | if ( $parser->mTitle->getNamespace() == NS_MEDIAWIKI && |
224 | | - strpos($parser->mTitle->getText, "Conversiontable") !== false ) |
| 263 | + strpos($parser->mTitle->getText(), "Conversiontable") !== false ) |
225 | 264 | { |
226 | 265 | return $text; |
227 | 266 | } |
— | — | @@ -264,6 +303,11 @@ |
265 | 304 | return $text; |
266 | 305 | |
267 | 306 | if( $isTitle ) { |
| 307 | + if($this->mNoTitleConvert){ |
| 308 | + $this->mTitleDisplay = $text; |
| 309 | + return $text; |
| 310 | + } |
| 311 | + |
268 | 312 | if( !$this->mDoTitleConvert ) { |
269 | 313 | $this->mTitleDisplay = $text; |
270 | 314 | return $text; |
— | — | @@ -278,7 +322,7 @@ |
279 | 323 | return $text; |
280 | 324 | } |
281 | 325 | else { |
282 | | - $this->mTitleDisplay = $this->autoConvert($text); |
| 326 | + $this->mTitleDisplay = $this->convert($text); |
283 | 327 | return $this->mTitleDisplay; |
284 | 328 | } |
285 | 329 | } |
— | — | @@ -315,7 +359,7 @@ |
316 | 360 | else |
317 | 361 | $rules = $marked[0]; |
318 | 362 | |
319 | | -#FIXME: may cause trouble here... |
| 363 | + //FIXME: may cause trouble here... |
320 | 364 | //strip since it interferes with the parsing, plus, |
321 | 365 | //all spaces should be stripped in this tag anyway. |
322 | 366 | $rules = str_replace(' ', '', $rules); |
— | — | @@ -410,23 +454,16 @@ |
411 | 455 | * @access public |
412 | 456 | */ |
413 | 457 | function findVariantLink( &$link, &$nt ) { |
414 | | - static $count=0; //used to limit this operation |
415 | | - static $cache=array(); |
416 | 458 | global $wgDisableLangConversion; |
417 | 459 | $pref = $this->getPreferredVariant(); |
418 | 460 | $ns=0; |
419 | 461 | if(is_object($nt)) |
420 | 462 | $ns = $nt->getNamespace(); |
421 | | - if( $count > 50 && $ns != NS_CATEGORY ) |
422 | | - return; |
423 | | - $count++; |
| 463 | + |
424 | 464 | $variants = $this->autoConvertToAllVariants($link); |
425 | 465 | if($variants == false) //give up |
426 | 466 | return; |
427 | 467 | foreach( $variants as $v ) { |
428 | | - if(isset($cache[$v])) |
429 | | - continue; |
430 | | - $cache[$v] = 1; |
431 | 468 | $varnt = Title::newFromText( $v, $ns ); |
432 | 469 | if( $varnt && $varnt->getArticleID() > 0 ) { |
433 | 470 | $nt = $varnt; |
— | — | @@ -655,7 +692,7 @@ |
656 | 693 | * @param string $text text to be tagged for no conversion |
657 | 694 | * @return string the tagged text |
658 | 695 | */ |
659 | | - function markNoConversion($text) { |
| 696 | + function markNoConversion($text, $noParse=false) { |
660 | 697 | # don't mark if already marked |
661 | 698 | if(strpos($text, $this->mMarkup['begin']) || |
662 | 699 | strpos($text, $this->mMarkup['end'])) |
— | — | @@ -696,6 +733,11 @@ |
697 | 734 | } |
698 | 735 | return true; |
699 | 736 | } |
| 737 | + |
| 738 | + function setNoTitleConvert(){ |
| 739 | + $this->mNoTitleConvert = true; |
| 740 | + } |
| 741 | + |
700 | 742 | } |
701 | 743 | |
702 | 744 | ?> |
Index: trunk/phase3/languages/LanguageSr.php |
— | — | @@ -169,8 +169,8 @@ |
170 | 170 | * We want our external link captions to be converted in variants, |
171 | 171 | * so we return the original text instead -{$text}-, except for URLs |
172 | 172 | */ |
173 | | - function markNoConversion($text) { |
174 | | - if(preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//",$text)) |
| 173 | + function markNoConversion($text, $noParse=false) { |
| 174 | + if($noParse || preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//",$text)) |
175 | 175 | return parent::markNoConversion($text); |
176 | 176 | return $text; |
177 | 177 | } |
— | — | @@ -188,22 +188,51 @@ |
189 | 189 | return parent::autoConvert($text,$toVariant); |
190 | 190 | } |
191 | 191 | |
| 192 | + /** |
| 193 | + * It translates text into variant, specials: |
| 194 | + * - ommiting roman numbers |
| 195 | + */ |
| 196 | + function translate($text, $toVariant){ |
| 197 | + $breaks = '[^\w\x80-\xff]'; |
192 | 198 | |
| 199 | + // regexp for roman numbers |
| 200 | + $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'; |
| 201 | + |
| 202 | + $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/'; |
| 203 | + |
| 204 | + $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); |
| 205 | + |
| 206 | + $m = array_shift($matches); |
| 207 | + $ret = strtr($m[0], $this->mTables[$toVariant]); |
| 208 | + $mstart = $m[1]+strlen($m[0]); |
| 209 | + foreach($matches as $m) { |
| 210 | + $ret .= substr($text, $mstart, $m[1]-$mstart); |
| 211 | + $ret .= parent::translate($m[0], $toVariant); |
| 212 | + $mstart = $m[1] + strlen($m[0]); |
| 213 | + } |
| 214 | + |
| 215 | + return $ret; |
| 216 | + } |
| 217 | + |
| 218 | + |
193 | 219 | } |
194 | 220 | |
195 | 221 | class LanguageSr extends LanguageSr_ec { |
196 | 222 | function __construct() { |
197 | 223 | global $wgHooks; |
| 224 | + |
198 | 225 | parent::__construct(); |
199 | 226 | |
200 | | - $variants = array('sr', 'sr-ec', 'sr-jc', 'sr-el', 'sr-jl'); |
| 227 | + // these variants are currently UNUSED: |
| 228 | + // 'sr-jc', 'sr-jl' |
| 229 | + $variants = array('sr', 'sr-ec', 'sr-el'); |
201 | 230 | $variantfallbacks = array( |
202 | 231 | 'sr' => 'sr-ec', |
203 | | - 'sr-ec' => 'sr-jc', |
204 | | - 'sr-jc' => 'sr-ec', |
205 | | - 'sr-el' => 'sr-jl', |
206 | | - 'sr-jl' => 'sr-el' |
207 | | - ); |
| 232 | + 'sr-ec' => 'sr-ec', |
| 233 | + 'sr-el' => 'sr-el', |
| 234 | + ); |
| 235 | + |
| 236 | + |
208 | 237 | $marker = array();//don't mess with these, leave them as they are |
209 | 238 | $flags = array( |
210 | 239 | 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S', |