r418 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r417‎ | r418 | r419 >
Date:01:10, 23 May 2002
Author:vibber
Status:old
Tags:
Comment:
Script to fix article title case after importing new other-language wiki
Modified paths:
  • /trunk/phpwiki/fpw/recaseLinks.php (added) (history)

Diff [purge]

Index: trunk/phpwiki/fpw/recaseLinks.php
@@ -0,0 +1,174 @@
 2+<?php
 3+/*
 4+recaseLinks.php -- Renames articles in wiki newly converted from old
 5+ UseMod format to fit case conventions.
 6+
 7+Most of the non-English wikipedias have been running in full capitalization
 8+mode, so articles Get Named Things Like This With All Kinds Of Things
 9+Capitalized That Shouldn't Be Capitalized At All. That's ugly, so this script
 10+will go through a newly converted database, check which case format was most
 11+often used in links, and rename pages to the preferred title.
 12+
 13+Additionally, redirects are created from the Old Caps Title and any other
 14+link forms that used to work, to avoid gratuitously breaking links.
 15+
 16+NOTE: Run this *after* importing the converted wiki into the database.
 17+Also, be sure to run rebuildLinks.php after this to get the linked/unlinked
 18+tables up to date again.
 19+
 20+OTHER NOTE: The Polish wiki is already on a (mostly) sane capitalization
 21+system, this script is just for the other languages.
 22+
 23+2002-05-21 <brion@pobox.com>
 24+
 25+*/
 26+
 27+ include_once ( "./wikiSettings.php" ) ;
 28+ include_once ( "./basicFunctions.php" ) ;
 29+ include_once ( "./databaseFunctions.php" ) ;
 30+ include_once ( "./wikiTitle.php" ) ;
 31+ include_once ( "./wikiUser.php" ) ;
 32+ include_once ( "./wikiPage.php" ) ;
 33+
 34+# Don't run from a web browser!
 35+if ( isset ( $ENV["SERVER_NAME"] ) ) die ( "Don't run this script via the web." ) ;
 36+
 37+ set_time_limit ( 0 ) ;
 38+
 39+
 40+# Slight encoding hack for Esperanto pedia charset issues
 41+$user = new wikiUser;
 42+$user->options["encoding"] = 1 ;
 43+$user->name = "Conversion script" ;
 44+
 45+# Translated out of old usemod wiki...
 46+function FreeToNormal ( $id , $FreeUpper = true ) {
 47+ # If necessary, work on pre-charset conversion values
 48+ global $wikiRecodeInput , $wikiRecodeOutput ;
 49+ $id = $wikiRecodeOutput ( $id ) ;
 50+
 51+ $id = str_replace ( " ", "_", $id ) ;
 52+ $id = ucfirst($id);
 53+ if (strstr($id, '_') != false) { # Quick check for any space/underscores
 54+ $id = preg_replace ( '/__+/' , "_" , $id ) ;
 55+ $id = preg_replace ( '/^_/' , "", $id ) ;
 56+ $id = preg_replace ( '/_$/' , "", $id ) ;
 57+ #if ($UseSubpage) {
 58+ $id = preg_replace ( '|_/|', "/" , $id ) ;
 59+ $id = preg_replace ( '|/_|', "/" , $id ) ;
 60+ #}
 61+ }
 62+ if ($FreeUpper) {
 63+ # Note that letters after ' are *not* capitalized
 64+ if (preg_match ( '|[-_.,\(\)/][a-z]|' , $id ) ) { # Quick check for non-canon
 65+ $id = preg_replace ( '|([-_.,\(\)/])([a-z])|e' , '"$1" . strtoupper("$2")' , $id ) ;
 66+ }
 67+ }
 68+ return $wikiRecodeInput ( $id ) ;
 69+}
 70+
 71+global $wikiMoveRedirectMessage ;
 72+
 73+$links = array () ;
 74+$connection = getDBconnection () ;
 75+
 76+$arbitrarylimit = 100 ;
 77+$randomcount = 0 ;
 78+
 79+function blarg ( $un ) {
 80+ global $links , $connection ;
 81+ global $arbitrarylimit, $randomcount ;
 82+
 83+ $sql = "SELECT ${un}linked_to as lt from ${un}linked order by lt" ;
 84+ $result = mysql_query ( $sql , $connection ) ;
 85+ if ( $result == 0 ) die ("SQL error: " . mysql_error()) ;
 86+
 87+ $row = mysql_fetch_object ( $result );
 88+
 89+ # Count them up!
 90+ $linkform = "" ;
 91+ while ( $row = mysql_fetch_object ( $result ) ) {
 92+ #if ( $randomcount++ > $arbitrarylimit ) break ; #FIXME
 93+
 94+ if ( $linkform != $row->lt ) {
 95+ $linkform = $row->lt ;
 96+ $oldcase = FreeToNormal ( $linkform ) ;
 97+ $linkform = ucfirstIntl ( $linkform ) ; # First letter always caps
 98+ if ( ! isset ( $links[$oldcase] ) ) $links[$oldcase] = array () ;
 99+ echo "\n$oldcase <- $linkform" ;
 100+ }
 101+ $x = $links[$oldcase];
 102+ if ( count ( $x ) ) {
 103+ $y = $x[$linkform] ;
 104+ if ( $y ) $y++; else $y = 1 ;
 105+ $x[$linkform] = $y ;
 106+ } else
 107+ $x = array ( $linkform => 1 ) ;
 108+ $links[$oldcase] = $x ;
 109+
 110+ #$links[$oldcase][$linkform]++ ;
 111+ echo "." ;
 112+ }
 113+ mysql_free_result ( $result ) ;
 114+ }
 115+echo "\n\nChecking linked table..." ;
 116+blarg ( "" ) ;
 117+echo "\n\nChecking unlinked table..." ;
 118+blarg ( "un" ) ;
 119+
 120+# For each title, find the most frequent form and rename the article
 121+# to use that form, ?leaving redirects for the others?
 122+echo "\n\nAwright, let's convert some titles!\n" ;
 123+foreach ( $links as $oldcase => $linkforms ) {
 124+ # Check that article by this name really exists...
 125+ #echo "GARRG oldcase is ".gettype($oldcase). " ($oldcase) ; linkforms is ".gettype($linkforms)." ($linkforms)\n" ;
 126+
 127+ $t = new wikiPage ;
 128+ $t->setTitle ( $oldcase ) ;
 129+ if ( !$t->doesTopicExist() ) {
 130+ #echo " (skipping nonexistent topic $oldcase) " ;
 131+ continue ;
 132+ }
 133+
 134+ # We want to use the most frequently linked-to form as the title
 135+ $maxcount = 0 ; $maxform = $oldcase ;
 136+ foreach ( $linkforms as $linkform => $count ) {
 137+ if ( $count > $maxcount ) {
 138+ $maxcount = $count ;
 139+ $maxform = $linkform ;
 140+ }
 141+ }
 142+ if ( $maxform != $oldcase ) {
 143+ echo "\nRenaming $oldcase to $maxform...\n" ;
 144+ # Most frequent form was different - rename the article
 145+ $sql = "UPDATE cur SET cur_title=\"$maxform\",cur_timestamp=cur_timestamp WHERE cur_title=\"$oldcase\"";
 146+ #echo "$sql\n" ;
 147+ if ( mysql_query ( $sql , $connection ) == 0 ) echo "\nMYSQL ERROR: " . mysql_error () . "\n";
 148+ $sql = "UPDATE old SET old_title=\"$maxform\",old_timestamp=old_timestamp WHERE old_title=\"$oldcase\"";
 149+ #echo "$sql\n" ;
 150+ if ( mysql_query ( $sql , $connection ) == 0 ) echo "\nMYSQL ERROR: " . mysql_error () . "\n";
 151+
 152+ # Add old case to redirect list for external links; bookmarks; etc
 153+ $links[$oldcase][$oldcase]++ ;
 154+ } else {
 155+ echo " (don't need to rename $oldcase) " ;
 156+ }
 157+
 158+ # Make redirects where necessary
 159+ foreach ( $linkforms as $linkform => $count ) {
 160+ if ( $linkform != $maxform ) {
 161+ # And make redirect
 162+ $t = new wikiPage ;
 163+ $t->setTitle ( $linkform ) ;
 164+ if (! $t->doesTopicExist() ) {
 165+ echo "\nMaking redirect from $linkfrom to $maxform...\n" ;
 166+ $t->ensureExistence () ;
 167+ $t->setEntry ( "#REDIRECT [[$maxform]]" ,
 168+ str_replace ( "$1" , "$maxform" , $wikiMoveRedirectMessage ) ,
 169+ 0 , $wikiConversionScript , 1 ) ;
 170+ }
 171+ }
 172+ }
 173+ }
 174+
 175+?>
Property changes on: trunk/phpwiki/fpw/recaseLinks.php
___________________________________________________________________
Added: svn:eol-style
1176 + native
Added: svn:keywords
2177 + Author Date Id Revision

Status & tagging log