Index: trunk/phpwiki/fpw/recaseLinks.php |
— | — | @@ -0,0 +1,174 @@ |
| 2 | +<?php |
| 3 | +/* |
| 4 | +recaseLinks.php -- Renames articles in wiki newly converted from old |
| 5 | + UseMod format to fit case conventions. |
| 6 | + |
| 7 | +Most of the non-English wikipedias have been running in full capitalization |
| 8 | +mode, so articles Get Named Things Like This With All Kinds Of Things |
| 9 | +Capitalized That Shouldn't Be Capitalized At All. That's ugly, so this script |
| 10 | +will go through a newly converted database, check which case format was most |
| 11 | +often used in links, and rename pages to the preferred title. |
| 12 | + |
| 13 | +Additionally, redirects are created from the Old Caps Title and any other |
| 14 | +link forms that used to work, to avoid gratuitously breaking links. |
| 15 | + |
| 16 | +NOTE: Run this *after* importing the converted wiki into the database. |
| 17 | +Also, be sure to run rebuildLinks.php after this to get the linked/unlinked |
| 18 | +tables up to date again. |
| 19 | + |
| 20 | +OTHER NOTE: The Polish wiki is already on a (mostly) sane capitalization |
| 21 | +system, this script is just for the other languages. |
| 22 | + |
| 23 | +2002-05-21 <brion@pobox.com> |
| 24 | + |
| 25 | +*/ |
| 26 | + |
| 27 | + include_once ( "./wikiSettings.php" ) ; |
| 28 | + include_once ( "./basicFunctions.php" ) ; |
| 29 | + include_once ( "./databaseFunctions.php" ) ; |
| 30 | + include_once ( "./wikiTitle.php" ) ; |
| 31 | + include_once ( "./wikiUser.php" ) ; |
| 32 | + include_once ( "./wikiPage.php" ) ; |
| 33 | + |
| 34 | +# Don't run from a web browser! |
| 35 | +if ( isset ( $ENV["SERVER_NAME"] ) ) die ( "Don't run this script via the web." ) ; |
| 36 | + |
| 37 | + set_time_limit ( 0 ) ; |
| 38 | + |
| 39 | + |
| 40 | +# Slight encoding hack for Esperanto pedia charset issues |
| 41 | +$user = new wikiUser; |
| 42 | +$user->options["encoding"] = 1 ; |
| 43 | +$user->name = "Conversion script" ; |
| 44 | + |
| 45 | +# Translated out of old usemod wiki... |
| 46 | +function FreeToNormal ( $id , $FreeUpper = true ) { |
| 47 | + # If necessary, work on pre-charset conversion values |
| 48 | + global $wikiRecodeInput , $wikiRecodeOutput ; |
| 49 | + $id = $wikiRecodeOutput ( $id ) ; |
| 50 | + |
| 51 | + $id = str_replace ( " ", "_", $id ) ; |
| 52 | + $id = ucfirst($id); |
| 53 | + if (strstr($id, '_') != false) { # Quick check for any space/underscores |
| 54 | + $id = preg_replace ( '/__+/' , "_" , $id ) ; |
| 55 | + $id = preg_replace ( '/^_/' , "", $id ) ; |
| 56 | + $id = preg_replace ( '/_$/' , "", $id ) ; |
| 57 | + #if ($UseSubpage) { |
| 58 | + $id = preg_replace ( '|_/|', "/" , $id ) ; |
| 59 | + $id = preg_replace ( '|/_|', "/" , $id ) ; |
| 60 | + #} |
| 61 | + } |
| 62 | + if ($FreeUpper) { |
| 63 | + # Note that letters after ' are *not* capitalized |
| 64 | + if (preg_match ( '|[-_.,\(\)/][a-z]|' , $id ) ) { # Quick check for non-canon |
| 65 | + $id = preg_replace ( '|([-_.,\(\)/])([a-z])|e' , '"$1" . strtoupper("$2")' , $id ) ; |
| 66 | + } |
| 67 | + } |
| 68 | + return $wikiRecodeInput ( $id ) ; |
| 69 | +} |
| 70 | + |
| 71 | +global $wikiMoveRedirectMessage ; |
| 72 | + |
| 73 | +$links = array () ; |
| 74 | +$connection = getDBconnection () ; |
| 75 | + |
| 76 | +$arbitrarylimit = 100 ; |
| 77 | +$randomcount = 0 ; |
| 78 | + |
| 79 | +function blarg ( $un ) { |
| 80 | + global $links , $connection ; |
| 81 | + global $arbitrarylimit, $randomcount ; |
| 82 | + |
| 83 | + $sql = "SELECT ${un}linked_to as lt from ${un}linked order by lt" ; |
| 84 | + $result = mysql_query ( $sql , $connection ) ; |
| 85 | + if ( $result == 0 ) die ("SQL error: " . mysql_error()) ; |
| 86 | + |
| 87 | + $row = mysql_fetch_object ( $result ); |
| 88 | + |
| 89 | + # Count them up! |
| 90 | + $linkform = "" ; |
| 91 | + while ( $row = mysql_fetch_object ( $result ) ) { |
| 92 | + #if ( $randomcount++ > $arbitrarylimit ) break ; #FIXME |
| 93 | + |
| 94 | + if ( $linkform != $row->lt ) { |
| 95 | + $linkform = $row->lt ; |
| 96 | + $oldcase = FreeToNormal ( $linkform ) ; |
| 97 | + $linkform = ucfirstIntl ( $linkform ) ; # First letter always caps |
| 98 | + if ( ! isset ( $links[$oldcase] ) ) $links[$oldcase] = array () ; |
| 99 | + echo "\n$oldcase <- $linkform" ; |
| 100 | + } |
| 101 | + $x = $links[$oldcase]; |
| 102 | + if ( count ( $x ) ) { |
| 103 | + $y = $x[$linkform] ; |
| 104 | + if ( $y ) $y++; else $y = 1 ; |
| 105 | + $x[$linkform] = $y ; |
| 106 | + } else |
| 107 | + $x = array ( $linkform => 1 ) ; |
| 108 | + $links[$oldcase] = $x ; |
| 109 | + |
| 110 | + #$links[$oldcase][$linkform]++ ; |
| 111 | + echo "." ; |
| 112 | + } |
| 113 | + mysql_free_result ( $result ) ; |
| 114 | + } |
| 115 | +echo "\n\nChecking linked table..." ; |
| 116 | +blarg ( "" ) ; |
| 117 | +echo "\n\nChecking unlinked table..." ; |
| 118 | +blarg ( "un" ) ; |
| 119 | + |
| 120 | +# For each title, find the most frequent form and rename the article |
| 121 | +# to use that form, ?leaving redirects for the others? |
| 122 | +echo "\n\nAwright, let's convert some titles!\n" ; |
| 123 | +foreach ( $links as $oldcase => $linkforms ) { |
| 124 | + # Check that article by this name really exists... |
| 125 | + #echo "GARRG oldcase is ".gettype($oldcase). " ($oldcase) ; linkforms is ".gettype($linkforms)." ($linkforms)\n" ; |
| 126 | + |
| 127 | + $t = new wikiPage ; |
| 128 | + $t->setTitle ( $oldcase ) ; |
| 129 | + if ( !$t->doesTopicExist() ) { |
| 130 | + #echo " (skipping nonexistent topic $oldcase) " ; |
| 131 | + continue ; |
| 132 | + } |
| 133 | + |
| 134 | + # We want to use the most frequently linked-to form as the title |
| 135 | + $maxcount = 0 ; $maxform = $oldcase ; |
| 136 | + foreach ( $linkforms as $linkform => $count ) { |
| 137 | + if ( $count > $maxcount ) { |
| 138 | + $maxcount = $count ; |
| 139 | + $maxform = $linkform ; |
| 140 | + } |
| 141 | + } |
| 142 | + if ( $maxform != $oldcase ) { |
| 143 | + echo "\nRenaming $oldcase to $maxform...\n" ; |
| 144 | + # Most frequent form was different - rename the article |
| 145 | + $sql = "UPDATE cur SET cur_title=\"$maxform\",cur_timestamp=cur_timestamp WHERE cur_title=\"$oldcase\""; |
| 146 | + #echo "$sql\n" ; |
| 147 | + if ( mysql_query ( $sql , $connection ) == 0 ) echo "\nMYSQL ERROR: " . mysql_error () . "\n"; |
| 148 | + $sql = "UPDATE old SET old_title=\"$maxform\",old_timestamp=old_timestamp WHERE old_title=\"$oldcase\""; |
| 149 | + #echo "$sql\n" ; |
| 150 | + if ( mysql_query ( $sql , $connection ) == 0 ) echo "\nMYSQL ERROR: " . mysql_error () . "\n"; |
| 151 | + |
| 152 | + # Add old case to redirect list for external links; bookmarks; etc |
| 153 | + $links[$oldcase][$oldcase]++ ; |
| 154 | + } else { |
| 155 | + echo " (don't need to rename $oldcase) " ; |
| 156 | + } |
| 157 | + |
| 158 | + # Make redirects where necessary |
| 159 | + foreach ( $linkforms as $linkform => $count ) { |
| 160 | + if ( $linkform != $maxform ) { |
| 161 | + # And make redirect |
| 162 | + $t = new wikiPage ; |
| 163 | + $t->setTitle ( $linkform ) ; |
| 164 | + if (! $t->doesTopicExist() ) { |
| 165 | + echo "\nMaking redirect from $linkfrom to $maxform...\n" ; |
| 166 | + $t->ensureExistence () ; |
| 167 | + $t->setEntry ( "#REDIRECT [[$maxform]]" , |
| 168 | + str_replace ( "$1" , "$maxform" , $wikiMoveRedirectMessage ) , |
| 169 | + 0 , $wikiConversionScript , 1 ) ; |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | +?> |
Property changes on: trunk/phpwiki/fpw/recaseLinks.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 176 | + native |
Added: svn:keywords |
2 | 177 | + Author Date Id Revision |