r100158 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r100157‎ | r100158 | r100159 >
Date:20:52, 18 October 2011
Author:platonides
Status:deferred
Tags:
Comment:
Modified paths:
  • /trunk/tools/code-utils/read_wfMsgCalls.py (added) (history)

Diff [purge]

Index: trunk/tools/code-utils/read_wfMsgCalls.py
@@ -0,0 +1,279 @@
 2+# -*- coding: utf-8 -*-
 3+
 4+#
 5+# Two small tools to analyse the keys of the i18n messages of MediaWiki
 6+# 1/ Get the current keys (and corresponding language and message) from their definition
 7+# 2/ Get the calls of the wfMsg functions (and corresponding file, line, type (wfMsg, wfMsgForContent, etc.), message key (when possible))
 8+#
 9+#
 10+# Some details:
 11+# * this program calls a PHP interpreter, so you need it
 12+# * the occurences of wfMsg functions in block comments are removed (about 30 occurences), I didn’t check for single-line comments
 13+# * I assumed the key of the messages are '([a-zA-Z0-9_-]+?)', it seems quite correct after tests (anyway no wfMsg call is forgotten, even if the key is not recognized)
 14+# * hence the key with a variable are never computed since you must have a deeper program analysis, the cases are wfMsg( $wgLogNames[$type] ) or (easier?) wfMessage( 'block-log-flags-' . $flag )
 15+# * some calls are missed when called by call_user_func_array, but in these case you probably have no chance to get the associated message key because it is probably a variable)
 16+# * the results are CSV
 17+# * the format of the messageStrings file is: 1/ language code (from the name of the file when available); 2/ message key; 3/ content of the message
 18+# * the format of the wfMsgCalls file is: 1/ path of the file; 2/ line number; 3/ wfMsg type (wfMsg, wfMessage, etc.); 4/ message key (when possible); 5/ complete call of the function
 19+#
 20+
 21+
 22+# # # # # # # #
 23+# Parameters #
 24+# # # # # # # #
 25+
 26+# BASE PARAMETERS
 27+
 28+# Folder containing a tree of MediaWiki
 29+baseFolder = "mediawiki/repo/phase3"
 30+
 31+# Name of the CSV result file (in the current folder) containing the calls to the functions wfMsg* (specified thereafter in a parameter) obtained by analysing the code
 32+wfMsgCallsResultFile = "wfMsgCalls.csv"
 33+
 34+# Name of the CSV result file (in the current folder) containing the associations lang-msgkey-message by retrieving the content of the PHP $messages variable in the 'languages' and i18n folders
 35+messageStringsResultFile = "messageStrings.csv"
 36+
 37+# Save also the content of the messages (count 10Mio without and 30Mio with)
 38+lightMessageStrings = False
 39+
 40+# Name of the wfMsg functions to search in the code
 41+messageFunctions = [ "wfMsg", "wfMessage", "wfMessageFallback", "wfMsgExt", "wfMsgForContent", "wfMsgNoTrans", "wfMsgForContentNoTrans", "wfMsgReal", "wfMsgHtml", "wfMsgWikiHtml", "wfEmptyMsg", "wfMsgReplaceArgs", "wfMsgGetKey" ]
 42+
 43+
 44+# MESSAGES FOLDERS AND FILES
 45+
 46+# Folders (let the # to include messagesIndividualFiles)
 47+messagesFolders = { 'phase3': [ 'languages/messages' ], 'extensions': [ ], '#':'#' }
 48+
 49+# Exclude these files
 50+messagesExcludeFiles = []
 51+
 52+# Include these files (must not be in the previous folders else it would be duplicated)
 53+messagesIndividualFiles = []
 54+
 55+
 56+# CODE FOLDERS AND FILES
 57+
 58+# Folders (let the # to include codeIndividualFiles)
 59+codeFolders = [ "includes", "extensions", "skins", "languages/classes", "#" ]
 60+
 61+# Exclude these files
 62+codeExcludeFiles = []
 63+
 64+# Include these files (must not be in the previous folders else it would be duplicated)
 65+codeIndividualFiles = [ "languages/Language.php", "languages/LanguageConverter.php", "languages/Names.php", "resources/Resources.php" ]
 66+
 67+
 68+
 69+
 70+# # # # # # # # # # # #
 71+# Read the i18n files #
 72+# # # # # # # # # # # #
 73+
 74+import os, os.path, re, csv, subprocess
 75+
 76+
 77+currentFolder = os.getcwd()
 78+os.chdir( baseFolder )
 79+
 80+i18nMessages = []
 81+
 82+# Iterate over folders and files
 83+for messagesFolderType in messagesFolders:
 84+
 85+ directories = []
 86+ if messagesFolderType == 'extensions':
 87+ for directory in messagesFolders[messagesFolderType]:
 88+ l = os.walk( directory )
 89+ for j in l:
 90+ if '.svn' in j[0]:
 91+ continue
 92+ directories.append( j[0] )
 93+ messagesFolders[messagesFolderType] = directories
 94+
 95+ for messagesFolder in messagesFolders[messagesFolderType]:
 96+
 97+ if messagesFolderType != '#':
 98+ files = os.listdir( messagesFolder )
 99+ else:
 100+ files = messagesIndividualFiles
 101+ messageFolder = ''
 102+
 103+ for filename in files:
 104+
 105+ if filename[-4:] != '.php':
 106+ continue
 107+
 108+ if messagesFolderType == 'extensions' and filename[-9:] != '.i18n.php':
 109+ continue
 110+
 111+ if filename in messagesExcludeFiles:
 112+ continue
 113+
 114+ if messagesFolderType == 'phase3' and filename[:8] == 'Messages':
 115+ lang = filename[8:-4]
 116+
 117+ # Read the PHP $messages variable
 118+ p = subprocess.Popen( 'php', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True )
 119+ print >>p.stdin, '<?php'
 120+ print >>p.stdin, "require( '"+os.path.join( 'includes', 'Defines.php' )+"' );"
 121+ print >>p.stdin, "require( '"+os.path.join( messagesFolder, filename )+"' );"
 122+ print >>p.stdin, """
 123+ if( isset( $messages ) && is_array( $messages ) && count( $messages ) > 0 ) {
 124+ if( is_array( current( $messages ) ) ) {
 125+ foreach( $messages as $lang => $msgs )
 126+ foreach( $msgs as $key => $msg )
 127+ echo $lang.'|'.$key.' '.str_replace( array("\r\n", "\n", "\r"), "5197361546748612348916973", $msg )."\n";
 128+ }
 129+ else {
 130+ foreach( $messages as $key => $msg )
 131+ echo $key.' '.str_replace( array("\r\n", "\n", "\r"), "5197361546748612348916973", $msg )."\n";
 132+ }
 133+ }
 134+ """
 135+ p.stdin.close()
 136+
 137+ messages = str.splitlines( p.stdout.read() )
 138+
 139+ if len(messages) == 0:
 140+ if messagesFolderType == 'phase3' and filename[:8] == 'Messages':
 141+ print 'Core language '+lang+' doesn’t have a $message variable in '+filename+' or is empty.'
 142+ else:
 143+ print 'File '+filename+' doesn’t have a $message variable or is empty.'
 144+ continue
 145+
 146+ # Retrieve the result and put it in a list of list
 147+ for message in messages:
 148+
 149+ msg = message.split( ' ', 1 )
 150+ i18nMessage = []
 151+ if '|' in msg[0]:
 152+ sp = msg[0].split( '|' )
 153+ i18nMessage.append( sp[0] )
 154+ i18nMessage.append( sp[1] )
 155+ else:
 156+ i18nMessage.append( lang.lower() )
 157+ i18nMessage.append( msg[0] )
 158+ if not lightMessageStrings:
 159+ i18nMessage.append( msg[1].replace( '5197361546748612348916973', '\n' ) )
 160+ i18nMessages.append( i18nMessage )
 161+
 162+os.chdir( currentFolder )
 163+writer = csv.writer( open( messageStringsResultFile, 'w' ) )
 164+writer.writerows( i18nMessages )
 165+
 166+
 167+# # # # # # # # # # # #
 168+# Read the code files #
 169+# # # # # # # # # # # #
 170+
 171+os.chdir( baseFolder )
 172+
 173+wfMsgCalls = []
 174+
 175+msgFunctions = "(" + '|'.join( messageFunctions ) + ")( *\(.*?\))"
 176+msgFunctionsSoft = "(" + '|'.join( messageFunctions ) + ")"
 177+msgFunctionsMaxLength = max( [ len(f) for f in messageFunctions ] )
 178+
 179+# Iterate over folders and files
 180+for folder in codeFolders:
 181+
 182+ if folder != '#':
 183+ directories = os.walk( folder )
 184+ else:
 185+ directories = [ '.' ]
 186+
 187+ for directory in directories:
 188+
 189+ if folder != '#':
 190+ if '.svn' in directory[0]:
 191+ continue
 192+ fileset = directory[2]
 193+ direct = directory[0]
 194+ else:
 195+ fileset = codeIndividualFiles
 196+ direct = ''
 197+
 198+ for filename in fileset:
 199+
 200+ if filename[-4:] != ".php":
 201+ continue
 202+
 203+ if filename in codeExcludeFiles:
 204+ continue
 205+
 206+ fyle = open( os.path.join( direct, filename ), 'r' )
 207+
 208+ content = fyle.read()
 209+
 210+ # Remove the false positive in block comments (some could remain if in single-line comments)
 211+ incomment = False
 212+ for c in range(len(content)-1):
 213+ if c == len(content):
 214+ break
 215+ if content[c] == '/' and content[c+1] == '*':
 216+ incomment = True
 217+ if content[c] == '*' and content[c+1] == '/':
 218+ incomment = False
 219+ if content[c] == 'w' and incomment:
 220+ f = re.search( '^'+msgFunctionsSoft, content[c:c+msgFunctionsMaxLength] )
 221+ if f != None:
 222+ content = content[:c] + content[c+f.end():]
 223+
 224+ # Get the indexes of the beginning of lines (to compute after the line number)
 225+ indexOfBeginningOfLines = [0]
 226+ for m in re.finditer( '(?:\n|\r|\n\r|\r\n)', content ):
 227+ indexOfBeginningOfLines.append( m.end() )
 228+
 229+ if indexOfBeginningOfLines[-1] != len(content):
 230+ indexOfBeginningOfLines.append( len(content) )
 231+
 232+ # Iterate to find the wfMsg functions
 233+ for m in re.finditer( msgFunctions, content, re.S ):
 234+
 235+ i = -1
 236+ while m.start()-indexOfBeginningOfLines[i] < 0:
 237+ i = i - 1
 238+
 239+ # Search the key once we recognized the message
 240+ key = ''
 241+ k = re.search( "^\(\s*'([a-zA-Z0-9_-]+?)'\s*(?:,|\))", m.group(2) )
 242+ if k != None:
 243+ key = k.group(1)
 244+ else:
 245+ k = re.search( '^\(\s*"([a-zA-Z0-9_-]+?)"\s*(?:,|\))', m.group(2) )
 246+ if k != None:
 247+ key = k.group(1)
 248+
 249+ wfMsgCall = [ os.path.join( directory[0], filename ), len(indexOfBeginningOfLines)+i+1, m.group(1), key, m.group(0) ]
 250+
 251+ # You must have the same number of opening and closing parenthesis
 252+ if m.group(0).count( '(' ) > 1:
 253+
 254+ recursion = 0
 255+ pos = m.end()
 256+
 257+ while wfMsgCall[4].count( '(' ) != wfMsgCall[4].count( ')' ):
 258+
 259+ endparenthesis = ''
 260+ for nbparenthesis in range( wfMsgCall[4].count( '(' ) - wfMsgCall[4].count( ')' ) ):
 261+ endparenthesis = endparenthesis + '.*?\)'
 262+ res = re.search( endparenthesis, content[pos:], re.S )
 263+
 264+ pos = pos + res.end()
 265+
 266+ if res != None:
 267+ wfMsgCall[4] = wfMsgCall[4] + res.group(0)
 268+ else:
 269+ raise Exception( 'parenthesis expected' )
 270+ recursion = recursion + 1
 271+
 272+ if recursion == 10:
 273+ raise Exception( 'recursion' )
 274+
 275+ wfMsgCalls.append( wfMsgCall )
 276+
 277+os.chdir( currentFolder )
 278+writer = csv.writer( open( wfMsgCallsResultFile, 'w' ) )
 279+writer.writerows( wfMsgCalls )
 280+
Property changes on: trunk/tools/code-utils/read_wfMsgCalls.py
___________________________________________________________________
Added: svn:eol-style
1281 + native

Follow-up revisions

RevisionCommit summaryAuthorDate
r100159Import wfMsg statistics script by Seb35 (follow-up r100158)...platonides20:53, 18 October 2011
r100160Import wfMsg statistics script by Seb35 (follow-up r100158, r100159)...platonides20:54, 18 October 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r18bugfixesmagnus_manske13:42, 2 October 2001

Status & tagging log