r29574 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r29573‎ | r29574 | r29575 >
Date:06:37, 11 January 2008
Author:brion
Status:old
Tags:
Comment:
* break old checkuser log import out to a separate include file; break up the function for easier testing
* importLog.php can be used to do a manual import, a dry run, or a test of the raw log file parser
* strip null characters from log input (caused by NFS write conflicts)
* handle missing log file gracefully during update.php run
Modified paths:
  • /trunk/extensions/CheckUser/cu_log_import.inc (added) (history)
  • /trunk/extensions/CheckUser/importLog.php (added) (history)
  • /trunk/extensions/CheckUser/install.inc (modified) (history)

Diff [purge]

Index: trunk/extensions/CheckUser/importLog.php
@@ -0,0 +1,54 @@
 2+<?php
 3+
 4+require_once "../../maintenance/commandLine.inc";
 5+require "cu_log_import.inc";
 6+
 7+function test_cu_log( $log ) {
 8+ $matched = 0;
 9+ $unmatched = 0;
 10+ $badtime = 0;
 11+
 12+ $file = fopen( $log, 'r' );
 13+ while ( false !== ( $line = fgets( $file ) ) ) {
 14+ $found = false;
 15+ $data = import_cu_log_line( $line );
 16+ if( $data ) {
 17+ $matched++;
 18+ if( !$data['timestamp'] ) {
 19+ print "[bad timestamp] $line";
 20+ $badtime++;
 21+ }
 22+ } else {
 23+ print "[bad format] $line";
 24+ $unmatched++;
 25+ }
 26+ }
 27+ fclose( $file );
 28+ print "\n$matched matched, $badtime matched with bad time, $unmatched unprocessed\n";
 29+}
 30+
 31+if( $args ) {
 32+ $log = $args[0];
 33+ if( isset( $options['test'] ) ) {
 34+ test_cu_log( $log );
 35+ } else {
 36+ $dryRun = isset( $options['dry-run'] );
 37+ if( $dryRun ) {
 38+ $db = false;
 39+ echo "Dry run; no actual imports will be made...\n";
 40+ } else {
 41+ $db = wfGetDB( DB_MASTER );
 42+ }
 43+ import_cu_log( $db, $log );
 44+ }
 45+} else {
 46+ echo "CheckUser old log file importer.\n";
 47+ echo "If cu_log table has been manually added, can be used to import old data.\n";
 48+ echo "\n";
 49+ echo "Usage: php importLog.php [--test] [--dry-run] checkuser.log\n";
 50+ echo " --dry-run Parse and do local lookups, but don't perform inserts\n";
 51+ echo " --test Test log parser without doing local lookups\n";
 52+ echo "\n";
 53+}
 54+
 55+?>
\ No newline at end of file
Property changes on: trunk/extensions/CheckUser/importLog.php
___________________________________________________________________
Added: svn:eol-style
156 + native
Index: trunk/extensions/CheckUser/cu_log_import.inc
@@ -0,0 +1,94 @@
 2+<?php
 3+
 4+function import_cu_log_line( $line ) {
 5+ $rxTimestamp = '(?P<timestamp>\d+:\d+, \d+ \w+ \d+)';
 6+ $rxUser = '(?P<user>.*?)';
 7+ $rxTarget = '(?P<target>.*?)';
 8+ $rxWiki = '(?P<wiki>[^)]*?)';
 9+ $rxReason = '(?: \("(?P<reason>.*)"\))?';
 10+
 11+ // Strip nulls due to NFS write collisions
 12+ $line = str_replace( "\0", "", $line );
 13+
 14+ $regexes = array(
 15+ 'ipedits-xff' => "!^<li>$rxTimestamp, $rxUser got edits for XFF $rxTarget on $rxWiki$rxReason</li>!",
 16+ 'ipedits' => "!^<li>$rxTimestamp, $rxUser got edits for" ." $rxTarget on $rxWiki$rxReason</li>!",
 17+ 'ipusers-xff' => "!^<li>$rxTimestamp, $rxUser got users for XFF $rxTarget on $rxWiki$rxReason</li>!",
 18+ 'ipusers' => "!^<li>$rxTimestamp, $rxUser got users for" ." $rxTarget on $rxWiki$rxReason</li>!",
 19+ 'userips' => "!^<li>$rxTimestamp, $rxUser got IPs for". " $rxTarget on $rxWiki$rxReason</li>!" );
 20+
 21+ foreach ( $regexes as $type => $regex ) {
 22+ $m = false;
 23+ if ( preg_match( $regex, $line, $m ) ) {
 24+
 25+ $data = array(
 26+ 'timestamp' => strtotime( $m['timestamp'] ),
 27+ 'user' => $m['user'],
 28+ 'reason' => isset( $m['reason'] ) ? $m['reason'] : '',
 29+ 'type' => $type,
 30+ 'wiki' => $m['wiki'],
 31+ 'target' => $m['target'] );
 32+
 33+ return $data;
 34+ }
 35+ }
 36+}
 37+
 38+function import_cu_log( $db, $log ) {
 39+ global $wgDBname;
 40+
 41+ $file = fopen( $log, 'r' );
 42+
 43+ $matched = 0;
 44+ $unmatched = 0;
 45+
 46+ while ( false !== ( $line = fgets( $file ) ) ) {
 47+ $found = false;
 48+ $data = import_cu_log_line( $line );
 49+ if( $data ) {
 50+ if ( $data['wiki'] != wfWikiID() && $data['wiki'] != $wgDBname ) {
 51+ $unmatched++;
 52+ continue;
 53+ }
 54+
 55+ // Local wiki lookups...
 56+ $user = User::newFromName( $data['user'] );
 57+
 58+ list( $start, $end ) = IP::parseRange( $data['target'] );
 59+ if ( $start === false ) {
 60+ $targetUser = User::newFromName( $data['target'] );
 61+ $targetID = $targetUser ? $targetUser->getID() : 0;
 62+ $start = $end = $hex = '';
 63+ } else {
 64+ $hex = $start;
 65+ if ( $start == $end ) {
 66+ $start = $end = '';
 67+ }
 68+ $targetID = 0;
 69+ }
 70+
 71+ if( $db ) {
 72+ $fields = array(
 73+ 'cul_id' => $db->nextSequenceValue( 'cu_log_cul_id' ),
 74+ 'cul_timestamp' => $db->timestamp( $data['timestamp'] ),
 75+ 'cul_user' => $user->getID(),
 76+ 'cul_user_text' => $user->getName(),
 77+ 'cul_reason' => $data['reason'],
 78+ 'cul_type' => $data['type'],
 79+ 'cul_target_id' => $targetID,
 80+ 'cul_target_text' => $data['target'],
 81+ 'cul_target_hex' => $hex,
 82+ 'cul_range_start' => $start,
 83+ 'cul_range_end' => $end );
 84+
 85+ $db->insert( 'cu_log', $fields, __METHOD__ );
 86+ }
 87+
 88+ $matched++;
 89+ }
 90+ $unmatched ++;
 91+ }
 92+ echo "...cu_log table populated: $matched matched rows, $unmatched discarded rows\n";
 93+}
 94+
 95+?>
\ No newline at end of file
Property changes on: trunk/extensions/CheckUser/cu_log_import.inc
___________________________________________________________________
Added: svn:eol-style
196 + native
Index: trunk/extensions/CheckUser/install.inc
@@ -1,5 +1,7 @@
22 <?php
33
 4+require "cu_log_import.inc";
 5+
46 define( 'BATCH_SIZE', 100 );
57
68 function create_cu_changes( $db, $cutoff = null ) {
@@ -67,6 +69,7 @@
6870 echo "...cu_changes table added and populated.\n";
6971 }
7072
 73+
7174 function create_cu_log( $db ) {
7275 global $wgDBtype, $wgCheckUserLog, $wgDBname;
7376 if( $db->tableExists( 'cu_log' ) ) {
@@ -79,78 +82,16 @@
8083
8184 echo "...cu_log added\n";
8285
83 - if ( empty( $wgCheckUserLog ) ) {
 86+ if( empty( $wgCheckUserLog ) ) {
 87+ echo "...logging disabled, skipping log import.\n";
8488 return;
8589 }
86 -
87 -
88 - $file = fopen( $wgCheckUserLog, 'r' );
89 -
90 - $rxTimestamp = '(?P<timestamp>\d+:\d+, \d+ \w+ \d+)';
91 - $rxUser = '(?P<user>.*?)';
92 - $rxTarget = '(?P<target>.*?)';
93 - $rxWiki = '(?P<wiki>[^)]*?)';
94 - $rxReason = '(?: \("(?P<reason>.*)"\))?';
95 -
96 - $regexes = array(
97 - 'ipedits-xff' => "!^<li>$rxTimestamp, $rxUser got edits for XFF $rxTarget on $rxWiki$rxReason</li>!",
98 - 'ipedits' => "!^<li>$rxTimestamp, $rxUser got edits for" ." $rxTarget on $rxWiki$rxReason</li>!",
99 - 'ipusers-xff' => "!^<li>$rxTimestamp, $rxUser got users for XFF $rxTarget on $rxWiki$rxReason</li>!",
100 - 'ipusers' => "!^<li>$rxTimestamp, $rxUser got users for" ." $rxTarget on $rxWiki$rxReason</li>!",
101 - 'userips' => "!^<li>$rxTimestamp, $rxUser got IPs for". " $rxTarget on $rxWiki$rxReason</li>!" );
102 -
103 - $matched = 0;
104 - $unmatched = 0;
10590
106 -
107 - while ( false !== ( $line = fgets( $file ) ) ) {
108 - $found = false;
109 - foreach ( $regexes as $type => $regex ) {
110 - $m = false;
111 - if ( preg_match( $regex, $line, $m ) ) {
112 - if ( $m['wiki'] != wfWikiID() && $m['wiki'] != $wgDBname ) {
113 - continue;
114 - }
115 -
116 - list( $start, $end ) = IP::parseRange( $m['target'] );
117 - if ( $start === false ) {
118 - $targetUser = User::newFromName( $m['target'] );
119 - $targetID = $targetUser ? $targetUser->getID() : 0;
120 - $start = $end = $hex = '';
121 - } else {
122 - $hex = $start;
123 - if ( $start == $end ) {
124 - $start = $end = '';
125 - }
126 - $targetID = 0;
127 - }
128 -
129 - $user = User::newFromName( $m['user'] );
130 - $fields = array(
131 - 'cul_id' => $db->nextSequenceValue( 'cu_log_cul_id' ),
132 - 'cul_timestamp' => $db->timestamp( strtotime( $m['timestamp'] ) ),
133 - 'cul_user' => $user->getID(),
134 - 'cul_user_text' => $user->getName(),
135 - 'cul_reason' => isset( $m['reason'] ) ? $m['reason'] : '',
136 - 'cul_type' => $type,
137 - 'cul_target_id' => $targetID,
138 - 'cul_target_text' => $m['target'],
139 - 'cul_target_hex' => $hex,
140 - 'cul_range_start' => $start,
141 - 'cul_range_end' => $end );
142 -
143 - $db->insert( 'cu_log', $fields, __METHOD__ );
144 -
145 - $found = true;
146 - break;
147 - }
148 - }
149 - if ( $found ) {
150 - $matched ++;
151 - } else {
152 - $unmatched ++;
153 - }
 91+ if( !file_exists( $wgCheckUserLog ) ) {
 92+ echo "...log file missing, skipping log import.\n";
 93+ return;
15494 }
155 - echo "...cu_log table populated: $matched matched rows, $unmatched discarded rows\n";
156 -}
15795
 96+ echo "...importing old CheckUser log file...\n";
 97+ import_cu_log( $db, $wgCheckUserLog );
 98+}

Status & tagging log