r62510 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62509‎ | r62510 | r62511 >
Date:10:38, 15 February 2010
Author:happydog
Status:resolved (Comments)
Tags:
Comment:
CodeReview: Modified the first argument to svnImport.php so it is now possible to specify * to import from all defined repositories. This meant moving the import code into a function (as it is called multiple times). For simplicity I have left that function in svnImport.php but feel free to move it elsewhere if that seems more appropriate.

I have also updated the usage notes to give more information on this and the --precache argument I added in my last commit.

[NOTE: It will be a lot easier to review this commit if you un-indent the new function body first...]
Modified paths:
  • /trunk/extensions/CodeReview/svnImport.php (modified) (history)

Diff [purge]

Index: trunk/extensions/CodeReview/svnImport.php
@@ -8,7 +8,17 @@
99 require "$IP/maintenance/commandLine.inc";
1010
1111 if ( !isset( $args[0] ) ) {
12 - echo "Usage: php svnImport.php <repo> [<start>]\n";
 12+ echo "Usage: php svnImport.php <repo> [<start>] [--precache=<N>]\n";
 13+ echo " <repo>\n";
 14+ echo " The name of the repo. Use * to import from all defined repos.\n";
 15+ echo " <start>\n";
 16+ echo " The revision to begin the import from. If not specified then\n";
 17+ echo " it starts from the last repo imported to the wiki. Ignored if\n";
 18+ echo " * is specified for <repo>.\n";
 19+ echo " --precache=<N>\n";
 20+ echo " (default N=50) Pre-cache diffs for last N revisions. Use 0 to \n";
 21+ echo " disable pre-caching, or -1 to pre-cache the entire repository.\n";
 22+ echo " Already-cached revisions do not count as part of this number.\n";
1323 die;
1424 }
1525
@@ -20,91 +30,104 @@
2131 die( "Invalid argument for --precache (must be a positive integer, or -1 for all)" );
2232 }
2333
24 -$repo = CodeRepository::newFromName( $args[0] );
25 -
26 -if ( !$repo ) {
27 - echo "Invalid repo {$args[0]}\n";
28 - die;
 34+if ( $args[0] == "*" ) {
 35+ $repoList = CodeRepository::getRepoList();
 36+ foreach ( $repoList as $repoInfo ) {
 37+ importRepo( $repoInfo->getName() );
 38+ }
 39+} else {
 40+ importRepo( $args[0], @$args[1] );
2941 }
3042
31 -$svn = SubversionAdaptor::newFromRepo( $repo->getPath() );
32 -$lastStoredRev = $repo->getLastStoredRev();
 43+function importRepo( $repoName, $start = null ) {
 44+ global $wgCodeReviewImportBatchSize, $cacheSize;
3345
34 -$chunkSize = $wgCodeReviewImportBatchSize;
 46+ $repo = CodeRepository::newFromName( $repoName );
3547
36 -$startTime = microtime( true );
37 -$revCount = 0;
38 -$start = isset( $args[1] ) ? intval( $args[1] ) : $lastStoredRev + 1;
39 -if ( $start > ( $lastStoredRev + 1 ) ) {
40 - echo "Invalid starting point r{$start}\n";
41 - die;
42 -}
 48+ if ( !$repo ) {
 49+ echo "Invalid repo $repoName\n";
 50+ die;
 51+ }
4352
44 -echo "Syncing repo {$args[0]} from r$start to HEAD...\n";
 53+ $svn = SubversionAdaptor::newFromRepo( $repo->getPath() );
 54+ $lastStoredRev = $repo->getLastStoredRev();
4555
46 -if ( !$svn->canConnect() )
47 - die( "Unable to connect to repository.\n" );
 56+ $chunkSize = $wgCodeReviewImportBatchSize;
4857
49 -while ( true ) {
50 - $log = $svn->getLog( '', $start, $start + $chunkSize - 1 );
51 - if ( empty( $log ) ) {
52 - # Repo seems to give a blank when max rev is invalid, which
53 - # stops new revisions from being added. Try to avoid this
54 - # by trying less at a time from the last point.
55 - if ( $chunkSize <= 1 ) {
56 - break; // done!
57 - }
58 - $chunkSize = max( 1, floor( $chunkSize / 4 ) );
59 - continue;
60 - } else {
61 - $start += $chunkSize;
 58+ $startTime = microtime( true );
 59+ $revCount = 0;
 60+ $start = isset( $start ) ? intval( $start ) : $lastStoredRev + 1;
 61+ if ( $start > ( $lastStoredRev + 1 ) ) {
 62+ echo "Invalid starting point r{$start}\n";
 63+ die;
6264 }
63 - if ( !is_array( $log ) ) {
64 - var_dump( $log );
65 - die( 'wtf' );
66 - }
67 - foreach ( $log as $data ) {
68 - $revCount++;
69 - $delta = microtime( true ) - $startTime;
70 - $revSpeed = $revCount / $delta;
7165
72 - $codeRev = CodeRevision::newFromSvn( $repo, $data );
73 - $codeRev->save();
 66+ echo "Syncing repo $repoName from r$start to HEAD...\n";
7467
75 - printf( "%d %s %s (%0.1f revs/sec)\n",
76 - $codeRev->mId,
77 - wfTimestamp( TS_DB, $codeRev->mTimestamp ),
78 - $codeRev->mAuthor,
79 - $revSpeed );
 68+ if ( !$svn->canConnect() )
 69+ die( "Unable to connect to repository.\n" );
 70+
 71+ while ( true ) {
 72+ $log = $svn->getLog( '', $start, $start + $chunkSize - 1 );
 73+ if ( empty( $log ) ) {
 74+ # Repo seems to give a blank when max rev is invalid, which
 75+ # stops new revisions from being added. Try to avoid this
 76+ # by trying less at a time from the last point.
 77+ if ( $chunkSize <= 1 ) {
 78+ break; // done!
 79+ }
 80+ $chunkSize = max( 1, floor( $chunkSize / 4 ) );
 81+ continue;
 82+ } else {
 83+ $start += $chunkSize;
 84+ }
 85+ if ( !is_array( $log ) ) {
 86+ var_dump( $log );
 87+ die( 'wtf' );
 88+ }
 89+ foreach ( $log as $data ) {
 90+ $revCount++;
 91+ $delta = microtime( true ) - $startTime;
 92+ $revSpeed = $revCount / $delta;
 93+
 94+ $codeRev = CodeRevision::newFromSvn( $repo, $data );
 95+ $codeRev->save();
 96+
 97+ printf( "%d %s %s (%0.1f revs/sec)\n",
 98+ $codeRev->mId,
 99+ wfTimestamp( TS_DB, $codeRev->mTimestamp ),
 100+ $codeRev->mAuthor,
 101+ $revSpeed );
 102+ }
 103+ wfWaitForSlaves( 5 );
80104 }
81 - wfWaitForSlaves( 5 );
82 -}
83105
84 -if ( $cacheSize != 0 ) {
85 - if ( $cacheSize == -1 )
86 - echo "Pre-caching all uncached diffs...\n";
87 - elseif ( $cacheSize == 1 )
88 - echo "Pre-caching the latest diff...\n";
89 - else
90 - echo "Pre-caching the latest $cacheSize diffs...\n";
 106+ if ( $cacheSize != 0 ) {
 107+ if ( $cacheSize == -1 )
 108+ echo "Pre-caching all uncached diffs...\n";
 109+ elseif ( $cacheSize == 1 )
 110+ echo "Pre-caching the latest diff...\n";
 111+ else
 112+ echo "Pre-caching the latest $cacheSize diffs...\n";
91113
92 - $dbw = wfGetDB( DB_MASTER );
93 - $options = array( 'ORDER BY' => 'cr_id DESC' );
94 - if ( $cacheSize > 0 )
95 - $options['LIMIT'] = $cacheSize;
 114+ $dbw = wfGetDB( DB_MASTER );
 115+ $options = array( 'ORDER BY' => 'cr_id DESC' );
 116+ if ( $cacheSize > 0 )
 117+ $options['LIMIT'] = $cacheSize;
96118
97 - $res = $dbw->select( 'code_rev', 'cr_id',
98 - array( 'cr_repo_id' => $repo->getId(), 'cr_diff IS NULL OR cr_diff = ""' ),
99 - __METHOD__,
100 - $options
101 - );
102 - while ( $row = $dbw->fetchObject( $res ) ) {
103 - $rev = $repo->getRevision( $row->cr_id );
104 - $diff = $repo->getDiff( $row->cr_id ); // trigger caching
105 - echo "Diff r{$row->cr_id} done\n";
 119+ $res = $dbw->select( 'code_rev', 'cr_id',
 120+ array( 'cr_repo_id' => $repo->getId(), 'cr_diff IS NULL OR cr_diff = ""' ),
 121+ __METHOD__,
 122+ $options
 123+ );
 124+ while ( $row = $dbw->fetchObject( $res ) ) {
 125+ $rev = $repo->getRevision( $row->cr_id );
 126+ $diff = $repo->getDiff( $row->cr_id ); // trigger caching
 127+ echo "Diff r{$row->cr_id} done\n";
 128+ }
106129 }
107 -}
108 -else
109 - echo "Pre-caching skipped.\n";
 130+ else
 131+ echo "Pre-caching skipped.\n";
110132
111 -echo "Done!\n";
 133+ echo "Done!\n";
 134+}
\ No newline at end of file

Follow-up revisions

RevisionCommit summaryAuthorDate
r62914Rewrite svnImport to subclass Maintenance.php, rename "*" to "all" for all re...demon13:18, 24 February 2010

Comments

#Comment by Tim Starling (talk | contribs)   07:25, 22 February 2010

Don't use the error suppression @ operator, it doesn't work in E_STRICT, which many developers have enabled. Use if( isset( ... ) ) instead.

+	$start = isset( $start ) ? intval( $start ) : $lastStoredRev + 1;

isset() is for error suppression only, don't use it on a variable which you think definitely exists, since it will suppress warnings if you make a typo. Use is_null() or "$start === null".

#Comment by Bryan (talk | contribs)   19:39, 23 February 2010

Why the heck is $cacheSize a global rather than a function argument?

#Comment by HappyDog (talk | contribs)   11:50, 14 March 2010

Both the above issues were fixed in demon's big update (r62914), but comments taken on board. :-)

Status & tagging log