r109201 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109200‎ | r109201 | r109202 >
Date:20:02, 17 January 2012
Author:ezachte
Status:deferred
Tags:
Comment:
misc changes
Modified paths:
  • /trunk/wikistats/progress/WikiCountsJobProgress.pl (modified) (history)
  • /trunk/wikistats/progress/WikimediaDownload.pl (modified) (history)

Diff [purge]

Index: trunk/wikistats/progress/WikimediaDownload.pl
@@ -19,7 +19,7 @@
2020 $url_matrix = "http://www.mediawiki.org/wiki/Special:SiteMatrix" ;
2121
2222 $file_matrix = "site_matrix.html" ;
23 - $file_test_input = "backup-index.html" ;
 23+ $file_test_input = "Test.html" ;
2424 $file_htm = "WikimediaDownload.htm" ;
2525 $file_csv_lastrun = "WikimediaDumpsLastRun.csv" ;
2626 $file_csv_lastsuccess = "WikimediaDumpsLastSuccess.csv" ;
@@ -235,8 +235,12 @@
236236 else
237237 { $date = "?" ; }
238238
239 - ($project = $href) =~ s/([^\/]+)\/.*$/$1/ ;
 239+ if ($test)
 240+ { ($project = $href) =~ s/http:\/\/download.wikimedia.org\/([^\/]+)\/.*$/$1/ ; }
 241+ else
 242+ { ($project = $href) =~ s/([^\/]+)\/.*$/$1/ ; }
240243 }
 244+ next if $project =~ /labs/ ;
241245
242246 $projectcount++ ;
243247 &Log ("\n=== $projectcount: Project $project ===\n\n") ;
@@ -529,7 +533,6 @@
530534 }
531535
532536 ($date,$project,$href,$usable_dumps) = split (',', $projectinfo_lastsuccess) ;
533 -
534537 if ($project =~ /^(?:tlh|strategyapp)/) # obsolete info, project abandoned
535538 { next ; }
536539
Index: trunk/wikistats/progress/WikiCountsJobProgress.pl
@@ -328,7 +328,7 @@
329329 {
330330 if (! -d $file)
331331 { next ; }
332 - if ($file !~ /^[A-Z]+$/)
 332+ if ($file !~ /^[A-Z]+(?:_[A-Z][a-z]+)?$/)
333333 { next ; }
334334 push @languages, $file ;
335335 }
@@ -338,6 +338,7 @@
339339
340340 $reports {$project} = "<p><b>$project2</b> <small>[count]</small> " ;
341341 $reports_cnt = 0 ;
 342+
342343 foreach $language (sort @languages)
343344 {
344345 $language_lc = lc ($language) ;
@@ -349,6 +350,12 @@
350351 $file_date = time - $file_age_secs ;
351352 # if ($language eq "EN")
352353 # {
 354+ $project_folder = $project ;
 355+ if ($project_folder =~ /Wikipedia/i)
 356+ { $project_folder = '' ; }
 357+
 358+ $language_uc = uc $language ;
 359+
353360 if ($project eq "")
354361 { $project = "wikipedia" ; }
355362
@@ -356,12 +363,33 @@
357364
358365 $file_ago = $file_age_days ;
359366 if ($file_ago == 0)
360 - { $file_ago = " <u>0 days, $file_age_min min</u>" ; }
 367+ # { $file_ago = " <u>0 days, $file_age_min min</u>" ; }
 368+ { $file_ago = " <u>$file_age_min min</u>" ; }
361369
362370 $color = "green" ;
 371+ if ($language =~ /_/) # regional report
 372+ {
 373+ $language_lc = "<i>$language_lc</i>" ; # color = "#004000" ;
 374+ ($region = $language) =~ s/^[^_]+_// ; # en_india -> EN_India
 375+ $language_uc = "EN_" . ucfirst $region ;
 376+ }
 377+
363378 if ($file_ago > 30)
364379 { $color = "darkred" ; }
365 - $reports {$project} .= "<small><font color=$color>$language_lc<sup>$file_ago</sup></font></small>, " ;
 380+
 381+ if ($language_lc eq 'en') # make English report more prominent, this one is more often refreshed
 382+ { $language_lc = "<b>[[en]]</b>" ; }
 383+
 384+ $reports {$project} .= "<a href='http://stats.wikimedia.org/$project_folder/$language_uc/Sitemap.htm'><small><font color=$color>$language_lc<sup>$file_ago</sup></font></small></a>, " ;
 385+
 386+ if ($language_lc =~ /\[\[en\]\]/) # make English report more prominent, this one is more often refreshed
 387+ { $reports_english {$project} = "<a href='http://stats.wikimedia.org/$project_folder/$language_uc/Sitemap.htm'><small><font color=$color>$project<sup>$file_ago</sup></font></small></a>" }
 388+
 389+ # if ($project !~ /_/)
 390+ # { $reports {$project} .= "<small><font color=$color>$language_lc<sup>$file_ago</sup></font></small>, " ; }
 391+ # else
 392+ # { $reports {"$project regional"} .= "<small><font color=$color>$language_lc<sup>$file_ago</sup></font></small>, " ; }
 393+
366394 $reports_cnt ++ ;
367395
368396 if ($language eq "EN")
@@ -433,12 +461,13 @@
434462 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n" .
435463 "<meta http-equiv=\"refresh\" content=\"60\">\n" .
436464 "<title>WikiStats data gathering progress</title>\n" .
437 - # "<style type=\text/css\">\n" .
438 - # "li { background-color: #f4f4f4; list-style-type: none; }\n" .
439 - # "li li { background-color: white; }\n" .
440 - # "li ul { margin-top: 4px; margin-bottom: 8px; text-color: #900000}\n" .
441 - # "</style>\n" .
442 - "<body bgcolor=#CCCCCC>\n" ;
 465+ "<style type=\text/css\">\n" .
 466+ "a:link { color:blue;text-decoration:none;}\n" .
 467+ "a:visited {color:#0000FF;text-decoration:none;}\n" .
 468+ "a:active {color:#0000FF;text-decoration:none;}\n" .
 469+ "a:hover {color:#FF00FF;text-decoration:underline}\n" .
 470+ "</style>\n" .
 471+ "</head>\n<body bgcolor=#CCCCCC>\n" ;
443472
444473 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=gmtime(time);
445474 $now_gm = sprintf ("%02d-%02d-%04d %02d:%02d\n",$mday,$mon+1,$year+1900,$hour,$min) ;
@@ -530,14 +559,13 @@
531560 {
532561 print HTML "<a name='reports' id='reports'></a>" .
533562 "<hr><b>Reports generated</b>\n" .
534 - "<small>This section shows -per project per target language- the file age of index.html in days (for age < 24 hours in minutes). " .
 563+ "<small>This section shows -per project per target language- the <b>file age of index.html in days</b> (for age < 24 hours in minutes). " .
535564 "Note that a very recently generated report does not always contain counts up to the previous month. " .
536565 "See above for how up to data counts are per project language. " .
537566 "Color green here means: reports have been generated and published less than 30 days ago.</small>" ;
538567
539 -
540 - foreach $report (sort {$report_dates {$b} <=> $report_dates {$a}} keys %report_dates)
541 - { print HTML $reports {$report} . "\n" ; }
 568+ foreach $project (sort {$report_dates {$b} <=> $report_dates {$a}} keys %report_dates)
 569+ { print HTML $reports {$project} . "\n" ; }
542570 }
543571
544572 print HTML "<hr><p><b>Longest jobs</b> <small>\n" ;
@@ -653,6 +681,10 @@
654682 "td.cb {text-align:center; border: inset 1px #FFFFFF}\n" .
655683 "td.lb {text-align:left; border: inset 1px #FFFFFF}\n" .
656684 "td.rb {text-align:right; border: inset 1px #FFFFFF}\n" .
 685+ "a:link { color:blue;text-decoration:none;}\n" .
 686+ "a:visited {color:#0000FF;text-decoration:none;}\n" .
 687+ "a:active {color:#0000FF;text-decoration:none;}\n" .
 688+ "a:hover {color:#FF00FF;text-decoration:underline}\n" .
657689 "-->\n" .
658690 "</style>\n" .
659691
@@ -757,15 +789,21 @@
758790
759791 if ($reports_total > 0)
760792 {
 793+ foreach $project (sort {$report_dates {$b} <=> $report_dates {$a}} keys %report_dates)
 794+ { $reports_english .= $reports_english {$project} . ", "; }
 795+ $reports_english =~ s/, $// ;
 796+
761797 print HTML "<a name='reports' id='reports'></a>\n" .
762798 "<hr><p><h3>Progress per project, reporting</h3>" .
763 - "<small>This section shows -per project per target language- the file age of index.html in days (for age < 24 hours in minutes). " .
 799+ "<small>This section shows -per project per target language- the <b>file age of index.html in days</b> (for age < 24 hours in minutes). " .
764800 "Note that a very recently generated report does not always contain counts up to the previous month. " .
765801 "See above for how up to data counts are per project language. " .
766 - "Color green here means: reports have been generated and published less than 30 days ago.</small>" ;
 802+ "Color green here means: reports have been generated and published less than 30 days ago. English reports are generated more often (performance issue), and therefore also listed separately.</small><p>" .
 803+ "<b>English reports</b>: $reports_english<p>" ;
767804
768 - foreach $report (sort {$report_dates {$b} <=> $report_dates {$a}} keys %report_dates)
769 - { print HTML $reports {$report} . "\n" ; }
 805+
 806+ foreach $project (sort {$report_dates {$b} <=> $report_dates {$a}} keys %report_dates)
 807+ { print HTML $reports {$project} . "\n" ; }
770808 }
771809
772810 print HTML "<hr>" ;

Status & tagging log