r53452 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53451‎ | r53452 | r53453 >
Date:19:00, 18 July 2009
Author:avar
Status:deferred
Tags:
Comment:
A directory for the tools I'm writing to import OSM on Cassini
Modified paths:
  • /trunk/tools/osm-tools (added) (history)
  • /trunk/tools/osm-tools/README (added) (history)
  • /trunk/tools/osm-tools/osm2pgsql-style (added) (history)
  • /trunk/tools/osm-tools/osm2pgsql-style/wikipedia-language-codes.pl (added) (history)

Diff [purge]

Index: trunk/tools/osm-tools/osm2pgsql-style/wikipedia-language-codes.pl
@@ -0,0 +1,117 @@
 2+#!/usr/bin/env perl
 3+
 4+=head1 NAME
 5+
 6+wikipedia-language-codes - Get the list of language codes currently used on Wikipedia from Special:SiteMatrix
 7+
 8+=head1 SYNOPSIS
 9+
 10+ # Spew out a list of Wikipedia language codes and corresponding languages
 11+ wikipedia-language-codes
 12+
 13+=head1 OPTIONS
 14+
 15+=over
 16+
 17+=item -h, --help
 18+
 19+Print a usage message listing all available options
 20+
 21+=item --url
 22+
 23+The URL to the SiteMatrix,
 24+L<http://en.wikipedia.org/wiki/Special:SiteMatrix> by default.
 25+
 26+=head1 AUTHOR
 27+
 28+E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avarab@gmail.com>
 29+
 30+=cut
 31+
 32+use strict;
 33+use warnings;
 34+
 35+use WWW::Mechanize;
 36+use HTML::TableParser::Grid;
 37+use Encode qw(encode decode);
 38+use YAML::Syck qw(Dump);
 39+
 40+use Getopt::Long;
 41+use Pod::Usage ();
 42+
 43+#
 44+# Get command line options
 45+#
 46+
 47+Getopt::Long::Parser->new(
 48+ config => [ qw< bundling no_ignore_case no_require_order > ],
 49+)->getoptions(
 50+ 'h|help' => \my $help,
 51+ 'url=s' => \(my $url = 'http://en.wikipedia.org/wiki/Special:SiteMatrix'),
 52+) or help();
 53+
 54+help() if $help;
 55+
 56+#
 57+# main
 58+#
 59+
 60+my %matrix = parse_sitematrix();
 61+
 62+print Dump \%matrix;
 63+
 64+exit 0;
 65+
 66+sub parse_sitematrix
 67+{
 68+ my $content = get_sitematrix();
 69+
 70+ my $parser = HTML::TableParser::Grid->new($content);
 71+
 72+ my %lang;
 73+
 74+ for my $n (0 .. $parser->num_rows - 1) {
 75+ my %row;
 76+ @row{qw(language code)} = $parser->row($n);
 77+
 78+ # Mark this as UTF-8
 79+ for my $key (keys %row) {
 80+ $row{$key} = encode('utf8', $row{$key});
 81+ }
 82+
 83+ next if $row{language} eq 'Total';
 84+
 85+ $lang{$row{code}} = $row{language};
 86+ }
 87+
 88+ return %lang;
 89+}
 90+
 91+sub get_sitematrix
 92+{
 93+ my $mech = WWW::Mechanize->new(
 94+ agent => $0,
 95+ );
 96+
 97+ $mech->get($url);
 98+
 99+ unless ($mech->success)
 100+ {
 101+ die "Can't get $url";
 102+ }
 103+
 104+ my $content = $mech->content;
 105+
 106+ return $content;
 107+}
 108+
 109+sub help
 110+{
 111+ require Pod::Usage;
 112+ my %arg = @_;
 113+
 114+ Pod::Usage::pod2usage(
 115+ -verbose => $arg{ verbose },
 116+ -exitval => $arg{ exitval } || 0,
 117+ );
 118+}
Index: trunk/tools/osm-tools/README
@@ -0,0 +1,5 @@
 2+Here are some helpful tools needed to set up the OSM stack on the
 3+Wikimedia Toolserver, to start with it's things needed for the stuff
 4+outlined here:
 5+
 6+http://lists.wikimedia.org/pipermail/maps-l/2009-July/000136.html

Status & tagging log