r54532 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r54531‎ | r54532 | r54533 >
Date:16:44, 6 August 2009
Author:werdna
Status:deferred
Tags:
Comment:
Some utility scripts for parsing and importing a discussion in Wikitext into LiquidThreads.
Warning: Seriously ugly, written mostly in perl
Modified paths:
  • /trunk/extensions/LiquidThreads/import (added) (history)
  • /trunk/extensions/LiquidThreads/import/WikiText (added) (history)
  • /trunk/extensions/LiquidThreads/import/WikiText/ParseDiscussion.pm (added) (history)
  • /trunk/extensions/LiquidThreads/import/WikiText/ParseHeadings.pm (added) (history)
  • /trunk/extensions/LiquidThreads/import/import-parsed-discussions.php (added) (history)
  • /trunk/extensions/LiquidThreads/import/parse-wikitext-discussion.pl (added) (history)
  • /trunk/extensions/LiquidThreads/import/parse-wikitext-headings.pl (added) (history)

Diff [purge]

Index: trunk/extensions/LiquidThreads/import/import-parsed-discussions.php
@@ -0,0 +1,85 @@
 2+<?php
 3+
 4+require_once ( getenv('MW_INSTALL_PATH') !== false
 5+ ? getenv('MW_INSTALL_PATH')."/maintenance/commandLine.inc"
 6+ : dirname( __FILE__ ) . '/../../maintenance/commandLine.inc' );
 7+
 8+## Imports JSON-encoded discussions from parse-wikitext-discussions.pl
 9+
 10+$structure = json_decode(file_get_contents( $argv[1] ), true);
 11+
 12+$article = new Article( Title::newFromText( $argv[2] ) );
 13+
 14+$subject = '';
 15+$rootPost = null;
 16+
 17+recursiveParseArray( $structure );
 18+
 19+function recursiveParseArray( $array ) {
 20+ static $recurseLevel = 0;
 21+
 22+ $recurseLevel++;
 23+
 24+ if ($recurseLevel > 90) {
 25+ print var_dump( $array );
 26+ die( wfBacktrace() );
 27+ }
 28+
 29+ global $subject, $rootPost;
 30+ if ( is_array( $array ) && isset($array['title']) ) {
 31+ $subject = $array['title'];
 32+ recursiveParseArray( $array['content'] );
 33+
 34+ $rootPost = null;
 35+ } elseif ( is_array( $array ) && isset($array['user']) ) {
 36+ // We have a post.
 37+ $t = createPost( $array, $subject, $rootPost );
 38+
 39+ if (!$rootPost) {
 40+ $rootPost = $t;
 41+ }
 42+ } elseif (is_array($array)) {
 43+ foreach( $array as $info ) {
 44+ recursiveParseArray( $info );
 45+ }
 46+
 47+ $rootPost = null;
 48+ }
 49+
 50+ $recurseLevel--;
 51+}
 52+
 53+function createPost( $info, $subject, $super = null ) {
 54+ $userName = $info['user'];
 55+ if ( strpos( $userName, '#' ) !== false ) {
 56+ $pos = strpos( $userName, '#' );
 57+
 58+ $userName = substr( $userName, 0, $pos );
 59+ }
 60+
 61+ $user = User::newFromName( $userName, /* no validation */ false );
 62+
 63+ if (!$user) {
 64+ throw new MWException( "Username ".$info['user']." is invalid." );
 65+ }
 66+
 67+ global $article;
 68+
 69+ if ($super) {
 70+ $title = Threads::newReplyTitle( $super, $user );
 71+ } else {
 72+ $title = Threads::newThreadTitle( $subject, $article );
 73+ }
 74+
 75+ print "Creating thread $title as a subthread of ".($super ? $super->title() : 'none')."\n";
 76+
 77+ $root = new Article( $title );
 78+ $root->doEdit( $info['content'], 'Imported from JSON', EDIT_NEW, false, $user );
 79+
 80+ $t = LqtView::postEditUpdates($super ? 'reply' : 'new', $super, $root, $article,
 81+ $subject, 'Imported from JSON', null );
 82+
 83+ $t = Threads::withId( $t->id() ); // Some weirdness.
 84+
 85+ return $t;
 86+}
Index: trunk/extensions/LiquidThreads/import/WikiText/ParseHeadings.pm
@@ -0,0 +1,45 @@
 2+#!/usr/bin/perl -sw
 3+
 4+package WikiText::ParseHeadings;
 5+
 6+# Parses out headings from wikitext
 7+
 8+my $structure = [];
 9+my $pointer = { 0 => $structure };
 10+my $content_buffer;
 11+my $level = 0;
 12+
 13+sub parse_line {
 14+ my ($unused, $_) = @_;
 15+
 16+ my $orig = $_;
 17+ s/\s*$//g;
 18+
 19+ if (m/^(\=+)\s*(.*?)\s*\=+$/) {
 20+ $level = length($1)-1;
 21+ my $insertLevel = $level - 1;
 22+
 23+ push @{$pointer->{$level}}, $content_buffer;
 24+ my $insert = { 'title' => $2, 'content' => [] };
 25+ push @{$pointer->{$insertLevel}}, $insert;
 26+
 27+ $pointer->{$level} = $insert->{'content'};
 28+ } else {
 29+ $content_buffer .= $orig;
 30+ }
 31+}
 32+
 33+sub finish_parse {
 34+ push @{$pointer->{$level}}, $content_buffer;
 35+}
 36+
 37+sub reset_state {
 38+ $structure = [];
 39+ $pointer = { 0 => $structure };
 40+ $content_buffer = '';
 41+ $level = 0;
 42+}
 43+
 44+sub structure { return $structure; }
 45+
 46+1;
Index: trunk/extensions/LiquidThreads/import/WikiText/ParseDiscussion.pm
@@ -0,0 +1,53 @@
 2+#!/usr/bin/perl -sw
 3+
 4+package WikiText::ParseDiscussion;
 5+
 6+## Package for parsing discussions set out in wikitext, with signatures etc.
 7+## Currently does not attempt to alter threading, owing to the inconsistent and confusing
 8+## indentation standards across wikis, discussion pages and users.
 9+## May or may not blow up when people quote each other's posts, this is pretty rough and
 10+## ready
 11+
 12+use YAML;
 13+
 14+my $posts;
 15+my $current_post;
 16+my $signatureLinkRegex = ## srsly
 17+ qr/\[\[(?: (?: User[ _](?: talk)?:)|(?: Special:Contributions\/) )([^\[\]|]+)(?: \|[^\[\]]*)?\]\]/xi;
 18+
 19+sub reset_state {
 20+ $posts = [];
 21+ $current_post = { 'content' => '' };
 22+}
 23+
 24+sub input_line {
 25+ my ($unused,$line) = @_;
 26+
 27+ ## Check for blank posts.
 28+ $line =~ s/\s*$//g;
 29+ if (!$line) { return; }
 30+
 31+ $line =~ s/^:+//g;
 32+
 33+ ## Add to the content.
 34+ $current_post->{'content'} .= $line;
 35+
 36+ if ($line =~ /\d{2}:\d{2}, \d{1,2} \w+ \d{4} \(UTC\)/) {
 37+ $current_post->{'timestamp'} = $&;
 38+ ## Finishes with a timestamp, must be a comment.
 39+ my @signatureLikeLinks = ($line =~ /$signatureLinkRegex/g );
 40+
 41+ $current_post->{'user'} = pop @signatureLikeLinks;
 42+
 43+ push @$posts, $current_post;
 44+
 45+ $current_post = { 'content' => '' };
 46+ }
 47+}
 48+
 49+sub get_posts {
 50+ return $posts;
 51+}
 52+
 53+reset;
 54+1;
Index: trunk/extensions/LiquidThreads/import/parse-wikitext-headings.pl
@@ -0,0 +1,14 @@
 2+#!/usr/bin/perl -sw
 3+
 4+use JSON;
 5+use YAML;
 6+use WikiText::ParseHeadings;
 7+
 8+WikiText::ParseHeadings->reset();
 9+
 10+while (<>) {
 11+ WikiText::ParseHeadings->parse_line($_);
 12+}
 13+
 14+WikiText::ParseHeadings->finish_parse();
 15+print Dump( WikiText::ParseHeadings->structure );
Property changes on: trunk/extensions/LiquidThreads/import/parse-wikitext-headings.pl
___________________________________________________________________
Name: svn:executable
116 + *
Index: trunk/extensions/LiquidThreads/import/parse-wikitext-discussion.pl
@@ -0,0 +1,43 @@
 2+#!/usr/bin/perl -sw
 3+
 4+use JSON;
 5+use YAML;
 6+use WikiText::ParseHeadings;
 7+use WikiText::ParseDiscussion;
 8+
 9+WikiText::ParseHeadings->reset_state();
 10+
 11+while (<>) {
 12+ WikiText::ParseHeadings->parse_line($_);
 13+}
 14+
 15+WikiText::ParseHeadings->finish_parse();
 16+
 17+my $topLevelStructure = WikiText::ParseHeadings->structure;
 18+
 19+recursiveParseStructure( $topLevelStructure );
 20+
 21+sub recursiveParseStructure {
 22+ my ($structure, $parent) = @_;
 23+
 24+ if (ref $structure eq 'HASH') {
 25+ #print "Processing section ".$structure->{'title'}."\n";
 26+ recursiveParseStructure( $structure->{'content'}, $structure );
 27+ } elsif (ref $structure eq 'ARRAY') {
 28+ foreach my $subitem (@$structure) {
 29+ recursiveParseStructure($subitem, $structure);
 30+ }
 31+ } else {
 32+ WikiText::ParseDiscussion->reset_state();
 33+
 34+ my @lines = split /[\r\n]+/, $structure;
 35+
 36+ foreach my $line (@lines) {
 37+ WikiText::ParseDiscussion->input_line($line);
 38+ }
 39+
 40+ @$parent = WikiText::ParseDiscussion->get_posts;
 41+ }
 42+}
 43+
 44+print encode_json( $topLevelStructure );
Property changes on: trunk/extensions/LiquidThreads/import/parse-wikitext-discussion.pl
___________________________________________________________________
Name: svn:executable
145 + *

Status & tagging log