Index: trunk/extensions/LiquidThreads/import/import-parsed-discussions.php |
— | — | @@ -0,0 +1,85 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +require_once ( getenv('MW_INSTALL_PATH') !== false |
| 5 | + ? getenv('MW_INSTALL_PATH')."/maintenance/commandLine.inc" |
| 6 | + : dirname( __FILE__ ) . '/../../maintenance/commandLine.inc' ); |
| 7 | + |
| 8 | +## Imports JSON-encoded discussions from parse-wikitext-discussions.pl |
| 9 | + |
| 10 | +$structure = json_decode(file_get_contents( $argv[1] ), true); |
| 11 | + |
| 12 | +$article = new Article( Title::newFromText( $argv[2] ) ); |
| 13 | + |
| 14 | +$subject = ''; |
| 15 | +$rootPost = null; |
| 16 | + |
| 17 | +recursiveParseArray( $structure ); |
| 18 | + |
| 19 | +function recursiveParseArray( $array ) { |
| 20 | + static $recurseLevel = 0; |
| 21 | + |
| 22 | + $recurseLevel++; |
| 23 | + |
| 24 | + if ($recurseLevel > 90) { |
| 25 | + print var_dump( $array ); |
| 26 | + die( wfBacktrace() ); |
| 27 | + } |
| 28 | + |
| 29 | + global $subject, $rootPost; |
| 30 | + if ( is_array( $array ) && isset($array['title']) ) { |
| 31 | + $subject = $array['title']; |
| 32 | + recursiveParseArray( $array['content'] ); |
| 33 | + |
| 34 | + $rootPost = null; |
| 35 | + } elseif ( is_array( $array ) && isset($array['user']) ) { |
| 36 | + // We have a post. |
| 37 | + $t = createPost( $array, $subject, $rootPost ); |
| 38 | + |
| 39 | + if (!$rootPost) { |
| 40 | + $rootPost = $t; |
| 41 | + } |
| 42 | + } elseif (is_array($array)) { |
| 43 | + foreach( $array as $info ) { |
| 44 | + recursiveParseArray( $info ); |
| 45 | + } |
| 46 | + |
| 47 | + $rootPost = null; |
| 48 | + } |
| 49 | + |
| 50 | + $recurseLevel--; |
| 51 | +} |
| 52 | + |
| 53 | +function createPost( $info, $subject, $super = null ) { |
| 54 | + $userName = $info['user']; |
| 55 | + if ( strpos( $userName, '#' ) !== false ) { |
| 56 | + $pos = strpos( $userName, '#' ); |
| 57 | + |
| 58 | + $userName = substr( $userName, 0, $pos ); |
| 59 | + } |
| 60 | + |
| 61 | + $user = User::newFromName( $userName, /* no validation */ false ); |
| 62 | + |
| 63 | + if (!$user) { |
| 64 | + throw new MWException( "Username ".$info['user']." is invalid." ); |
| 65 | + } |
| 66 | + |
| 67 | + global $article; |
| 68 | + |
| 69 | + if ($super) { |
| 70 | + $title = Threads::newReplyTitle( $super, $user ); |
| 71 | + } else { |
| 72 | + $title = Threads::newThreadTitle( $subject, $article ); |
| 73 | + } |
| 74 | + |
| 75 | + print "Creating thread $title as a subthread of ".($super ? $super->title() : 'none')."\n"; |
| 76 | + |
| 77 | + $root = new Article( $title ); |
| 78 | + $root->doEdit( $info['content'], 'Imported from JSON', EDIT_NEW, false, $user ); |
| 79 | + |
| 80 | + $t = LqtView::postEditUpdates($super ? 'reply' : 'new', $super, $root, $article, |
| 81 | + $subject, 'Imported from JSON', null ); |
| 82 | + |
| 83 | + $t = Threads::withId( $t->id() ); // Some weirdness. |
| 84 | + |
| 85 | + return $t; |
| 86 | +} |
Index: trunk/extensions/LiquidThreads/import/WikiText/ParseHeadings.pm |
— | — | @@ -0,0 +1,45 @@ |
| 2 | +#!/usr/bin/perl -sw |
| 3 | + |
| 4 | +package WikiText::ParseHeadings; |
| 5 | + |
| 6 | +# Parses out headings from wikitext |
| 7 | + |
| 8 | +my $structure = []; |
| 9 | +my $pointer = { 0 => $structure }; |
| 10 | +my $content_buffer; |
| 11 | +my $level = 0; |
| 12 | + |
| 13 | +sub parse_line { |
| 14 | + my ($unused, $_) = @_; |
| 15 | + |
| 16 | + my $orig = $_; |
| 17 | + s/\s*$//g; |
| 18 | + |
| 19 | + if (m/^(\=+)\s*(.*?)\s*\=+$/) { |
| 20 | + $level = length($1)-1; |
| 21 | + my $insertLevel = $level - 1; |
| 22 | + |
| 23 | + push @{$pointer->{$level}}, $content_buffer; |
| 24 | + my $insert = { 'title' => $2, 'content' => [] }; |
| 25 | + push @{$pointer->{$insertLevel}}, $insert; |
| 26 | + |
| 27 | + $pointer->{$level} = $insert->{'content'}; |
| 28 | + } else { |
| 29 | + $content_buffer .= $orig; |
| 30 | + } |
| 31 | +} |
| 32 | + |
| 33 | +sub finish_parse { |
| 34 | + push @{$pointer->{$level}}, $content_buffer; |
| 35 | +} |
| 36 | + |
| 37 | +sub reset_state { |
| 38 | + $structure = []; |
| 39 | + $pointer = { 0 => $structure }; |
| 40 | + $content_buffer = ''; |
| 41 | + $level = 0; |
| 42 | +} |
| 43 | + |
| 44 | +sub structure { return $structure; } |
| 45 | + |
| 46 | +1; |
Index: trunk/extensions/LiquidThreads/import/WikiText/ParseDiscussion.pm |
— | — | @@ -0,0 +1,53 @@ |
| 2 | +#!/usr/bin/perl -sw |
| 3 | + |
| 4 | +package WikiText::ParseDiscussion; |
| 5 | + |
| 6 | +## Package for parsing discussions set out in wikitext, with signatures etc. |
| 7 | +## Currently does not attempt to alter threading, owing to the inconsistent and confusing |
| 8 | +## indentation standards across wikis, discussion pages and users. |
| 9 | +## May or may not blow up when people quote each other's posts, this is pretty rough and |
| 10 | +## ready |
| 11 | + |
| 12 | +use YAML; |
| 13 | + |
| 14 | +my $posts; |
| 15 | +my $current_post; |
| 16 | +my $signatureLinkRegex = ## srsly |
| 17 | + qr/\[\[(?: (?: User[ _](?: talk)?:)|(?: Special:Contributions\/) )([^\[\]|]+)(?: \|[^\[\]]*)?\]\]/xi; |
| 18 | + |
| 19 | +sub reset_state { |
| 20 | + $posts = []; |
| 21 | + $current_post = { 'content' => '' }; |
| 22 | +} |
| 23 | + |
| 24 | +sub input_line { |
| 25 | + my ($unused,$line) = @_; |
| 26 | + |
| 27 | + ## Check for blank posts. |
| 28 | + $line =~ s/\s*$//g; |
| 29 | + if (!$line) { return; } |
| 30 | + |
| 31 | + $line =~ s/^:+//g; |
| 32 | + |
| 33 | + ## Add to the content. |
| 34 | + $current_post->{'content'} .= $line; |
| 35 | + |
| 36 | + if ($line =~ /\d{2}:\d{2}, \d{1,2} \w+ \d{4} \(UTC\)/) { |
| 37 | + $current_post->{'timestamp'} = $&; |
| 38 | + ## Finishes with a timestamp, must be a comment. |
| 39 | + my @signatureLikeLinks = ($line =~ /$signatureLinkRegex/g ); |
| 40 | + |
| 41 | + $current_post->{'user'} = pop @signatureLikeLinks; |
| 42 | + |
| 43 | + push @$posts, $current_post; |
| 44 | + |
| 45 | + $current_post = { 'content' => '' }; |
| 46 | + } |
| 47 | +} |
| 48 | + |
| 49 | +sub get_posts { |
| 50 | + return $posts; |
| 51 | +} |
| 52 | + |
| 53 | +reset; |
| 54 | +1; |
Index: trunk/extensions/LiquidThreads/import/parse-wikitext-headings.pl |
— | — | @@ -0,0 +1,14 @@ |
| 2 | +#!/usr/bin/perl -sw |
| 3 | + |
| 4 | +use JSON; |
| 5 | +use YAML; |
| 6 | +use WikiText::ParseHeadings; |
| 7 | + |
| 8 | +WikiText::ParseHeadings->reset(); |
| 9 | + |
| 10 | +while (<>) { |
| 11 | + WikiText::ParseHeadings->parse_line($_); |
| 12 | +} |
| 13 | + |
| 14 | +WikiText::ParseHeadings->finish_parse(); |
| 15 | +print Dump( WikiText::ParseHeadings->structure ); |
Property changes on: trunk/extensions/LiquidThreads/import/parse-wikitext-headings.pl |
___________________________________________________________________ |
Name: svn:executable |
1 | 16 | + * |
Index: trunk/extensions/LiquidThreads/import/parse-wikitext-discussion.pl |
— | — | @@ -0,0 +1,43 @@ |
| 2 | +#!/usr/bin/perl -sw |
| 3 | + |
| 4 | +use JSON; |
| 5 | +use YAML; |
| 6 | +use WikiText::ParseHeadings; |
| 7 | +use WikiText::ParseDiscussion; |
| 8 | + |
| 9 | +WikiText::ParseHeadings->reset_state(); |
| 10 | + |
| 11 | +while (<>) { |
| 12 | + WikiText::ParseHeadings->parse_line($_); |
| 13 | +} |
| 14 | + |
| 15 | +WikiText::ParseHeadings->finish_parse(); |
| 16 | + |
| 17 | +my $topLevelStructure = WikiText::ParseHeadings->structure; |
| 18 | + |
| 19 | +recursiveParseStructure( $topLevelStructure ); |
| 20 | + |
| 21 | +sub recursiveParseStructure { |
| 22 | + my ($structure, $parent) = @_; |
| 23 | + |
| 24 | + if (ref $structure eq 'HASH') { |
| 25 | + #print "Processing section ".$structure->{'title'}."\n"; |
| 26 | + recursiveParseStructure( $structure->{'content'}, $structure ); |
| 27 | + } elsif (ref $structure eq 'ARRAY') { |
| 28 | + foreach my $subitem (@$structure) { |
| 29 | + recursiveParseStructure($subitem, $structure); |
| 30 | + } |
| 31 | + } else { |
| 32 | + WikiText::ParseDiscussion->reset_state(); |
| 33 | + |
| 34 | + my @lines = split /[\r\n]+/, $structure; |
| 35 | + |
| 36 | + foreach my $line (@lines) { |
| 37 | + WikiText::ParseDiscussion->input_line($line); |
| 38 | + } |
| 39 | + |
| 40 | + @$parent = WikiText::ParseDiscussion->get_posts; |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +print encode_json( $topLevelStructure ); |
Property changes on: trunk/extensions/LiquidThreads/import/parse-wikitext-discussion.pl |
___________________________________________________________________ |
Name: svn:executable |
1 | 45 | + * |