Index: trunk/mwdumper/mwimport/AssemblyInfo.cs |
— | — | @@ -0,0 +1,32 @@ |
| 2 | +using System.Reflection; |
| 3 | +using System.Runtime.CompilerServices; |
| 4 | + |
| 5 | +// Information about this assembly is defined by the following |
| 6 | +// attributes. |
| 7 | +// |
| 8 | +// change them to the information which is associated with the assembly |
| 9 | +// you compile. |
| 10 | + |
| 11 | +[assembly: AssemblyTitle("")] |
| 12 | +[assembly: AssemblyDescription("")] |
| 13 | +[assembly: AssemblyConfiguration("")] |
| 14 | +[assembly: AssemblyCompany("")] |
| 15 | +[assembly: AssemblyProduct("")] |
| 16 | +[assembly: AssemblyCopyright("")] |
| 17 | +[assembly: AssemblyTrademark("")] |
| 18 | +[assembly: AssemblyCulture("")] |
| 19 | + |
| 20 | +// The assembly version has following format : |
| 21 | +// |
| 22 | +// Major.Minor.Build.Revision |
| 23 | +// |
| 24 | +// You can specify all values by your own or you can build default build and revision |
| 25 | +// numbers with the '*' character (the default): |
| 26 | + |
| 27 | +[assembly: AssemblyVersion("1.0.*")] |
| 28 | + |
| 29 | +// The following attributes specify the key for the sign of your assembly. See the |
| 30 | +// .NET Framework documentation for more information about signing. |
| 31 | +// This is not required, if you don't want signing let these attributes like they're. |
| 32 | +[assembly: AssemblyDelaySign(false)] |
| 33 | +[assembly: AssemblyKeyFile("")] |
Property changes on: trunk/mwdumper/mwimport/AssemblyInfo.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 34 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 35 | + native |
Index: trunk/mwdumper/mwimport/IDumpWriter.cs |
— | — | @@ -0,0 +1,14 @@ |
| 2 | +// created on 8/29/2005 at 12:06 AM |
| 3 | + |
| 4 | +interface IDumpWriter { |
| 5 | + void WriteStartWiki(); |
| 6 | + void WriteEndWiki(); |
| 7 | + |
| 8 | + void WriteSiteinfo(Siteinfo info); |
| 9 | + |
| 10 | + void WriteStartPage(Page page); |
| 11 | + void WriteEndPage(); |
| 12 | + |
| 13 | + void WriteRevision(Revision revision); |
| 14 | + //void WriteUpload(Upload upload); // for the future |
| 15 | +} |
Property changes on: trunk/mwdumper/mwimport/IDumpWriter.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 16 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 17 | + native |
Index: trunk/mwdumper/mwimport/Page.cs |
— | — | @@ -0,0 +1,11 @@ |
| 2 | +// created on 8/28/2005 at 11:50 PM |
| 3 | +class Page { |
| 4 | + public Title Title; |
| 5 | + public int Id; |
| 6 | + public string Restrictions; |
| 7 | + |
| 8 | + public Page() { |
| 9 | + // <restrictions> is optional... |
| 10 | + Restrictions = null; |
| 11 | + } |
| 12 | +} |
Property changes on: trunk/mwdumper/mwimport/Page.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 13 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 14 | + native |
Index: trunk/mwdumper/mwimport/SqlWriter.cs |
— | — | @@ -0,0 +1,186 @@ |
| 2 | +// created on 8/29/2005 at 12:41 AM |
| 3 | + |
| 4 | +using System; |
| 5 | +using System.Collections; |
| 6 | +using System.IO; |
| 7 | +using System.Text; |
| 8 | + |
| 9 | +abstract class SqlWriter : IDumpWriter { |
| 10 | + protected TextWriter _stream; |
| 11 | + |
| 12 | + public SqlWriter(TextWriter output) { |
| 13 | + _stream = output; |
| 14 | + } |
| 15 | + |
| 16 | + public void WriteStartWiki() { |
| 17 | + _stream.WriteLine("-- MediaWiki XML dump converted to SQL"); |
| 18 | + } |
| 19 | + |
| 20 | + public void WriteEndWiki() { |
| 21 | + _stream.WriteLine("-- DONE"); |
| 22 | + } |
| 23 | + |
| 24 | + public void WriteSiteinfo(Siteinfo info) { |
| 25 | + _stream.WriteLine(""); |
| 26 | + _stream.WriteLine("-- Site: " + CommentSafe(info.Sitename)); |
| 27 | + _stream.WriteLine("-- URL: " + CommentSafe(info.Base)); |
| 28 | + _stream.WriteLine("-- Generator: " + CommentSafe(info.Generator)); |
| 29 | + _stream.WriteLine("-- Case: " + CommentSafe(info.Case)); |
| 30 | + _stream.WriteLine("--"); |
| 31 | + _stream.WriteLine("-- Namespaces:"); |
| 32 | + foreach (int key in info.Namespaces.Keys) { |
| 33 | + _stream.WriteLine("-- " + key + ": " + info.Namespaces[key]); |
| 34 | + } |
| 35 | + _stream.WriteLine(""); |
| 36 | + } |
| 37 | + |
| 38 | + public abstract void WriteStartPage(Page page); |
| 39 | + |
| 40 | + public abstract void WriteEndPage(); |
| 41 | + |
| 42 | + public abstract void WriteRevision(Revision revision); |
| 43 | + |
| 44 | + |
| 45 | + |
| 46 | + protected string CommentSafe(string text) { |
| 47 | + // TODO |
| 48 | + return text; |
| 49 | + } |
| 50 | + |
| 51 | + protected object InsertRow(string table, IDictionary row) { |
| 52 | + StringBuilder sql = new StringBuilder(); |
| 53 | + bool first; |
| 54 | + |
| 55 | + sql.Append("INSERT INTO "); |
| 56 | + //sql.Append(_tablePrefix); |
| 57 | + sql.Append(table); |
| 58 | + sql.Append(" ("); |
| 59 | + |
| 60 | + first = true; |
| 61 | + foreach (string field in row.Keys) { |
| 62 | + if (!first) |
| 63 | + sql.Append(','); |
| 64 | + first = false; |
| 65 | + sql.Append(field); |
| 66 | + } |
| 67 | + sql.Append(") VALUES ("); |
| 68 | + |
| 69 | + first = true; |
| 70 | + foreach (object val in row.Values) { |
| 71 | + if (!first) |
| 72 | + sql.Append(','); |
| 73 | + first = false; |
| 74 | + sql.Append(SqlSafe(val)); |
| 75 | + } |
| 76 | + sql.Append(");"); |
| 77 | + |
| 78 | + _stream.WriteLine(sql); |
| 79 | + return null; |
| 80 | + } |
| 81 | + |
| 82 | + protected void UpdateRow(string table, IDictionary row, string keyField, object keyValue) { |
| 83 | + StringBuilder sql = new StringBuilder(); |
| 84 | + bool first; |
| 85 | + |
| 86 | + sql.Append("UPDATE "); |
| 87 | + //sql.Append(_tablePrefix); |
| 88 | + sql.Append(table); |
| 89 | + sql.Append(" SET "); |
| 90 | + |
| 91 | + first = true; |
| 92 | + foreach (string field in row.Keys) { |
| 93 | + if (!first) |
| 94 | + sql.Append(','); |
| 95 | + first = false; |
| 96 | + sql.Append(field); |
| 97 | + sql.Append('='); |
| 98 | + sql.Append(SqlSafe(row[field])); |
| 99 | + } |
| 100 | + |
| 101 | + sql.Append(" WHERE "); |
| 102 | + sql.Append(keyField); |
| 103 | + sql.Append('='); |
| 104 | + sql.Append(SqlSafe(keyValue)); |
| 105 | + |
| 106 | + sql.Append(";"); |
| 107 | + |
| 108 | + _stream.WriteLine(sql); |
| 109 | + } |
| 110 | + |
| 111 | + protected string SqlSafe(object val) { |
| 112 | + if (val == null) |
| 113 | + return "NULL"; |
| 114 | + |
| 115 | + Type type = val.GetType(); |
| 116 | + int i = 1; |
| 117 | + double d = 1.0; |
| 118 | + |
| 119 | + string str = val.ToString(); |
| 120 | + if (type == str.GetType()) { |
| 121 | + return SqlEscape(str); |
| 122 | + } else if (type == i.GetType()) { |
| 123 | + return str; |
| 124 | + } else if (type == d.GetType()) { |
| 125 | + return str; |
| 126 | + } else { |
| 127 | + throw new ArgumentException("Unknown thingy in SQL"); |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + protected string SqlEscape(string str) { |
| 132 | + StringBuilder sql = new StringBuilder(); |
| 133 | + sql.Append('\''); |
| 134 | + int len = str.Length; |
| 135 | + for (int i = 0; i < len; i++) { |
| 136 | + char c = str[i]; |
| 137 | + switch (c) { |
| 138 | + case '\u0000': |
| 139 | + sql.Append("\\0"); |
| 140 | + break; |
| 141 | + case '\n': |
| 142 | + sql.Append("\\n"); |
| 143 | + break; |
| 144 | + case '\r': |
| 145 | + sql.Append("\\r"); |
| 146 | + break; |
| 147 | + case '\u001a': |
| 148 | + sql.Append("\\Z"); |
| 149 | + break; |
| 150 | + case '"': |
| 151 | + case '\'': |
| 152 | + case '\\': |
| 153 | + sql.Append('\\'); |
| 154 | + goto default; |
| 155 | + default: |
| 156 | + sql.Append(c); |
| 157 | + break; |
| 158 | + } |
| 159 | + } |
| 160 | + sql.Append('\''); |
| 161 | + return sql.ToString(); |
| 162 | + } |
| 163 | + |
| 164 | + protected string TitleFormat(string title) { |
| 165 | + return title.Replace(' ', '_'); |
| 166 | + } |
| 167 | + |
| 168 | + protected string TimestampFormat(DateTime time) { |
| 169 | + return string.Format("{0:0000}{1:00}{2:00}{3:00}{4:00}{5:00}", |
| 170 | + time.Year, |
| 171 | + time.Month, |
| 172 | + time.Day, |
| 173 | + time.Hour, |
| 174 | + time.Minute, |
| 175 | + time.Second); |
| 176 | + } |
| 177 | + |
| 178 | + protected string InverseTimestamp(DateTime time) { |
| 179 | + return string.Format("{0:0000}{1:00}{2:00}{3:00}{4:00}{5:00}", |
| 180 | + 9999 - time.Year, |
| 181 | + 99 - time.Month, |
| 182 | + 99 - time.Day, |
| 183 | + 99 - time.Hour, |
| 184 | + 99 - time.Minute, |
| 185 | + 99 - time.Second); |
| 186 | + } |
| 187 | +} |
Property changes on: trunk/mwdumper/mwimport/SqlWriter.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 188 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 189 | + native |
Index: trunk/mwdumper/mwimport/mwimport.mds |
— | — | @@ -0,0 +1,16 @@ |
| 2 | +<Combine name="mwimport" fileversion="2.0"> |
| 3 | + <Configurations active="Debug"> |
| 4 | + <Configuration name="Debug" ctype="CombineConfiguration"> |
| 5 | + <Entry configuration="Debug" build="True" name="mwimport" /> |
| 6 | + </Configuration> |
| 7 | + <Configuration name="Release" ctype="CombineConfiguration"> |
| 8 | + <Entry configuration="Debug" build="True" name="mwimport" /> |
| 9 | + </Configuration> |
| 10 | + </Configurations> |
| 11 | + <StartMode startupentry="mwimport" single="True"> |
| 12 | + <Execute type="None" entry="mwimport" /> |
| 13 | + </StartMode> |
| 14 | + <Entries> |
| 15 | + <Entry filename="./mwimport.mdp" /> |
| 16 | + </Entries> |
| 17 | +</Combine> |
\ No newline at end of file |
Property changes on: trunk/mwdumper/mwimport/mwimport.mds |
___________________________________________________________________ |
Added: svn:keywords |
1 | 18 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 19 | + native |
Index: trunk/mwdumper/mwimport/Revision.cs |
— | — | @@ -0,0 +1,25 @@ |
| 2 | +// created on 8/28/2005 at 11:52 PM |
| 3 | +using System; |
| 4 | +using System.Collections; |
| 5 | + |
| 6 | +class Revision { |
| 7 | + public int Id; |
| 8 | + public DateTime Timestamp; |
| 9 | + public Contributor Contributor; |
| 10 | + public string Comment; |
| 11 | + public string Text; |
| 12 | + public bool Minor; |
| 13 | + |
| 14 | + public bool IsRedirect { |
| 15 | + get { |
| 16 | + // todo |
| 17 | + return false; |
| 18 | + } |
| 19 | + } |
| 20 | + |
| 21 | + public Revision() { |
| 22 | + Comment = ""; |
| 23 | + Text = ""; |
| 24 | + Minor = false; |
| 25 | + } |
| 26 | +} |
Property changes on: trunk/mwdumper/mwimport/Revision.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 27 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 28 | + native |
Index: trunk/mwdumper/mwimport/Siteinfo.cs |
— | — | @@ -0,0 +1,11 @@ |
| 2 | +// created on 8/29/2005 at 12:09 AM |
| 3 | +using System; |
| 4 | +using System.Collections; |
| 5 | + |
| 6 | +public class Siteinfo { |
| 7 | + public string Sitename; |
| 8 | + public string Base; |
| 9 | + public string Generator; |
| 10 | + public string Case; |
| 11 | + public IDictionary Namespaces; |
| 12 | +} |
Property changes on: trunk/mwdumper/mwimport/Siteinfo.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 13 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 14 | + native |
Index: trunk/mwdumper/mwimport/SqlWriter14.cs |
— | — | @@ -0,0 +1,69 @@ |
| 2 | +// created on 8/29/2005 at 12:13 AM |
| 3 | +using System; |
| 4 | +using System.Collections; |
| 5 | +using System.IO; |
| 6 | + |
| 7 | +class SqlWriter14 : SqlWriter { |
| 8 | + Random _random; |
| 9 | + Page _currentPage; |
| 10 | + Revision _lastRevision; |
| 11 | + |
| 12 | + public SqlWriter14(TextWriter output) : base(output) { |
| 13 | + _random = new Random(); |
| 14 | + } |
| 15 | + |
| 16 | + public override void WriteStartPage(Page page) { |
| 17 | + _currentPage = page; |
| 18 | + _lastRevision = null; |
| 19 | + } |
| 20 | + |
| 21 | + public override void WriteEndPage() { |
| 22 | + if (_lastRevision != null) |
| 23 | + WriteCurRevision(_currentPage, _lastRevision); |
| 24 | + _currentPage = null; |
| 25 | + _lastRevision = null; |
| 26 | + } |
| 27 | + |
| 28 | + public override void WriteRevision(Revision revision) { |
| 29 | + if (_lastRevision != null) |
| 30 | + WriteOldRevision(_currentPage, _lastRevision); |
| 31 | + _lastRevision = revision; |
| 32 | + } |
| 33 | + |
| 34 | + private void WriteOldRevision(Page page, Revision revision) { |
| 35 | + IDictionary row = new Hashtable(); |
| 36 | + row["old_id"] = revision.Id; |
| 37 | + row["old_namespace"] = page.Title.Namespace; |
| 38 | + row["old_title"] = TitleFormat(page.Title.Text); |
| 39 | + row["old_text"] = revision.Text; |
| 40 | + row["old_comment"] = revision.Comment; |
| 41 | + row["old_user"] = revision.Contributor.Id; |
| 42 | + row["old_user_text"] = revision.Contributor.Username; |
| 43 | + row["old_timestamp"] = TimestampFormat(revision.Timestamp); |
| 44 | + row["old_minor_edit"] = revision.Minor ? 1 : 0; |
| 45 | + row["old_flags"] = "utf-8"; |
| 46 | + row["inverse_timestamp"] = InverseTimestamp(revision.Timestamp); |
| 47 | + InsertRow("old", row); |
| 48 | + } |
| 49 | + |
| 50 | + private void WriteCurRevision(Page page, Revision revision) { |
| 51 | + IDictionary row = new Hashtable(); |
| 52 | + row["cur_id"] = revision.Id; |
| 53 | + row["cur_namespace"] = page.Title.Namespace; |
| 54 | + row["cur_title"] = TitleFormat(page.Title.Text); |
| 55 | + row["cur_text"] = revision.Text; |
| 56 | + row["cur_comment"] = revision.Comment; |
| 57 | + row["cur_user"] = revision.Contributor.Id; |
| 58 | + row["cur_user_text"] = revision.Contributor.Username; |
| 59 | + row["cur_timestamp"] = TimestampFormat(revision.Timestamp); |
| 60 | + row["cur_restrictions"] = page.Restrictions; |
| 61 | + row["cur_counter"] = 0; |
| 62 | + row["cur_is_redirect"] = revision.IsRedirect ? 1 : 0; |
| 63 | + row["cur_minor_edit"] = revision.Minor ? 1 : 0; |
| 64 | + row["cur_random"] = _random.NextDouble(); |
| 65 | + row["cur_touched"] = TimestampFormat(DateTime.UtcNow); |
| 66 | + row["inverse_timestamp"] = InverseTimestamp(revision.Timestamp); |
| 67 | + InsertRow("cur", row); |
| 68 | + } |
| 69 | + |
| 70 | +} |
Property changes on: trunk/mwdumper/mwimport/SqlWriter14.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 71 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 72 | + native |
Index: trunk/mwdumper/mwimport/Title.cs |
— | — | @@ -0,0 +1,30 @@ |
| 2 | +// created on 8/28/2005 at 11:58 PM |
| 3 | +using System; |
| 4 | +using System.Collections; |
| 5 | + |
| 6 | +struct Title { |
| 7 | + public int Namespace; |
| 8 | + public string Text; |
| 9 | + |
| 10 | + public Title(string prefixedTitle, IDictionary namespaces) { |
| 11 | + foreach (int key in namespaces.Keys) { |
| 12 | + string prefix = (string)namespaces[key]; |
| 13 | + int len = prefix.Length; |
| 14 | + if (len > 0 |
| 15 | + && (prefixedTitle.Length - len) > 1 |
| 16 | + && prefixedTitle.StartsWith(prefix) |
| 17 | + && prefixedTitle[len] == ':') { |
| 18 | + Namespace = key; |
| 19 | + Text = Title.ValidateTitleChars(prefixedTitle.Substring(len + 1)); |
| 20 | + return; |
| 21 | + } |
| 22 | + } |
| 23 | + Namespace = 0; |
| 24 | + Text = prefixedTitle; |
| 25 | + } |
| 26 | + |
| 27 | + public static string ValidateTitleChars(string text) { |
| 28 | + // FIXME |
| 29 | + return text; |
| 30 | + } |
| 31 | +} |
Property changes on: trunk/mwdumper/mwimport/Title.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 32 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 33 | + native |
Index: trunk/mwdumper/mwimport/SqlWriter15.cs |
— | — | @@ -0,0 +1,71 @@ |
| 2 | +// created on 8/29/2005 at 12:49 AM |
| 3 | + |
| 4 | +// Doesn't actually work yet... |
| 5 | + |
| 6 | +using System; |
| 7 | +using System.Collections; |
| 8 | +using System.IO; |
| 9 | + |
| 10 | +class SqlWriter15 : SqlWriter { |
| 11 | + Random _random; |
| 12 | + Page _currentPage; |
| 13 | + Revision _lastRevision; |
| 14 | + |
| 15 | + public SqlWriter15(TextWriter output) : base(output) { |
| 16 | + _random = new Random(); |
| 17 | + } |
| 18 | + |
| 19 | + public override void WriteStartPage(Page page) { |
| 20 | + IDictionary row = new Hashtable(); |
| 21 | + row["page_id"] = page.Id; |
| 22 | + row["page_namespace"] = page.Title.Namespace; |
| 23 | + row["page_title"] = TitleFormat(page.Title.Text); |
| 24 | + row["page_restrictions"] = page.Restrictions; |
| 25 | + row["page_counter"] = 0; |
| 26 | + row["page_is_redirect"] = 0; |
| 27 | + row["page_is_new"] = 0; |
| 28 | + row["page_random"] = _random.NextDouble(); |
| 29 | + row["page_touched"] = TimestampFormat(DateTime.UtcNow); |
| 30 | + row["page_latest"] = 0; // We'll touch this up at the end... |
| 31 | + row["page_len"] = 0; // ..... |
| 32 | + InsertRow("page", row); |
| 33 | + } |
| 34 | + |
| 35 | + public override void WriteEndPage() { |
| 36 | + if (_lastRevision != null) |
| 37 | + UpdatePage(_currentPage, _lastRevision); |
| 38 | + _currentPage = null; |
| 39 | + _lastRevision = null; |
| 40 | + } |
| 41 | + |
| 42 | + public override void WriteRevision(Revision revision) { |
| 43 | + IDictionary row = new Hashtable(); |
| 44 | + row["old_id"] = null; |
| 45 | + row["old_text"] = revision.Text; |
| 46 | + row["old_flags"] = "utf-8"; |
| 47 | + object textId = InsertRow("text", row); |
| 48 | + |
| 49 | + row = new Hashtable(); |
| 50 | + row["rev_id"] = revision.Id; |
| 51 | + row["rev_page"] = _currentPage.Id; |
| 52 | + row["rev_text_id"] = textId; |
| 53 | + row["rev_comment"] = revision.Comment; |
| 54 | + row["rev_user"] = revision.Contributor.Id; |
| 55 | + row["rev_user_text"] = revision.Contributor.Username; |
| 56 | + row["rev_timestamp"] = TimestampFormat(revision.Timestamp); |
| 57 | + row["rev_minor_edit"] = revision.Minor ? 1 : 0; |
| 58 | + row["rev_deleted"] = 0; |
| 59 | + |
| 60 | + InsertRow("rev", row); |
| 61 | + _lastRevision = revision; |
| 62 | + } |
| 63 | + |
| 64 | + private void UpdatePage(Page page, Revision revision) { |
| 65 | + IDictionary row = new Hashtable(); |
| 66 | + row["page_len"] = revision.Text.Length; // TODO: UTF-8 byte length |
| 67 | + row["page_latest"] = revision.Id; |
| 68 | + row["page_is_redirect"] = revision.IsRedirect ? 1 : 0; |
| 69 | + UpdateRow("page", row, "page_id", page.Id); |
| 70 | + } |
| 71 | + |
| 72 | +} |
Property changes on: trunk/mwdumper/mwimport/SqlWriter15.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 73 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 74 | + native |
Index: trunk/mwdumper/mwimport/Main.cs |
— | — | @@ -0,0 +1,221 @@ |
| 2 | +// project created on 8/28/2005 at 11:08 PM |
| 3 | + |
| 4 | +/* |
| 5 | + -> read header info |
| 6 | + site name, url, language, namespace keys |
| 7 | + |
| 8 | + -> read pages..... |
| 9 | + <page> |
| 10 | + -> get title, etc |
| 11 | + <revision> |
| 12 | + -> store each revision |
| 13 | + on next one or end of sequence, write out |
| 14 | + [so for 1.4 schema we can be friendly] |
| 15 | + |
| 16 | + progress report: |
| 17 | + if possible, a percentage through file. this might not be possible. |
| 18 | + rates and counts definitely |
| 19 | + |
| 20 | + input: |
| 21 | + stdin or file |
| 22 | + allow gzip -> autodetect if possible |
| 23 | + |
| 24 | + output: |
| 25 | + SQL on stdout |
| 26 | + SQL on file |
| 27 | + SQL directly to a server |
| 28 | + |
| 29 | + output formats: |
| 30 | + 1.4 schema |
| 31 | + 1.5 schema |
| 32 | + |
| 33 | +*/ |
| 34 | + |
| 35 | +using System; |
| 36 | +using System.Collections; |
| 37 | +using System.IO; |
| 38 | +using System.Text; |
| 39 | +using System.Xml; |
| 40 | + |
| 41 | +using ICSharpCode.SharpZipLib.GZip; |
| 42 | + |
| 43 | +class MainClass { |
| 44 | + public static void Main(string[] args) { |
| 45 | + XmlTextReader reader = new XmlTextReader(Console.In); |
| 46 | + reader.WhitespaceHandling = WhitespaceHandling.Significant; |
| 47 | + SqlWriter14 writer = new SqlWriter14(Console.Out); |
| 48 | + |
| 49 | + reader.ReadStartElement(); |
| 50 | + writer.WriteStartWiki(); |
| 51 | + Siteinfo siteinfo = null; |
| 52 | + |
| 53 | + while (!reader.EOF) { |
| 54 | + reader.Read(); |
| 55 | + if (reader.NodeType == XmlNodeType.Element) { |
| 56 | + if (reader.LocalName.Equals("page")) |
| 57 | + ReadPage(reader, writer, siteinfo); |
| 58 | + else if (reader.LocalName.Equals("siteinfo")) |
| 59 | + siteinfo = ReadSiteinfo(reader, writer); |
| 60 | + } |
| 61 | + } |
| 62 | + reader.Close(); |
| 63 | + writer.WriteEndWiki(); |
| 64 | + } |
| 65 | + |
| 66 | + static Siteinfo ReadSiteinfo(XmlReader reader, IDumpWriter writer) { |
| 67 | + Siteinfo info = new Siteinfo(); |
| 68 | + while (reader.Read()) { |
| 69 | + string name = reader.LocalName; |
| 70 | + if (reader.NodeType == XmlNodeType.Element) { |
| 71 | + reader.MoveToContent(); |
| 72 | + if (name.Equals("sitename")) |
| 73 | + info.Sitename = reader.Value; |
| 74 | + else if (name.Equals("base")) |
| 75 | + info.Sitename = reader.Value; |
| 76 | + else if (name.Equals("generator")) |
| 77 | + info.Sitename = reader.Value; |
| 78 | + else if (name.Equals("case")) |
| 79 | + info.Sitename = reader.Value; |
| 80 | + else if (name.Equals("namespaces")) |
| 81 | + info.Namespaces = ReadNamespaces(reader); |
| 82 | + } else if (reader.NodeType == XmlNodeType.EndElement && name.Equals("siteinfo")) { |
| 83 | + return info; |
| 84 | + } |
| 85 | + } |
| 86 | + throw new ArgumentException("Ran out of XML early; incomplete <siteinfo>"); |
| 87 | + } |
| 88 | + |
| 89 | + static IDictionary ReadNamespaces(XmlReader reader) { |
| 90 | + Hashtable namespaces = new Hashtable(); |
| 91 | + while (reader.Read()) { |
| 92 | + string name = reader.LocalName; |
| 93 | + if (reader.NodeType == XmlNodeType.Element && name.Equals("namespace")) { |
| 94 | + int key = XmlConvert.ToInt32(reader.GetAttribute("key")); |
| 95 | + reader.MoveToContent(); |
| 96 | + namespaces[key] = reader.Value; |
| 97 | + } else if (reader.NodeType == XmlNodeType.EndElement && name.Equals("namespaces")) { |
| 98 | + return namespaces; |
| 99 | + } |
| 100 | + } |
| 101 | + throw new ArgumentException("Ran out of XML early; incomplete <namespaces>"); |
| 102 | + } |
| 103 | + |
| 104 | + static string ReadElementContent(XmlReader reader) { |
| 105 | + StringBuilder val = new StringBuilder(); |
| 106 | + while (reader.Read()) { |
| 107 | + //Console.WriteLine("XXX: " + reader.NodeType + ", " + reader.LocalName + ", " + reader.Value); |
| 108 | + switch (reader.NodeType) { |
| 109 | + case XmlNodeType.SignificantWhitespace: |
| 110 | + case XmlNodeType.Text: |
| 111 | + reader.MoveToContent(); |
| 112 | + val.Append(reader.Value); |
| 113 | + break; |
| 114 | + case XmlNodeType.EndElement: |
| 115 | + return val.ToString(); |
| 116 | + default: |
| 117 | + // ignore |
| 118 | + break; |
| 119 | + } |
| 120 | + } |
| 121 | + return val.ToString(); |
| 122 | + } |
| 123 | + |
| 124 | + static void ReadPage(XmlReader reader, IDumpWriter writer, Siteinfo siteinfo) { |
| 125 | + Page page = new Page(); |
| 126 | + |
| 127 | + while (reader.Read()) { |
| 128 | + string name = reader.LocalName; |
| 129 | + if (reader.NodeType == XmlNodeType.Element) { |
| 130 | + if (name.Equals("revision")) |
| 131 | + break; // Move on to Stage Two |
| 132 | + |
| 133 | + string val = ReadElementContent(reader); |
| 134 | + |
| 135 | + if (name.Equals("title")) |
| 136 | + page.Title = new Title(val, siteinfo.Namespaces); |
| 137 | + else if (name.Equals("id")) |
| 138 | + page.Id = XmlConvert.ToInt32(val); |
| 139 | + else if (name.Equals("restrictions")) |
| 140 | + page.Restrictions = val; |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + writer.WriteStartPage(page); |
| 145 | + do { |
| 146 | + string name = reader.LocalName; |
| 147 | + if (reader.NodeType == XmlNodeType.Element && name.Equals("revision")) |
| 148 | + ReadRevision(reader, writer, siteinfo); |
| 149 | + else if (reader.NodeType == XmlNodeType.EndElement && name.Equals("page")) |
| 150 | + break; |
| 151 | + } while(reader.Read()); |
| 152 | + writer.WriteEndPage(); |
| 153 | + } |
| 154 | + |
| 155 | + static void ReadRevision(XmlReader reader, IDumpWriter writer, Siteinfo siteinfo) { |
| 156 | + Revision rev = new Revision(); |
| 157 | + while (reader.Read()) { |
| 158 | + string name = reader.LocalName; |
| 159 | + if (reader.NodeType == XmlNodeType.Element) { |
| 160 | + string val = ReadElementContent(reader); |
| 161 | + if (name.Equals("id")) |
| 162 | + rev.Id = XmlConvert.ToInt32(val); |
| 163 | + else if (name.Equals("timestamp")) |
| 164 | + rev.Timestamp = XmlConvert.ToDateTime(val); |
| 165 | + else if (name.Equals("contributor")) |
| 166 | + rev.Contributor = new Contributor("test"); |
| 167 | + else if (name.Equals("minor")) |
| 168 | + rev.Minor = true; |
| 169 | + else if (name.Equals("comment")) |
| 170 | + rev.Comment = val; |
| 171 | + else if (name.Equals("text")) |
| 172 | + rev.Text = val; |
| 173 | + } else if (reader.NodeType == XmlNodeType.EndElement && name.Equals("revision")) { |
| 174 | + writer.WriteRevision(rev); |
| 175 | + return; |
| 176 | + } |
| 177 | + } |
| 178 | + } |
| 179 | + |
| 180 | + public static void Test(string[] args) { |
| 181 | + Siteinfo info = new Siteinfo(); |
| 182 | + info.Sitename = "OneFive"; |
| 183 | + info.Base = "http://localhost/head/index.php/Main_Page"; |
| 184 | + info.Generator = "MediaWiki 1.6alpha"; |
| 185 | + info.Case = "first-letter"; |
| 186 | + info.Namespaces = new Hashtable(); |
| 187 | + info.Namespaces[-2] = "Media"; |
| 188 | + info.Namespaces[-1] = "Special"; |
| 189 | + info.Namespaces[0] = ""; |
| 190 | + info.Namespaces[1] = "Talk"; |
| 191 | + |
| 192 | + Page page = new Page(); |
| 193 | + page.Id = 1; |
| 194 | + page.Title = new Title("Talk:Main Page", info.Namespaces); |
| 195 | + page.Restrictions = ""; |
| 196 | + |
| 197 | + Revision revision = new Revision(); |
| 198 | + revision.Id = 1; |
| 199 | + revision.Text = "This is a bunch of stuff\nyo momma!"; |
| 200 | + revision.Minor = true; |
| 201 | + revision.Timestamp = DateTime.UtcNow; |
| 202 | + revision.Contributor = new Contributor("WikiSysop", 1); |
| 203 | + revision.Comment = "wacky edit o doom (it's all good)"; |
| 204 | + |
| 205 | + Revision revision2 = new Revision(); |
| 206 | + revision2.Id = 2; |
| 207 | + revision2.Text = "''''''''\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\" VANDALE!!!!!"; |
| 208 | + revision2.Minor = false; |
| 209 | + revision2.Timestamp = DateTime.UtcNow; |
| 210 | + revision2.Contributor = new Contributor("127.0.0.1"); |
| 211 | + revision2.Comment = "/* fuk uuuu */"; |
| 212 | + |
| 213 | + SqlWriter14 writer = new SqlWriter14(Console.Out); |
| 214 | + writer.WriteStartWiki(); |
| 215 | + writer.WriteSiteinfo(info); |
| 216 | + writer.WriteStartPage(page); |
| 217 | + writer.WriteRevision(revision); |
| 218 | + writer.WriteRevision(revision2); |
| 219 | + writer.WriteEndPage(); |
| 220 | + writer.WriteEndWiki(); |
| 221 | + } |
| 222 | +} |
Property changes on: trunk/mwdumper/mwimport/Main.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 223 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 224 | + native |
Index: trunk/mwdumper/mwimport/mwimport.mdp |
— | — | @@ -0,0 +1,36 @@ |
| 2 | +<Project name="mwimport" fileversion="2.0" language="C#" ctype="DotNetProject"> |
| 3 | + <Configurations active="Debug"> |
| 4 | + <Configuration name="Debug" ctype="DotNetProjectConfiguration"> |
| 5 | + <Output directory="./bin/Debug" assembly="mwimport" /> |
| 6 | + <Build debugmode="True" target="Exe" /> |
| 7 | + <Execution runwithwarnings="True" consolepause="True" runtime="MsNet" /> |
| 8 | + <CodeGeneration compiler="Csc" warninglevel="4" optimize="True" unsafecodeallowed="False" generateoverflowchecks="True" generatexmldocumentation="False" ctype="CSharpCompilerParameters" /> |
| 9 | + </Configuration> |
| 10 | + <Configuration name="Release" ctype="DotNetProjectConfiguration"> |
| 11 | + <Output directory="./bin/Release" assembly="mwimport" /> |
| 12 | + <Build debugmode="False" target="Exe" /> |
| 13 | + <Execution runwithwarnings="True" consolepause="True" runtime="MsNet" /> |
| 14 | + <CodeGeneration compiler="Csc" warninglevel="4" optimize="True" unsafecodeallowed="False" generateoverflowchecks="True" generatexmldocumentation="False" ctype="CSharpCompilerParameters" /> |
| 15 | + </Configuration> |
| 16 | + </Configurations> |
| 17 | + <DeploymentInformation strategy="File"> |
| 18 | + <excludeFiles /> |
| 19 | + </DeploymentInformation> |
| 20 | + <Contents> |
| 21 | + <File name="./Main.cs" subtype="Code" buildaction="Compile" /> |
| 22 | + <File name="./AssemblyInfo.cs" subtype="Code" buildaction="Compile" /> |
| 23 | + <File name="./Page.cs" subtype="Code" buildaction="Compile" /> |
| 24 | + <File name="./Revision.cs" subtype="Code" buildaction="Compile" /> |
| 25 | + <File name="./Contributor.cs" subtype="Code" buildaction="Compile" /> |
| 26 | + <File name="./Title.cs" subtype="Code" buildaction="Compile" /> |
| 27 | + <File name="./IDumpWriter.cs" subtype="Code" buildaction="Compile" /> |
| 28 | + <File name="./Siteinfo.cs" subtype="Code" buildaction="Compile" /> |
| 29 | + <File name="./SqlWriter14.cs" subtype="Code" buildaction="Compile" /> |
| 30 | + <File name="./SqlWriter.cs" subtype="Code" buildaction="Compile" /> |
| 31 | + <File name="./SqlWriter15.cs" subtype="Code" buildaction="Compile" /> |
| 32 | + </Contents> |
| 33 | + <References> |
| 34 | + <ProjectReference type="Assembly" localcopy="True" refto="../libs/ICSharpCode.SharpZipLib.dll" /> |
| 35 | + <ProjectReference type="Gac" localcopy="True" refto="System.Xml, Version=1.0.5000.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" /> |
| 36 | + </References> |
| 37 | +</Project> |
\ No newline at end of file |
Property changes on: trunk/mwdumper/mwimport/mwimport.mdp |
___________________________________________________________________ |
Added: svn:keywords |
1 | 38 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 39 | + native |
Index: trunk/mwdumper/mwimport/Contributor.cs |
— | — | @@ -0,0 +1,21 @@ |
| 2 | +// created on 8/28/2005 at 11:53 PM |
| 3 | +struct Contributor { |
| 4 | + public string Username; |
| 5 | + public int Id; |
| 6 | + |
| 7 | + public string Address { |
| 8 | + get { |
| 9 | + return Username; |
| 10 | + } |
| 11 | + } |
| 12 | + |
| 13 | + public Contributor(string username, int id) { |
| 14 | + Username = username; |
| 15 | + Id = id; |
| 16 | + } |
| 17 | + |
| 18 | + public Contributor(string ip) { |
| 19 | + Username = ip; |
| 20 | + Id = 0; |
| 21 | + } |
| 22 | +} |
Property changes on: trunk/mwdumper/mwimport/Contributor.cs |
___________________________________________________________________ |
Added: svn:keywords |
1 | 23 | + Author Date Id Revision |
Added: svn:eol-style |
2 | 24 | + native |