r10910 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r10909‎ | r10910 | r10911 >
Date:10:43, 5 September 2005
Author:vibber
Status:old
Tags:
Comment:
* Fix for whitespace-only <text> sections
* Fix for roundtripping timestamps at local DST boundary
Modified paths:
  • /trunk/mwdumper/mwimport/XmlDumpReader.cs (modified) (history)

Diff [purge]

Index: trunk/mwdumper/mwimport/XmlDumpReader.cs
@@ -122,11 +122,9 @@
123123 return "";
124124 }
125125 while (_reader.Read()) {
126 - //Console.WriteLine("XXX: " + reader.NodeType + ", " + reader.LocalName + ", " + reader.Value);
127126 switch (_reader.NodeType) {
128127 case XmlNodeType.SignificantWhitespace:
129128 case XmlNodeType.Text:
130 - _reader.MoveToContent();
131129 val.Append(_reader.Value);
132130 break;
133131 case XmlNodeType.EndElement:
@@ -226,13 +224,7 @@
227225 }
228226
229227 private void ReadTimestamp() {
230 - // This is slow, took up 10% of runtime trying 17 different formats!
231 - //_rev.Timestamp = XmlConvert.ToDateTime(ReadElementContent()).ToUniversalTime();
232 -
233 - // We've declared a standard format, so just check it.
234 - _rev.Timestamp = DateTime.ParseExact(ReadElementContent(),
235 - @"yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'",
236 - System.Globalization.CultureInfo.CurrentCulture);
 228+ _rev.Timestamp = ParseUTCTimestamp(ReadElementContent());
237229 }
238230
239231 private void ReadComment() {
@@ -273,5 +265,23 @@
274266 private void ReadIp() {
275267 _contrib = new Contributor(ReadElementContent());
276268 }
 269+
 270+ private DateTime ParseUTCTimestamp(string text) {
 271+ // 2003-10-26T04:50:47Z
 272+ //
 273+ // We're doing this manually because:
 274+ // * XmlConvert.ToDateTime() is slow
 275+ // * XmlConvert.ToDateTime() is lossy at local DST boundary
 276+ // * DateTime.ParseExact seems to be similarly lossy, unless
 277+ // there's a magic formula I haven't found yet.
 278+ string trimmed = text.Trim();
 279+ return new DateTime(
 280+ int.Parse(trimmed.Substring(0,4)), // year
 281+ int.Parse(trimmed.Substring(5,2)), // month
 282+ int.Parse(trimmed.Substring(8,2)), // day
 283+ int.Parse(trimmed.Substring(11,2)), // hour
 284+ int.Parse(trimmed.Substring(14,2)), // minute
 285+ int.Parse(trimmed.Substring(17,2))); // second
 286+ }
277287 }
278288 }

Status & tagging log