r61528 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r61527‎ | r61528 | r61529 >
Date:18:58, 26 January 2010
Author:platonides
Status:reverted
Tags:
Comment:
Step 4: Profit!!
Add and use PregSplitIterator instead of a direct preg_split.
Slower, but with an upper bound on memory usage.
Modified paths:
  • /trunk/phase3/includes/StringUtils.php (modified) (history)
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)
  • /trunk/phase3/tests/preg_split_test.php (added) (history)

Diff [purge]

Index: trunk/phase3/tests/preg_split_test.php
@@ -0,0 +1,24 @@
 2+<?php
 3+include "../includes/StringUtils.php";
 4+
 5+$pattern = "/('')+/";
 6+$subject = str_repeat("'' ", 1024*1024 + 7);
 7+
 8+$m = memory_get_usage();
 9+
 10+$ps1 = preg_split($pattern, $subject);
 11+
 12+$r = "";
 13+foreach ($ps1 as $c) {
 14+ $r .= $c . "|";
 15+}
 16+echo "Original preg_split: " . md5($r) . " " . (memory_get_usage()-$m) . "\n";
 17+
 18+unset($ps1);
 19+
 20+$r = "";
 21+$ps2 = StringUtils::preg_split($pattern, $subject);
 22+foreach ($ps2 as $c) {
 23+ $r .= $c . "|";
 24+}
 25+echo "StringUtils preg_split: " . md5($r) . " " . (memory_get_usage()-$m) . "\n";
Property changes on: trunk/phase3/tests/preg_split_test.php
___________________________________________________________________
Name: svn:eol-style
126 + native
Index: trunk/phase3/includes/parser/Parser.php
@@ -1154,7 +1154,7 @@
11551155 # be text, and the remaining three constitute mark-up for bold text.
11561156 # If there are more than 6 apostrophes in a row, assume they're all
11571157 # text except for the last 6.
1158 - $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 1158+ $arr = Stringutils::preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
11591159
11601160
11611161 # Now let's actually convert our apostrophic mush to HTML!
Index: trunk/phase3/includes/StringUtils.php
@@ -179,6 +179,14 @@
180180 return new ArrayIterator( explode( $separator, $subject ) );
181181 }
182182 }
 183+
 184+ /**
 185+ * Workalike for preg_split() with limited memory usage.
 186+ * Returns an Iterator
 187+ */
 188+ static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) {
 189+ return new PregSplitIterator( $pattern, $subject, $limit, $flags );
 190+ }
183191 }
184192
185193 /**
@@ -409,3 +417,82 @@
410418 }
411419 }
412420
 421+
 422+/**
 423+ * An iterator which works exactly like:
 424+ *
 425+ * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) {
 426+ * ...
 427+ * }
 428+ *
 429+ * Except it doesn't use huge amounts of memory when $limit is -1
 430+ *
 431+ * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported.
 432+ */
 433+class PregSplitIterator implements Iterator {
 434+ // The subject string
 435+ var $pattern, $subject, $originalLimit, $flags;
 436+
 437+ // The last extracted group of items.
 438+ var $smallArray;
 439+
 440+ // The position on the iterator.
 441+ var $curPos;
 442+
 443+ const MAX_LIMIT = 100;
 444+
 445+ /**
 446+ * Construct a PregSplitIterator
 447+ */
 448+ function __construct( $pattern, $s, $limit, $flags) {
 449+ $this->pattern = $pattern;
 450+ $this->subject = $s;
 451+ $this->originalLimit = $limit;
 452+ $this->flags = $flags;
 453+
 454+ $this->rewind();
 455+ }
 456+
 457+ private function effectiveLimit() {
 458+ if ($this->originalLimit == -1) {
 459+ return self::MAX_LIMIT + 1;
 460+ } else if ($this->limit > self::MAX_LIMIT) {
 461+ $this->limit -= self::MAX_LIMIT;
 462+ return self::MAX_LIMIT + 1;
 463+ } else {
 464+ $old = $this->limit;
 465+ $this->limit = 0;
 466+ return $old;
 467+ }
 468+ }
 469+
 470+ function rewind() {
 471+ $this->curPos = 0;
 472+ $this->limit = $this->originalLimit;
 473+ if ($this->limit == -1) $this->limit = self::MAX_LIMIT;
 474+ $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags);
 475+ }
 476+
 477+ function current() {
 478+ return $this->smallArray[$this->curPos % self::MAX_LIMIT];
 479+ }
 480+
 481+ function key() {
 482+ return $this->curPos;
 483+ }
 484+
 485+ function next() {
 486+ $this->curPos++;
 487+ if ( $this->curPos % self::MAX_LIMIT == 0 ) {
 488+ # Last item contains the rest unsplitted.
 489+ if ($this->limit > 0) {
 490+ $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags);
 491+ }
 492+ }
 493+ return;
 494+ }
 495+
 496+ function valid() {
 497+ return $this->curPos % self::MAX_LIMIT < count($this->smallArray);
 498+ }
 499+}

Follow-up revisions

RevisionCommit summaryAuthorDate
r61551Revert r61528, r61527, r61526, r61525, r61519, r61515, r61053, r61052 (Parser...tstarling02:41, 27 January 2010

Status & tagging log