r17822 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r17821‎ | r17822 | r17823 >
Date:10:38, 21 November 2006
Author:tstarling
Status:old
Tags:
Comment:
Collection of generic string functions and classes
Modified paths:
  • /trunk/phase3/includes/StringUtils.php (added) (history)

Diff [purge]

Index: trunk/phase3/includes/StringUtils.php
@@ -0,0 +1,263 @@
 2+<?php
 3+
 4+class StringUtils {
 5+ /**
 6+ * Perform an operation equivalent to
 7+ *
 8+ * preg_replace( "!$startDelim(.*?)$endDelim!", $replace, $subject );
 9+ *
 10+ * except that it's worst-case O(N) instead of O(N^2)
 11+ *
 12+ * Compared to delimiterReplace(), this implementation is fast but memory-
 13+ * hungry and inflexible. The memory requirements are such that I don't
 14+ * recommend using it on anything but guaranteed small chunks of text.
 15+ */
 16+ static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
 17+ $segments = explode( $startDelim, $subject );
 18+ $output = array_shift( $segments );
 19+ foreach ( $segments as $s ) {
 20+ $endDelimPos = strpos( $s, $endDelim );
 21+ if ( $endDelimPos === false ) {
 22+ $output .= $startDelim . $s;
 23+ } else {
 24+ $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
 25+ }
 26+ }
 27+ return $output;
 28+ }
 29+
 30+ /**
 31+ * Perform an operation equivalent to
 32+ *
 33+ * preg_replace_callback( "!$startDelim(.*)$endDelim!s$flags", $callback, $subject )
 34+ *
 35+ * This implementation is slower than staticDelimiterReplace but uses far less
 36+ * memory and allows regular expression delimiters.
 37+ *
 38+ * @param string $flags Regular expression flags
 39+ */
 40+ static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
 41+ $inputPos = 0;
 42+ $outputPos = 0;
 43+ $output = '';
 44+ $foundStart = false;
 45+
 46+ while ( $inputPos < strlen( $subject ) &&
 47+ preg_match( "!($startDelim)|($endDelim)!$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) )
 48+ {
 49+ if ( $m[1][0] != '' ) {
 50+ # Found start
 51+ # Write out the non-matching section
 52+ $output .= substr( $subject, $outputPos, $m[1][1] - $outputPos );
 53+ $foundStart = true;
 54+ $inputPos = $contentPos = $m[1][1] + strlen( $m[1][0] );
 55+ $outputPos = $m[1][1];
 56+ } elseif ( $m[2][0] != '' ) {
 57+ # Found end
 58+ if ( $foundStart ) {
 59+ # Found match
 60+ $output .= call_user_func( $callback, array(
 61+ substr( $subject, $outputPos, $m[2][1] + strlen( $m[2][0] ) - $outputPos ),
 62+ substr( $subject, $contentPos, $m[2][1] - $contentPos )
 63+ ));
 64+ $foundStart = false;
 65+ } else {
 66+ # Non-matching end, write it out
 67+ $output .= substr( $subject, $inputPos, $m[2][1] + strlen( $m[2][0] ) - $outputPos );
 68+ }
 69+ $inputPos = $outputPos = $m[2][1] + strlen( $m[2][0] );
 70+ } else {
 71+ throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
 72+ }
 73+ }
 74+ if ( $outputPos < strlen( $subject ) ) {
 75+ $output .= substr( $subject, $outputPos );
 76+ }
 77+ return $output;
 78+ }
 79+
 80+ /*
 81+ * Perform an operation equivalent to
 82+ *
 83+ * preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject )
 84+ *
 85+ * @param string $startDelim Start delimiter regular expression
 86+ * @param string $endDelim End delimiter regular expression
 87+ * @param string $replace Replacement string. May contain $1, which will be
 88+ * replaced by the text between the delimiters
 89+ * @param string $subject String to search
 90+ * @return string The string with the matches replaced
 91+ */
 92+ static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
 93+ $replacer = new RegexlikeReplacer( $replace );
 94+ return self::delimiterReplaceCallback( $startDelim, $endDelim,
 95+ $replacer->cb(), $subject, $flags );
 96+ }
 97+
 98+ /**
 99+ * More or less "markup-safe" explode()
 100+ * Ignores any instances of the separator inside <...>
 101+ * @param string $separator
 102+ * @param string $text
 103+ * @return array
 104+ */
 105+ static function explodeMarkup( $separator, $text ) {
 106+ $placeholder = "\x00";
 107+
 108+ // Remove placeholder instances
 109+ $text = str_replace( $placeholder, '', $text );
 110+
 111+ // Replace instances of the separator inside HTML-like tags with the placeholder
 112+ $replacer = new DoubleReplacer( $separator, $placeholder );
 113+ $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
 114+
 115+ // Explode, then put the replaced separators back in
 116+ $items = explode( $separator, $cleaned );
 117+ foreach( $items as $i => $str ) {
 118+ $items[$i] = str_replace( $placeholder, $separator, $str );
 119+ }
 120+
 121+ return $items;
 122+ }
 123+
 124+ /**
 125+ * Escape a string to make it suitable for inclusion in a preg_replace()
 126+ * replacement parameter.
 127+ *
 128+ * @param string $string
 129+ * @return string
 130+ */
 131+ static function escapeRegexReplacement( $string ) {
 132+ $string = str_replace( '\\', '\\\\', $string );
 133+ $string = str_replace( '$', '\\$', $string );
 134+ return $string;
 135+ }
 136+}
 137+
 138+/**
 139+ * Base class for "replacers", objects used in preg_replace_callback() and
 140+ * StringUtils::delimiterReplaceCallback()
 141+ */
 142+class Replacer {
 143+ function cb() {
 144+ return array( &$this, 'replace' );
 145+ }
 146+}
 147+
 148+/**
 149+ * Class to replace regex matches with a string similar to that used in preg_replace()
 150+ */
 151+class RegexlikeReplacer extends Replacer {
 152+ var $r;
 153+ function __construct( $r ) {
 154+ $this->r = $r;
 155+ }
 156+
 157+ function replace( $matches ) {
 158+ $pairs = array();
 159+ foreach ( $matches as $i => $match ) {
 160+ $pairs["\$$i"] = $match;
 161+ }
 162+ return strtr( $this->r, $pairs );
 163+ }
 164+
 165+}
 166+
 167+/**
 168+ * Class to perform secondary replacement within each replacement string
 169+ */
 170+class DoubleReplacer extends Replacer {
 171+ function __construct( $from, $to, $index = 0 ) {
 172+ $this->from = $from;
 173+ $this->to = $to;
 174+ $this->index = $index;
 175+ }
 176+
 177+ function replace( $matches ) {
 178+ return str_replace( $this->from, $this->to, $matches[$this->index] );
 179+ }
 180+}
 181+
 182+/**
 183+ * Class to perform replacement based on a simple hashtable lookup
 184+ */
 185+class HashtableReplacer extends Replacer {
 186+ var $table, $index;
 187+
 188+ function __construct( $table, $index = 0 ) {
 189+ $this->table = $table;
 190+ $this->index = $index;
 191+ }
 192+
 193+ function replace( $matches ) {
 194+ return $this->table[$matches[$this->index]];
 195+ }
 196+}
 197+
 198+/**
 199+ * Replacement array for FSS with fallback to strtr()
 200+ * Supports lazy initialisation of FSS resource
 201+ */
 202+class ReplacementArray {
 203+ /*mostly private*/ var $data = false;
 204+ /*mostly private*/ var $fss = false;
 205+
 206+ /**
 207+ * Create an object with the specified replacement array
 208+ * The array should have the same form as the replacement array for strtr()
 209+ */
 210+ function __construct( $data = array() ) {
 211+ $this->data = $data;
 212+ }
 213+
 214+ function __sleep() {
 215+ return array( 'data' );
 216+ }
 217+
 218+ function __wakeup() {
 219+ $this->fss = false;
 220+ }
 221+
 222+ /**
 223+ * Set the whole replacement array at once
 224+ */
 225+ function setArray( $data ) {
 226+ $this->data = $data;
 227+ $this->fss = false;
 228+ }
 229+
 230+ function getArray() {
 231+ return $this->data;
 232+ }
 233+
 234+ /**
 235+ * Set an element of the replacement array
 236+ */
 237+ function setPair( $from, $to ) {
 238+ $this->data[$from] = $to;
 239+ $this->fss = false;
 240+ }
 241+
 242+ function mergeArray( $data ) {
 243+ $this->data = array_merge( $this->data, $data );
 244+ $this->fss = false;
 245+ }
 246+
 247+ function merge( $other ) {
 248+ $this->data = array_merge( $this->data, $other->data );
 249+ $this->fss = false;
 250+ }
 251+
 252+ function replace( $subject ) {
 253+ if ( function_exists( 'fss_prep_replace' ) ) {
 254+ if ( $this->fss === false ) {
 255+ $this->fss = fss_prep_replace( $this->data );
 256+ }
 257+ return fss_exec_replace( $this->fss, $subject );
 258+ } else {
 259+ return strtr( $subject, $this->data );
 260+ }
 261+ }
 262+}
 263+
 264+?>
Property changes on: trunk/phase3/includes/StringUtils.php
___________________________________________________________________
Added: svn:eol-style
1265 + native