r82512 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r82511‎ | r82512 | r82513 >
Date:16:28, 20 February 2011
Author:ialex
Status:ok
Tags:
Comment:
Per Tim Starling, follow-up r76252: move WikiDiff.php to DairikiDiff.php to not confuse with the wikidiff extension
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/diff/DairikiDiff.php (added) (history)
  • /trunk/phase3/includes/diff/WikiDiff.php (deleted) (history)

Diff [purge]

Index: trunk/phase3/includes/diff/WikiDiff.php
@@ -1,1291 +0,0 @@
2 -<?php
3 -/**
4 - * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
5 - *
6 - * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
7 - * You may copy this code freely under the conditions of the GPL.
8 - *
9 - * @file
10 - * @ingroup DifferenceEngine
11 - * @defgroup DifferenceEngine DifferenceEngine
12 - */
13 -
14 -/**
15 - * @todo document
16 - * @private
17 - * @ingroup DifferenceEngine
18 - */
19 -class _DiffOp {
20 - var $type;
21 - var $orig;
22 - var $closing;
23 -
24 - function reverse() {
25 - trigger_error( 'pure virtual', E_USER_ERROR );
26 - }
27 -
28 - function norig() {
29 - return $this->orig ? sizeof( $this->orig ) : 0;
30 - }
31 -
32 - function nclosing() {
33 - return $this->closing ? sizeof( $this->closing ) : 0;
34 - }
35 -}
36 -
37 -/**
38 - * @todo document
39 - * @private
40 - * @ingroup DifferenceEngine
41 - */
42 -class _DiffOp_Copy extends _DiffOp {
43 - var $type = 'copy';
44 -
45 - function __construct ( $orig, $closing = false ) {
46 - if ( !is_array( $closing ) )
47 - $closing = $orig;
48 - $this->orig = $orig;
49 - $this->closing = $closing;
50 - }
51 -
52 - function reverse() {
53 - return new _DiffOp_Copy( $this->closing, $this->orig );
54 - }
55 -}
56 -
57 -/**
58 - * @todo document
59 - * @private
60 - * @ingroup DifferenceEngine
61 - */
62 -class _DiffOp_Delete extends _DiffOp {
63 - var $type = 'delete';
64 -
65 - function __construct ( $lines ) {
66 - $this->orig = $lines;
67 - $this->closing = false;
68 - }
69 -
70 - function reverse() {
71 - return new _DiffOp_Add( $this->orig );
72 - }
73 -}
74 -
75 -/**
76 - * @todo document
77 - * @private
78 - * @ingroup DifferenceEngine
79 - */
80 -class _DiffOp_Add extends _DiffOp {
81 - var $type = 'add';
82 -
83 - function __construct ( $lines ) {
84 - $this->closing = $lines;
85 - $this->orig = false;
86 - }
87 -
88 - function reverse() {
89 - return new _DiffOp_Delete( $this->closing );
90 - }
91 -}
92 -
93 -/**
94 - * @todo document
95 - * @private
96 - * @ingroup DifferenceEngine
97 - */
98 -class _DiffOp_Change extends _DiffOp {
99 - var $type = 'change';
100 -
101 - function __construct ( $orig, $closing ) {
102 - $this->orig = $orig;
103 - $this->closing = $closing;
104 - }
105 -
106 - function reverse() {
107 - return new _DiffOp_Change( $this->closing, $this->orig );
108 - }
109 -}
110 -
111 -/**
112 - * Class used internally by Diff to actually compute the diffs.
113 - *
114 - * The algorithm used here is mostly lifted from the perl module
115 - * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
116 - * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
117 - *
118 - * More ideas are taken from:
119 - * http://www.ics.uci.edu/~eppstein/161/960229.html
120 - *
121 - * Some ideas are (and a bit of code) are from from analyze.c, from GNU
122 - * diffutils-2.7, which can be found at:
123 - * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
124 - *
125 - * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
126 - * are my own.
127 - *
128 - * Line length limits for robustness added by Tim Starling, 2005-08-31
129 - * Alternative implementation added by Guy Van den Broeck, 2008-07-30
130 - *
131 - * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
132 - * @private
133 - * @ingroup DifferenceEngine
134 - */
135 -class _DiffEngine {
136 -
137 - const MAX_XREF_LENGTH = 10000;
138 -
139 - protected $xchanged, $ychanged;
140 -
141 - protected $xv = array(), $yv = array();
142 - protected $xind = array(), $yind = array();
143 -
144 - protected $seq = array(), $in_seq = array();
145 -
146 - protected $lcs = 0;
147 -
148 - function diff ( $from_lines, $to_lines ) {
149 - wfProfileIn( __METHOD__ );
150 -
151 - // Diff and store locally
152 - $this->diff_local( $from_lines, $to_lines );
153 -
154 - // Merge edits when possible
155 - $this->_shift_boundaries( $from_lines, $this->xchanged, $this->ychanged );
156 - $this->_shift_boundaries( $to_lines, $this->ychanged, $this->xchanged );
157 -
158 - // Compute the edit operations.
159 - $n_from = sizeof( $from_lines );
160 - $n_to = sizeof( $to_lines );
161 -
162 - $edits = array();
163 - $xi = $yi = 0;
164 - while ( $xi < $n_from || $yi < $n_to ) {
165 - assert( $yi < $n_to || $this->xchanged[$xi] );
166 - assert( $xi < $n_from || $this->ychanged[$yi] );
167 -
168 - // Skip matching "snake".
169 - $copy = array();
170 - while ( $xi < $n_from && $yi < $n_to
171 - && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) {
172 - $copy[] = $from_lines[$xi++];
173 - ++$yi;
174 - }
175 - if ( $copy ) {
176 - $edits[] = new _DiffOp_Copy( $copy );
177 - }
178 -
179 - // Find deletes & adds.
180 - $delete = array();
181 - while ( $xi < $n_from && $this->xchanged[$xi] ) {
182 - $delete[] = $from_lines[$xi++];
183 - }
184 -
185 - $add = array();
186 - while ( $yi < $n_to && $this->ychanged[$yi] ) {
187 - $add[] = $to_lines[$yi++];
188 - }
189 -
190 - if ( $delete && $add ) {
191 - $edits[] = new _DiffOp_Change( $delete, $add );
192 - } elseif ( $delete ) {
193 - $edits[] = new _DiffOp_Delete( $delete );
194 - } elseif ( $add ) {
195 - $edits[] = new _DiffOp_Add( $add );
196 - }
197 - }
198 - wfProfileOut( __METHOD__ );
199 - return $edits;
200 - }
201 -
202 - function diff_local ( $from_lines, $to_lines ) {
203 - global $wgExternalDiffEngine;
204 - wfProfileIn( __METHOD__ );
205 -
206 - if ( $wgExternalDiffEngine == 'wikidiff3' ) {
207 - // wikidiff3
208 - $wikidiff3 = new WikiDiff3();
209 - $wikidiff3->diff( $from_lines, $to_lines );
210 - $this->xchanged = $wikidiff3->removed;
211 - $this->ychanged = $wikidiff3->added;
212 - unset( $wikidiff3 );
213 - } else {
214 - // old diff
215 - $n_from = sizeof( $from_lines );
216 - $n_to = sizeof( $to_lines );
217 - $this->xchanged = $this->ychanged = array();
218 - $this->xv = $this->yv = array();
219 - $this->xind = $this->yind = array();
220 - unset( $this->seq );
221 - unset( $this->in_seq );
222 - unset( $this->lcs );
223 -
224 - // Skip leading common lines.
225 - for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
226 - if ( $from_lines[$skip] !== $to_lines[$skip] )
227 - break;
228 - $this->xchanged[$skip] = $this->ychanged[$skip] = false;
229 - }
230 - // Skip trailing common lines.
231 - $xi = $n_from; $yi = $n_to;
232 - for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
233 - if ( $from_lines[$xi] !== $to_lines[$yi] )
234 - break;
235 - $this->xchanged[$xi] = $this->ychanged[$yi] = false;
236 - }
237 -
238 - // Ignore lines which do not exist in both files.
239 - for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
240 - $xhash[$this->_line_hash( $from_lines[$xi] )] = 1;
241 - }
242 -
243 - for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
244 - $line = $to_lines[$yi];
245 - if ( ( $this->ychanged[$yi] = empty( $xhash[$this->_line_hash( $line )] ) ) )
246 - continue;
247 - $yhash[$this->_line_hash( $line )] = 1;
248 - $this->yv[] = $line;
249 - $this->yind[] = $yi;
250 - }
251 - for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
252 - $line = $from_lines[$xi];
253 - if ( ( $this->xchanged[$xi] = empty( $yhash[$this->_line_hash( $line )] ) ) )
254 - continue;
255 - $this->xv[] = $line;
256 - $this->xind[] = $xi;
257 - }
258 -
259 - // Find the LCS.
260 - $this->_compareseq( 0, sizeof( $this->xv ), 0, sizeof( $this->yv ) );
261 - }
262 - wfProfileOut( __METHOD__ );
263 - }
264 -
265 - /**
266 - * Returns the whole line if it's small enough, or the MD5 hash otherwise
267 - */
268 - function _line_hash( $line ) {
269 - if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
270 - return md5( $line );
271 - } else {
272 - return $line;
273 - }
274 - }
275 -
276 - /* Divide the Largest Common Subsequence (LCS) of the sequences
277 - * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
278 - * sized segments.
279 - *
280 - * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
281 - * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
282 - * sub sequences. The first sub-sequence is contained in [X0, X1),
283 - * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
284 - * that (X0, Y0) == (XOFF, YOFF) and
285 - * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
286 - *
287 - * This function assumes that the first lines of the specified portions
288 - * of the two files do not match, and likewise that the last lines do not
289 - * match. The caller must trim matching lines from the beginning and end
290 - * of the portions it is going to specify.
291 - */
292 - function _diag ( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
293 - $flip = false;
294 -
295 - if ( $xlim - $xoff > $ylim - $yoff ) {
296 - // Things seems faster (I'm not sure I understand why)
297 - // when the shortest sequence in X.
298 - $flip = true;
299 - list ( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
300 - }
301 -
302 - if ( $flip ) {
303 - for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
304 - $ymatches[$this->xv[$i]][] = $i;
305 - }
306 - } else {
307 - for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
308 - $ymatches[$this->yv[$i]][] = $i;
309 - }
310 - }
311 -
312 - $this->lcs = 0;
313 - $this->seq[0] = $yoff - 1;
314 - $this->in_seq = array();
315 - $ymids[0] = array();
316 -
317 - $numer = $xlim - $xoff + $nchunks - 1;
318 - $x = $xoff;
319 - for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
320 - if ( $chunk > 0 ) {
321 - for ( $i = 0; $i <= $this->lcs; $i++ ) {
322 - $ymids[$i][$chunk -1] = $this->seq[$i];
323 - }
324 - }
325 -
326 - $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks );
327 - for ( ; $x < $x1; $x++ ) {
328 - $line = $flip ? $this->yv[$x] : $this->xv[$x];
329 - if ( empty( $ymatches[$line] ) ) {
330 - continue;
331 - }
332 - $matches = $ymatches[$line];
333 - reset( $matches );
334 - while ( list( , $y ) = each( $matches ) ) {
335 - if ( empty( $this->in_seq[$y] ) ) {
336 - $k = $this->_lcs_pos( $y );
337 - assert( $k > 0 );
338 - $ymids[$k] = $ymids[$k -1];
339 - break;
340 - }
341 - }
342 - while ( list ( , $y ) = each( $matches ) ) {
343 - if ( $y > $this->seq[$k -1] ) {
344 - assert( $y < $this->seq[$k] );
345 - // Optimization: this is a common case:
346 - // next match is just replacing previous match.
347 - $this->in_seq[$this->seq[$k]] = false;
348 - $this->seq[$k] = $y;
349 - $this->in_seq[$y] = 1;
350 - } else if ( empty( $this->in_seq[$y] ) ) {
351 - $k = $this->_lcs_pos( $y );
352 - assert( $k > 0 );
353 - $ymids[$k] = $ymids[$k -1];
354 - }
355 - }
356 - }
357 - }
358 -
359 - $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
360 - $ymid = $ymids[$this->lcs];
361 - for ( $n = 0; $n < $nchunks - 1; $n++ ) {
362 - $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
363 - $y1 = $ymid[$n] + 1;
364 - $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
365 - }
366 - $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
367 -
368 - return array( $this->lcs, $seps );
369 - }
370 -
371 - function _lcs_pos ( $ypos ) {
372 - $end = $this->lcs;
373 - if ( $end == 0 || $ypos > $this->seq[$end] ) {
374 - $this->seq[++$this->lcs] = $ypos;
375 - $this->in_seq[$ypos] = 1;
376 - return $this->lcs;
377 - }
378 -
379 - $beg = 1;
380 - while ( $beg < $end ) {
381 - $mid = (int)( ( $beg + $end ) / 2 );
382 - if ( $ypos > $this->seq[$mid] )
383 - $beg = $mid + 1;
384 - else
385 - $end = $mid;
386 - }
387 -
388 - assert( $ypos != $this->seq[$end] );
389 -
390 - $this->in_seq[$this->seq[$end]] = false;
391 - $this->seq[$end] = $ypos;
392 - $this->in_seq[$ypos] = 1;
393 - return $end;
394 - }
395 -
396 - /* Find LCS of two sequences.
397 - *
398 - * The results are recorded in the vectors $this->{x,y}changed[], by
399 - * storing a 1 in the element for each line that is an insertion
400 - * or deletion (ie. is not in the LCS).
401 - *
402 - * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
403 - *
404 - * Note that XLIM, YLIM are exclusive bounds.
405 - * All line numbers are origin-0 and discarded lines are not counted.
406 - */
407 - function _compareseq ( $xoff, $xlim, $yoff, $ylim ) {
408 - // Slide down the bottom initial diagonal.
409 - while ( $xoff < $xlim && $yoff < $ylim
410 - && $this->xv[$xoff] == $this->yv[$yoff] ) {
411 - ++$xoff;
412 - ++$yoff;
413 - }
414 -
415 - // Slide up the top initial diagonal.
416 - while ( $xlim > $xoff && $ylim > $yoff
417 - && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) {
418 - --$xlim;
419 - --$ylim;
420 - }
421 -
422 - if ( $xoff == $xlim || $yoff == $ylim ) {
423 - $lcs = 0;
424 - } else {
425 - // This is ad hoc but seems to work well.
426 - // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
427 - // $nchunks = max(2,min(8,(int)$nchunks));
428 - $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
429 - list ( $lcs, $seps )
430 - = $this->_diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
431 - }
432 -
433 - if ( $lcs == 0 ) {
434 - // X and Y sequences have no common subsequence:
435 - // mark all changed.
436 - while ( $yoff < $ylim ) {
437 - $this->ychanged[$this->yind[$yoff++]] = 1;
438 - }
439 - while ( $xoff < $xlim ) {
440 - $this->xchanged[$this->xind[$xoff++]] = 1;
441 - }
442 - } else {
443 - // Use the partitions to split this problem into subproblems.
444 - reset( $seps );
445 - $pt1 = $seps[0];
446 - while ( $pt2 = next( $seps ) ) {
447 - $this->_compareseq ( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
448 - $pt1 = $pt2;
449 - }
450 - }
451 - }
452 -
453 - /* Adjust inserts/deletes of identical lines to join changes
454 - * as much as possible.
455 - *
456 - * We do something when a run of changed lines include a
457 - * line at one end and has an excluded, identical line at the other.
458 - * We are free to choose which identical line is included.
459 - * `compareseq' usually chooses the one at the beginning,
460 - * but usually it is cleaner to consider the following identical line
461 - * to be the "change".
462 - *
463 - * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
464 - */
465 - function _shift_boundaries ( $lines, &$changed, $other_changed ) {
466 - wfProfileIn( __METHOD__ );
467 - $i = 0;
468 - $j = 0;
469 -
470 - assert( 'sizeof($lines) == sizeof($changed)' );
471 - $len = sizeof( $lines );
472 - $other_len = sizeof( $other_changed );
473 -
474 - while ( 1 ) {
475 - /*
476 - * Scan forwards to find beginning of another run of changes.
477 - * Also keep track of the corresponding point in the other file.
478 - *
479 - * Throughout this code, $i and $j are adjusted together so that
480 - * the first $i elements of $changed and the first $j elements
481 - * of $other_changed both contain the same number of zeros
482 - * (unchanged lines).
483 - * Furthermore, $j is always kept so that $j == $other_len or
484 - * $other_changed[$j] == false.
485 - */
486 - while ( $j < $other_len && $other_changed[$j] ) {
487 - $j++;
488 - }
489 -
490 - while ( $i < $len && ! $changed[$i] ) {
491 - assert( '$j < $other_len && ! $other_changed[$j]' );
492 - $i++; $j++;
493 - while ( $j < $other_len && $other_changed[$j] )
494 - $j++;
495 - }
496 -
497 - if ( $i == $len ) {
498 - break;
499 - }
500 -
501 - $start = $i;
502 -
503 - // Find the end of this run of changes.
504 - while ( ++$i < $len && $changed[$i] ) {
505 - continue;
506 - }
507 -
508 - do {
509 - /*
510 - * Record the length of this run of changes, so that
511 - * we can later determine whether the run has grown.
512 - */
513 - $runlength = $i - $start;
514 -
515 - /*
516 - * Move the changed region back, so long as the
517 - * previous unchanged line matches the last changed one.
518 - * This merges with previous changed regions.
519 - */
520 - while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
521 - $changed[--$start] = 1;
522 - $changed[--$i] = false;
523 - while ( $start > 0 && $changed[$start - 1] ) {
524 - $start--;
525 - }
526 - assert( '$j > 0' );
527 - while ( $other_changed[--$j] ) {
528 - continue;
529 - }
530 - assert( '$j >= 0 && !$other_changed[$j]' );
531 - }
532 -
533 - /*
534 - * Set CORRESPONDING to the end of the changed run, at the last
535 - * point where it corresponds to a changed run in the other file.
536 - * CORRESPONDING == LEN means no such point has been found.
537 - */
538 - $corresponding = $j < $other_len ? $i : $len;
539 -
540 - /*
541 - * Move the changed region forward, so long as the
542 - * first changed line matches the following unchanged one.
543 - * This merges with following changed regions.
544 - * Do this second, so that if there are no merges,
545 - * the changed region is moved forward as far as possible.
546 - */
547 - while ( $i < $len && $lines[$start] == $lines[$i] ) {
548 - $changed[$start++] = false;
549 - $changed[$i++] = 1;
550 - while ( $i < $len && $changed[$i] ) {
551 - $i++;
552 - }
553 -
554 - assert( '$j < $other_len && ! $other_changed[$j]' );
555 - $j++;
556 - if ( $j < $other_len && $other_changed[$j] ) {
557 - $corresponding = $i;
558 - while ( $j < $other_len && $other_changed[$j] ) {
559 - $j++;
560 - }
561 - }
562 - }
563 - } while ( $runlength != $i - $start );
564 -
565 - /*
566 - * If possible, move the fully-merged run of changes
567 - * back to a corresponding run in the other file.
568 - */
569 - while ( $corresponding < $i ) {
570 - $changed[--$start] = 1;
571 - $changed[--$i] = 0;
572 - assert( '$j > 0' );
573 - while ( $other_changed[--$j] ) {
574 - continue;
575 - }
576 - assert( '$j >= 0 && !$other_changed[$j]' );
577 - }
578 - }
579 - wfProfileOut( __METHOD__ );
580 - }
581 -}
582 -
583 -/**
584 - * Class representing a 'diff' between two sequences of strings.
585 - * @todo document
586 - * @private
587 - * @ingroup DifferenceEngine
588 - */
589 -class Diff
590 -{
591 - var $edits;
592 -
593 - /**
594 - * Constructor.
595 - * Computes diff between sequences of strings.
596 - *
597 - * @param $from_lines array An array of strings.
598 - * (Typically these are lines from a file.)
599 - * @param $to_lines array An array of strings.
600 - */
601 - function __construct( $from_lines, $to_lines ) {
602 - $eng = new _DiffEngine;
603 - $this->edits = $eng->diff( $from_lines, $to_lines );
604 - // $this->_check($from_lines, $to_lines);
605 - }
606 -
607 - /**
608 - * Compute reversed Diff.
609 - *
610 - * SYNOPSIS:
611 - *
612 - * $diff = new Diff($lines1, $lines2);
613 - * $rev = $diff->reverse();
614 - * @return object A Diff object representing the inverse of the
615 - * original diff.
616 - */
617 - function reverse () {
618 - $rev = $this;
619 - $rev->edits = array();
620 - foreach ( $this->edits as $edit ) {
621 - $rev->edits[] = $edit->reverse();
622 - }
623 - return $rev;
624 - }
625 -
626 - /**
627 - * Check for empty diff.
628 - *
629 - * @return bool True iff two sequences were identical.
630 - */
631 - function isEmpty () {
632 - foreach ( $this->edits as $edit ) {
633 - if ( $edit->type != 'copy' ) {
634 - return false;
635 - }
636 - }
637 - return true;
638 - }
639 -
640 - /**
641 - * Compute the length of the Longest Common Subsequence (LCS).
642 - *
643 - * This is mostly for diagnostic purposed.
644 - *
645 - * @return int The length of the LCS.
646 - */
647 - function lcs () {
648 - $lcs = 0;
649 - foreach ( $this->edits as $edit ) {
650 - if ( $edit->type == 'copy' ) {
651 - $lcs += sizeof( $edit->orig );
652 - }
653 - }
654 - return $lcs;
655 - }
656 -
657 - /**
658 - * Get the original set of lines.
659 - *
660 - * This reconstructs the $from_lines parameter passed to the
661 - * constructor.
662 - *
663 - * @return array The original sequence of strings.
664 - */
665 - function orig() {
666 - $lines = array();
667 -
668 - foreach ( $this->edits as $edit ) {
669 - if ( $edit->orig ) {
670 - array_splice( $lines, sizeof( $lines ), 0, $edit->orig );
671 - }
672 - }
673 - return $lines;
674 - }
675 -
676 - /**
677 - * Get the closing set of lines.
678 - *
679 - * This reconstructs the $to_lines parameter passed to the
680 - * constructor.
681 - *
682 - * @return array The sequence of strings.
683 - */
684 - function closing() {
685 - $lines = array();
686 -
687 - foreach ( $this->edits as $edit ) {
688 - if ( $edit->closing ) {
689 - array_splice( $lines, sizeof( $lines ), 0, $edit->closing );
690 - }
691 - }
692 - return $lines;
693 - }
694 -
695 - /**
696 - * Check a Diff for validity.
697 - *
698 - * This is here only for debugging purposes.
699 - */
700 - function _check ( $from_lines, $to_lines ) {
701 - wfProfileIn( __METHOD__ );
702 - if ( serialize( $from_lines ) != serialize( $this->orig() ) ) {
703 - trigger_error( "Reconstructed original doesn't match", E_USER_ERROR );
704 - }
705 - if ( serialize( $to_lines ) != serialize( $this->closing() ) ) {
706 - trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR );
707 - }
708 -
709 - $rev = $this->reverse();
710 - if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) {
711 - trigger_error( "Reversed original doesn't match", E_USER_ERROR );
712 - }
713 - if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) {
714 - trigger_error( "Reversed closing doesn't match", E_USER_ERROR );
715 - }
716 -
717 -
718 - $prevtype = 'none';
719 - foreach ( $this->edits as $edit ) {
720 - if ( $prevtype == $edit->type ) {
721 - trigger_error( "Edit sequence is non-optimal", E_USER_ERROR );
722 - }
723 - $prevtype = $edit->type;
724 - }
725 -
726 - $lcs = $this->lcs();
727 - trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE );
728 - wfProfileOut( __METHOD__ );
729 - }
730 -}
731 -
732 -/**
733 - * @todo document, bad name.
734 - * @private
735 - * @ingroup DifferenceEngine
736 - */
737 -class MappedDiff extends Diff
738 -{
739 - /**
740 - * Constructor.
741 - *
742 - * Computes diff between sequences of strings.
743 - *
744 - * This can be used to compute things like
745 - * case-insensitve diffs, or diffs which ignore
746 - * changes in white-space.
747 - *
748 - * @param $from_lines array An array of strings.
749 - * (Typically these are lines from a file.)
750 - *
751 - * @param $to_lines array An array of strings.
752 - *
753 - * @param $mapped_from_lines array This array should
754 - * have the same size number of elements as $from_lines.
755 - * The elements in $mapped_from_lines and
756 - * $mapped_to_lines are what is actually compared
757 - * when computing the diff.
758 - *
759 - * @param $mapped_to_lines array This array should
760 - * have the same number of elements as $to_lines.
761 - */
762 - function __construct( $from_lines, $to_lines,
763 - $mapped_from_lines, $mapped_to_lines ) {
764 - wfProfileIn( __METHOD__ );
765 -
766 - assert( sizeof( $from_lines ) == sizeof( $mapped_from_lines ) );
767 - assert( sizeof( $to_lines ) == sizeof( $mapped_to_lines ) );
768 -
769 - parent::__construct( $mapped_from_lines, $mapped_to_lines );
770 -
771 - $xi = $yi = 0;
772 - for ( $i = 0; $i < sizeof( $this->edits ); $i++ ) {
773 - $orig = &$this->edits[$i]->orig;
774 - if ( is_array( $orig ) ) {
775 - $orig = array_slice( $from_lines, $xi, sizeof( $orig ) );
776 - $xi += sizeof( $orig );
777 - }
778 -
779 - $closing = &$this->edits[$i]->closing;
780 - if ( is_array( $closing ) ) {
781 - $closing = array_slice( $to_lines, $yi, sizeof( $closing ) );
782 - $yi += sizeof( $closing );
783 - }
784 - }
785 - wfProfileOut( __METHOD__ );
786 - }
787 -}
788 -
789 -/**
790 - * A class to format Diffs
791 - *
792 - * This class formats the diff in classic diff format.
793 - * It is intended that this class be customized via inheritance,
794 - * to obtain fancier outputs.
795 - * @todo document
796 - * @private
797 - * @ingroup DifferenceEngine
798 - */
799 -class DiffFormatter {
800 - /**
801 - * Number of leading context "lines" to preserve.
802 - *
803 - * This should be left at zero for this class, but subclasses
804 - * may want to set this to other values.
805 - */
806 - var $leading_context_lines = 0;
807 -
808 - /**
809 - * Number of trailing context "lines" to preserve.
810 - *
811 - * This should be left at zero for this class, but subclasses
812 - * may want to set this to other values.
813 - */
814 - var $trailing_context_lines = 0;
815 -
816 - /**
817 - * Format a diff.
818 - *
819 - * @param $diff Diff A Diff object.
820 - * @return string The formatted output.
821 - */
822 - function format( $diff ) {
823 - wfProfileIn( __METHOD__ );
824 -
825 - $xi = $yi = 1;
826 - $block = false;
827 - $context = array();
828 -
829 - $nlead = $this->leading_context_lines;
830 - $ntrail = $this->trailing_context_lines;
831 -
832 - $this->_start_diff();
833 -
834 - foreach ( $diff->edits as $edit ) {
835 - if ( $edit->type == 'copy' ) {
836 - if ( is_array( $block ) ) {
837 - if ( sizeof( $edit->orig ) <= $nlead + $ntrail ) {
838 - $block[] = $edit;
839 - }
840 - else {
841 - if ( $ntrail ) {
842 - $context = array_slice( $edit->orig, 0, $ntrail );
843 - $block[] = new _DiffOp_Copy( $context );
844 - }
845 - $this->_block( $x0, $ntrail + $xi - $x0,
846 - $y0, $ntrail + $yi - $y0,
847 - $block );
848 - $block = false;
849 - }
850 - }
851 - $context = $edit->orig;
852 - } else {
853 - if ( ! is_array( $block ) ) {
854 - $context = array_slice( $context, sizeof( $context ) - $nlead );
855 - $x0 = $xi - sizeof( $context );
856 - $y0 = $yi - sizeof( $context );
857 - $block = array();
858 - if ( $context ) {
859 - $block[] = new _DiffOp_Copy( $context );
860 - }
861 - }
862 - $block[] = $edit;
863 - }
864 -
865 - if ( $edit->orig ) {
866 - $xi += sizeof( $edit->orig );
867 - }
868 - if ( $edit->closing ) {
869 - $yi += sizeof( $edit->closing );
870 - }
871 - }
872 -
873 - if ( is_array( $block ) ) {
874 - $this->_block( $x0, $xi - $x0,
875 - $y0, $yi - $y0,
876 - $block );
877 - }
878 -
879 - $end = $this->_end_diff();
880 - wfProfileOut( __METHOD__ );
881 - return $end;
882 - }
883 -
884 - function _block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) {
885 - wfProfileIn( __METHOD__ );
886 - $this->_start_block( $this->_block_header( $xbeg, $xlen, $ybeg, $ylen ) );
887 - foreach ( $edits as $edit ) {
888 - if ( $edit->type == 'copy' ) {
889 - $this->_context( $edit->orig );
890 - } elseif ( $edit->type == 'add' ) {
891 - $this->_added( $edit->closing );
892 - } elseif ( $edit->type == 'delete' ) {
893 - $this->_deleted( $edit->orig );
894 - } elseif ( $edit->type == 'change' ) {
895 - $this->_changed( $edit->orig, $edit->closing );
896 - } else {
897 - trigger_error( 'Unknown edit type', E_USER_ERROR );
898 - }
899 - }
900 - $this->_end_block();
901 - wfProfileOut( __METHOD__ );
902 - }
903 -
904 - function _start_diff() {
905 - ob_start();
906 - }
907 -
908 - function _end_diff() {
909 - $val = ob_get_contents();
910 - ob_end_clean();
911 - return $val;
912 - }
913 -
914 - function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
915 - if ( $xlen > 1 ) {
916 - $xbeg .= "," . ( $xbeg + $xlen - 1 );
917 - }
918 - if ( $ylen > 1 ) {
919 - $ybeg .= "," . ( $ybeg + $ylen - 1 );
920 - }
921 -
922 - return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg;
923 - }
924 -
925 - function _start_block( $header ) {
926 - echo $header . "\n";
927 - }
928 -
929 - function _end_block() {
930 - }
931 -
932 - function _lines( $lines, $prefix = ' ' ) {
933 - foreach ( $lines as $line ) {
934 - echo "$prefix $line\n";
935 - }
936 - }
937 -
938 - function _context( $lines ) {
939 - $this->_lines( $lines );
940 - }
941 -
942 - function _added( $lines ) {
943 - $this->_lines( $lines, '>' );
944 - }
945 - function _deleted( $lines ) {
946 - $this->_lines( $lines, '<' );
947 - }
948 -
949 - function _changed( $orig, $closing ) {
950 - $this->_deleted( $orig );
951 - echo "---\n";
952 - $this->_added( $closing );
953 - }
954 -}
955 -
956 -/**
957 - * A formatter that outputs unified diffs
958 - * @ingroup DifferenceEngine
959 - */
960 -
961 -class UnifiedDiffFormatter extends DiffFormatter {
962 - var $leading_context_lines = 2;
963 - var $trailing_context_lines = 2;
964 -
965 - function _added( $lines ) {
966 - $this->_lines( $lines, '+' );
967 - }
968 - function _deleted( $lines ) {
969 - $this->_lines( $lines, '-' );
970 - }
971 - function _changed( $orig, $closing ) {
972 - $this->_deleted( $orig );
973 - $this->_added( $closing );
974 - }
975 - function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
976 - return "@@ -$xbeg,$xlen +$ybeg,$ylen @@";
977 - }
978 -}
979 -
980 -/**
981 - * A pseudo-formatter that just passes along the Diff::$edits array
982 - * @ingroup DifferenceEngine
983 - */
984 -class ArrayDiffFormatter extends DiffFormatter {
985 - function format( $diff ) {
986 - $oldline = 1;
987 - $newline = 1;
988 - $retval = array();
989 - foreach ( $diff->edits as $edit ) {
990 - switch( $edit->type ) {
991 - case 'add':
992 - foreach ( $edit->closing as $l ) {
993 - $retval[] = array(
994 - 'action' => 'add',
995 - 'new' => $l,
996 - 'newline' => $newline++
997 - );
998 - }
999 - break;
1000 - case 'delete':
1001 - foreach ( $edit->orig as $l ) {
1002 - $retval[] = array(
1003 - 'action' => 'delete',
1004 - 'old' => $l,
1005 - 'oldline' => $oldline++,
1006 - );
1007 - }
1008 - break;
1009 - case 'change':
1010 - foreach ( $edit->orig as $i => $l ) {
1011 - $retval[] = array(
1012 - 'action' => 'change',
1013 - 'old' => $l,
1014 - 'new' => @$edit->closing[$i],
1015 - 'oldline' => $oldline++,
1016 - 'newline' => $newline++,
1017 - );
1018 - }
1019 - break;
1020 - case 'copy':
1021 - $oldline += count( $edit->orig );
1022 - $newline += count( $edit->orig );
1023 - }
1024 - }
1025 - return $retval;
1026 - }
1027 -}
1028 -
1029 -/**
1030 - * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
1031 - *
1032 - */
1033 -
1034 -define( 'NBSP', '&#160;' ); // iso-8859-x non-breaking space.
1035 -
1036 -/**
1037 - * @todo document
1038 - * @private
1039 - * @ingroup DifferenceEngine
1040 - */
1041 -class _HWLDF_WordAccumulator {
1042 - function __construct () {
1043 - $this->_lines = array();
1044 - $this->_line = '';
1045 - $this->_group = '';
1046 - $this->_tag = '';
1047 - }
1048 -
1049 - function _flushGroup ( $new_tag ) {
1050 - if ( $this->_group !== '' ) {
1051 - if ( $this->_tag == 'ins' ) {
1052 - $this->_line .= '<ins class="diffchange diffchange-inline">' .
1053 - htmlspecialchars ( $this->_group ) . '</ins>';
1054 - } elseif ( $this->_tag == 'del' ) {
1055 - $this->_line .= '<del class="diffchange diffchange-inline">' .
1056 - htmlspecialchars ( $this->_group ) . '</del>';
1057 - } else {
1058 - $this->_line .= htmlspecialchars ( $this->_group );
1059 - }
1060 - }
1061 - $this->_group = '';
1062 - $this->_tag = $new_tag;
1063 - }
1064 -
1065 - function _flushLine ( $new_tag ) {
1066 - $this->_flushGroup( $new_tag );
1067 - if ( $this->_line != '' ) {
1068 - array_push ( $this->_lines, $this->_line );
1069 - } else {
1070 - # make empty lines visible by inserting an NBSP
1071 - array_push ( $this->_lines, NBSP );
1072 - }
1073 - $this->_line = '';
1074 - }
1075 -
1076 - function addWords ( $words, $tag = '' ) {
1077 - if ( $tag != $this->_tag ) {
1078 - $this->_flushGroup( $tag );
1079 - }
1080 -
1081 - foreach ( $words as $word ) {
1082 - // new-line should only come as first char of word.
1083 - if ( $word == '' ) {
1084 - continue;
1085 - }
1086 - if ( $word[0] == "\n" ) {
1087 - $this->_flushLine( $tag );
1088 - $word = substr( $word, 1 );
1089 - }
1090 - assert( !strstr( $word, "\n" ) );
1091 - $this->_group .= $word;
1092 - }
1093 - }
1094 -
1095 - function getLines() {
1096 - $this->_flushLine( '~done' );
1097 - return $this->_lines;
1098 - }
1099 -}
1100 -
1101 -/**
1102 - * @todo document
1103 - * @private
1104 - * @ingroup DifferenceEngine
1105 - */
1106 -class WordLevelDiff extends MappedDiff {
1107 - const MAX_LINE_LENGTH = 10000;
1108 -
1109 - function __construct ( $orig_lines, $closing_lines ) {
1110 - wfProfileIn( __METHOD__ );
1111 -
1112 - list ( $orig_words, $orig_stripped ) = $this->_split( $orig_lines );
1113 - list ( $closing_words, $closing_stripped ) = $this->_split( $closing_lines );
1114 -
1115 - parent::__construct( $orig_words, $closing_words,
1116 - $orig_stripped, $closing_stripped );
1117 - wfProfileOut( __METHOD__ );
1118 - }
1119 -
1120 - function _split( $lines ) {
1121 - wfProfileIn( __METHOD__ );
1122 -
1123 - $words = array();
1124 - $stripped = array();
1125 - $first = true;
1126 - foreach ( $lines as $line ) {
1127 - # If the line is too long, just pretend the entire line is one big word
1128 - # This prevents resource exhaustion problems
1129 - if ( $first ) {
1130 - $first = false;
1131 - } else {
1132 - $words[] = "\n";
1133 - $stripped[] = "\n";
1134 - }
1135 - if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1136 - $words[] = $line;
1137 - $stripped[] = $line;
1138 - } else {
1139 - $m = array();
1140 - if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1141 - $line, $m ) )
1142 - {
1143 - $words = array_merge( $words, $m[0] );
1144 - $stripped = array_merge( $stripped, $m[1] );
1145 - }
1146 - }
1147 - }
1148 - wfProfileOut( __METHOD__ );
1149 - return array( $words, $stripped );
1150 - }
1151 -
1152 - function orig () {
1153 - wfProfileIn( __METHOD__ );
1154 - $orig = new _HWLDF_WordAccumulator;
1155 -
1156 - foreach ( $this->edits as $edit ) {
1157 - if ( $edit->type == 'copy' ) {
1158 - $orig->addWords( $edit->orig );
1159 - } elseif ( $edit->orig ) {
1160 - $orig->addWords( $edit->orig, 'del' );
1161 - }
1162 - }
1163 - $lines = $orig->getLines();
1164 - wfProfileOut( __METHOD__ );
1165 - return $lines;
1166 - }
1167 -
1168 - function closing () {
1169 - wfProfileIn( __METHOD__ );
1170 - $closing = new _HWLDF_WordAccumulator;
1171 -
1172 - foreach ( $this->edits as $edit ) {
1173 - if ( $edit->type == 'copy' ) {
1174 - $closing->addWords( $edit->closing );
1175 - } elseif ( $edit->closing ) {
1176 - $closing->addWords( $edit->closing, 'ins' );
1177 - }
1178 - }
1179 - $lines = $closing->getLines();
1180 - wfProfileOut( __METHOD__ );
1181 - return $lines;
1182 - }
1183 -}
1184 -
1185 -/**
1186 - * Wikipedia Table style diff formatter.
1187 - * @todo document
1188 - * @private
1189 - * @ingroup DifferenceEngine
1190 - */
1191 -class TableDiffFormatter extends DiffFormatter {
1192 - function __construct() {
1193 - $this->leading_context_lines = 2;
1194 - $this->trailing_context_lines = 2;
1195 - }
1196 -
1197 - public static function escapeWhiteSpace( $msg ) {
1198 - $msg = preg_replace( '/^ /m', '&#160; ', $msg );
1199 - $msg = preg_replace( '/ $/m', ' &#160;', $msg );
1200 - $msg = preg_replace( '/ /', '&#160; ', $msg );
1201 - return $msg;
1202 - }
1203 -
1204 - function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
1205 - $r = '<tr><td colspan="2" class="diff-lineno"><!--LINE ' . $xbeg . "--></td>\n" .
1206 - '<td colspan="2" class="diff-lineno"><!--LINE ' . $ybeg . "--></td></tr>\n";
1207 - return $r;
1208 - }
1209 -
1210 - function _start_block( $header ) {
1211 - echo $header;
1212 - }
1213 -
1214 - function _end_block() {
1215 - }
1216 -
1217 - function _lines( $lines, $prefix = ' ', $color = 'white' ) {
1218 - }
1219 -
1220 - # HTML-escape parameter before calling this
1221 - function addedLine( $line ) {
1222 - return $this->wrapLine( '+', 'diff-addedline', $line );
1223 - }
1224 -
1225 - # HTML-escape parameter before calling this
1226 - function deletedLine( $line ) {
1227 - return $this->wrapLine( '−', 'diff-deletedline', $line );
1228 - }
1229 -
1230 - # HTML-escape parameter before calling this
1231 - function contextLine( $line ) {
1232 - return $this->wrapLine( '&#160;', 'diff-context', $line );
1233 - }
1234 -
1235 - private function wrapLine( $marker, $class, $line ) {
1236 - if ( $line !== '' ) {
1237 - // The <div> wrapper is needed for 'overflow: auto' style to scroll properly
1238 - $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) );
1239 - }
1240 - return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>";
1241 - }
1242 -
1243 - function emptyLine() {
1244 - return '<td colspan="2">&#160;</td>';
1245 - }
1246 -
1247 - function _added( $lines ) {
1248 - foreach ( $lines as $line ) {
1249 - echo '<tr>' . $this->emptyLine() .
1250 - $this->addedLine( '<ins class="diffchange">' .
1251 - htmlspecialchars ( $line ) . '</ins>' ) . "</tr>\n";
1252 - }
1253 - }
1254 -
1255 - function _deleted( $lines ) {
1256 - foreach ( $lines as $line ) {
1257 - echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
1258 - htmlspecialchars ( $line ) . '</del>' ) .
1259 - $this->emptyLine() . "</tr>\n";
1260 - }
1261 - }
1262 -
1263 - function _context( $lines ) {
1264 - foreach ( $lines as $line ) {
1265 - echo '<tr>' .
1266 - $this->contextLine( htmlspecialchars ( $line ) ) .
1267 - $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1268 - }
1269 - }
1270 -
1271 - function _changed( $orig, $closing ) {
1272 - wfProfileIn( __METHOD__ );
1273 -
1274 - $diff = new WordLevelDiff( $orig, $closing );
1275 - $del = $diff->orig();
1276 - $add = $diff->closing();
1277 -
1278 - # Notice that WordLevelDiff returns HTML-escaped output.
1279 - # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1280 -
1281 - while ( $line = array_shift( $del ) ) {
1282 - $aline = array_shift( $add );
1283 - echo '<tr>' . $this->deletedLine( $line ) .
1284 - $this->addedLine( $aline ) . "</tr>\n";
1285 - }
1286 - foreach ( $add as $line ) { # If any leftovers
1287 - echo '<tr>' . $this->emptyLine() .
1288 - $this->addedLine( $line ) . "</tr>\n";
1289 - }
1290 - wfProfileOut( __METHOD__ );
1291 - }
1292 -}
Index: trunk/phase3/includes/diff/DairikiDiff.php
@@ -0,0 +1,1291 @@
 2+<?php
 3+/**
 4+ * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
 5+ *
 6+ * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
 7+ * You may copy this code freely under the conditions of the GPL.
 8+ *
 9+ * @file
 10+ * @ingroup DifferenceEngine
 11+ * @defgroup DifferenceEngine DifferenceEngine
 12+ */
 13+
 14+/**
 15+ * @todo document
 16+ * @private
 17+ * @ingroup DifferenceEngine
 18+ */
 19+class _DiffOp {
 20+ var $type;
 21+ var $orig;
 22+ var $closing;
 23+
 24+ function reverse() {
 25+ trigger_error( 'pure virtual', E_USER_ERROR );
 26+ }
 27+
 28+ function norig() {
 29+ return $this->orig ? sizeof( $this->orig ) : 0;
 30+ }
 31+
 32+ function nclosing() {
 33+ return $this->closing ? sizeof( $this->closing ) : 0;
 34+ }
 35+}
 36+
 37+/**
 38+ * @todo document
 39+ * @private
 40+ * @ingroup DifferenceEngine
 41+ */
 42+class _DiffOp_Copy extends _DiffOp {
 43+ var $type = 'copy';
 44+
 45+ function __construct ( $orig, $closing = false ) {
 46+ if ( !is_array( $closing ) )
 47+ $closing = $orig;
 48+ $this->orig = $orig;
 49+ $this->closing = $closing;
 50+ }
 51+
 52+ function reverse() {
 53+ return new _DiffOp_Copy( $this->closing, $this->orig );
 54+ }
 55+}
 56+
 57+/**
 58+ * @todo document
 59+ * @private
 60+ * @ingroup DifferenceEngine
 61+ */
 62+class _DiffOp_Delete extends _DiffOp {
 63+ var $type = 'delete';
 64+
 65+ function __construct ( $lines ) {
 66+ $this->orig = $lines;
 67+ $this->closing = false;
 68+ }
 69+
 70+ function reverse() {
 71+ return new _DiffOp_Add( $this->orig );
 72+ }
 73+}
 74+
 75+/**
 76+ * @todo document
 77+ * @private
 78+ * @ingroup DifferenceEngine
 79+ */
 80+class _DiffOp_Add extends _DiffOp {
 81+ var $type = 'add';
 82+
 83+ function __construct ( $lines ) {
 84+ $this->closing = $lines;
 85+ $this->orig = false;
 86+ }
 87+
 88+ function reverse() {
 89+ return new _DiffOp_Delete( $this->closing );
 90+ }
 91+}
 92+
 93+/**
 94+ * @todo document
 95+ * @private
 96+ * @ingroup DifferenceEngine
 97+ */
 98+class _DiffOp_Change extends _DiffOp {
 99+ var $type = 'change';
 100+
 101+ function __construct ( $orig, $closing ) {
 102+ $this->orig = $orig;
 103+ $this->closing = $closing;
 104+ }
 105+
 106+ function reverse() {
 107+ return new _DiffOp_Change( $this->closing, $this->orig );
 108+ }
 109+}
 110+
 111+/**
 112+ * Class used internally by Diff to actually compute the diffs.
 113+ *
 114+ * The algorithm used here is mostly lifted from the perl module
 115+ * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
 116+ * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
 117+ *
 118+ * More ideas are taken from:
 119+ * http://www.ics.uci.edu/~eppstein/161/960229.html
 120+ *
 121+ * Some ideas are (and a bit of code) are from from analyze.c, from GNU
 122+ * diffutils-2.7, which can be found at:
 123+ * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
 124+ *
 125+ * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
 126+ * are my own.
 127+ *
 128+ * Line length limits for robustness added by Tim Starling, 2005-08-31
 129+ * Alternative implementation added by Guy Van den Broeck, 2008-07-30
 130+ *
 131+ * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
 132+ * @private
 133+ * @ingroup DifferenceEngine
 134+ */
 135+class _DiffEngine {
 136+
 137+ const MAX_XREF_LENGTH = 10000;
 138+
 139+ protected $xchanged, $ychanged;
 140+
 141+ protected $xv = array(), $yv = array();
 142+ protected $xind = array(), $yind = array();
 143+
 144+ protected $seq = array(), $in_seq = array();
 145+
 146+ protected $lcs = 0;
 147+
 148+ function diff ( $from_lines, $to_lines ) {
 149+ wfProfileIn( __METHOD__ );
 150+
 151+ // Diff and store locally
 152+ $this->diff_local( $from_lines, $to_lines );
 153+
 154+ // Merge edits when possible
 155+ $this->_shift_boundaries( $from_lines, $this->xchanged, $this->ychanged );
 156+ $this->_shift_boundaries( $to_lines, $this->ychanged, $this->xchanged );
 157+
 158+ // Compute the edit operations.
 159+ $n_from = sizeof( $from_lines );
 160+ $n_to = sizeof( $to_lines );
 161+
 162+ $edits = array();
 163+ $xi = $yi = 0;
 164+ while ( $xi < $n_from || $yi < $n_to ) {
 165+ assert( $yi < $n_to || $this->xchanged[$xi] );
 166+ assert( $xi < $n_from || $this->ychanged[$yi] );
 167+
 168+ // Skip matching "snake".
 169+ $copy = array();
 170+ while ( $xi < $n_from && $yi < $n_to
 171+ && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) {
 172+ $copy[] = $from_lines[$xi++];
 173+ ++$yi;
 174+ }
 175+ if ( $copy ) {
 176+ $edits[] = new _DiffOp_Copy( $copy );
 177+ }
 178+
 179+ // Find deletes & adds.
 180+ $delete = array();
 181+ while ( $xi < $n_from && $this->xchanged[$xi] ) {
 182+ $delete[] = $from_lines[$xi++];
 183+ }
 184+
 185+ $add = array();
 186+ while ( $yi < $n_to && $this->ychanged[$yi] ) {
 187+ $add[] = $to_lines[$yi++];
 188+ }
 189+
 190+ if ( $delete && $add ) {
 191+ $edits[] = new _DiffOp_Change( $delete, $add );
 192+ } elseif ( $delete ) {
 193+ $edits[] = new _DiffOp_Delete( $delete );
 194+ } elseif ( $add ) {
 195+ $edits[] = new _DiffOp_Add( $add );
 196+ }
 197+ }
 198+ wfProfileOut( __METHOD__ );
 199+ return $edits;
 200+ }
 201+
 202+ function diff_local ( $from_lines, $to_lines ) {
 203+ global $wgExternalDiffEngine;
 204+ wfProfileIn( __METHOD__ );
 205+
 206+ if ( $wgExternalDiffEngine == 'wikidiff3' ) {
 207+ // wikidiff3
 208+ $wikidiff3 = new WikiDiff3();
 209+ $wikidiff3->diff( $from_lines, $to_lines );
 210+ $this->xchanged = $wikidiff3->removed;
 211+ $this->ychanged = $wikidiff3->added;
 212+ unset( $wikidiff3 );
 213+ } else {
 214+ // old diff
 215+ $n_from = sizeof( $from_lines );
 216+ $n_to = sizeof( $to_lines );
 217+ $this->xchanged = $this->ychanged = array();
 218+ $this->xv = $this->yv = array();
 219+ $this->xind = $this->yind = array();
 220+ unset( $this->seq );
 221+ unset( $this->in_seq );
 222+ unset( $this->lcs );
 223+
 224+ // Skip leading common lines.
 225+ for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
 226+ if ( $from_lines[$skip] !== $to_lines[$skip] )
 227+ break;
 228+ $this->xchanged[$skip] = $this->ychanged[$skip] = false;
 229+ }
 230+ // Skip trailing common lines.
 231+ $xi = $n_from; $yi = $n_to;
 232+ for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
 233+ if ( $from_lines[$xi] !== $to_lines[$yi] )
 234+ break;
 235+ $this->xchanged[$xi] = $this->ychanged[$yi] = false;
 236+ }
 237+
 238+ // Ignore lines which do not exist in both files.
 239+ for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
 240+ $xhash[$this->_line_hash( $from_lines[$xi] )] = 1;
 241+ }
 242+
 243+ for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
 244+ $line = $to_lines[$yi];
 245+ if ( ( $this->ychanged[$yi] = empty( $xhash[$this->_line_hash( $line )] ) ) )
 246+ continue;
 247+ $yhash[$this->_line_hash( $line )] = 1;
 248+ $this->yv[] = $line;
 249+ $this->yind[] = $yi;
 250+ }
 251+ for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
 252+ $line = $from_lines[$xi];
 253+ if ( ( $this->xchanged[$xi] = empty( $yhash[$this->_line_hash( $line )] ) ) )
 254+ continue;
 255+ $this->xv[] = $line;
 256+ $this->xind[] = $xi;
 257+ }
 258+
 259+ // Find the LCS.
 260+ $this->_compareseq( 0, sizeof( $this->xv ), 0, sizeof( $this->yv ) );
 261+ }
 262+ wfProfileOut( __METHOD__ );
 263+ }
 264+
 265+ /**
 266+ * Returns the whole line if it's small enough, or the MD5 hash otherwise
 267+ */
 268+ function _line_hash( $line ) {
 269+ if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
 270+ return md5( $line );
 271+ } else {
 272+ return $line;
 273+ }
 274+ }
 275+
 276+ /* Divide the Largest Common Subsequence (LCS) of the sequences
 277+ * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
 278+ * sized segments.
 279+ *
 280+ * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
 281+ * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
 282+ * sub sequences. The first sub-sequence is contained in [X0, X1),
 283+ * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
 284+ * that (X0, Y0) == (XOFF, YOFF) and
 285+ * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
 286+ *
 287+ * This function assumes that the first lines of the specified portions
 288+ * of the two files do not match, and likewise that the last lines do not
 289+ * match. The caller must trim matching lines from the beginning and end
 290+ * of the portions it is going to specify.
 291+ */
 292+ function _diag ( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
 293+ $flip = false;
 294+
 295+ if ( $xlim - $xoff > $ylim - $yoff ) {
 296+ // Things seems faster (I'm not sure I understand why)
 297+ // when the shortest sequence in X.
 298+ $flip = true;
 299+ list ( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
 300+ }
 301+
 302+ if ( $flip ) {
 303+ for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
 304+ $ymatches[$this->xv[$i]][] = $i;
 305+ }
 306+ } else {
 307+ for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
 308+ $ymatches[$this->yv[$i]][] = $i;
 309+ }
 310+ }
 311+
 312+ $this->lcs = 0;
 313+ $this->seq[0] = $yoff - 1;
 314+ $this->in_seq = array();
 315+ $ymids[0] = array();
 316+
 317+ $numer = $xlim - $xoff + $nchunks - 1;
 318+ $x = $xoff;
 319+ for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
 320+ if ( $chunk > 0 ) {
 321+ for ( $i = 0; $i <= $this->lcs; $i++ ) {
 322+ $ymids[$i][$chunk -1] = $this->seq[$i];
 323+ }
 324+ }
 325+
 326+ $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks );
 327+ for ( ; $x < $x1; $x++ ) {
 328+ $line = $flip ? $this->yv[$x] : $this->xv[$x];
 329+ if ( empty( $ymatches[$line] ) ) {
 330+ continue;
 331+ }
 332+ $matches = $ymatches[$line];
 333+ reset( $matches );
 334+ while ( list( , $y ) = each( $matches ) ) {
 335+ if ( empty( $this->in_seq[$y] ) ) {
 336+ $k = $this->_lcs_pos( $y );
 337+ assert( $k > 0 );
 338+ $ymids[$k] = $ymids[$k -1];
 339+ break;
 340+ }
 341+ }
 342+ while ( list ( , $y ) = each( $matches ) ) {
 343+ if ( $y > $this->seq[$k -1] ) {
 344+ assert( $y < $this->seq[$k] );
 345+ // Optimization: this is a common case:
 346+ // next match is just replacing previous match.
 347+ $this->in_seq[$this->seq[$k]] = false;
 348+ $this->seq[$k] = $y;
 349+ $this->in_seq[$y] = 1;
 350+ } else if ( empty( $this->in_seq[$y] ) ) {
 351+ $k = $this->_lcs_pos( $y );
 352+ assert( $k > 0 );
 353+ $ymids[$k] = $ymids[$k -1];
 354+ }
 355+ }
 356+ }
 357+ }
 358+
 359+ $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
 360+ $ymid = $ymids[$this->lcs];
 361+ for ( $n = 0; $n < $nchunks - 1; $n++ ) {
 362+ $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
 363+ $y1 = $ymid[$n] + 1;
 364+ $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
 365+ }
 366+ $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
 367+
 368+ return array( $this->lcs, $seps );
 369+ }
 370+
 371+ function _lcs_pos ( $ypos ) {
 372+ $end = $this->lcs;
 373+ if ( $end == 0 || $ypos > $this->seq[$end] ) {
 374+ $this->seq[++$this->lcs] = $ypos;
 375+ $this->in_seq[$ypos] = 1;
 376+ return $this->lcs;
 377+ }
 378+
 379+ $beg = 1;
 380+ while ( $beg < $end ) {
 381+ $mid = (int)( ( $beg + $end ) / 2 );
 382+ if ( $ypos > $this->seq[$mid] )
 383+ $beg = $mid + 1;
 384+ else
 385+ $end = $mid;
 386+ }
 387+
 388+ assert( $ypos != $this->seq[$end] );
 389+
 390+ $this->in_seq[$this->seq[$end]] = false;
 391+ $this->seq[$end] = $ypos;
 392+ $this->in_seq[$ypos] = 1;
 393+ return $end;
 394+ }
 395+
 396+ /* Find LCS of two sequences.
 397+ *
 398+ * The results are recorded in the vectors $this->{x,y}changed[], by
 399+ * storing a 1 in the element for each line that is an insertion
 400+ * or deletion (ie. is not in the LCS).
 401+ *
 402+ * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
 403+ *
 404+ * Note that XLIM, YLIM are exclusive bounds.
 405+ * All line numbers are origin-0 and discarded lines are not counted.
 406+ */
 407+ function _compareseq ( $xoff, $xlim, $yoff, $ylim ) {
 408+ // Slide down the bottom initial diagonal.
 409+ while ( $xoff < $xlim && $yoff < $ylim
 410+ && $this->xv[$xoff] == $this->yv[$yoff] ) {
 411+ ++$xoff;
 412+ ++$yoff;
 413+ }
 414+
 415+ // Slide up the top initial diagonal.
 416+ while ( $xlim > $xoff && $ylim > $yoff
 417+ && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) {
 418+ --$xlim;
 419+ --$ylim;
 420+ }
 421+
 422+ if ( $xoff == $xlim || $yoff == $ylim ) {
 423+ $lcs = 0;
 424+ } else {
 425+ // This is ad hoc but seems to work well.
 426+ // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
 427+ // $nchunks = max(2,min(8,(int)$nchunks));
 428+ $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
 429+ list ( $lcs, $seps )
 430+ = $this->_diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
 431+ }
 432+
 433+ if ( $lcs == 0 ) {
 434+ // X and Y sequences have no common subsequence:
 435+ // mark all changed.
 436+ while ( $yoff < $ylim ) {
 437+ $this->ychanged[$this->yind[$yoff++]] = 1;
 438+ }
 439+ while ( $xoff < $xlim ) {
 440+ $this->xchanged[$this->xind[$xoff++]] = 1;
 441+ }
 442+ } else {
 443+ // Use the partitions to split this problem into subproblems.
 444+ reset( $seps );
 445+ $pt1 = $seps[0];
 446+ while ( $pt2 = next( $seps ) ) {
 447+ $this->_compareseq ( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
 448+ $pt1 = $pt2;
 449+ }
 450+ }
 451+ }
 452+
 453+ /* Adjust inserts/deletes of identical lines to join changes
 454+ * as much as possible.
 455+ *
 456+ * We do something when a run of changed lines include a
 457+ * line at one end and has an excluded, identical line at the other.
 458+ * We are free to choose which identical line is included.
 459+ * `compareseq' usually chooses the one at the beginning,
 460+ * but usually it is cleaner to consider the following identical line
 461+ * to be the "change".
 462+ *
 463+ * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
 464+ */
 465+ function _shift_boundaries ( $lines, &$changed, $other_changed ) {
 466+ wfProfileIn( __METHOD__ );
 467+ $i = 0;
 468+ $j = 0;
 469+
 470+ assert( 'sizeof($lines) == sizeof($changed)' );
 471+ $len = sizeof( $lines );
 472+ $other_len = sizeof( $other_changed );
 473+
 474+ while ( 1 ) {
 475+ /*
 476+ * Scan forwards to find beginning of another run of changes.
 477+ * Also keep track of the corresponding point in the other file.
 478+ *
 479+ * Throughout this code, $i and $j are adjusted together so that
 480+ * the first $i elements of $changed and the first $j elements
 481+ * of $other_changed both contain the same number of zeros
 482+ * (unchanged lines).
 483+ * Furthermore, $j is always kept so that $j == $other_len or
 484+ * $other_changed[$j] == false.
 485+ */
 486+ while ( $j < $other_len && $other_changed[$j] ) {
 487+ $j++;
 488+ }
 489+
 490+ while ( $i < $len && ! $changed[$i] ) {
 491+ assert( '$j < $other_len && ! $other_changed[$j]' );
 492+ $i++; $j++;
 493+ while ( $j < $other_len && $other_changed[$j] )
 494+ $j++;
 495+ }
 496+
 497+ if ( $i == $len ) {
 498+ break;
 499+ }
 500+
 501+ $start = $i;
 502+
 503+ // Find the end of this run of changes.
 504+ while ( ++$i < $len && $changed[$i] ) {
 505+ continue;
 506+ }
 507+
 508+ do {
 509+ /*
 510+ * Record the length of this run of changes, so that
 511+ * we can later determine whether the run has grown.
 512+ */
 513+ $runlength = $i - $start;
 514+
 515+ /*
 516+ * Move the changed region back, so long as the
 517+ * previous unchanged line matches the last changed one.
 518+ * This merges with previous changed regions.
 519+ */
 520+ while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
 521+ $changed[--$start] = 1;
 522+ $changed[--$i] = false;
 523+ while ( $start > 0 && $changed[$start - 1] ) {
 524+ $start--;
 525+ }
 526+ assert( '$j > 0' );
 527+ while ( $other_changed[--$j] ) {
 528+ continue;
 529+ }
 530+ assert( '$j >= 0 && !$other_changed[$j]' );
 531+ }
 532+
 533+ /*
 534+ * Set CORRESPONDING to the end of the changed run, at the last
 535+ * point where it corresponds to a changed run in the other file.
 536+ * CORRESPONDING == LEN means no such point has been found.
 537+ */
 538+ $corresponding = $j < $other_len ? $i : $len;
 539+
 540+ /*
 541+ * Move the changed region forward, so long as the
 542+ * first changed line matches the following unchanged one.
 543+ * This merges with following changed regions.
 544+ * Do this second, so that if there are no merges,
 545+ * the changed region is moved forward as far as possible.
 546+ */
 547+ while ( $i < $len && $lines[$start] == $lines[$i] ) {
 548+ $changed[$start++] = false;
 549+ $changed[$i++] = 1;
 550+ while ( $i < $len && $changed[$i] ) {
 551+ $i++;
 552+ }
 553+
 554+ assert( '$j < $other_len && ! $other_changed[$j]' );
 555+ $j++;
 556+ if ( $j < $other_len && $other_changed[$j] ) {
 557+ $corresponding = $i;
 558+ while ( $j < $other_len && $other_changed[$j] ) {
 559+ $j++;
 560+ }
 561+ }
 562+ }
 563+ } while ( $runlength != $i - $start );
 564+
 565+ /*
 566+ * If possible, move the fully-merged run of changes
 567+ * back to a corresponding run in the other file.
 568+ */
 569+ while ( $corresponding < $i ) {
 570+ $changed[--$start] = 1;
 571+ $changed[--$i] = 0;
 572+ assert( '$j > 0' );
 573+ while ( $other_changed[--$j] ) {
 574+ continue;
 575+ }
 576+ assert( '$j >= 0 && !$other_changed[$j]' );
 577+ }
 578+ }
 579+ wfProfileOut( __METHOD__ );
 580+ }
 581+}
 582+
 583+/**
 584+ * Class representing a 'diff' between two sequences of strings.
 585+ * @todo document
 586+ * @private
 587+ * @ingroup DifferenceEngine
 588+ */
 589+class Diff
 590+{
 591+ var $edits;
 592+
 593+ /**
 594+ * Constructor.
 595+ * Computes diff between sequences of strings.
 596+ *
 597+ * @param $from_lines array An array of strings.
 598+ * (Typically these are lines from a file.)
 599+ * @param $to_lines array An array of strings.
 600+ */
 601+ function __construct( $from_lines, $to_lines ) {
 602+ $eng = new _DiffEngine;
 603+ $this->edits = $eng->diff( $from_lines, $to_lines );
 604+ // $this->_check($from_lines, $to_lines);
 605+ }
 606+
 607+ /**
 608+ * Compute reversed Diff.
 609+ *
 610+ * SYNOPSIS:
 611+ *
 612+ * $diff = new Diff($lines1, $lines2);
 613+ * $rev = $diff->reverse();
 614+ * @return object A Diff object representing the inverse of the
 615+ * original diff.
 616+ */
 617+ function reverse () {
 618+ $rev = $this;
 619+ $rev->edits = array();
 620+ foreach ( $this->edits as $edit ) {
 621+ $rev->edits[] = $edit->reverse();
 622+ }
 623+ return $rev;
 624+ }
 625+
 626+ /**
 627+ * Check for empty diff.
 628+ *
 629+ * @return bool True iff two sequences were identical.
 630+ */
 631+ function isEmpty () {
 632+ foreach ( $this->edits as $edit ) {
 633+ if ( $edit->type != 'copy' ) {
 634+ return false;
 635+ }
 636+ }
 637+ return true;
 638+ }
 639+
 640+ /**
 641+ * Compute the length of the Longest Common Subsequence (LCS).
 642+ *
 643+ * This is mostly for diagnostic purposed.
 644+ *
 645+ * @return int The length of the LCS.
 646+ */
 647+ function lcs () {
 648+ $lcs = 0;
 649+ foreach ( $this->edits as $edit ) {
 650+ if ( $edit->type == 'copy' ) {
 651+ $lcs += sizeof( $edit->orig );
 652+ }
 653+ }
 654+ return $lcs;
 655+ }
 656+
 657+ /**
 658+ * Get the original set of lines.
 659+ *
 660+ * This reconstructs the $from_lines parameter passed to the
 661+ * constructor.
 662+ *
 663+ * @return array The original sequence of strings.
 664+ */
 665+ function orig() {
 666+ $lines = array();
 667+
 668+ foreach ( $this->edits as $edit ) {
 669+ if ( $edit->orig ) {
 670+ array_splice( $lines, sizeof( $lines ), 0, $edit->orig );
 671+ }
 672+ }
 673+ return $lines;
 674+ }
 675+
 676+ /**
 677+ * Get the closing set of lines.
 678+ *
 679+ * This reconstructs the $to_lines parameter passed to the
 680+ * constructor.
 681+ *
 682+ * @return array The sequence of strings.
 683+ */
 684+ function closing() {
 685+ $lines = array();
 686+
 687+ foreach ( $this->edits as $edit ) {
 688+ if ( $edit->closing ) {
 689+ array_splice( $lines, sizeof( $lines ), 0, $edit->closing );
 690+ }
 691+ }
 692+ return $lines;
 693+ }
 694+
 695+ /**
 696+ * Check a Diff for validity.
 697+ *
 698+ * This is here only for debugging purposes.
 699+ */
 700+ function _check ( $from_lines, $to_lines ) {
 701+ wfProfileIn( __METHOD__ );
 702+ if ( serialize( $from_lines ) != serialize( $this->orig() ) ) {
 703+ trigger_error( "Reconstructed original doesn't match", E_USER_ERROR );
 704+ }
 705+ if ( serialize( $to_lines ) != serialize( $this->closing() ) ) {
 706+ trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR );
 707+ }
 708+
 709+ $rev = $this->reverse();
 710+ if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) {
 711+ trigger_error( "Reversed original doesn't match", E_USER_ERROR );
 712+ }
 713+ if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) {
 714+ trigger_error( "Reversed closing doesn't match", E_USER_ERROR );
 715+ }
 716+
 717+
 718+ $prevtype = 'none';
 719+ foreach ( $this->edits as $edit ) {
 720+ if ( $prevtype == $edit->type ) {
 721+ trigger_error( "Edit sequence is non-optimal", E_USER_ERROR );
 722+ }
 723+ $prevtype = $edit->type;
 724+ }
 725+
 726+ $lcs = $this->lcs();
 727+ trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE );
 728+ wfProfileOut( __METHOD__ );
 729+ }
 730+}
 731+
 732+/**
 733+ * @todo document, bad name.
 734+ * @private
 735+ * @ingroup DifferenceEngine
 736+ */
 737+class MappedDiff extends Diff
 738+{
 739+ /**
 740+ * Constructor.
 741+ *
 742+ * Computes diff between sequences of strings.
 743+ *
 744+ * This can be used to compute things like
 745+ * case-insensitve diffs, or diffs which ignore
 746+ * changes in white-space.
 747+ *
 748+ * @param $from_lines array An array of strings.
 749+ * (Typically these are lines from a file.)
 750+ *
 751+ * @param $to_lines array An array of strings.
 752+ *
 753+ * @param $mapped_from_lines array This array should
 754+ * have the same size number of elements as $from_lines.
 755+ * The elements in $mapped_from_lines and
 756+ * $mapped_to_lines are what is actually compared
 757+ * when computing the diff.
 758+ *
 759+ * @param $mapped_to_lines array This array should
 760+ * have the same number of elements as $to_lines.
 761+ */
 762+ function __construct( $from_lines, $to_lines,
 763+ $mapped_from_lines, $mapped_to_lines ) {
 764+ wfProfileIn( __METHOD__ );
 765+
 766+ assert( sizeof( $from_lines ) == sizeof( $mapped_from_lines ) );
 767+ assert( sizeof( $to_lines ) == sizeof( $mapped_to_lines ) );
 768+
 769+ parent::__construct( $mapped_from_lines, $mapped_to_lines );
 770+
 771+ $xi = $yi = 0;
 772+ for ( $i = 0; $i < sizeof( $this->edits ); $i++ ) {
 773+ $orig = &$this->edits[$i]->orig;
 774+ if ( is_array( $orig ) ) {
 775+ $orig = array_slice( $from_lines, $xi, sizeof( $orig ) );
 776+ $xi += sizeof( $orig );
 777+ }
 778+
 779+ $closing = &$this->edits[$i]->closing;
 780+ if ( is_array( $closing ) ) {
 781+ $closing = array_slice( $to_lines, $yi, sizeof( $closing ) );
 782+ $yi += sizeof( $closing );
 783+ }
 784+ }
 785+ wfProfileOut( __METHOD__ );
 786+ }
 787+}
 788+
 789+/**
 790+ * A class to format Diffs
 791+ *
 792+ * This class formats the diff in classic diff format.
 793+ * It is intended that this class be customized via inheritance,
 794+ * to obtain fancier outputs.
 795+ * @todo document
 796+ * @private
 797+ * @ingroup DifferenceEngine
 798+ */
 799+class DiffFormatter {
 800+ /**
 801+ * Number of leading context "lines" to preserve.
 802+ *
 803+ * This should be left at zero for this class, but subclasses
 804+ * may want to set this to other values.
 805+ */
 806+ var $leading_context_lines = 0;
 807+
 808+ /**
 809+ * Number of trailing context "lines" to preserve.
 810+ *
 811+ * This should be left at zero for this class, but subclasses
 812+ * may want to set this to other values.
 813+ */
 814+ var $trailing_context_lines = 0;
 815+
 816+ /**
 817+ * Format a diff.
 818+ *
 819+ * @param $diff Diff A Diff object.
 820+ * @return string The formatted output.
 821+ */
 822+ function format( $diff ) {
 823+ wfProfileIn( __METHOD__ );
 824+
 825+ $xi = $yi = 1;
 826+ $block = false;
 827+ $context = array();
 828+
 829+ $nlead = $this->leading_context_lines;
 830+ $ntrail = $this->trailing_context_lines;
 831+
 832+ $this->_start_diff();
 833+
 834+ foreach ( $diff->edits as $edit ) {
 835+ if ( $edit->type == 'copy' ) {
 836+ if ( is_array( $block ) ) {
 837+ if ( sizeof( $edit->orig ) <= $nlead + $ntrail ) {
 838+ $block[] = $edit;
 839+ }
 840+ else {
 841+ if ( $ntrail ) {
 842+ $context = array_slice( $edit->orig, 0, $ntrail );
 843+ $block[] = new _DiffOp_Copy( $context );
 844+ }
 845+ $this->_block( $x0, $ntrail + $xi - $x0,
 846+ $y0, $ntrail + $yi - $y0,
 847+ $block );
 848+ $block = false;
 849+ }
 850+ }
 851+ $context = $edit->orig;
 852+ } else {
 853+ if ( ! is_array( $block ) ) {
 854+ $context = array_slice( $context, sizeof( $context ) - $nlead );
 855+ $x0 = $xi - sizeof( $context );
 856+ $y0 = $yi - sizeof( $context );
 857+ $block = array();
 858+ if ( $context ) {
 859+ $block[] = new _DiffOp_Copy( $context );
 860+ }
 861+ }
 862+ $block[] = $edit;
 863+ }
 864+
 865+ if ( $edit->orig ) {
 866+ $xi += sizeof( $edit->orig );
 867+ }
 868+ if ( $edit->closing ) {
 869+ $yi += sizeof( $edit->closing );
 870+ }
 871+ }
 872+
 873+ if ( is_array( $block ) ) {
 874+ $this->_block( $x0, $xi - $x0,
 875+ $y0, $yi - $y0,
 876+ $block );
 877+ }
 878+
 879+ $end = $this->_end_diff();
 880+ wfProfileOut( __METHOD__ );
 881+ return $end;
 882+ }
 883+
 884+ function _block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) {
 885+ wfProfileIn( __METHOD__ );
 886+ $this->_start_block( $this->_block_header( $xbeg, $xlen, $ybeg, $ylen ) );
 887+ foreach ( $edits as $edit ) {
 888+ if ( $edit->type == 'copy' ) {
 889+ $this->_context( $edit->orig );
 890+ } elseif ( $edit->type == 'add' ) {
 891+ $this->_added( $edit->closing );
 892+ } elseif ( $edit->type == 'delete' ) {
 893+ $this->_deleted( $edit->orig );
 894+ } elseif ( $edit->type == 'change' ) {
 895+ $this->_changed( $edit->orig, $edit->closing );
 896+ } else {
 897+ trigger_error( 'Unknown edit type', E_USER_ERROR );
 898+ }
 899+ }
 900+ $this->_end_block();
 901+ wfProfileOut( __METHOD__ );
 902+ }
 903+
 904+ function _start_diff() {
 905+ ob_start();
 906+ }
 907+
 908+ function _end_diff() {
 909+ $val = ob_get_contents();
 910+ ob_end_clean();
 911+ return $val;
 912+ }
 913+
 914+ function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
 915+ if ( $xlen > 1 ) {
 916+ $xbeg .= "," . ( $xbeg + $xlen - 1 );
 917+ }
 918+ if ( $ylen > 1 ) {
 919+ $ybeg .= "," . ( $ybeg + $ylen - 1 );
 920+ }
 921+
 922+ return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg;
 923+ }
 924+
 925+ function _start_block( $header ) {
 926+ echo $header . "\n";
 927+ }
 928+
 929+ function _end_block() {
 930+ }
 931+
 932+ function _lines( $lines, $prefix = ' ' ) {
 933+ foreach ( $lines as $line ) {
 934+ echo "$prefix $line\n";
 935+ }
 936+ }
 937+
 938+ function _context( $lines ) {
 939+ $this->_lines( $lines );
 940+ }
 941+
 942+ function _added( $lines ) {
 943+ $this->_lines( $lines, '>' );
 944+ }
 945+ function _deleted( $lines ) {
 946+ $this->_lines( $lines, '<' );
 947+ }
 948+
 949+ function _changed( $orig, $closing ) {
 950+ $this->_deleted( $orig );
 951+ echo "---\n";
 952+ $this->_added( $closing );
 953+ }
 954+}
 955+
 956+/**
 957+ * A formatter that outputs unified diffs
 958+ * @ingroup DifferenceEngine
 959+ */
 960+
 961+class UnifiedDiffFormatter extends DiffFormatter {
 962+ var $leading_context_lines = 2;
 963+ var $trailing_context_lines = 2;
 964+
 965+ function _added( $lines ) {
 966+ $this->_lines( $lines, '+' );
 967+ }
 968+ function _deleted( $lines ) {
 969+ $this->_lines( $lines, '-' );
 970+ }
 971+ function _changed( $orig, $closing ) {
 972+ $this->_deleted( $orig );
 973+ $this->_added( $closing );
 974+ }
 975+ function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
 976+ return "@@ -$xbeg,$xlen +$ybeg,$ylen @@";
 977+ }
 978+}
 979+
 980+/**
 981+ * A pseudo-formatter that just passes along the Diff::$edits array
 982+ * @ingroup DifferenceEngine
 983+ */
 984+class ArrayDiffFormatter extends DiffFormatter {
 985+ function format( $diff ) {
 986+ $oldline = 1;
 987+ $newline = 1;
 988+ $retval = array();
 989+ foreach ( $diff->edits as $edit ) {
 990+ switch( $edit->type ) {
 991+ case 'add':
 992+ foreach ( $edit->closing as $l ) {
 993+ $retval[] = array(
 994+ 'action' => 'add',
 995+ 'new' => $l,
 996+ 'newline' => $newline++
 997+ );
 998+ }
 999+ break;
 1000+ case 'delete':
 1001+ foreach ( $edit->orig as $l ) {
 1002+ $retval[] = array(
 1003+ 'action' => 'delete',
 1004+ 'old' => $l,
 1005+ 'oldline' => $oldline++,
 1006+ );
 1007+ }
 1008+ break;
 1009+ case 'change':
 1010+ foreach ( $edit->orig as $i => $l ) {
 1011+ $retval[] = array(
 1012+ 'action' => 'change',
 1013+ 'old' => $l,
 1014+ 'new' => @$edit->closing[$i],
 1015+ 'oldline' => $oldline++,
 1016+ 'newline' => $newline++,
 1017+ );
 1018+ }
 1019+ break;
 1020+ case 'copy':
 1021+ $oldline += count( $edit->orig );
 1022+ $newline += count( $edit->orig );
 1023+ }
 1024+ }
 1025+ return $retval;
 1026+ }
 1027+}
 1028+
 1029+/**
 1030+ * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
 1031+ *
 1032+ */
 1033+
 1034+define( 'NBSP', '&#160;' ); // iso-8859-x non-breaking space.
 1035+
 1036+/**
 1037+ * @todo document
 1038+ * @private
 1039+ * @ingroup DifferenceEngine
 1040+ */
 1041+class _HWLDF_WordAccumulator {
 1042+ function __construct () {
 1043+ $this->_lines = array();
 1044+ $this->_line = '';
 1045+ $this->_group = '';
 1046+ $this->_tag = '';
 1047+ }
 1048+
 1049+ function _flushGroup ( $new_tag ) {
 1050+ if ( $this->_group !== '' ) {
 1051+ if ( $this->_tag == 'ins' ) {
 1052+ $this->_line .= '<ins class="diffchange diffchange-inline">' .
 1053+ htmlspecialchars ( $this->_group ) . '</ins>';
 1054+ } elseif ( $this->_tag == 'del' ) {
 1055+ $this->_line .= '<del class="diffchange diffchange-inline">' .
 1056+ htmlspecialchars ( $this->_group ) . '</del>';
 1057+ } else {
 1058+ $this->_line .= htmlspecialchars ( $this->_group );
 1059+ }
 1060+ }
 1061+ $this->_group = '';
 1062+ $this->_tag = $new_tag;
 1063+ }
 1064+
 1065+ function _flushLine ( $new_tag ) {
 1066+ $this->_flushGroup( $new_tag );
 1067+ if ( $this->_line != '' ) {
 1068+ array_push ( $this->_lines, $this->_line );
 1069+ } else {
 1070+ # make empty lines visible by inserting an NBSP
 1071+ array_push ( $this->_lines, NBSP );
 1072+ }
 1073+ $this->_line = '';
 1074+ }
 1075+
 1076+ function addWords ( $words, $tag = '' ) {
 1077+ if ( $tag != $this->_tag ) {
 1078+ $this->_flushGroup( $tag );
 1079+ }
 1080+
 1081+ foreach ( $words as $word ) {
 1082+ // new-line should only come as first char of word.
 1083+ if ( $word == '' ) {
 1084+ continue;
 1085+ }
 1086+ if ( $word[0] == "\n" ) {
 1087+ $this->_flushLine( $tag );
 1088+ $word = substr( $word, 1 );
 1089+ }
 1090+ assert( !strstr( $word, "\n" ) );
 1091+ $this->_group .= $word;
 1092+ }
 1093+ }
 1094+
 1095+ function getLines() {
 1096+ $this->_flushLine( '~done' );
 1097+ return $this->_lines;
 1098+ }
 1099+}
 1100+
 1101+/**
 1102+ * @todo document
 1103+ * @private
 1104+ * @ingroup DifferenceEngine
 1105+ */
 1106+class WordLevelDiff extends MappedDiff {
 1107+ const MAX_LINE_LENGTH = 10000;
 1108+
 1109+ function __construct ( $orig_lines, $closing_lines ) {
 1110+ wfProfileIn( __METHOD__ );
 1111+
 1112+ list ( $orig_words, $orig_stripped ) = $this->_split( $orig_lines );
 1113+ list ( $closing_words, $closing_stripped ) = $this->_split( $closing_lines );
 1114+
 1115+ parent::__construct( $orig_words, $closing_words,
 1116+ $orig_stripped, $closing_stripped );
 1117+ wfProfileOut( __METHOD__ );
 1118+ }
 1119+
 1120+ function _split( $lines ) {
 1121+ wfProfileIn( __METHOD__ );
 1122+
 1123+ $words = array();
 1124+ $stripped = array();
 1125+ $first = true;
 1126+ foreach ( $lines as $line ) {
 1127+ # If the line is too long, just pretend the entire line is one big word
 1128+ # This prevents resource exhaustion problems
 1129+ if ( $first ) {
 1130+ $first = false;
 1131+ } else {
 1132+ $words[] = "\n";
 1133+ $stripped[] = "\n";
 1134+ }
 1135+ if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
 1136+ $words[] = $line;
 1137+ $stripped[] = $line;
 1138+ } else {
 1139+ $m = array();
 1140+ if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
 1141+ $line, $m ) )
 1142+ {
 1143+ $words = array_merge( $words, $m[0] );
 1144+ $stripped = array_merge( $stripped, $m[1] );
 1145+ }
 1146+ }
 1147+ }
 1148+ wfProfileOut( __METHOD__ );
 1149+ return array( $words, $stripped );
 1150+ }
 1151+
 1152+ function orig () {
 1153+ wfProfileIn( __METHOD__ );
 1154+ $orig = new _HWLDF_WordAccumulator;
 1155+
 1156+ foreach ( $this->edits as $edit ) {
 1157+ if ( $edit->type == 'copy' ) {
 1158+ $orig->addWords( $edit->orig );
 1159+ } elseif ( $edit->orig ) {
 1160+ $orig->addWords( $edit->orig, 'del' );
 1161+ }
 1162+ }
 1163+ $lines = $orig->getLines();
 1164+ wfProfileOut( __METHOD__ );
 1165+ return $lines;
 1166+ }
 1167+
 1168+ function closing () {
 1169+ wfProfileIn( __METHOD__ );
 1170+ $closing = new _HWLDF_WordAccumulator;
 1171+
 1172+ foreach ( $this->edits as $edit ) {
 1173+ if ( $edit->type == 'copy' ) {
 1174+ $closing->addWords( $edit->closing );
 1175+ } elseif ( $edit->closing ) {
 1176+ $closing->addWords( $edit->closing, 'ins' );
 1177+ }
 1178+ }
 1179+ $lines = $closing->getLines();
 1180+ wfProfileOut( __METHOD__ );
 1181+ return $lines;
 1182+ }
 1183+}
 1184+
 1185+/**
 1186+ * Wikipedia Table style diff formatter.
 1187+ * @todo document
 1188+ * @private
 1189+ * @ingroup DifferenceEngine
 1190+ */
 1191+class TableDiffFormatter extends DiffFormatter {
 1192+ function __construct() {
 1193+ $this->leading_context_lines = 2;
 1194+ $this->trailing_context_lines = 2;
 1195+ }
 1196+
 1197+ public static function escapeWhiteSpace( $msg ) {
 1198+ $msg = preg_replace( '/^ /m', '&#160; ', $msg );
 1199+ $msg = preg_replace( '/ $/m', ' &#160;', $msg );
 1200+ $msg = preg_replace( '/ /', '&#160; ', $msg );
 1201+ return $msg;
 1202+ }
 1203+
 1204+ function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
 1205+ $r = '<tr><td colspan="2" class="diff-lineno"><!--LINE ' . $xbeg . "--></td>\n" .
 1206+ '<td colspan="2" class="diff-lineno"><!--LINE ' . $ybeg . "--></td></tr>\n";
 1207+ return $r;
 1208+ }
 1209+
 1210+ function _start_block( $header ) {
 1211+ echo $header;
 1212+ }
 1213+
 1214+ function _end_block() {
 1215+ }
 1216+
 1217+ function _lines( $lines, $prefix = ' ', $color = 'white' ) {
 1218+ }
 1219+
 1220+ # HTML-escape parameter before calling this
 1221+ function addedLine( $line ) {
 1222+ return $this->wrapLine( '+', 'diff-addedline', $line );
 1223+ }
 1224+
 1225+ # HTML-escape parameter before calling this
 1226+ function deletedLine( $line ) {
 1227+ return $this->wrapLine( '−', 'diff-deletedline', $line );
 1228+ }
 1229+
 1230+ # HTML-escape parameter before calling this
 1231+ function contextLine( $line ) {
 1232+ return $this->wrapLine( '&#160;', 'diff-context', $line );
 1233+ }
 1234+
 1235+ private function wrapLine( $marker, $class, $line ) {
 1236+ if ( $line !== '' ) {
 1237+ // The <div> wrapper is needed for 'overflow: auto' style to scroll properly
 1238+ $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) );
 1239+ }
 1240+ return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>";
 1241+ }
 1242+
 1243+ function emptyLine() {
 1244+ return '<td colspan="2">&#160;</td>';
 1245+ }
 1246+
 1247+ function _added( $lines ) {
 1248+ foreach ( $lines as $line ) {
 1249+ echo '<tr>' . $this->emptyLine() .
 1250+ $this->addedLine( '<ins class="diffchange">' .
 1251+ htmlspecialchars ( $line ) . '</ins>' ) . "</tr>\n";
 1252+ }
 1253+ }
 1254+
 1255+ function _deleted( $lines ) {
 1256+ foreach ( $lines as $line ) {
 1257+ echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
 1258+ htmlspecialchars ( $line ) . '</del>' ) .
 1259+ $this->emptyLine() . "</tr>\n";
 1260+ }
 1261+ }
 1262+
 1263+ function _context( $lines ) {
 1264+ foreach ( $lines as $line ) {
 1265+ echo '<tr>' .
 1266+ $this->contextLine( htmlspecialchars ( $line ) ) .
 1267+ $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
 1268+ }
 1269+ }
 1270+
 1271+ function _changed( $orig, $closing ) {
 1272+ wfProfileIn( __METHOD__ );
 1273+
 1274+ $diff = new WordLevelDiff( $orig, $closing );
 1275+ $del = $diff->orig();
 1276+ $add = $diff->closing();
 1277+
 1278+ # Notice that WordLevelDiff returns HTML-escaped output.
 1279+ # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
 1280+
 1281+ while ( $line = array_shift( $del ) ) {
 1282+ $aline = array_shift( $add );
 1283+ echo '<tr>' . $this->deletedLine( $line ) .
 1284+ $this->addedLine( $aline ) . "</tr>\n";
 1285+ }
 1286+ foreach ( $add as $line ) { # If any leftovers
 1287+ echo '<tr>' . $this->emptyLine() .
 1288+ $this->addedLine( $line ) . "</tr>\n";
 1289+ }
 1290+ wfProfileOut( __METHOD__ );
 1291+ }
 1292+}
Property changes on: trunk/phase3/includes/diff/DairikiDiff.php
___________________________________________________________________
Added: svn:eol-style
11293 + native
Added: svn:keywords
21294 + Author Date Id Revision
Index: trunk/phase3/includes/AutoLoader.php
@@ -416,23 +416,23 @@
417417 'IBM_DB2Field' => 'includes/db/DatabaseIbm_db2.php',
418418
419419 # includes/diff
420 - 'ArrayDiffFormatter' => 'includes/diff/WikiDiff.php',
421 - '_DiffEngine' => 'includes/diff/WikiDiff.php',
 420+ 'ArrayDiffFormatter' => 'includes/diff/DairikiDiff.php',
 421+ '_DiffEngine' => 'includes/diff/DairikiDiff.php',
422422 'DifferenceEngine' => 'includes/diff/DifferenceEngine.php',
423 - 'DiffFormatter' => 'includes/diff/WikiDiff.php',
424 - 'Diff' => 'includes/diff/WikiDiff.php',
425 - '_DiffOp_Add' => 'includes/diff/WikiDiff.php',
426 - '_DiffOp_Change' => 'includes/diff/WikiDiff.php',
427 - '_DiffOp_Copy' => 'includes/diff/WikiDiff.php',
428 - '_DiffOp_Delete' => 'includes/diff/WikiDiff.php',
429 - '_DiffOp' => 'includes/diff/WikiDiff.php',
430 - '_HWLDF_WordAccumulator' => 'includes/diff/WikiDiff.php',
431 - 'MappedDiff' => 'includes/diff/WikiDiff.php',
 423+ 'DiffFormatter' => 'includes/diff/DairikiDiff.php',
 424+ 'Diff' => 'includes/diff/DairikiDiff.php',
 425+ '_DiffOp_Add' => 'includes/diff/DairikiDiff.php',
 426+ '_DiffOp_Change' => 'includes/diff/DairikiDiff.php',
 427+ '_DiffOp_Copy' => 'includes/diff/DairikiDiff.php',
 428+ '_DiffOp_Delete' => 'includes/diff/DairikiDiff.php',
 429+ '_DiffOp' => 'includes/diff/DairikiDiff.php',
 430+ '_HWLDF_WordAccumulator' => 'includes/diff/DairikiDiff.php',
 431+ 'MappedDiff' => 'includes/diff/DairikiDiff.php',
432432 'RangeDifference' => 'includes/diff/WikiDiff3.php',
433 - 'TableDiffFormatter' => 'includes/diff/WikiDiff.php',
434 - 'UnifiedDiffFormatter' => 'includes/diff/WikiDiff.php',
 433+ 'TableDiffFormatter' => 'includes/diff/DairikiDiff.php',
 434+ 'UnifiedDiffFormatter' => 'includes/diff/DairikiDiff.php',
435435 'WikiDiff3' => 'includes/diff/WikiDiff3.php',
436 - 'WordLevelDiff' => 'includes/diff/WikiDiff.php',
 436+ 'WordLevelDiff' => 'includes/diff/DairikiDiff.php',
437437
438438 # includes/filerepo
439439 'ArchivedFile' => 'includes/filerepo/ArchivedFile.php',

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r76252* (bug 24833) Files name in includes/diff/ are now less confusing...ialex16:28, 7 November 2010

Status & tagging log