r38653 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r38652‎ | r38653 | r38654 >
Date:19:40, 5 August 2008
Author:guyvdb
Status:old
Tags:
Comment:
Alternative experimental diff implementation. Enable by setting = 'wikidiff3'.
Modified paths:
  • /trunk/phase3/includes/Diff.php (added) (history)
  • /trunk/phase3/includes/DifferenceEngine.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/Diff.php
@@ -0,0 +1,472 @@
 2+<?php
 3+/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
 4+ *
 5+ * This program is free software; you can redistribute it and/or modify
 6+ * it under the terms of the GNU General Public License as published by
 7+ * the Free Software Foundation; either version 2 of the License, or
 8+ * (at your option) any later version.
 9+ *
 10+ * This program is distributed in the hope that it will be useful,
 11+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13+ * GNU General Public License for more details.
 14+ *
 15+ * You should have received a copy of the GNU General Public License
 16+ * along with this program; if not, write to the Free Software
 17+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 18+ * or see http://www.gnu.org/
 19+ */
 20+
 21+/**
 22+ * Implementation of the Diff algorithm.
 23+ *
 24+ * DO NOT USE ON PRODUCTION SYSTEMS
 25+ *
 26+ * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
 27+ * and extended with my own ideas.
 28+ *
 29+ * @return array($from_removed, $to_added)
 30+ * TRUE if the token was removed or added.
 31+ *
 32+ * @author Guy Van den Broeck
 33+ */
 34+function wikidiff3_diff(array $from, array $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
 35+ wfProfileIn( __METHOD__ );
 36+
 37+ $m = sizeof($from);
 38+ $n = sizeof($to);
 39+
 40+ $result_from = array_fill(0,$m,TRUE);
 41+ $result_to = array_fill(0,$n,TRUE);
 42+
 43+ //reduce the complexity for the next step (intentionally done twice)
 44+ //remove common tokens at the start
 45+ $i=0;
 46+ while($i<$m && $i<$n && $from[$i]===$to[$i]){
 47+ $result_from[$i] = $result_to[$i] = FALSE;
 48+ unset($from[$i],$to[$i]);
 49+ ++$i;
 50+ }
 51+
 52+ //remove common tokens at the end
 53+ $j=1;
 54+ while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
 55+ $result_from[$m-$j] = $result_to[$n-$j] = FALSE;
 56+ unset($from[$m-$j],$to[$n-$j]);
 57+ ++$j;
 58+ }
 59+
 60+ $newFrom = $newFromIndex = $newTo = $newToIndex = array();
 61+
 62+ //remove tokens not in both sequences
 63+ $shared = array_fill_keys($from,FALSE);
 64+ foreach($to as $index => $el){
 65+ if(array_key_exists($el,$shared)){
 66+ //keep it
 67+ $shared[$el] = TRUE;
 68+ $newTo[] = $el;
 69+ $newToIndex[] = $index;
 70+ }
 71+ }
 72+ foreach($from as $index => $el){
 73+ if($shared[$el]){
 74+ //keep it
 75+ $newFrom[] = $el;
 76+ $newFromIndex[] = $index;
 77+ }
 78+ }
 79+
 80+ unset($from, $to, $shared);
 81+
 82+ $m2 = sizeof($newFrom);
 83+ $n2 = sizeof($newTo);
 84+ $offsetx = $offsety = 0;
 85+ $from_inLcs = new InLcs($m2);
 86+ $to_inLcs = new InLcs($n2);
 87+
 88+ wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
 89+
 90+ foreach($from_inLcs->inLcs as $key => $in){
 91+ if($in){
 92+ $result_from[$newFromIndex[$key]] = FALSE;
 93+ }
 94+ }
 95+ foreach($to_inLcs->inLcs as $key => $in){
 96+ if($in){
 97+ $result_to[$newToIndex[$key]] = FALSE;
 98+ }
 99+ }
 100+
 101+ wfProfileOut( __METHOD__ );
 102+ return array($result_from, $result_to);
 103+}
 104+
 105+function wikidiff3_diffPart(array $a, array $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
 106+ if($bestKnownLcs==0 || $m==0 || $n==0){
 107+ return;
 108+ }
 109+
 110+ if($m>$n){
 111+ return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
 112+ }
 113+
 114+ $a_inLcs_sym = &$a_inLcs->inLcs;
 115+ $b_inLcs_sym = &$b_inLcs->inLcs;
 116+
 117+ //remove common tokens at the start
 118+ while($m>0 && $n>0 && $a[0]===$b[0]){
 119+ $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
 120+ ++$offsetx; ++$offsety;
 121+ --$m; --$n;
 122+ --$bestKnownLcs;
 123+ array_shift($a);
 124+ array_shift($b);
 125+ }
 126+
 127+ //remove common tokens at the end
 128+ while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
 129+ $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
 130+ --$m; --$n;
 131+ --$bestKnownLcs;
 132+ array_pop($a);
 133+ array_pop($b);
 134+ }
 135+
 136+ if($bestKnownLcs==0 || $m==0 || $n==0){
 137+ return;
 138+ }
 139+ if($m>$n){
 140+ return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
 141+ }
 142+
 143+ $delta = $n-$m;
 144+ $delta_plus_1 = $delta+1;
 145+ $delta_min_1 = $delta-1;
 146+
 147+ $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
 148+ $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
 149+ $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
 150+ $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
 151+ $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
 152+
 153+ if($boundRunningTime){
 154+ $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
 155+ if($maxp_before_bound>=$m){
 156+ $boundRunningTime = false;
 157+ unset($maxp_before_bound);
 158+ }
 159+ }
 160+
 161+ $p=-1;
 162+ $m_min_1 = $m-1;
 163+ $maxp=$m_min_1-$bestLcsLength;
 164+
 165+ while($p<$maxp){
 166+
 167+ if($boundRunningTime && $p>$maxp_before_bound){
 168+ // bound the running time by stopping early
 169+ if($bestLcsLength>=0){
 170+ // accept the best LCS thusfar
 171+ break;
 172+ }else{
 173+ $bestLcsProgressForw = $bestkForw = 0;
 174+ foreach($lcsSizeForw as $k => $localLcsProgress){
 175+ if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
 176+ $bestLcsProgressForw = $localLcsProgress;
 177+ $bestkForw = $k;
 178+ }
 179+ }
 180+ $bestLcsProgressBack = $bestkBack = 0;
 181+ foreach($lcsSizeBack as $k => $localLcsProgress){
 182+ if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
 183+ $bestLcsProgressBack = $localLcsProgress;
 184+ $bestkBack = $k;
 185+ }
 186+ }
 187+ if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
 188+ // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
 189+ }else{
 190+ // lets do this
 191+ $topSnakeStart = $snakeBeginForw[$bestkForw];
 192+ $topSnakeEnd = $snakeEndForw[$bestkForw];
 193+ $topSnakek = $snakekForw[$bestkForw];
 194+ $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
 195+
 196+ $bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
 197+ $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
 198+ $bottomSnakek = $snakekBack[$bestkBack];
 199+ $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
 200+
 201+ if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
 202+ // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
 203+ // also diff the middle now
 204+ if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
 205+ // do the middle diff between the snakes
 206+ wfDebug(" Limiting diff run time from both sides, middle between snakes\n");
 207+ $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
 208+ $n_t = $bottomSnakeStart-$topSnakeEnd;
 209+ $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
 210+ $b_t = array_slice($b, $topSnakeEnd, $n_t);
 211+ $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
 212+ $offsety_t = $offsety+$topSnakeEnd;
 213+ }else{
 214+ // the snake is too early in the sequence, do the middle diff between endpoints of progress
 215+ wfDebug(" Limiting diff run time from both sides, middle between endpoints\n");
 216+ $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
 217+ $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
 218+ $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
 219+ $b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
 220+ $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
 221+ $offsety_t = $offsety+$fpForw[$bestkForw];
 222+ }
 223+ wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
 224+ }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
 225+ // the bottom snake is more interesting
 226+ wfDebug("Limiting diff run time from bottom side\n");
 227+ $topSnakeStart = $bottomSnakeStart;
 228+ $topSnakeEnd = $bottomSnakeEnd;
 229+ $topSnakek = $bottomSnakek;
 230+ $bestLcsLengthTop = $topSnakeEnd-$topSnakek;
 231+ $bottomSnakeStart = $bottomSnakeEnd;
 232+ }else{
 233+ // the top snake is more interesting
 234+ wfDebug(" Limiting diff run time from top side\n");
 235+ $bottomSnakeStart = $topSnakeEnd;
 236+ $bottomSnakeEnd = $topSnakeEnd;
 237+ $bottomSnakek = $topSnakek;
 238+ $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek);
 239+ }
 240+ break;
 241+ }
 242+ }
 243+ }
 244+ ++$p;
 245+ $overlap[-$p] = $overlap[$delta+$p] = FALSE;
 246+
 247+ $min_p_min_1 = -$p-1;
 248+ $delta_plus_1_plus_p = $delta_plus_1+$p;
 249+
 250+ // forward
 251+ $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
 252+ $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
 253+ $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
 254+
 255+ $k=-$p;
 256+ do {
 257+ $k_plus_1 = $k+1;
 258+ $k_min_1 = $k-1;
 259+
 260+ $fpBelow = $fpForw[$k_min_1]+1;
 261+ $fpAbove = $fpForw[$k_plus_1];
 262+ $y = &$fpForw[$k];
 263+ if($fpBelow>$fpAbove){
 264+ $oldy = $y = $fpBelow;
 265+ $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
 266+ $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
 267+ $snakeEndForw[$k] = $snakeEndForw[$k_min_1];
 268+ $snakekForw[$k] = $snakekForw[$k_min_1];
 269+ }else{
 270+ $oldy = $y = $fpAbove;
 271+ $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
 272+ $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
 273+ $snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
 274+ $snakekForw[$k] = $snakekForw[$k_plus_1];
 275+ }
 276+ $x = $y-$k;
 277+ while($x < $m && $y < $n && $a[$x]===$b[$y]){
 278+ ++$x;
 279+ ++$y;
 280+ }
 281+ if($y-$oldy>0){
 282+ $lcsSizeForw[$k] += $y-$oldy;
 283+ $snakeBeginForw[$k] = $oldy;
 284+ $snakeEndForw[$k]= $y;
 285+ $snakekForw[$k] = $k;
 286+ }
 287+ if($y>=$fpBack[$k] && !$overlap[$k]){
 288+ // there is overlap
 289+ $overlap[$k]=TRUE;
 290+ $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
 291+ if($y>$fpBack[$k]+1){
 292+ $snakeoverlap = $y-$fpBack[$k]-1;
 293+ $lcsLength -= $snakeoverlap;
 294+ }
 295+ if($lcsLength>$bestLcsLength){
 296+ // a better sequence found!
 297+ $bestLcsLength = $lcsLength;
 298+
 299+ $topSnakeStart = $snakeBeginForw[$k];
 300+ $topSnakeEnd = $snakeEndForw[$k];
 301+ $topSnakek = $snakekForw[$k];
 302+
 303+ // aligned snakes bite (\ )
 304+ // ( \ \)
 305+ $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
 306+ $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
 307+ $bottomSnakek = $snakekBack[$k];
 308+
 309+ if($bottomSnakeEnd<$y){
 310+ $bottomSnakeStart = $y;
 311+ $bottomSnakeEnd = $y;
 312+ $bottomSnakek = $k;
 313+ }
 314+
 315+ $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
 316+ $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
 317+ if($bestKnownLcs==$lcsLength){
 318+ $fpForw[$k]=$y;
 319+ break 2;
 320+ }
 321+ $maxp=$m_min_1-$bestLcsLength;
 322+ }
 323+ }
 324+ if($k<$delta_min_1){
 325+ ++$k;
 326+ }else if($k>$delta){
 327+ --$k;
 328+ }else if($k==$delta_min_1){
 329+ $k = $delta+$p;
 330+ }else{
 331+ break;
 332+ }
 333+ } while(TRUE);
 334+
 335+ // backward
 336+ $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
 337+ $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
 338+ $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
 339+
 340+ $k=$delta+$p;
 341+ do {
 342+ $k_plus_1 = $k+1;
 343+ $k_min_1 = $k-1;
 344+
 345+ $fpBelow = $fpBack[$k_min_1];
 346+ $fpAbove = $fpBack[$k_plus_1]-1;
 347+ $y = &$fpBack[$k];
 348+ if($fpBelow<$fpAbove){
 349+ $oldy = $y = $fpBelow;
 350+ $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
 351+ $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
 352+ $snakeEndBack[$k] = $snakeEndBack[$k_min_1];
 353+ $snakekBack[$k] = $snakekBack[$k_min_1];
 354+ }else{
 355+ $oldy = $y = $fpAbove;
 356+ $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
 357+ $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
 358+ $snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
 359+ $snakekBack[$k] = $snakekBack[$k_plus_1];
 360+ }
 361+ $x = $y-$k;
 362+ while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
 363+ --$x;
 364+ --$y;
 365+ }
 366+ if($oldy-$y>0){
 367+ $lcsSizeBack[$k] += $oldy-$y;
 368+ $snakeBeginBack[$k] = $oldy;
 369+ $snakeEndBack[$k] = $y;
 370+ $snakekBack[$k] = $k;
 371+ }
 372+ if($fpForw[$k]>=$y && !$overlap[$k]){
 373+ // there is overlap
 374+ $overlap[$k]=TRUE;
 375+ $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
 376+ if($fpForw[$k]>$y+1){
 377+ $snakeoverlap = $fpForw[$k]-$y-1;
 378+ $lcsLength -= $snakeoverlap;
 379+ }
 380+ if($lcsLength>$bestLcsLength){
 381+ // a better sequence found!
 382+ $bestLcsLength = $lcsLength;
 383+
 384+ $topSnakeStart = $snakeBeginForw[$k];
 385+ $topSnakeEnd = $snakeEndForw[$k];
 386+ $topSnakek = $snakekForw[$k];
 387+
 388+ // aligned snakes bite (\ )
 389+ // ( \ \)
 390+ $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
 391+ $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
 392+ $bottomSnakek = $snakekBack[$k];
 393+
 394+ if($bottomSnakeEnd<$fpForw[$k]){
 395+ $bottomSnakeStart = $fpForw[$k];
 396+ $bottomSnakeEnd = $fpForw[$k];
 397+ $bottomSnakek = $k;
 398+ }
 399+
 400+ $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
 401+ $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
 402+ if($bestKnownLcs==$lcsLength){
 403+ $fpBack[$k] = $y;
 404+ break 2;
 405+ }
 406+ $maxp=$m_min_1-$bestLcsLength;
 407+ }
 408+ }
 409+ if($k>1){
 410+ --$k;
 411+ }else if($k<0){
 412+ ++$k;
 413+ }else if($k==1){
 414+ $k = -$p;
 415+ }else{
 416+ break;
 417+ }
 418+ } while(TRUE);
 419+ }
 420+
 421+ unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
 422+ unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
 423+ unset($overlap);
 424+
 425+ // Mark snakes as in LCS
 426+ $maxi = $offsetx+$topSnakeEnd-$topSnakek;
 427+ for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
 428+ $a_inLcs_sym[$i] = TRUE;
 429+ }
 430+ $maxi = $offsety+$topSnakeEnd;
 431+ for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
 432+ $b_inLcs_sym[$i] = TRUE;
 433+ }
 434+ $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
 435+ for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
 436+ $a_inLcs_sym[$i] = TRUE;
 437+ }
 438+ $maxi = $offsety+$bottomSnakeEnd;
 439+ for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
 440+ $b_inLcs_sym[$i] = TRUE;
 441+ }
 442+
 443+ $m_t = $topSnakeStart-$topSnakek;
 444+ $a_t = array_slice($a, 0, $m_t);
 445+ $b_t = array_slice($b, 0, $topSnakeStart);
 446+
 447+ $m_b = $m-($bottomSnakeEnd-$bottomSnakek);
 448+ $n_b = $n-$bottomSnakeEnd;
 449+ $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
 450+ $b_b = array_slice($b, $bottomSnakeEnd, $n_b);
 451+
 452+ wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
 453+
 454+ wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
 455+}
 456+
 457+class InLcs {
 458+
 459+ public $inLcs;
 460+
 461+ function __construct($length){
 462+ $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
 463+ }
 464+
 465+ /**
 466+ * Get the length of the Longest Common Subsequence (computed)
 467+ */
 468+ public function getLcsLength(){
 469+ return array_sum($this->inLcs);
 470+ }
 471+
 472+}
 473+?>
\ No newline at end of file
Index: trunk/phase3/includes/DifferenceEngine.php
@@ -3,6 +3,8 @@
44 * @defgroup DifferenceEngine DifferenceEngine
55 */
66
 7+require_once( 'Diff.php' );
 8+
79 /**
810 * Constant to indicate diff cache compatibility.
911 * Bump this when changing the diff formatting in a way that
@@ -77,7 +79,7 @@
7880 global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol;
7981 wfProfileIn( __METHOD__ );
8082
81 - # If external diffs are enabled both globally and for the user,
 83+ # If external diffs are enabled both globally and for the user,
8284 # we'll use the application/x-external-editor interface to call
8385 # an external diff tool like kompare, kdiff3, etc.
8486 if($wgUseExternalEditor && $wgUser->getOption('externaldiff')) {
@@ -88,19 +90,19 @@
8991 $url2=$this->mTitle->getFullURL("action=raw&oldid=".$this->mNewid);
9092 $special=$wgLang->getNsText(NS_SPECIAL);
9193 $control=<<<CONTROL
92 -[Process]
93 -Type=Diff text
94 -Engine=MediaWiki
95 -Script={$wgServer}{$wgScript}
96 -Special namespace={$special}
 94+ [Process]
 95+ Type=Diff text
 96+ Engine=MediaWiki
 97+ Script={$wgServer}{$wgScript}
 98+ Special namespace={$special}
9799
98 -[File]
99 -Extension=wiki
100 -URL=$url1
 100+ [File]
 101+ Extension=wiki
 102+ URL=$url1
101103
102 -[File 2]
103 -Extension=wiki
104 -URL=$url2
 104+ [File 2]
 105+ Extension=wiki
 106+ URL=$url2
105107 CONTROL;
106108 echo($control);
107109 return;
@@ -170,15 +172,15 @@
171173 // Look for an unpatrolled change corresponding to this diff
172174 $db = wfGetDB( DB_SLAVE );
173175 $change = RecentChange::newFromConds(
174 - array(
175 - // Add redundant user,timestamp condition so we can use the existing index
 176+ array(
 177+ // Add redundant user,timestamp condition so we can use the existing index
176178 'rc_user_text' => $this->mNewRev->getRawUserText(),
177179 'rc_timestamp' => $db->timestamp( $this->mNewRev->getTimestamp() ),
178180 'rc_this_oldid' => $this->mNewid,
179181 'rc_last_oldid' => $this->mOldid,
180182 'rc_patrolled' => 0
181 - ),
182 - __METHOD__
 183+ ),
 184+ __METHOD__
183185 );
184186 if( $change instanceof RecentChange ) {
185187 $rcid = $change->mAttribs['rc_id'];
@@ -190,8 +192,8 @@
191193 // Build the link
192194 if( $rcid ) {
193195 $patrol = ' <span class="patrollink">[' . $sk->makeKnownLinkObj(
194 - $this->mTitle,
195 - wfMsgHtml( 'markaspatrolleddiff' ),
 196+ $this->mTitle,
 197+ wfMsgHtml( 'markaspatrolleddiff' ),
196198 "action=markpatrolled&rcid={$rcid}"
197199 ) . ']</span>';
198200 } else {
@@ -225,33 +227,33 @@
226228 if( $wgUser->isAllowed( 'deleterevision' ) ) {
227229 $revdel = SpecialPage::getTitleFor( 'Revisiondelete' );
228230 if( !$this->mOldRev->userCan( Revision::DELETED_RESTRICTED ) ) {
229 - // If revision was hidden from sysops
 231+ // If revision was hidden from sysops
230232 $ldel = wfMsgHtml('rev-delundel');
231233 } else {
232234 $ldel = $sk->makeKnownLinkObj( $revdel,
233 - wfMsgHtml('rev-delundel'),
 235+ wfMsgHtml('rev-delundel'),
234236 'target=' . urlencode( $this->mOldRev->mTitle->getPrefixedDbkey() ) .
235237 '&oldid=' . urlencode( $this->mOldRev->getId() ) );
236238 // Bolden oversighted content
237239 if( $this->mOldRev->isDeleted( Revision::DELETED_RESTRICTED ) )
238 - $ldel = "<strong>$ldel</strong>";
 240+ $ldel = "<strong>$ldel</strong>";
239241 }
240242 $ldel = "&nbsp;&nbsp;&nbsp;<tt>(<small>$ldel</small>)</tt> ";
241243 // We don't currently handle well changing the top revision's settings
242244 if( $this->mNewRev->isCurrent() ) {
243 - // If revision was hidden from sysops
 245+ // If revision was hidden from sysops
244246 $rdel = wfMsgHtml('rev-delundel');
245247 } else if( !$this->mNewRev->userCan( Revision::DELETED_RESTRICTED ) ) {
246 - // If revision was hidden from sysops
 248+ // If revision was hidden from sysops
247249 $rdel = wfMsgHtml('rev-delundel');
248250 } else {
249251 $rdel = $sk->makeKnownLinkObj( $revdel,
250 - wfMsgHtml('rev-delundel'),
 252+ wfMsgHtml('rev-delundel'),
251253 'target=' . urlencode( $this->mNewRev->mTitle->getPrefixedDbkey() ) .
252254 '&oldid=' . urlencode( $this->mNewRev->getId() ) );
253255 // Bolden oversighted content
254256 if( $this->mNewRev->isDeleted( Revision::DELETED_RESTRICTED ) )
255 - $rdel = "<strong>$rdel</strong>";
 257+ $rdel = "<strong>$rdel</strong>";
256258 }
257259 $rdel = "&nbsp;&nbsp;&nbsp;<tt>(<small>$rdel</small>)</tt> ";
258260 }
@@ -268,7 +270,7 @@
269271 $this->showDiff( $oldHeader, $newHeader );
270272
271273 if ( !$diffOnly )
272 - $this->renderNewRevision();
 274+ $this->renderNewRevision();
273275
274276 wfProfileOut( __METHOD__ );
275277 }
@@ -283,9 +285,9 @@
284286 $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" );
285287 #add deleted rev tag if needed
286288 if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
287 - $wgOut->addWikiMsg( 'rev-deleted-text-permission' );
 289+ $wgOut->addWikiMsg( 'rev-deleted-text-permission' );
288290 } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
289 - $wgOut->addWikiMsg( 'rev-deleted-text-view' );
 291+ $wgOut->addWikiMsg( 'rev-deleted-text-view' );
290292 }
291293
292294 if( !$this->mNewRev->isCurrent() ) {
@@ -310,7 +312,7 @@
311313 $wgOut->addHtml( "\n</pre>\n" );
312314 }
313315 } else
314 - $wgOut->addWikiTextTidy( $this->mNewtext );
 316+ $wgOut->addWikiTextTidy( $this->mNewtext );
315317
316318 if( !$this->mNewRev->isCurrent() ) {
317319 $wgOut->parserOptions()->setEditSection( $oldEditSectionSetting );
@@ -356,9 +358,9 @@
357359
358360 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ), 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
359361 $header = "<div class=\"firstrevisionheader\" style=\"text-align: center\"><strong>{$this->mOldtitle}</strong><br />" .
360 - $sk->revUserTools( $this->mNewRev ) . "<br />" .
361 - $sk->revComment( $this->mNewRev ) . "<br />" .
362 - $nextlink . "</div>\n";
 362+ $sk->revUserTools( $this->mNewRev ) . "<br />" .
 363+ $sk->revComment( $this->mNewRev ) . "<br />" .
 364+ $nextlink . "</div>\n";
363365
364366 $wgOut->addHTML( $header );
365367
@@ -423,9 +425,9 @@
424426 wfProfileIn( __METHOD__ );
425427 // Check if the diff should be hidden from this user
426428 if ( $this->mOldRev && !$this->mOldRev->userCan(Revision::DELETED_TEXT) ) {
427 - return '';
 429+ return '';
428430 } else if ( $this->mNewRev && !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
429 - return '';
 431+ return '';
430432 }
431433 // Cacheable?
432434 $key = false;
@@ -486,7 +488,7 @@
487489 dl('php_wikidiff.so');
488490 }
489491 return $wgContLang->unsegementForDiff( wikidiff_do_diff( $otext, $ntext, 2 ) ) .
490 - $this->debug( 'wikidiff1' );
 492+ $this->debug( 'wikidiff1' );
491493 }
492494
493495 if ( $wgExternalDiffEngine == 'wikidiff2' ) {
@@ -505,7 +507,7 @@
506508 return $text;
507509 }
508510 }
509 - if ( $wgExternalDiffEngine !== false ) {
 511+ if ( $wgExternalDiffEngine != 'wikidiff3' && $wgExternalDiffEngine !== false ) {
510512 # Diff via the shell
511513 global $wgTmpDirectory;
512514 $tempName1 = tempnam( $wgTmpDirectory, 'diff_' );
@@ -541,9 +543,9 @@
542544 $diffs = new Diff( $ota, $nta );
543545 $formatter = new TableDiffFormatter();
544546 return $wgContLang->unsegmentForDiff( $formatter->format( $diffs ) ) .
545 - $this->debug();
 547+ $this->debug();
546548 }
547 -
 549+
548550 /**
549551 * Generate a debug comment indicating diff generating time,
550552 * server node, and generator backend.
@@ -556,10 +558,10 @@
557559 }
558560 $data[] = wfTimestamp( TS_DB );
559561 return "<!-- diff generator: " .
560 - implode( " ",
561 - array_map(
 562+ implode( " ",
 563+ array_map(
562564 "htmlspecialchars",
563 - $data ) ) .
 565+ $data ) ) .
564566 " -->\n";
565567 }
566568
@@ -568,7 +570,7 @@
569571 */
570572 function localiseLineNumbers( $text ) {
571573 return preg_replace_callback( '/<!--LINE (\d+)-->/',
572 - array( &$this, 'localiseLineNumbersCb' ), $text );
 574+ array( &$this, 'localiseLineNumbersCb' ), $text );
573575 }
574576
575577 function localiseLineNumbersCb( $matches ) {
@@ -582,7 +584,7 @@
583585 */
584586 function getMultiNotice() {
585587 if ( !is_object($this->mOldRev) || !is_object($this->mNewRev) )
586 - return '';
 588+ return '';
587589
588590 if( !$this->mOldPage->equals( $this->mNewPage ) ) {
589591 // Comparing two different pages? Count would be meaningless.
@@ -597,7 +599,7 @@
598600
599601 $n = $this->mTitle->countRevisionsBetween( $oldid, $newid );
600602 if ( !$n )
601 - return '';
 603+ return '';
602604
603605 return wfMsgExt( 'diff-multi', array( 'parseinline' ), $n );
604606 }
@@ -610,19 +612,19 @@
611613 global $wgOut;
612614
613615 $header = "
614 - <table class='diff'>
615 - <col class='diff-marker' />
616 - <col class='diff-content' />
617 - <col class='diff-marker' />
618 - <col class='diff-content' />
619 - <tr valign='top'>
620 - <td colspan='2' class='diff-otitle'>{$otitle}</td>
621 - <td colspan='2' class='diff-ntitle'>{$ntitle}</td>
622 - </tr>
 616+ <table class='diff'>
 617+ <col class='diff-marker' />
 618+ <col class='diff-content' />
 619+ <col class='diff-marker' />
 620+ <col class='diff-content' />
 621+ <tr valign='top'>
 622+ <td colspan='2' class='diff-otitle'>{$otitle}</td>
 623+ <td colspan='2' class='diff-ntitle'>{$ntitle}</td>
 624+ </tr>
623625 ";
624626
625627 if ( $multi != '' )
626 - $header .= "<tr><td colspan='4' align='center' class='diff-multi'>{$multi}</td></tr>";
 628+ $header .= "<tr><td colspan='4' align='center' class='diff-multi'>{$multi}</td></tr>";
627629
628630 return $header . $diff . "</table>";
629631 }
@@ -657,10 +659,10 @@
658660
659661 // Load the new revision object
660662 $this->mNewRev = $this->mNewid
661 - ? Revision::newFromId( $this->mNewid )
662 - : Revision::newFromTitle( $this->mTitle );
 663+ ? Revision::newFromId( $this->mNewid )
 664+ : Revision::newFromTitle( $this->mTitle );
663665 if( !$this->mNewRev instanceof Revision )
664 - return false;
 666+ return false;
665667
666668 // Update the new revision ID in case it was 0 (makes life easier doing UI stuff)
667669 $this->mNewid = $this->mNewRev->getId();
@@ -688,9 +690,9 @@
689691 $this->mNewtitle .= " (<a href='$newEdit'>" . wfMsgHtml( $editable ? 'editold' : 'viewsourceold' ) . "</a>)";
690692 }
691693 if ( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
692 - $this->mNewtitle = "<span class='history-deleted'>{$this->mPagetitle}</span>";
 694+ $this->mNewtitle = "<span class='history-deleted'>{$this->mPagetitle}</span>";
693695 } else if ( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
694 - $this->mNewtitle = '<span class="history-deleted">'.$this->mNewtitle.'</span>';
 696+ $this->mNewtitle = '<span class="history-deleted">'.$this->mNewtitle.'</span>';
695697 }
696698
697699 // Load the old revision object
@@ -722,7 +724,7 @@
723725 $this->mOldPagetitle = htmlspecialchars( wfMsg( 'revisionasof', $t ) );
724726
725727 $this->mOldtitle = "<a href='$oldLink'>{$this->mOldPagetitle}</a>"
726 - . " (<a href='$oldEdit'>" . wfMsgHtml( $editable ? 'editold' : 'viewsourceold' ) . "</a>)";
 728+ . " (<a href='$oldEdit'>" . wfMsgHtml( $editable ? 'editold' : 'viewsourceold' ) . "</a>)";
727729 // Add an "undo" link
728730 $newUndo = $this->mNewPage->escapeLocalUrl( 'action=edit&undoafter=' . $this->mOldid . '&undo=' . $this->mNewid);
729731 if( $editable && !$this->mOldRev->isDeleted( Revision::DELETED_TEXT ) && !$this->mNewRev->isDeleted( Revision::DELETED_TEXT ) ) {
@@ -730,9 +732,9 @@
731733 }
732734
733735 if( !$this->mOldRev->userCan( Revision::DELETED_TEXT ) ) {
734 - $this->mOldtitle = '<span class="history-deleted">' . $this->mOldPagetitle . '</span>';
 736+ $this->mOldtitle = '<span class="history-deleted">' . $this->mOldPagetitle . '</span>';
735737 } else if( $this->mOldRev->isDeleted( Revision::DELETED_TEXT ) ) {
736 - $this->mOldtitle = '<span class="history-deleted">' . $this->mOldtitle . '</span>';
 738+ $this->mOldtitle = '<span class="history-deleted">' . $this->mOldtitle . '</span>';
737739 }
738740 }
739741
@@ -828,7 +830,7 @@
829831
830832 function _DiffOp_Copy ($orig, $closing = false) {
831833 if (!is_array($closing))
832 - $closing = $orig;
 834+ $closing = $orig;
833835 $this->orig = $orig;
834836 $this->closing = $closing;
835837 }
@@ -892,7 +894,6 @@
893895 }
894896 }
895897
896 -
897898 /**
898899 * Class used internally by Diff to actually compute the diffs.
899900 *
@@ -911,65 +912,77 @@
912913 * are my own.
913914 *
914915 * Line length limits for robustness added by Tim Starling, 2005-08-31
 916+ * Alternative implementation added by Guy Van den Broeck, 2008-07-30
915917 *
916 - * @author Geoffrey T. Dairiki, Tim Starling
 918+ * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
917919 * @private
918920 * @ingroup DifferenceEngine
919921 */
920922 class _DiffEngine {
 923+
921924 const MAX_XREF_LENGTH = 10000;
922925
923926 function diff ($from_lines, $to_lines) {
924927 wfProfileIn( __METHOD__ );
925928
 929+ global $wgExternalDiffEngine;
 930+
926931 $n_from = sizeof($from_lines);
927932 $n_to = sizeof($to_lines);
928933
929 - $this->xchanged = $this->ychanged = array();
930 - $this->xv = $this->yv = array();
931 - $this->xind = $this->yind = array();
932 - unset($this->seq);
933 - unset($this->in_seq);
934 - unset($this->lcs);
 934+ if($wgExternalDiffEngine == 'wikidiff3'){
 935+ // wikidiff3
 936+ list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000);
 937+ }else{
 938+ // old diff
 939+ wfProfileIn( __METHOD__ ." - basicdiff");
 940+ $this->xchanged = $this->ychanged = array();
 941+ $this->xv = $this->yv = array();
 942+ $this->xind = $this->yind = array();
 943+ unset($this->seq);
 944+ unset($this->in_seq);
 945+ unset($this->lcs);
935946
936 - // Skip leading common lines.
937 - for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
938 - if ($from_lines[$skip] !== $to_lines[$skip])
 947+ // Skip leading common lines.
 948+ for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
 949+ if ($from_lines[$skip] !== $to_lines[$skip])
939950 break;
940 - $this->xchanged[$skip] = $this->ychanged[$skip] = false;
941 - }
942 - // Skip trailing common lines.
943 - $xi = $n_from; $yi = $n_to;
944 - for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
945 - if ($from_lines[$xi] !== $to_lines[$yi])
 951+ $this->xchanged[$skip] = $this->ychanged[$skip] = false;
 952+ }
 953+ // Skip trailing common lines.
 954+ $xi = $n_from; $yi = $n_to;
 955+ for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
 956+ if ($from_lines[$xi] !== $to_lines[$yi])
946957 break;
947 - $this->xchanged[$xi] = $this->ychanged[$yi] = false;
948 - }
 958+ $this->xchanged[$xi] = $this->ychanged[$yi] = false;
 959+ }
949960
950 - // Ignore lines which do not exist in both files.
951 - for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
952 - $xhash[$this->_line_hash($from_lines[$xi])] = 1;
953 - }
 961+ // Ignore lines which do not exist in both files.
 962+ for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
 963+ $xhash[$this->_line_hash($from_lines[$xi])] = 1;
 964+ }
954965
955 - for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
956 - $line = $to_lines[$yi];
957 - if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
 966+ for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
 967+ $line = $to_lines[$yi];
 968+ if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
958969 continue;
959 - $yhash[$this->_line_hash($line)] = 1;
960 - $this->yv[] = $line;
961 - $this->yind[] = $yi;
962 - }
963 - for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
964 - $line = $from_lines[$xi];
965 - if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
 970+ $yhash[$this->_line_hash($line)] = 1;
 971+ $this->yv[] = $line;
 972+ $this->yind[] = $yi;
 973+ }
 974+ for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
 975+ $line = $from_lines[$xi];
 976+ if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
966977 continue;
967 - $this->xv[] = $line;
968 - $this->xind[] = $xi;
 978+ $this->xv[] = $line;
 979+ $this->xind[] = $xi;
 980+ }
 981+
 982+ // Find the LCS.
 983+ $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
 984+ wfProfileOut( __METHOD__ ." - basicdiff");
969985 }
970986
971 - // Find the LCS.
972 - $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
973 -
974987 // Merge edits when possible
975988 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
976989 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
@@ -984,28 +997,28 @@
985998 // Skip matching "snake".
986999 $copy = array();
9871000 while ( $xi < $n_from && $yi < $n_to
988 - && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
 1001+ && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
9891002 $copy[] = $from_lines[$xi++];
9901003 ++$yi;
9911004 }
9921005 if ($copy)
993 - $edits[] = new _DiffOp_Copy($copy);
 1006+ $edits[] = new _DiffOp_Copy($copy);
9941007
9951008 // Find deletes & adds.
9961009 $delete = array();
9971010 while ($xi < $n_from && $this->xchanged[$xi])
998 - $delete[] = $from_lines[$xi++];
 1011+ $delete[] = $from_lines[$xi++];
9991012
10001013 $add = array();
10011014 while ($yi < $n_to && $this->ychanged[$yi])
1002 - $add[] = $to_lines[$yi++];
 1015+ $add[] = $to_lines[$yi++];
10031016
10041017 if ($delete && $add)
1005 - $edits[] = new _DiffOp_Change($delete, $add);
 1018+ $edits[] = new _DiffOp_Change($delete, $add);
10061019 elseif ($delete)
1007 - $edits[] = new _DiffOp_Delete($delete);
 1020+ $edits[] = new _DiffOp_Delete($delete);
10081021 elseif ($add)
1009 - $edits[] = new _DiffOp_Add($add);
 1022+ $edits[] = new _DiffOp_Add($add);
10101023 }
10111024 wfProfileOut( __METHOD__ );
10121025 return $edits;
@@ -1022,7 +1035,6 @@
10231036 }
10241037 }
10251038
1026 -
10271039 /* Divide the Largest Common Subsequence (LCS) of the sequences
10281040 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
10291041 * sized segments.
@@ -1040,23 +1052,22 @@
10411053 * of the portions it is going to specify.
10421054 */
10431055 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
1044 - wfProfileIn( __METHOD__ );
10451056 $flip = false;
10461057
10471058 if ($xlim - $xoff > $ylim - $yoff) {
10481059 // Things seems faster (I'm not sure I understand why)
1049 - // when the shortest sequence in X.
1050 - $flip = true;
 1060+ // when the shortest sequence in X.
 1061+ $flip = true;
10511062 list ($xoff, $xlim, $yoff, $ylim)
10521063 = array( $yoff, $ylim, $xoff, $xlim);
10531064 }
10541065
10551066 if ($flip)
1056 - for ($i = $ylim - 1; $i >= $yoff; $i--)
1057 - $ymatches[$this->xv[$i]][] = $i;
 1067+ for ($i = $ylim - 1; $i >= $yoff; $i--)
 1068+ $ymatches[$this->xv[$i]][] = $i;
10581069 else
1059 - for ($i = $ylim - 1; $i >= $yoff; $i--)
1060 - $ymatches[$this->yv[$i]][] = $i;
 1070+ for ($i = $ylim - 1; $i >= $yoff; $i--)
 1071+ $ymatches[$this->yv[$i]][] = $i;
10611072
10621073 $this->lcs = 0;
10631074 $this->seq[0]= $yoff - 1;
@@ -1068,23 +1079,23 @@
10691080 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
10701081 wfProfileIn( __METHOD__ . "-chunk" );
10711082 if ($chunk > 0)
1072 - for ($i = 0; $i <= $this->lcs; $i++)
1073 - $ymids[$i][$chunk-1] = $this->seq[$i];
 1083+ for ($i = 0; $i <= $this->lcs; $i++)
 1084+ $ymids[$i][$chunk-1] = $this->seq[$i];
10741085
10751086 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
10761087 for ( ; $x < $x1; $x++) {
10771088 $line = $flip ? $this->yv[$x] : $this->xv[$x];
1078 - if (empty($ymatches[$line]))
1079 - continue;
 1089+ if (empty($ymatches[$line]))
 1090+ continue;
10801091 $matches = $ymatches[$line];
10811092 reset($matches);
10821093 while (list ($junk, $y) = each($matches))
1083 - if (empty($this->in_seq[$y])) {
1084 - $k = $this->_lcs_pos($y);
1085 - USE_ASSERTS && assert($k > 0);
1086 - $ymids[$k] = $ymids[$k-1];
1087 - break;
1088 - }
 1094+ if (empty($this->in_seq[$y])) {
 1095+ $k = $this->_lcs_pos($y);
 1096+ USE_ASSERTS && assert($k > 0);
 1097+ $ymids[$k] = $ymids[$k-1];
 1098+ break;
 1099+ }
10891100 while (list ( /* $junk */, $y) = each($matches)) {
10901101 if ($y > $this->seq[$k-1]) {
10911102 USE_ASSERTS && assert($y < $this->seq[$k]);
@@ -1100,7 +1111,6 @@
11011112 }
11021113 }
11031114 }
1104 - wfProfileOut( __METHOD__ . "-chunk" );
11051115 }
11061116
11071117 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
@@ -1112,13 +1122,10 @@
11131123 }
11141124 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
11151125
1116 - wfProfileOut( __METHOD__ );
11171126 return array($this->lcs, $seps);
11181127 }
11191128
11201129 function _lcs_pos ($ypos) {
1121 - wfProfileIn( __METHOD__ );
1122 -
11231130 $end = $this->lcs;
11241131 if ($end == 0 || $ypos > $this->seq[$end]) {
11251132 $this->seq[++$this->lcs] = $ypos;
@@ -1131,9 +1138,9 @@
11321139 while ($beg < $end) {
11331140 $mid = (int)(($beg + $end) / 2);
11341141 if ( $ypos > $this->seq[$mid] )
1135 - $beg = $mid + 1;
 1142+ $beg = $mid + 1;
11361143 else
1137 - $end = $mid;
 1144+ $end = $mid;
11381145 }
11391146
11401147 USE_ASSERTS && assert($ypos != $this->seq[$end]);
@@ -1141,7 +1148,6 @@
11421149 $this->in_seq[$this->seq[$end]] = false;
11431150 $this->seq[$end] = $ypos;
11441151 $this->in_seq[$ypos] = 1;
1145 - wfProfileOut( __METHOD__ );
11461152 return $end;
11471153 }
11481154
@@ -1157,24 +1163,22 @@
11581164 * All line numbers are origin-0 and discarded lines are not counted.
11591165 */
11601166 function _compareseq ($xoff, $xlim, $yoff, $ylim) {
1161 - wfProfileIn( __METHOD__ );
1162 -
11631167 // Slide down the bottom initial diagonal.
11641168 while ($xoff < $xlim && $yoff < $ylim
1165 - && $this->xv[$xoff] == $this->yv[$yoff]) {
 1169+ && $this->xv[$xoff] == $this->yv[$yoff]) {
11661170 ++$xoff;
11671171 ++$yoff;
11681172 }
11691173
11701174 // Slide up the top initial diagonal.
11711175 while ($xlim > $xoff && $ylim > $yoff
1172 - && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
 1176+ && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
11731177 --$xlim;
11741178 --$ylim;
11751179 }
11761180
11771181 if ($xoff == $xlim || $yoff == $ylim)
1178 - $lcs = 0;
 1182+ $lcs = 0;
11791183 else {
11801184 // This is ad hoc but seems to work well.
11811185 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
@@ -1188,9 +1192,9 @@
11891193 // X and Y sequences have no common subsequence:
11901194 // mark all changed.
11911195 while ($yoff < $ylim)
1192 - $this->ychanged[$this->yind[$yoff++]] = 1;
 1196+ $this->ychanged[$this->yind[$yoff++]] = 1;
11931197 while ($xoff < $xlim)
1194 - $this->xchanged[$this->xind[$xoff++]] = 1;
 1198+ $this->xchanged[$this->xind[$xoff++]] = 1;
11951199 } else {
11961200 // Use the partitions to split this problem into subproblems.
11971201 reset($seps);
@@ -1200,7 +1204,6 @@
12011205 $pt1 = $pt2;
12021206 }
12031207 }
1204 - wfProfileOut( __METHOD__ );
12051208 }
12061209
12071210 /* Adjust inserts/deletes of identical lines to join changes
@@ -1237,23 +1240,23 @@
12381241 * $other_changed[$j] == false.
12391242 */
12401243 while ($j < $other_len && $other_changed[$j])
1241 - $j++;
 1244+ $j++;
12421245
12431246 while ($i < $len && ! $changed[$i]) {
12441247 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
12451248 $i++; $j++;
12461249 while ($j < $other_len && $other_changed[$j])
1247 - $j++;
 1250+ $j++;
12481251 }
12491252
12501253 if ($i == $len)
1251 - break;
 1254+ break;
12521255
12531256 $start = $i;
12541257
12551258 // Find the end of this run of changes.
12561259 while (++$i < $len && $changed[$i])
1257 - continue;
 1260+ continue;
12581261
12591262 do {
12601263 /*
@@ -1271,10 +1274,10 @@
12721275 $changed[--$start] = 1;
12731276 $changed[--$i] = false;
12741277 while ($start > 0 && $changed[$start - 1])
1275 - $start--;
 1278+ $start--;
12761279 USE_ASSERTS && assert('$j > 0');
12771280 while ($other_changed[--$j])
1278 - continue;
 1281+ continue;
12791282 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
12801283 }
12811284
@@ -1296,14 +1299,14 @@
12971300 $changed[$start++] = false;
12981301 $changed[$i++] = 1;
12991302 while ($i < $len && $changed[$i])
1300 - $i++;
 1303+ $i++;
13011304
13021305 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
13031306 $j++;
13041307 if ($j < $other_len && $other_changed[$j]) {
13051308 $corresponding = $i;
13061309 while ($j < $other_len && $other_changed[$j])
1307 - $j++;
 1310+ $j++;
13081311 }
13091312 }
13101313 } while ($runlength != $i - $start);
@@ -1317,7 +1320,7 @@
13181321 $changed[--$i] = 0;
13191322 USE_ASSERTS && assert('$j > 0');
13201323 while ($other_changed[--$j])
1321 - continue;
 1324+ continue;
13221325 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
13231326 }
13241327 }
@@ -1376,7 +1379,7 @@
13771380 function isEmpty () {
13781381 foreach ($this->edits as $edit) {
13791382 if ($edit->type != 'copy')
1380 - return false;
 1383+ return false;
13811384 }
13821385 return true;
13831386 }
@@ -1392,7 +1395,7 @@
13931396 $lcs = 0;
13941397 foreach ($this->edits as $edit) {
13951398 if ($edit->type == 'copy')
1396 - $lcs += sizeof($edit->orig);
 1399+ $lcs += sizeof($edit->orig);
13971400 }
13981401 return $lcs;
13991402 }
@@ -1410,7 +1413,7 @@
14111414
14121415 foreach ($this->edits as $edit) {
14131416 if ($edit->orig)
1414 - array_splice($lines, sizeof($lines), 0, $edit->orig);
 1417+ array_splice($lines, sizeof($lines), 0, $edit->orig);
14151418 }
14161419 return $lines;
14171420 }
@@ -1428,7 +1431,7 @@
14291432
14301433 foreach ($this->edits as $edit) {
14311434 if ($edit->closing)
1432 - array_splice($lines, sizeof($lines), 0, $edit->closing);
 1435+ array_splice($lines, sizeof($lines), 0, $edit->closing);
14331436 }
14341437 return $lines;
14351438 }
@@ -1441,21 +1444,21 @@
14421445 function _check ($from_lines, $to_lines) {
14431446 wfProfileIn( __METHOD__ );
14441447 if (serialize($from_lines) != serialize($this->orig()))
1445 - trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
 1448+ trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
14461449 if (serialize($to_lines) != serialize($this->closing()))
1447 - trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
 1450+ trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
14481451
14491452 $rev = $this->reverse();
14501453 if (serialize($to_lines) != serialize($rev->orig()))
1451 - trigger_error("Reversed original doesn't match", E_USER_ERROR);
 1454+ trigger_error("Reversed original doesn't match", E_USER_ERROR);
14521455 if (serialize($from_lines) != serialize($rev->closing()))
1453 - trigger_error("Reversed closing doesn't match", E_USER_ERROR);
 1456+ trigger_error("Reversed closing doesn't match", E_USER_ERROR);
14541457
14551458
14561459 $prevtype = 'none';
14571460 foreach ($this->edits as $edit) {
14581461 if ( $prevtype == $edit->type )
1459 - trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
 1462+ trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
14601463 $prevtype = $edit->type;
14611464 }
14621465
@@ -1496,7 +1499,7 @@
14971500 * have the same number of elements as $to_lines.
14981501 */
14991502 function MappedDiff($from_lines, $to_lines,
1500 - $mapped_from_lines, $mapped_to_lines) {
 1503+ $mapped_from_lines, $mapped_to_lines) {
15011504 wfProfileIn( __METHOD__ );
15021505
15031506 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
@@ -1579,8 +1582,8 @@
15801583 $block[] = new _DiffOp_Copy($context);
15811584 }
15821585 $this->_block($x0, $ntrail + $xi - $x0,
1583 - $y0, $ntrail + $yi - $y0,
1584 - $block);
 1586+ $y0, $ntrail + $yi - $y0,
 1587+ $block);
15851588 $block = false;
15861589 }
15871590 }
@@ -1593,21 +1596,21 @@
15941597 $y0 = $yi - sizeof($context);
15951598 $block = array();
15961599 if ($context)
1597 - $block[] = new _DiffOp_Copy($context);
 1600+ $block[] = new _DiffOp_Copy($context);
15981601 }
15991602 $block[] = $edit;
16001603 }
16011604
16021605 if ($edit->orig)
1603 - $xi += sizeof($edit->orig);
 1606+ $xi += sizeof($edit->orig);
16041607 if ($edit->closing)
1605 - $yi += sizeof($edit->closing);
 1608+ $yi += sizeof($edit->closing);
16061609 }
16071610
16081611 if (is_array($block))
1609 - $this->_block($x0, $xi - $x0,
1610 - $y0, $yi - $y0,
1611 - $block);
 1612+ $this->_block($x0, $xi - $x0,
 1613+ $y0, $yi - $y0,
 1614+ $block);
16121615
16131616 $end = $this->_end_diff();
16141617 wfProfileOut( __METHOD__ );
@@ -1619,15 +1622,15 @@
16201623 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
16211624 foreach ($edits as $edit) {
16221625 if ($edit->type == 'copy')
1623 - $this->_context($edit->orig);
 1626+ $this->_context($edit->orig);
16241627 elseif ($edit->type == 'add')
1625 - $this->_added($edit->closing);
 1628+ $this->_added($edit->closing);
16261629 elseif ($edit->type == 'delete')
1627 - $this->_deleted($edit->orig);
 1630+ $this->_deleted($edit->orig);
16281631 elseif ($edit->type == 'change')
1629 - $this->_changed($edit->orig, $edit->closing);
 1632+ $this->_changed($edit->orig, $edit->closing);
16301633 else
1631 - trigger_error('Unknown edit type', E_USER_ERROR);
 1634+ trigger_error('Unknown edit type', E_USER_ERROR);
16321635 }
16331636 $this->_end_block();
16341637 wfProfileOut( __METHOD__ );
@@ -1645,9 +1648,9 @@
16461649
16471650 function _block_header($xbeg, $xlen, $ybeg, $ylen) {
16481651 if ($xlen > 1)
1649 - $xbeg .= "," . ($xbeg + $xlen - 1);
 1652+ $xbeg .= "," . ($xbeg + $xlen - 1);
16501653 if ($ylen > 1)
1651 - $ybeg .= "," . ($ybeg + $ylen - 1);
 1654+ $ybeg .= "," . ($ybeg + $ylen - 1);
16521655
16531656 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
16541657 }
@@ -1661,7 +1664,7 @@
16621665
16631666 function _lines($lines, $prefix = ' ') {
16641667 foreach ($lines as $line)
1665 - echo "$prefix $line\n";
 1668+ echo "$prefix $line\n";
16661669 }
16671670
16681671 function _context($lines) {
@@ -1716,40 +1719,40 @@
17171720 $newline = 1;
17181721 $retval = array();
17191722 foreach($diff->edits as $edit)
1720 - switch($edit->type) {
1721 - case 'add':
1722 - foreach($edit->closing as $l) {
1723 - $retval[] = array(
 1723+ switch($edit->type) {
 1724+ case 'add':
 1725+ foreach($edit->closing as $l) {
 1726+ $retval[] = array(
17241727 'action' => 'add',
17251728 'new'=> $l,
17261729 'newline' => $newline++
1727 - );
1728 - }
1729 - break;
1730 - case 'delete':
1731 - foreach($edit->orig as $l) {
1732 - $retval[] = array(
 1730+ );
 1731+ }
 1732+ break;
 1733+ case 'delete':
 1734+ foreach($edit->orig as $l) {
 1735+ $retval[] = array(
17331736 'action' => 'delete',
17341737 'old' => $l,
17351738 'oldline' => $oldline++,
1736 - );
1737 - }
1738 - break;
1739 - case 'change':
1740 - foreach($edit->orig as $i => $l) {
1741 - $retval[] = array(
 1739+ );
 1740+ }
 1741+ break;
 1742+ case 'change':
 1743+ foreach($edit->orig as $i => $l) {
 1744+ $retval[] = array(
17421745 'action' => 'change',
17431746 'old' => $l,
17441747 'new' => @$edit->closing[$i],
17451748 'oldline' => $oldline++,
17461749 'newline' => $newline++,
1747 - );
1748 - }
1749 - break;
1750 - case 'copy':
1751 - $oldline += count($edit->orig);
1752 - $newline += count($edit->orig);
1753 - }
 1750+ );
 1751+ }
 1752+ break;
 1753+ case 'copy':
 1754+ $oldline += count($edit->orig);
 1755+ $newline += count($edit->orig);
 1756+ }
17541757 return $retval;
17551758 }
17561759 }
@@ -1777,13 +1780,13 @@
17781781 function _flushGroup ($new_tag) {
17791782 if ($this->_group !== '') {
17801783 if ($this->_tag == 'ins')
1781 - $this->_line .= '<ins class="diffchange diffchange-inline">' .
1782 - htmlspecialchars ( $this->_group ) . '</ins>';
 1784+ $this->_line .= '<ins class="diffchange diffchange-inline">' .
 1785+ htmlspecialchars ( $this->_group ) . '</ins>';
17831786 elseif ($this->_tag == 'del')
1784 - $this->_line .= '<del class="diffchange diffchange-inline">' .
1785 - htmlspecialchars ( $this->_group ) . '</del>';
 1787+ $this->_line .= '<del class="diffchange diffchange-inline">' .
 1788+ htmlspecialchars ( $this->_group ) . '</del>';
17861789 else
1787 - $this->_line .= htmlspecialchars ( $this->_group );
 1790+ $this->_line .= htmlspecialchars ( $this->_group );
17881791 }
17891792 $this->_group = '';
17901793 $this->_tag = $new_tag;
@@ -1792,21 +1795,21 @@
17931796 function _flushLine ($new_tag) {
17941797 $this->_flushGroup($new_tag);
17951798 if ($this->_line != '')
1796 - array_push ( $this->_lines, $this->_line );
 1799+ array_push ( $this->_lines, $this->_line );
17971800 else
1798 - # make empty lines visible by inserting an NBSP
1799 - array_push ( $this->_lines, NBSP );
 1801+ # make empty lines visible by inserting an NBSP
 1802+ array_push ( $this->_lines, NBSP );
18001803 $this->_line = '';
18011804 }
18021805
18031806 function addWords ($words, $tag = '') {
18041807 if ($tag != $this->_tag)
1805 - $this->_flushGroup($tag);
 1808+ $this->_flushGroup($tag);
18061809
18071810 foreach ($words as $word) {
18081811 // new-line should only come as first char of word.
18091812 if ($word == '')
1810 - continue;
 1813+ continue;
18111814 if ($word[0] == "\n") {
18121815 $this->_flushLine($tag);
18131816 $word = substr($word, 1);
@@ -1837,7 +1840,7 @@
18381841 list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
18391842
18401843 $this->MappedDiff($orig_words, $closing_words,
1841 - $orig_stripped, $closing_stripped);
 1844+ $orig_stripped, $closing_stripped);
18421845 wfProfileOut( __METHOD__ );
18431846 }
18441847
@@ -1862,7 +1865,7 @@
18631866 } else {
18641867 $m = array();
18651868 if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1866 - $line, $m))
 1869+ $line, $m))
18671870 {
18681871 $words = array_merge( $words, $m[0] );
18691872 $stripped = array_merge( $stripped, $m[1] );
@@ -1879,9 +1882,9 @@
18801883
18811884 foreach ($this->edits as $edit) {
18821885 if ($edit->type == 'copy')
1883 - $orig->addWords($edit->orig);
 1886+ $orig->addWords($edit->orig);
18841887 elseif ($edit->orig)
1885 - $orig->addWords($edit->orig, 'del');
 1888+ $orig->addWords($edit->orig, 'del');
18861889 }
18871890 $lines = $orig->getLines();
18881891 wfProfileOut( __METHOD__ );
@@ -1894,9 +1897,9 @@
18951898
18961899 foreach ($this->edits as $edit) {
18971900 if ($edit->type == 'copy')
1898 - $closing->addWords($edit->closing);
 1901+ $closing->addWords($edit->closing);
18991902 elseif ($edit->closing)
1900 - $closing->addWords($edit->closing, 'ins');
 1903+ $closing->addWords($edit->closing, 'ins');
19011904 }
19021905 $lines = $closing->getLines();
19031906 wfProfileOut( __METHOD__ );
@@ -1969,24 +1972,24 @@
19701973 function _added( $lines ) {
19711974 foreach ($lines as $line) {
19721975 echo '<tr>' . $this->emptyLine() .
1973 - $this->addedLine( '<ins class="diffchange">' .
1974 - htmlspecialchars ( $line ) . '</ins>' ) . "</tr>\n";
 1976+ $this->addedLine( '<ins class="diffchange">' .
 1977+ htmlspecialchars ( $line ) . '</ins>' ) . "</tr>\n";
19751978 }
19761979 }
19771980
19781981 function _deleted($lines) {
19791982 foreach ($lines as $line) {
19801983 echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
1981 - htmlspecialchars ( $line ) . '</del>' ) .
1982 - $this->emptyLine() . "</tr>\n";
 1984+ htmlspecialchars ( $line ) . '</del>' ) .
 1985+ $this->emptyLine() . "</tr>\n";
19831986 }
19841987 }
19851988
19861989 function _context( $lines ) {
19871990 foreach ($lines as $line) {
19881991 echo '<tr>' .
1989 - $this->contextLine( htmlspecialchars ( $line ) ) .
1990 - $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
 1992+ $this->contextLine( htmlspecialchars ( $line ) ) .
 1993+ $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
19911994 }
19921995 }
19931996
@@ -2003,11 +2006,11 @@
20042007 while ( $line = array_shift( $del ) ) {
20052008 $aline = array_shift( $add );
20062009 echo '<tr>' . $this->deletedLine( $line ) .
2007 - $this->addedLine( $aline ) . "</tr>\n";
 2010+ $this->addedLine( $aline ) . "</tr>\n";
20082011 }
20092012 foreach ($add as $line) { # If any leftovers
20102013 echo '<tr>' . $this->emptyLine() .
2011 - $this->addedLine( $line ) . "</tr>\n";
 2014+ $this->addedLine( $line ) . "</tr>\n";
20122015 }
20132016 wfProfileOut( __METHOD__ );
20142017 }

Status & tagging log