r37488 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r37487‎ | r37488 | r37489 >
Date:10:27, 10 July 2008
Author:guyvdb
Status:old
Tags:
Comment:
fast diff heuristic
Modified paths:
  • /branches/visual_diff/phase3/includes/Diff.php (modified) (history)

Diff [purge]

Index: branches/visual_diff/phase3/includes/Diff.php
@@ -7,64 +7,28 @@
88 */
99 class AbstractDiffAlgorithm {
1010
11 - protected $a;
12 - protected $m;
13 -
14 - protected $b;
15 - protected $n;
16 -
17 - protected $swapped;
18 -
1911 /**
20 - * The sequences need to be ordered by length.
21 - */
22 - protected function order(array $from, array $to){
23 - if(sizeof($from)>=sizeof($to)){
24 - $this->a = $to;
25 - $this->m = sizeof($to);
26 - $this->b = $from;
27 - $this->n = sizeof($from);
28 - $this->swapped = TRUE;
29 - }else{
30 - $this->a = $from;
31 - $this->m = sizeof($from);
32 - $this->b = $to;
33 - $this->n = sizeof($to);
34 - $this->swapped = FALSE;
35 - }
36 - }
37 -
38 - /**
3912 * Slide down the snake untill reaching an inequality.
4013 */
41 - protected function forward_snake( $k, $y, $maxx, $maxy, $offsetx=0, $offsety=0, $swapped=FALSE){
42 - if($swapped){
43 - return $this->forward_snake( $k, $y-$k, $maxy, $maxx, $offsety, $offsetx, $swapped );
44 - }else{
45 - $x = $y-$k;
46 - while($x < $maxx && $y < $maxy && $this->equals($this->a[$offsetx+$x],$this->b[$offsety+$y])){
47 - ++$x;
48 - ++$y;
49 - }
50 -
51 - return $y;
 14+ protected function forward_snake( $k, $y, $maxx, $maxy, &$a, &$b, $offsetx=0, $offsety=0){
 15+ $x = $y-$k;
 16+ while($x < $maxx && $y < $maxy && $this->equals($a[$offsetx+$x],$b[$offsety+$y])){
 17+ ++$x;
 18+ ++$y;
5219 }
 20+ return $y;
5321 }
5422
5523 /**
5624 * Slide up the snake until reaching an inequality.
5725 */
58 - protected function backward_snake( $k, $y, $maxx, $maxy, $offsetx=0, $offsety=0, $swapped=FALSE){
59 - if($swapped){
60 - return $this->backward_snake( $k, $y-$k, $maxy, $maxx, $offsety, $offsetx, $swapped );
61 - }else{
62 - $x = $y-$k;
63 - while($x > $maxx && $y > $maxy && $this->equals($this->a[$offsetx+$x],$this->b[$offsety+$y])){
64 - --$x;
65 - --$y;
66 - }
67 - return $y;
 26+ protected function backward_snake( $k, $y, $maxx, $maxy, &$a, &$b, $offsetx=0, $offsety=0){
 27+ $x = $y-$k;
 28+ while($x > $maxx && $y > $maxy && $this->equals($a[$offsetx+$x],$b[$offsety+$y])){
 29+ --$x;
 30+ --$y;
6831 }
 32+ return $y;
6933 }
7034
7135 /**
@@ -97,30 +61,38 @@
9862 * the arrays $from and $to.
9963 */
10064 public function ses (array $from, array $to) {
101 - $this->order($from, $to);
 65+ if(sizeof($from)>sizeof($to)){
 66+ swap($from, $to);
 67+ }
 68+ $m = sizeof($from);
 69+ $n = sizeof($to);
10270
103 - if($this->n==0){
104 - return $this->m;
105 - }else if($this->m==0){
106 - return $this->n;
 71+ if($n==0){
 72+ return $m;
 73+ }else if($m==0){
 74+ return $n;
10775 }
10876
109 - $delta = $this->n-$this->m;
110 - $fp = array_fill_keys( range( -($this->m+1), $this->n+1), $this->n);
 77+ $delta = $n-$m;
11178 $p = -1;
 79+ $fp = array_fill( 0, $delta+1, $n);
11280
11381 do{
11482 ++$p;
 83+
 84+ $fp[-$p-1] = $n;
 85+ $fp[$delta+$p+1] = $n;
 86+
11587 for($k=$delta+$p;$k>0;$k--){
116 - $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1);
 88+ $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1, $from, $to);
11789 }
11890 for($k=-$p;$k<0;$k++){
119 - $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1);
 91+ $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1, $from, $to);
12092 }
121 - $fp[0] = $this->backward_snake( 0, min( $fp[1]-1, $fp[-1]), -1, -1);
 93+ $fp[0] = $this->backward_snake( 0, min( $fp[1]-1, $fp[-1]), -1, -1, $from, $to);
12294 }while($fp[0]!=-1);
12395
124 - echo "<br>$delta, $p";
 96+ //echo "<br>$delta, $p";
12597 return $delta+2*$p;
12698 }
12799
@@ -148,30 +120,36 @@
149121 * the arrays $from and $to.
150122 */
151123 public function ses (array $from, array $to) {
152 - $this->order($from, $to);
 124+ if(sizeof($from)>sizeof($to)){
 125+ swap($from, $to);
 126+ }
 127+ $m = sizeof($from);
 128+ $n = sizeof($to);
153129
154 - if($this->n==0){
155 - return $this->m;
156 - }else if($this->m==0){
157 - return $this->n;
 130+ if($n==0){
 131+ return $m;
 132+ }else if($m==0){
 133+ return $n;
158134 }
159135
160 - $delta = $this->n-$this->m;
161 - $fp = array_fill_keys( range( -($this->m+1), $this->n+1), -1);
 136+ $delta = $n-$m;
162137 $p = -1;
163 -
 138+ $fp = array_fill( 0, $delta+1, -1);
164139 do{
165140 ++$p;
 141+
 142+ $fp[-$p-1] = -1;
 143+ $fp[$delta+$p+1] = -1;
 144+
166145 for($k=-$p;$k<$delta;$k++){
167 - $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $this->m, $this->n);
 146+ $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $m, $n, $from, $to);
168147 }
169148 for($k=$delta+$p;$k>$delta;$k--){
170 - $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $this->m, $this->n);
 149+ $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $m, $n, $from, $to);
171150 }
172 - $fp[$delta] = $this->forward_snake( $delta, max( $fp[$delta-1]+1, $fp[$delta+1]), $this->m, $this->n);
173 - }while($fp[$delta]!=$this->n);
 151+ $fp[$delta] = $this->forward_snake( $delta, max( $fp[$delta-1]+1, $fp[$delta+1]), $m, $n, $from, $to);
 152+ }while($fp[$delta]!=$n);
174153
175 - echo "<br>$delta, $p";
176154 return $delta+2*$p;
177155 }
178156
@@ -206,65 +184,69 @@
207185 * the arrays $from and $to.
208186 */
209187 public function ses (array $from, array $to) {
210 - $this->order($from, $to);
 188+ if(sizeof($from)>sizeof($to)){
 189+ swap($from, $to);
 190+ }
 191+ $m = sizeof($from);
 192+ $n = sizeof($to);
211193
212 - if($this->n==0){
213 - return $this->m;
214 - }else if($this->m==0){
215 - return $this->n;
 194+ if($n==0){
 195+ return $m;
 196+ }else if($m==0){
 197+ return $n;
216198 }
217199
218 - $delta = $this->n-$this->m;
 200+ $delta = $n-$m;
 201+ $p = -1;
 202+ $fpf = array_fill( 0, $delta+1, -1);
 203+ $fpb = array_fill( 0, $delta+1, $n);
219204
220 - $fpkeys = range( -($this->m+1), $this->n+1);
221 - $fpf = array_fill_keys( $fpkeys, -1);
222 - $fpb = array_fill_keys( $fpkeys, $this->n);
223205 $p = -1;
224206
225207 while(1){
226208 ++$p;
227209
228210 //forward
 211+ $fpf[-$p-1] = -1;
 212+ $fpf[$delta+$p+1] = -1;
 213+
229214 for($k=-$p;$k<$delta;$k++){
230 - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $this->m, $this->n);
 215+ $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $from, $to);
231216 if($fpf[$k]>$fpb[$k]+1){
232 - echo "<br>0, $delta, $p";
233217 return $delta+4*$p-4;
234218 }
235219 }
236220 for($k=$delta+$p;$k>$delta;$k--){
237 - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $this->m, $this->n);
 221+ $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $from, $to);
238222 if($fpf[$k]>$fpb[$k]+1){
239 - echo "<br>0, $delta, $p";
240223 return $delta+4*$p-4;
241224 }
242225 }
243 - $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $this->m, $this->n);
 226+ $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $m, $n, $from, $to);
244227
245228 if($fpf[$delta]>$fpb[$delta]+1){
246 - echo "<br>0, $delta, $p";
247229 return $delta+4*$p-4;
248230 }
249231
250232 //backward
 233+ $fpb[-$p-1] = $n;
 234+ $fpb[$delta+$p+1] = $n;
 235+
251236 for($k=$delta+$p;$k>0;$k--){
252 - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1);
 237+ $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $from, $to);
253238 if($fpf[$k]>$fpb[$k]+1){
254 - echo "<br>1, $delta, $p";
255239 return $delta+4*$p;
256240 }
257241 }
258242 for($k=-$p;$k<0;$k++){
259 - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1);
 243+ $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $from, $to);
260244 if($fpf[$k]>$fpb[$k]+1){
261 - echo "<br>1, $delta, $p";
262245 return $delta+4*$p;
263246 }
264247 }
265 - $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1);
 248+ $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1, $from, $to);
266249
267250 if($fpf[0]>$fpb[0]+1){
268 - echo "<br>1, $delta, $p";
269251 return $delta+4*$p;
270252 }
271253
@@ -279,8 +261,10 @@
280262 * !!!!!!!!!!!!!!!!!!!!!!
281263 * BEWARE HERE BE DRAGONS
282264 * This algorithm is not guaranteed to find the exact size of the LCS.
283 - * There are border cases where it will be 1 (or more) off.
284 - * This implmentation is useful because it is very fast.
 265+ * There are border cases for n>5 where it will be 1 (or more) off recursively.
 266+ * This can add up to a difference from the optimal LCS of 5%-10%.
 267+ * The common subsequence will be correct though.
 268+ * This implementation is useful because it is very fast.
285269 * !!!!!!!!!!!!!!!!!!!!!!
286270 *
287271 * This is the O(NP) variant as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
@@ -289,32 +273,26 @@
290274 */
291275 class FastDiffAlgorithm extends AbstractDiffAlgorithm {
292276
293 - private $a_inLcs;
294 -
295 - private $b_inLcs;
296 -
297 - private $lcsLength;
298 -
299277 /*
300278 * Diffs two arrays and returns array($from_inLcs, $to_inLcs)
301279 * where {from,to}_inLcs is FALSE if the token was {removed,added}
302280 * and TRUE if it is in the longest common subsequence.
303281 */
304282 public function diff (array $from, array $to) {
305 - $this->order($from, $to);
306 - $this->a_inLcs = array_fill(0,$this->m,FALSE);
307 - $this->b_inLcs = array_fill(0,$this->n,FALSE);
308283
309 - //There is no need to remove common tokens at the beginning and end, the first snake will catch them.
310 - //Hashing the tokens is not generally applicable.
 284+ $from_inLcs = sizeof($from)>0 ? array_fill(0,sizeof($from),FALSE): array();
 285+ $to_inLcs = sizeof($to)>0 ? array_fill(0,sizeof($to),FALSE): array();
 286+
311287 //Removing tokens that are not in both sequences is O(N*M)>=O(N*P). There is little to gain when most edits are small.
312288
313 - $this->diff_recursive(0,$this->m,0,$this->n);
314 -
315 - if($this->swapped){
316 - return array($this->b_inLcs,$this->a_inLcs);
 289+ $nbSwaps=0;
 290+ $from2 = $from;
 291+ $to2 = $to;
 292+ $this->diff_recursive(0,sizeof($from),0,sizeof($to), $from2, $to2, $from_inLcs, $to_inLcs, $nbSwaps);
 293+ if($nbSwaps==0){
 294+ return array($from_inLcs, $to_inLcs);
317295 }else{
318 - return array($this->a_inLcs,$this->b_inLcs);
 296+ return array($to_inLcs, $from_inLcs);
319297 }
320298 }
321299
@@ -323,20 +301,35 @@
324302 *
325303 * Ends are exclusive
326304 */
327 - protected function diff_recursive($x_start, $x_end, $y_start, $y_end){
328 - echo "<br>$x_start-$x_end , $y_start-$y_end";
 305+ protected function diff_recursive($x_start, $x_end, $y_start, $y_end, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){
 306+ //echo "<br>$x_start-$x_end , $y_start-$y_end";
329307 if($x_end>$x_start && $y_end>$y_start){
 308+ //echo "-pass";
330309
 310+// //echo "<br>";
 311+// for($i = $x_start;$i<$x_end;$i++){
 312+// //echo " ".$a[$i];
 313+// }
 314+// //echo "<br>";
 315+// for($i = $y_start;$i<$y_end;$i++){
 316+// //echo " ".$b[$i];
 317+// }
 318+
331319 if($x_end-$x_start>$y_end-$y_start){
332 - $temp = $x_end;
333 - $x_end = $y_end;
334 - $y_end = $temp;
335 - $temp = $x_start;
336 - $x_start = $y_start;
337 - $y_start = $temp;
338 - $swapped = TRUE;
339 - }else{
340 - $swapped = FALSE;
 320+ swap($x_end, $y_end);
 321+ swap($x_start, $y_start);
 322+ swap($a, $b);
 323+ swap($a_inLcs, $b_inLcs);
 324+ $nbSwaps = ($nbSwaps+1)%2;
 325+ //echo "<br>nbswaps=$nbSwaps";
 326+ //echo "<br>";
 327+// for($i = $x_start;$i<$x_end;$i++){
 328+// //echo " ".$a[$i];
 329+// }
 330+// //echo "<br>";
 331+// for($i = $y_start;$i<$y_end;$i++){
 332+// //echo " ".$b[$i];
 333+// }
341334 }
342335
343336 $m = $x_end-$x_start;
@@ -344,121 +337,301 @@
345338
346339 $delta = $n-$m;
347340
348 - $fpkeys = range( -($m+1), $n+1);
349 - $fpf = array_fill_keys( $fpkeys, -1);
350 - $fpb = array_fill_keys( $fpkeys, $n);
 341+ $fpf_start = array_fill( 0, $delta+1, -1);
 342+ $fpb_start = array_fill( 0, $delta+1, $n);
 343+ $fpf_end = array_fill( 0, $delta+1, -1);
 344+ $fpb_end = array_fill( 0, $delta+1, $n);
 345+
 346+ $no_middle_found = TRUE;
 347+ $endgame = FALSE;
 348+ $middle_found = array_fill( 0, $delta+1, FALSE);
 349+ $best_middle_score = -$n-3;
 350+ $best_middle_start;
 351+ $best_middle_end;
 352+ $best_middle_k;
 353+
351354 $p = -1;
352355
353356 while(1){
354357 ++$p;
355358
356 - echo "$p\n";
357 - echo "forward:\n";
358 - print_r($fpf);
359 - echo "backward:\n";
360 - print_r($fpb);
 359+ //echo "<br>".microtime(true)." p:".$p;
361360
362 - //forward
 361+ $middle_found[-$p-1] = FALSE;
 362+ $middle_found[$delta+$p+1] = FALSE;
 363+
 364+ // forward
 365+ $fpf_start[-$p-1] = -1;
 366+ $fpf_start[$delta+$p+1] = -1;
 367+ $fpf_end[-$p-1] = -1;
 368+ $fpf_end[$delta+$p+1] = -1;
 369+
363370 for($k=-$p;$k<$delta;$k++){
364 - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $x_start, $y_start, $swapped);
365 - if($fpf[$k]>$fpb[$k]+1){
366 - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped);
367 - return;
 371+ $fpf_start[$k] = max( $fpf_end[$k-1]+1, $fpf_end[$k+1]);
 372+ //echo "<br>p=$p, fpf_start[$k]=$fpf_start[$k]";
 373+ if($fpf_start[$k]>$fpb_end[$k] && $fpb_end[$k]-$fpb_start[$k]!=0 && !$middle_found[$k]){
 374+ $no_middle_found = FALSE;
 375+ $middle_found[$k] = TRUE;
 376+ //echo "<br>middle found";
 377+ if($fpb_start[$k]-$fpb_end[$k]>$best_middle_score){
 378+ //echo " and best! ($fpb_start[$k]-$fpb_end[$k]>$best_middle_score)";
 379+ $best_middle_start = $fpb_end[$k]+1;
 380+ $best_middle_end = $fpb_start[$k]+1;
 381+ $best_middle_k = $k;
 382+ $best_middle_score = $best_middle_end-$best_middle_start;
 383+ }
368384 }
 385+ $fpf_end[$k] = $this->forward_snake( $k, $fpf_start[$k], $m, $n, $a, $b, $x_start, $y_start);
 386+ //echo ", fpf_end[$k]=$fpf_end[$k]";
 387+ if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){
 388+ $no_middle_found = FALSE;
 389+ $middle_found[$k] = TRUE;
 390+ //echo "<br>middle found";
 391+ if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){
 392+ //echo " and best ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)!";
 393+ $best_middle_start = $fpf_start[$k];
 394+ $best_middle_end = $fpf_end[$k];
 395+ $best_middle_k = $k;
 396+ $best_middle_score = $fpb_end[$k]-$fpf_end[$k];
 397+ }
 398+ }
 399+
369400 }
370 - echo "forward:\n";
371 - print_r($fpf);
372 - echo "backward:\n";
373 - print_r($fpb);
 401+
374402 for($k=$delta+$p;$k>$delta;$k--){
375 - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $x_start, $y_start, $swapped);
376 - if($fpf[$k]>$fpb[$k]+1){
377 - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped);
378 - return;
 403+ $fpf_start[$k] = max( $fpf_end[$k-1]+1, $fpf_end[$k+1]);
 404+ //echo "<br>p=$p, fpf_start[$k]=$fpf_start[$k]";
 405+ if($fpf_start[$k]>$fpb_end[$k] && $fpb_end[$k]-$fpb_start[$k]!=0 && !$middle_found[$k]){
 406+ $no_middle_found = FALSE;
 407+ $middle_found[$k] = TRUE;
 408+ //echo "<br>middle found";
 409+ if($fpb_start[$k]-$fpb_end[$k]>$best_middle_score){
 410+ //echo " and best! ($fpb_start[$k]-$fpb_end[$k]>$best_middle_score)";
 411+ $best_middle_start = $fpb_end[$k]+1;
 412+ $best_middle_end = $fpb_start[$k]+1;
 413+ $best_middle_k = $k;
 414+ $best_middle_score = $best_middle_end-$best_middle_start;
 415+ }
379416 }
 417+ $fpf_end[$k] = $this->forward_snake( $k, $fpf_start[$k], $m, $n, $a, $b, $x_start, $y_start);
 418+ //echo ", fpf_end[$k]=$fpf_end[$k]";
 419+ if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){
 420+ $no_middle_found = FALSE;
 421+ $middle_found[$k] = TRUE;
 422+ //echo "<br>middle found";
 423+ if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){
 424+ //echo " and best! ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)";
 425+ $best_middle_start = $fpf_start[$k];
 426+ $best_middle_end = $fpf_end[$k];
 427+ $best_middle_k = $k;
 428+ $best_middle_score = $fpb_end[$k]-$fpf_end[$k];
 429+ }
 430+ }
380431 }
381 - echo "forward:\n";
382 - print_r($fpf);
383 - echo "backward:\n";
384 - print_r($fpb);
385 - $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $m, $n, $x_start, $y_start, $swapped);
386 - if($fpf[$delta]>$fpb[$delta]+1){
387 - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$delta]-$delta+1, $x_start+$fpf[$delta]-$k, $y_start+$fpb[$delta]+1, $y_start+$fpf[$delta], $swapped);
388 - return;
 432+
 433+ // delta
 434+ $fpf_start[$delta] = max( $fpf_end[$delta-1]+1, $fpf_end[$delta+1]);
 435+ //echo "<br>p=$p, fpf_start[$delta]=$fpf_start[$delta]";
 436+ if($fpf_start[$delta]>$fpb_end[$delta] && $fpb_end[$delta]-$fpb_start[$delta]!=0 && !$middle_found[$delta]){
 437+ if($fpb_start[$delta]+1==$n){
 438+ return $this->foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $fpb_end[$delta]+1,$fpb_start[$delta]+1, $delta, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 439+ }
 440+ $no_middle_found = FALSE;
 441+ $middle_found[$delta] = TRUE;
 442+ //echo "<br>middle found";
 443+ if($fpb_start[$delta]-$fpb_end[$delta]>$best_middle_score){
 444+ //echo " and best! ($fpb_start[$delta]-$fpb_end[$delta]>$best_middle_score)";
 445+ $best_middle_start = $fpb_end[$delta]+1;
 446+ $best_middle_end = $fpb_start[$delta]+1;
 447+ $best_middle_k = $delta;
 448+ $best_middle_score = $best_middle_end-$best_middle_start;
 449+ }
389450 }
390 - echo "forward:\n";
391 - print_r($fpf);
392 - echo "backward:\n";
393 - print_r($fpb);
 451+ $fpf_end[$delta] = $this->forward_snake( $delta, $fpf_start[$delta], $m, $n, $a, $b, $x_start, $y_start);
 452+ //echo ", fpf_end[$delta]=$fpf_end[$delta]";
 453+ if($fpf_end[$delta]==$n){
 454+ if($fpf_start[$delta]==$fpf_end[$delta]){
 455+ if($fpf_start[$delta]==$fpf_end[$delta-1]+1){
 456+ //last edge was horizontal
 457+ return $this->reachedEndHorizontal($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 458+ }else{
 459+ //last edge was vertical
 460+ return $this->reachedEndVertical($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 461+ }
 462+ }else{
 463+ return $this->foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $fpf_start[$delta],$fpf_end[$delta], $delta, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 464+ }
 465+ }
 466+ if($fpf_end[$delta]>$fpb_end[$delta] && !$middle_found[$delta]){
 467+ $no_middle_found = FALSE;
 468+ $middle_found[$delta] = TRUE;
 469+ //echo "<br>middle found";
 470+ if($fpb_end[$delta]-$fpf_end[$delta]>$best_middle_score){
 471+ //echo " and best! ($fpb_end[$delta]-$fpf_end[$delta]>$best_middle_score)";
 472+ $best_middle_start = $fpf_start[$delta];
 473+ $best_middle_end = $fpf_end[$delta];
 474+ $best_middle_k = $delta;
 475+ $best_middle_score = $fpb_end[$delta]-$fpf_end[$delta];
 476+ }
 477+ }
394478
395 - //backward
 479+ // backward
 480+ $fpb_start[-$p-1] = $n;
 481+ $fpb_start[$delta+$p+1] = $n;
 482+ $fpb_end[-$p-1] = $n;
 483+ $fpb_end[$delta+$p+1] = $n;
 484+
 485+ for($k=-$p;$k<0;$k++){
 486+ $fpb_start[$k] = min( $fpb_end[$k+1]-1, $fpb_end[$k-1]);
 487+ //echo "<br>p=$p, fpb_start[$k]=$fpb_start[$k]";
 488+ if($fpf_end[$k]>$fpb_start[$k] && $fpf_end[$k]-$fpf_start[$k]!=0 && !$middle_found[$k]){
 489+ $no_middle_found = FALSE;
 490+ $middle_found[$k] = TRUE;
 491+ //echo "<br>middle found";
 492+ if($fpf_end[$k]-$fpf_start[$k]>$best_middle_score){
 493+ //echo " and best! ($fpf_end[$k]-$fpf_start[$k]>$best_middle_score)";
 494+ $best_middle_start = $fpf_start[$k];
 495+ $best_middle_end = $fpf_end[$k];
 496+ $best_middle_k = $k;
 497+ $best_middle_score = $best_middle_end-$best_middle_start;
 498+ }
 499+ }
 500+ $fpb_end[$k] = $this->backward_snake( $k, $fpb_start[$k], -1, -1, $a, $b, $x_start, $y_start);
 501+ //echo ", fpb_end[$k]=$fpb_end[$k]";
 502+ if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){
 503+ $no_middle_found = FALSE;
 504+ $middle_found[$k] = TRUE;
 505+ //echo "<br>middle found";
 506+ if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){
 507+ //echo " and best! ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)";
 508+ $best_middle_start = $fpb_end[$k]+1;
 509+ $best_middle_end = $fpb_start[$k]+1;
 510+ $best_middle_k = $k;
 511+ $best_middle_score = $fpb_end[$k]-$fpf_end[$k];
 512+ }
 513+
 514+ }
 515+ }
396516 for($k=$delta+$p;$k>0;$k--){
397 - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $x_start, $y_start, $swapped);
398 - if($fpf[$k]>$fpb[$k]+1){
399 - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped);
400 - return;
 517+ $fpb_start[$k] = min( $fpb_end[$k+1]-1, $fpb_end[$k-1]);
 518+ //echo "<br>p=$p, fpb_start[$k]=$fpb_start[$k]";
 519+ if($fpf_end[$k]>$fpb_start[$k] && $fpf_end[$k]-$fpf_start[$k]!=0 && !$middle_found[$k]){
 520+ $no_middle_found = FALSE;
 521+ $middle_found[$k] = TRUE;
 522+ //echo "<br>middle found";
 523+ if($fpf_end[$k]-$fpf_start[$k]>$best_middle_score){
 524+ //echo " and best! ($fpf_end[$k]-$fpf_start[$k]>$best_middle_score)";
 525+ $best_middle_start = $fpf_start[$k];
 526+ $best_middle_end = $fpf_end[$k];
 527+ $best_middle_k = $k;
 528+ $best_middle_score = $best_middle_end-$best_middle_start;
 529+ }
401530 }
 531+ $fpb_end[$k] = $this->backward_snake( $k, $fpb_start[$k], -1, -1, $a, $b, $x_start, $y_start);
 532+ //echo ", fpb_end[$k]=$fpb_end[$k]";
 533+ if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){
 534+ $no_middle_found = FALSE;
 535+ $middle_found[$k] = TRUE;
 536+ //echo "<br>middle found";
 537+ if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){
 538+ //echo " and best! ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)";
 539+ $best_middle_start = $fpb_end[$k]+1;
 540+ $best_middle_end = $fpb_start[$k]+1;
 541+ $best_middle_k = $k;
 542+ $best_middle_score = $fpb_end[$k]-$fpf_end[$k];
 543+ }
 544+ }
402545 }
403 - echo "forward:\n";
404 - print_r($fpf);
405 - echo "backward:\n";
406 - print_r($fpb);
407 - for($k=-$p;$k<0;$k++){
408 - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $x_start, $y_start, $swapped);
409 - if($fpf[$k]>$fpb[$k]+1){
410 - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped);
411 - return;
 546+
 547+
 548+ // 0
 549+ $fpb_start[0] = min( $fpb_end[1]-1, $fpb_end[-1]);
 550+ //echo "<br>p=$p, fpb_start[0]=$fpb_start[0]";
 551+ if($fpf_end[0]>$fpb_start[0] && $fpf_end[0]-$fpf_start[0]!=0 && !$middle_found[0]){
 552+ $no_middle_found = FALSE;
 553+ $middle_found[0] = TRUE;
 554+ //echo "<br>middle found";
 555+ if($fpf_end[0]-$fpf_start[0]>$best_middle_score){
 556+ //echo " and best! ($fpf_end[0]-$fpf_start[0]>$best_middle_score)";
 557+ $best_middle_start = $fpf_start[0];
 558+ $best_middle_end = $fpf_end[0];
 559+ $best_middle_k = 0;
 560+ $best_middle_score = $best_middle_end-$best_middle_start;
412561 }
413562 }
414 - echo "forward:\n";
415 - print_r($fpf);
416 - echo "backward:\n";
417 - print_r($fpb);
418 - $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1, $x_start, $y_start, $swapped);
419 - if($fpf[0]>$fpb[0]+1){
420 - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[0]+1, $x_start+$fpf[0], $y_start+$fpb[0]+1, $y_start+$fpf[0], $swapped);
421 - return;
 563+ $fpb_end[0] = $this->backward_snake( 0, $fpb_start[0], -1, -1, $a, $b, $x_start, $y_start);
 564+ //echo ", fpb_end[0]=$fpb_end[0]";
 565+ if($fpf_end[0]>$fpb_end[0] && !$middle_found[0]){
 566+ $no_middle_found = FALSE;
 567+ $middle_found[$k] = TRUE;
 568+ //echo "<br>middle found";
 569+ if($fpb_end[0]-$fpf_end[0]>$best_middle_score){
 570+ //echo " and best! ($fpb_end[0]-$fpf_end[0]>$best_middle_score)";
 571+ $best_middle_start = $fpb_end[0]+1;
 572+ $best_middle_end = $fpb_start[0]+1;
 573+ $best_middle_k = 0;
 574+ $best_middle_score = $fpb_end[0]-$fpf_end[0];
 575+ }
422576 }
423 - echo "forward:\n";
424 - print_r($fpf);
425 - echo "backward:\n";
426 - print_r($fpb);
427577
 578+ if(!$no_middle_found){
 579+ if($endgame || $p == $m){
 580+ //echo "<br>endgame: $best_middle_start->$best_middle_end, k=$best_middle_k";
 581+ return $this->foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $best_middle_start, $best_middle_end, $best_middle_k, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 582+ }
 583+ $endgame = TRUE;
 584+ }
428585 }
429586 }
430587 }
431588
432589 /**
433 - * Mark tokens $start until $end (exclusive) as in the LCS
434 - * Diff the part before and after the snake.
 590+ * Mark tokens as in the LCS if it is a snake.
 591+ * Diff the part before and after the middle.
435592 */
436 - protected function foundSnake($x_start, $x_end, $y_start, $y_end, $x_snakestart, $x_snakeend, $y_snakestart, $y_snakeend, $swapped){
437 - echo "<br>$x_start, $x_end, $y_start, $y_end, $x_snakestart, $x_snakeend, $y_snakestart, $y_snakeend, $swapped";
438 - if($swapped){
439 - $temp = $x_snakestart;
440 - $x_snakestart = $y_snakestart;
441 - $y_snakestart = $temp;
442 - $temp = $x_snakeend;
443 - $x_snakeend = $y_snakeend;
444 - $y_snakeend = $temp;
 593+ protected function foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $fp_start, $fp_end, $k, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){
 594+ //echo "<br><b>foundmiddle= $x_start, $x_end, $y_start, $y_end, $fp_start->$fp_end, $k</b>";
445595
446 - $temp = $x_start;
447 - $x_start = $y_start;
448 - $y_start = $temp;
449 - $temp = $x_end;
450 - $x_end = $y_end;
451 - $y_end = $temp;
452 - }
 596+ $x_snakestart = $x_start+$fp_start-$k;
 597+ $x_snakeend = $x_start+$fp_end-$k;
 598+ $y_snakestart = $y_start+$fp_start;
 599+ $y_snakeend = $y_start+$fp_end;
 600+
453601 for($x=$x_snakestart;$x<$x_snakeend;$x++){
454 - $this->a_inLcs[$x] = TRUE;
 602+ $a_inLcs[$x] = TRUE;
455603 }
456604 for($y=$y_snakestart;$y<$y_snakeend;$y++){
457 - $this->b_inLcs[$y] = TRUE;
 605+ $b_inLcs[$y] = TRUE;
458606 }
459 - echo "<br>left:";
460 - $this->diff_recursive($x_start, $x_snakestart, $y_start, $y_snakestart);
461 - echo "<br>right:";
462 - $this->diff_recursive($x_snakeend, $x_end, $y_snakeend, $y_end);
 607+ //echo "<br>left:";
 608+ $swaps = $nbSwaps;
 609+ $this->diff_recursive($x_start, $x_snakestart, $y_start, $y_snakestart, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 610+ //echo "<br>right:";
 611+ if($swaps==$nbSwaps){
 612+ $this->diff_recursive($x_snakeend, $x_end, $y_snakeend, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 613+ }else{
 614+ $this->diff_recursive($y_snakeend, $y_end, $x_snakeend, $x_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 615+ }
463616 }
 617+
 618+ protected function reachedEndHorizontal($x_start, $x_end, $y_start, $y_end, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){
 619+ //echo "<br><b>reached end horizontally! $x_start, $x_end, $y_start, $y_end</b>";
 620+ $y_end = $y_end-1;
 621+ $this->diff_recursive($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 622+ }
 623+
 624+ protected function reachedEndVertical($x_start, $x_end, $y_start, $y_end, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){
 625+ //echo "<br><b>reached end vertically! $x_start, $x_end, $y_start, $y_end</b>";
 626+ $x_end = $x_end-1;
 627+ $this->diff_recursive($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps);
 628+ }
464629 }
 630+
 631+function swap(&$a, &$b){
 632+ //echo "<br>swapping $a and $b";
 633+ $temp = $b;
 634+ $b = $a;
 635+ $a = $temp;
 636+}
 637+
465638 ?>
\ No newline at end of file

Status & tagging log