Index: branches/visual_diff/phase3/includes/Diff.php |
— | — | @@ -7,64 +7,28 @@ |
8 | 8 | */ |
9 | 9 | class AbstractDiffAlgorithm { |
10 | 10 | |
11 | | - protected $a; |
12 | | - protected $m; |
13 | | - |
14 | | - protected $b; |
15 | | - protected $n; |
16 | | - |
17 | | - protected $swapped; |
18 | | - |
19 | 11 | /** |
20 | | - * The sequences need to be ordered by length. |
21 | | - */ |
22 | | - protected function order(array $from, array $to){ |
23 | | - if(sizeof($from)>=sizeof($to)){ |
24 | | - $this->a = $to; |
25 | | - $this->m = sizeof($to); |
26 | | - $this->b = $from; |
27 | | - $this->n = sizeof($from); |
28 | | - $this->swapped = TRUE; |
29 | | - }else{ |
30 | | - $this->a = $from; |
31 | | - $this->m = sizeof($from); |
32 | | - $this->b = $to; |
33 | | - $this->n = sizeof($to); |
34 | | - $this->swapped = FALSE; |
35 | | - } |
36 | | - } |
37 | | - |
38 | | - /** |
39 | 12 | * Slide down the snake untill reaching an inequality. |
40 | 13 | */ |
41 | | - protected function forward_snake( $k, $y, $maxx, $maxy, $offsetx=0, $offsety=0, $swapped=FALSE){ |
42 | | - if($swapped){ |
43 | | - return $this->forward_snake( $k, $y-$k, $maxy, $maxx, $offsety, $offsetx, $swapped ); |
44 | | - }else{ |
45 | | - $x = $y-$k; |
46 | | - while($x < $maxx && $y < $maxy && $this->equals($this->a[$offsetx+$x],$this->b[$offsety+$y])){ |
47 | | - ++$x; |
48 | | - ++$y; |
49 | | - } |
50 | | - |
51 | | - return $y; |
| 14 | + protected function forward_snake( $k, $y, $maxx, $maxy, &$a, &$b, $offsetx=0, $offsety=0){ |
| 15 | + $x = $y-$k; |
| 16 | + while($x < $maxx && $y < $maxy && $this->equals($a[$offsetx+$x],$b[$offsety+$y])){ |
| 17 | + ++$x; |
| 18 | + ++$y; |
52 | 19 | } |
| 20 | + return $y; |
53 | 21 | } |
54 | 22 | |
55 | 23 | /** |
56 | 24 | * Slide up the snake until reaching an inequality. |
57 | 25 | */ |
58 | | - protected function backward_snake( $k, $y, $maxx, $maxy, $offsetx=0, $offsety=0, $swapped=FALSE){ |
59 | | - if($swapped){ |
60 | | - return $this->backward_snake( $k, $y-$k, $maxy, $maxx, $offsety, $offsetx, $swapped ); |
61 | | - }else{ |
62 | | - $x = $y-$k; |
63 | | - while($x > $maxx && $y > $maxy && $this->equals($this->a[$offsetx+$x],$this->b[$offsety+$y])){ |
64 | | - --$x; |
65 | | - --$y; |
66 | | - } |
67 | | - return $y; |
| 26 | + protected function backward_snake( $k, $y, $maxx, $maxy, &$a, &$b, $offsetx=0, $offsety=0){ |
| 27 | + $x = $y-$k; |
| 28 | + while($x > $maxx && $y > $maxy && $this->equals($a[$offsetx+$x],$b[$offsety+$y])){ |
| 29 | + --$x; |
| 30 | + --$y; |
68 | 31 | } |
| 32 | + return $y; |
69 | 33 | } |
70 | 34 | |
71 | 35 | /** |
— | — | @@ -97,30 +61,38 @@ |
98 | 62 | * the arrays $from and $to. |
99 | 63 | */ |
100 | 64 | public function ses (array $from, array $to) { |
101 | | - $this->order($from, $to); |
| 65 | + if(sizeof($from)>sizeof($to)){ |
| 66 | + swap($from, $to); |
| 67 | + } |
| 68 | + $m = sizeof($from); |
| 69 | + $n = sizeof($to); |
102 | 70 | |
103 | | - if($this->n==0){ |
104 | | - return $this->m; |
105 | | - }else if($this->m==0){ |
106 | | - return $this->n; |
| 71 | + if($n==0){ |
| 72 | + return $m; |
| 73 | + }else if($m==0){ |
| 74 | + return $n; |
107 | 75 | } |
108 | 76 | |
109 | | - $delta = $this->n-$this->m; |
110 | | - $fp = array_fill_keys( range( -($this->m+1), $this->n+1), $this->n); |
| 77 | + $delta = $n-$m; |
111 | 78 | $p = -1; |
| 79 | + $fp = array_fill( 0, $delta+1, $n); |
112 | 80 | |
113 | 81 | do{ |
114 | 82 | ++$p; |
| 83 | + |
| 84 | + $fp[-$p-1] = $n; |
| 85 | + $fp[$delta+$p+1] = $n; |
| 86 | + |
115 | 87 | for($k=$delta+$p;$k>0;$k--){ |
116 | | - $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1); |
| 88 | + $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1, $from, $to); |
117 | 89 | } |
118 | 90 | for($k=-$p;$k<0;$k++){ |
119 | | - $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1); |
| 91 | + $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1, $from, $to); |
120 | 92 | } |
121 | | - $fp[0] = $this->backward_snake( 0, min( $fp[1]-1, $fp[-1]), -1, -1); |
| 93 | + $fp[0] = $this->backward_snake( 0, min( $fp[1]-1, $fp[-1]), -1, -1, $from, $to); |
122 | 94 | }while($fp[0]!=-1); |
123 | 95 | |
124 | | - echo "<br>$delta, $p"; |
| 96 | + //echo "<br>$delta, $p"; |
125 | 97 | return $delta+2*$p; |
126 | 98 | } |
127 | 99 | |
— | — | @@ -148,30 +120,36 @@ |
149 | 121 | * the arrays $from and $to. |
150 | 122 | */ |
151 | 123 | public function ses (array $from, array $to) { |
152 | | - $this->order($from, $to); |
| 124 | + if(sizeof($from)>sizeof($to)){ |
| 125 | + swap($from, $to); |
| 126 | + } |
| 127 | + $m = sizeof($from); |
| 128 | + $n = sizeof($to); |
153 | 129 | |
154 | | - if($this->n==0){ |
155 | | - return $this->m; |
156 | | - }else if($this->m==0){ |
157 | | - return $this->n; |
| 130 | + if($n==0){ |
| 131 | + return $m; |
| 132 | + }else if($m==0){ |
| 133 | + return $n; |
158 | 134 | } |
159 | 135 | |
160 | | - $delta = $this->n-$this->m; |
161 | | - $fp = array_fill_keys( range( -($this->m+1), $this->n+1), -1); |
| 136 | + $delta = $n-$m; |
162 | 137 | $p = -1; |
163 | | - |
| 138 | + $fp = array_fill( 0, $delta+1, -1); |
164 | 139 | do{ |
165 | 140 | ++$p; |
| 141 | + |
| 142 | + $fp[-$p-1] = -1; |
| 143 | + $fp[$delta+$p+1] = -1; |
| 144 | + |
166 | 145 | for($k=-$p;$k<$delta;$k++){ |
167 | | - $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $this->m, $this->n); |
| 146 | + $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $m, $n, $from, $to); |
168 | 147 | } |
169 | 148 | for($k=$delta+$p;$k>$delta;$k--){ |
170 | | - $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $this->m, $this->n); |
| 149 | + $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $m, $n, $from, $to); |
171 | 150 | } |
172 | | - $fp[$delta] = $this->forward_snake( $delta, max( $fp[$delta-1]+1, $fp[$delta+1]), $this->m, $this->n); |
173 | | - }while($fp[$delta]!=$this->n); |
| 151 | + $fp[$delta] = $this->forward_snake( $delta, max( $fp[$delta-1]+1, $fp[$delta+1]), $m, $n, $from, $to); |
| 152 | + }while($fp[$delta]!=$n); |
174 | 153 | |
175 | | - echo "<br>$delta, $p"; |
176 | 154 | return $delta+2*$p; |
177 | 155 | } |
178 | 156 | |
— | — | @@ -206,65 +184,69 @@ |
207 | 185 | * the arrays $from and $to. |
208 | 186 | */ |
209 | 187 | public function ses (array $from, array $to) { |
210 | | - $this->order($from, $to); |
| 188 | + if(sizeof($from)>sizeof($to)){ |
| 189 | + swap($from, $to); |
| 190 | + } |
| 191 | + $m = sizeof($from); |
| 192 | + $n = sizeof($to); |
211 | 193 | |
212 | | - if($this->n==0){ |
213 | | - return $this->m; |
214 | | - }else if($this->m==0){ |
215 | | - return $this->n; |
| 194 | + if($n==0){ |
| 195 | + return $m; |
| 196 | + }else if($m==0){ |
| 197 | + return $n; |
216 | 198 | } |
217 | 199 | |
218 | | - $delta = $this->n-$this->m; |
| 200 | + $delta = $n-$m; |
| 201 | + $p = -1; |
| 202 | + $fpf = array_fill( 0, $delta+1, -1); |
| 203 | + $fpb = array_fill( 0, $delta+1, $n); |
219 | 204 | |
220 | | - $fpkeys = range( -($this->m+1), $this->n+1); |
221 | | - $fpf = array_fill_keys( $fpkeys, -1); |
222 | | - $fpb = array_fill_keys( $fpkeys, $this->n); |
223 | 205 | $p = -1; |
224 | 206 | |
225 | 207 | while(1){ |
226 | 208 | ++$p; |
227 | 209 | |
228 | 210 | //forward |
| 211 | + $fpf[-$p-1] = -1; |
| 212 | + $fpf[$delta+$p+1] = -1; |
| 213 | + |
229 | 214 | for($k=-$p;$k<$delta;$k++){ |
230 | | - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $this->m, $this->n); |
| 215 | + $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $from, $to); |
231 | 216 | if($fpf[$k]>$fpb[$k]+1){ |
232 | | - echo "<br>0, $delta, $p"; |
233 | 217 | return $delta+4*$p-4; |
234 | 218 | } |
235 | 219 | } |
236 | 220 | for($k=$delta+$p;$k>$delta;$k--){ |
237 | | - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $this->m, $this->n); |
| 221 | + $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $from, $to); |
238 | 222 | if($fpf[$k]>$fpb[$k]+1){ |
239 | | - echo "<br>0, $delta, $p"; |
240 | 223 | return $delta+4*$p-4; |
241 | 224 | } |
242 | 225 | } |
243 | | - $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $this->m, $this->n); |
| 226 | + $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $m, $n, $from, $to); |
244 | 227 | |
245 | 228 | if($fpf[$delta]>$fpb[$delta]+1){ |
246 | | - echo "<br>0, $delta, $p"; |
247 | 229 | return $delta+4*$p-4; |
248 | 230 | } |
249 | 231 | |
250 | 232 | //backward |
| 233 | + $fpb[-$p-1] = $n; |
| 234 | + $fpb[$delta+$p+1] = $n; |
| 235 | + |
251 | 236 | for($k=$delta+$p;$k>0;$k--){ |
252 | | - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1); |
| 237 | + $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $from, $to); |
253 | 238 | if($fpf[$k]>$fpb[$k]+1){ |
254 | | - echo "<br>1, $delta, $p"; |
255 | 239 | return $delta+4*$p; |
256 | 240 | } |
257 | 241 | } |
258 | 242 | for($k=-$p;$k<0;$k++){ |
259 | | - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1); |
| 243 | + $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $from, $to); |
260 | 244 | if($fpf[$k]>$fpb[$k]+1){ |
261 | | - echo "<br>1, $delta, $p"; |
262 | 245 | return $delta+4*$p; |
263 | 246 | } |
264 | 247 | } |
265 | | - $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1); |
| 248 | + $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1, $from, $to); |
266 | 249 | |
267 | 250 | if($fpf[0]>$fpb[0]+1){ |
268 | | - echo "<br>1, $delta, $p"; |
269 | 251 | return $delta+4*$p; |
270 | 252 | } |
271 | 253 | |
— | — | @@ -279,8 +261,10 @@ |
280 | 262 | * !!!!!!!!!!!!!!!!!!!!!! |
281 | 263 | * BEWARE HERE BE DRAGONS |
282 | 264 | * This algorithm is not guaranteed to find the exact size of the LCS. |
283 | | - * There are border cases where it will be 1 (or more) off. |
284 | | - * This implmentation is useful because it is very fast. |
| 265 | + * There are border cases for n>5 where it will be 1 (or more) off recursively. |
| 266 | + * This can add up to a difference from the optimal LCS of 5%-10%. |
| 267 | + * The common subsequence will be correct though. |
| 268 | + * This implementation is useful because it is very fast. |
285 | 269 | * !!!!!!!!!!!!!!!!!!!!!! |
286 | 270 | * |
287 | 271 | * This is the O(NP) variant as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
— | — | @@ -289,32 +273,26 @@ |
290 | 274 | */ |
291 | 275 | class FastDiffAlgorithm extends AbstractDiffAlgorithm { |
292 | 276 | |
293 | | - private $a_inLcs; |
294 | | - |
295 | | - private $b_inLcs; |
296 | | - |
297 | | - private $lcsLength; |
298 | | - |
299 | 277 | /* |
300 | 278 | * Diffs two arrays and returns array($from_inLcs, $to_inLcs) |
301 | 279 | * where {from,to}_inLcs is FALSE if the token was {removed,added} |
302 | 280 | * and TRUE if it is in the longest common subsequence. |
303 | 281 | */ |
304 | 282 | public function diff (array $from, array $to) { |
305 | | - $this->order($from, $to); |
306 | | - $this->a_inLcs = array_fill(0,$this->m,FALSE); |
307 | | - $this->b_inLcs = array_fill(0,$this->n,FALSE); |
308 | 283 | |
309 | | - //There is no need to remove common tokens at the beginning and end, the first snake will catch them. |
310 | | - //Hashing the tokens is not generally applicable. |
| 284 | + $from_inLcs = sizeof($from)>0 ? array_fill(0,sizeof($from),FALSE): array(); |
| 285 | + $to_inLcs = sizeof($to)>0 ? array_fill(0,sizeof($to),FALSE): array(); |
| 286 | + |
311 | 287 | //Removing tokens that are not in both sequences is O(N*M)>=O(N*P). There is little to gain when most edits are small. |
312 | 288 | |
313 | | - $this->diff_recursive(0,$this->m,0,$this->n); |
314 | | - |
315 | | - if($this->swapped){ |
316 | | - return array($this->b_inLcs,$this->a_inLcs); |
| 289 | + $nbSwaps=0; |
| 290 | + $from2 = $from; |
| 291 | + $to2 = $to; |
| 292 | + $this->diff_recursive(0,sizeof($from),0,sizeof($to), $from2, $to2, $from_inLcs, $to_inLcs, $nbSwaps); |
| 293 | + if($nbSwaps==0){ |
| 294 | + return array($from_inLcs, $to_inLcs); |
317 | 295 | }else{ |
318 | | - return array($this->a_inLcs,$this->b_inLcs); |
| 296 | + return array($to_inLcs, $from_inLcs); |
319 | 297 | } |
320 | 298 | } |
321 | 299 | |
— | — | @@ -323,20 +301,35 @@ |
324 | 302 | * |
325 | 303 | * Ends are exclusive |
326 | 304 | */ |
327 | | - protected function diff_recursive($x_start, $x_end, $y_start, $y_end){ |
328 | | - echo "<br>$x_start-$x_end , $y_start-$y_end"; |
| 305 | + protected function diff_recursive($x_start, $x_end, $y_start, $y_end, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){ |
| 306 | + //echo "<br>$x_start-$x_end , $y_start-$y_end"; |
329 | 307 | if($x_end>$x_start && $y_end>$y_start){ |
| 308 | + //echo "-pass"; |
330 | 309 | |
| 310 | +// //echo "<br>"; |
| 311 | +// for($i = $x_start;$i<$x_end;$i++){ |
| 312 | +// //echo " ".$a[$i]; |
| 313 | +// } |
| 314 | +// //echo "<br>"; |
| 315 | +// for($i = $y_start;$i<$y_end;$i++){ |
| 316 | +// //echo " ".$b[$i]; |
| 317 | +// } |
| 318 | + |
331 | 319 | if($x_end-$x_start>$y_end-$y_start){ |
332 | | - $temp = $x_end; |
333 | | - $x_end = $y_end; |
334 | | - $y_end = $temp; |
335 | | - $temp = $x_start; |
336 | | - $x_start = $y_start; |
337 | | - $y_start = $temp; |
338 | | - $swapped = TRUE; |
339 | | - }else{ |
340 | | - $swapped = FALSE; |
| 320 | + swap($x_end, $y_end); |
| 321 | + swap($x_start, $y_start); |
| 322 | + swap($a, $b); |
| 323 | + swap($a_inLcs, $b_inLcs); |
| 324 | + $nbSwaps = ($nbSwaps+1)%2; |
| 325 | + //echo "<br>nbswaps=$nbSwaps"; |
| 326 | + //echo "<br>"; |
| 327 | +// for($i = $x_start;$i<$x_end;$i++){ |
| 328 | +// //echo " ".$a[$i]; |
| 329 | +// } |
| 330 | +// //echo "<br>"; |
| 331 | +// for($i = $y_start;$i<$y_end;$i++){ |
| 332 | +// //echo " ".$b[$i]; |
| 333 | +// } |
341 | 334 | } |
342 | 335 | |
343 | 336 | $m = $x_end-$x_start; |
— | — | @@ -344,121 +337,301 @@ |
345 | 338 | |
346 | 339 | $delta = $n-$m; |
347 | 340 | |
348 | | - $fpkeys = range( -($m+1), $n+1); |
349 | | - $fpf = array_fill_keys( $fpkeys, -1); |
350 | | - $fpb = array_fill_keys( $fpkeys, $n); |
| 341 | + $fpf_start = array_fill( 0, $delta+1, -1); |
| 342 | + $fpb_start = array_fill( 0, $delta+1, $n); |
| 343 | + $fpf_end = array_fill( 0, $delta+1, -1); |
| 344 | + $fpb_end = array_fill( 0, $delta+1, $n); |
| 345 | + |
| 346 | + $no_middle_found = TRUE; |
| 347 | + $endgame = FALSE; |
| 348 | + $middle_found = array_fill( 0, $delta+1, FALSE); |
| 349 | + $best_middle_score = -$n-3; |
| 350 | + $best_middle_start; |
| 351 | + $best_middle_end; |
| 352 | + $best_middle_k; |
| 353 | + |
351 | 354 | $p = -1; |
352 | 355 | |
353 | 356 | while(1){ |
354 | 357 | ++$p; |
355 | 358 | |
356 | | - echo "$p\n"; |
357 | | - echo "forward:\n"; |
358 | | - print_r($fpf); |
359 | | - echo "backward:\n"; |
360 | | - print_r($fpb); |
| 359 | + //echo "<br>".microtime(true)." p:".$p; |
361 | 360 | |
362 | | - //forward |
| 361 | + $middle_found[-$p-1] = FALSE; |
| 362 | + $middle_found[$delta+$p+1] = FALSE; |
| 363 | + |
| 364 | + // forward |
| 365 | + $fpf_start[-$p-1] = -1; |
| 366 | + $fpf_start[$delta+$p+1] = -1; |
| 367 | + $fpf_end[-$p-1] = -1; |
| 368 | + $fpf_end[$delta+$p+1] = -1; |
| 369 | + |
363 | 370 | for($k=-$p;$k<$delta;$k++){ |
364 | | - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $x_start, $y_start, $swapped); |
365 | | - if($fpf[$k]>$fpb[$k]+1){ |
366 | | - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
367 | | - return; |
| 371 | + $fpf_start[$k] = max( $fpf_end[$k-1]+1, $fpf_end[$k+1]); |
| 372 | + //echo "<br>p=$p, fpf_start[$k]=$fpf_start[$k]"; |
| 373 | + if($fpf_start[$k]>$fpb_end[$k] && $fpb_end[$k]-$fpb_start[$k]!=0 && !$middle_found[$k]){ |
| 374 | + $no_middle_found = FALSE; |
| 375 | + $middle_found[$k] = TRUE; |
| 376 | + //echo "<br>middle found"; |
| 377 | + if($fpb_start[$k]-$fpb_end[$k]>$best_middle_score){ |
| 378 | + //echo " and best! ($fpb_start[$k]-$fpb_end[$k]>$best_middle_score)"; |
| 379 | + $best_middle_start = $fpb_end[$k]+1; |
| 380 | + $best_middle_end = $fpb_start[$k]+1; |
| 381 | + $best_middle_k = $k; |
| 382 | + $best_middle_score = $best_middle_end-$best_middle_start; |
| 383 | + } |
368 | 384 | } |
| 385 | + $fpf_end[$k] = $this->forward_snake( $k, $fpf_start[$k], $m, $n, $a, $b, $x_start, $y_start); |
| 386 | + //echo ", fpf_end[$k]=$fpf_end[$k]"; |
| 387 | + if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){ |
| 388 | + $no_middle_found = FALSE; |
| 389 | + $middle_found[$k] = TRUE; |
| 390 | + //echo "<br>middle found"; |
| 391 | + if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){ |
| 392 | + //echo " and best ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)!"; |
| 393 | + $best_middle_start = $fpf_start[$k]; |
| 394 | + $best_middle_end = $fpf_end[$k]; |
| 395 | + $best_middle_k = $k; |
| 396 | + $best_middle_score = $fpb_end[$k]-$fpf_end[$k]; |
| 397 | + } |
| 398 | + } |
| 399 | + |
369 | 400 | } |
370 | | - echo "forward:\n"; |
371 | | - print_r($fpf); |
372 | | - echo "backward:\n"; |
373 | | - print_r($fpb); |
| 401 | + |
374 | 402 | for($k=$delta+$p;$k>$delta;$k--){ |
375 | | - $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $x_start, $y_start, $swapped); |
376 | | - if($fpf[$k]>$fpb[$k]+1){ |
377 | | - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
378 | | - return; |
| 403 | + $fpf_start[$k] = max( $fpf_end[$k-1]+1, $fpf_end[$k+1]); |
| 404 | + //echo "<br>p=$p, fpf_start[$k]=$fpf_start[$k]"; |
| 405 | + if($fpf_start[$k]>$fpb_end[$k] && $fpb_end[$k]-$fpb_start[$k]!=0 && !$middle_found[$k]){ |
| 406 | + $no_middle_found = FALSE; |
| 407 | + $middle_found[$k] = TRUE; |
| 408 | + //echo "<br>middle found"; |
| 409 | + if($fpb_start[$k]-$fpb_end[$k]>$best_middle_score){ |
| 410 | + //echo " and best! ($fpb_start[$k]-$fpb_end[$k]>$best_middle_score)"; |
| 411 | + $best_middle_start = $fpb_end[$k]+1; |
| 412 | + $best_middle_end = $fpb_start[$k]+1; |
| 413 | + $best_middle_k = $k; |
| 414 | + $best_middle_score = $best_middle_end-$best_middle_start; |
| 415 | + } |
379 | 416 | } |
| 417 | + $fpf_end[$k] = $this->forward_snake( $k, $fpf_start[$k], $m, $n, $a, $b, $x_start, $y_start); |
| 418 | + //echo ", fpf_end[$k]=$fpf_end[$k]"; |
| 419 | + if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){ |
| 420 | + $no_middle_found = FALSE; |
| 421 | + $middle_found[$k] = TRUE; |
| 422 | + //echo "<br>middle found"; |
| 423 | + if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){ |
| 424 | + //echo " and best! ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)"; |
| 425 | + $best_middle_start = $fpf_start[$k]; |
| 426 | + $best_middle_end = $fpf_end[$k]; |
| 427 | + $best_middle_k = $k; |
| 428 | + $best_middle_score = $fpb_end[$k]-$fpf_end[$k]; |
| 429 | + } |
| 430 | + } |
380 | 431 | } |
381 | | - echo "forward:\n"; |
382 | | - print_r($fpf); |
383 | | - echo "backward:\n"; |
384 | | - print_r($fpb); |
385 | | - $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $m, $n, $x_start, $y_start, $swapped); |
386 | | - if($fpf[$delta]>$fpb[$delta]+1){ |
387 | | - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$delta]-$delta+1, $x_start+$fpf[$delta]-$k, $y_start+$fpb[$delta]+1, $y_start+$fpf[$delta], $swapped); |
388 | | - return; |
| 432 | + |
| 433 | + // delta |
| 434 | + $fpf_start[$delta] = max( $fpf_end[$delta-1]+1, $fpf_end[$delta+1]); |
| 435 | + //echo "<br>p=$p, fpf_start[$delta]=$fpf_start[$delta]"; |
| 436 | + if($fpf_start[$delta]>$fpb_end[$delta] && $fpb_end[$delta]-$fpb_start[$delta]!=0 && !$middle_found[$delta]){ |
| 437 | + if($fpb_start[$delta]+1==$n){ |
| 438 | + return $this->foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $fpb_end[$delta]+1,$fpb_start[$delta]+1, $delta, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 439 | + } |
| 440 | + $no_middle_found = FALSE; |
| 441 | + $middle_found[$delta] = TRUE; |
| 442 | + //echo "<br>middle found"; |
| 443 | + if($fpb_start[$delta]-$fpb_end[$delta]>$best_middle_score){ |
| 444 | + //echo " and best! ($fpb_start[$delta]-$fpb_end[$delta]>$best_middle_score)"; |
| 445 | + $best_middle_start = $fpb_end[$delta]+1; |
| 446 | + $best_middle_end = $fpb_start[$delta]+1; |
| 447 | + $best_middle_k = $delta; |
| 448 | + $best_middle_score = $best_middle_end-$best_middle_start; |
| 449 | + } |
389 | 450 | } |
390 | | - echo "forward:\n"; |
391 | | - print_r($fpf); |
392 | | - echo "backward:\n"; |
393 | | - print_r($fpb); |
| 451 | + $fpf_end[$delta] = $this->forward_snake( $delta, $fpf_start[$delta], $m, $n, $a, $b, $x_start, $y_start); |
| 452 | + //echo ", fpf_end[$delta]=$fpf_end[$delta]"; |
| 453 | + if($fpf_end[$delta]==$n){ |
| 454 | + if($fpf_start[$delta]==$fpf_end[$delta]){ |
| 455 | + if($fpf_start[$delta]==$fpf_end[$delta-1]+1){ |
| 456 | + //last edge was horizontal |
| 457 | + return $this->reachedEndHorizontal($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 458 | + }else{ |
| 459 | + //last edge was vertical |
| 460 | + return $this->reachedEndVertical($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 461 | + } |
| 462 | + }else{ |
| 463 | + return $this->foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $fpf_start[$delta],$fpf_end[$delta], $delta, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 464 | + } |
| 465 | + } |
| 466 | + if($fpf_end[$delta]>$fpb_end[$delta] && !$middle_found[$delta]){ |
| 467 | + $no_middle_found = FALSE; |
| 468 | + $middle_found[$delta] = TRUE; |
| 469 | + //echo "<br>middle found"; |
| 470 | + if($fpb_end[$delta]-$fpf_end[$delta]>$best_middle_score){ |
| 471 | + //echo " and best! ($fpb_end[$delta]-$fpf_end[$delta]>$best_middle_score)"; |
| 472 | + $best_middle_start = $fpf_start[$delta]; |
| 473 | + $best_middle_end = $fpf_end[$delta]; |
| 474 | + $best_middle_k = $delta; |
| 475 | + $best_middle_score = $fpb_end[$delta]-$fpf_end[$delta]; |
| 476 | + } |
| 477 | + } |
394 | 478 | |
395 | | - //backward |
| 479 | + // backward |
| 480 | + $fpb_start[-$p-1] = $n; |
| 481 | + $fpb_start[$delta+$p+1] = $n; |
| 482 | + $fpb_end[-$p-1] = $n; |
| 483 | + $fpb_end[$delta+$p+1] = $n; |
| 484 | + |
| 485 | + for($k=-$p;$k<0;$k++){ |
| 486 | + $fpb_start[$k] = min( $fpb_end[$k+1]-1, $fpb_end[$k-1]); |
| 487 | + //echo "<br>p=$p, fpb_start[$k]=$fpb_start[$k]"; |
| 488 | + if($fpf_end[$k]>$fpb_start[$k] && $fpf_end[$k]-$fpf_start[$k]!=0 && !$middle_found[$k]){ |
| 489 | + $no_middle_found = FALSE; |
| 490 | + $middle_found[$k] = TRUE; |
| 491 | + //echo "<br>middle found"; |
| 492 | + if($fpf_end[$k]-$fpf_start[$k]>$best_middle_score){ |
| 493 | + //echo " and best! ($fpf_end[$k]-$fpf_start[$k]>$best_middle_score)"; |
| 494 | + $best_middle_start = $fpf_start[$k]; |
| 495 | + $best_middle_end = $fpf_end[$k]; |
| 496 | + $best_middle_k = $k; |
| 497 | + $best_middle_score = $best_middle_end-$best_middle_start; |
| 498 | + } |
| 499 | + } |
| 500 | + $fpb_end[$k] = $this->backward_snake( $k, $fpb_start[$k], -1, -1, $a, $b, $x_start, $y_start); |
| 501 | + //echo ", fpb_end[$k]=$fpb_end[$k]"; |
| 502 | + if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){ |
| 503 | + $no_middle_found = FALSE; |
| 504 | + $middle_found[$k] = TRUE; |
| 505 | + //echo "<br>middle found"; |
| 506 | + if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){ |
| 507 | + //echo " and best! ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)"; |
| 508 | + $best_middle_start = $fpb_end[$k]+1; |
| 509 | + $best_middle_end = $fpb_start[$k]+1; |
| 510 | + $best_middle_k = $k; |
| 511 | + $best_middle_score = $fpb_end[$k]-$fpf_end[$k]; |
| 512 | + } |
| 513 | + |
| 514 | + } |
| 515 | + } |
396 | 516 | for($k=$delta+$p;$k>0;$k--){ |
397 | | - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $x_start, $y_start, $swapped); |
398 | | - if($fpf[$k]>$fpb[$k]+1){ |
399 | | - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
400 | | - return; |
| 517 | + $fpb_start[$k] = min( $fpb_end[$k+1]-1, $fpb_end[$k-1]); |
| 518 | + //echo "<br>p=$p, fpb_start[$k]=$fpb_start[$k]"; |
| 519 | + if($fpf_end[$k]>$fpb_start[$k] && $fpf_end[$k]-$fpf_start[$k]!=0 && !$middle_found[$k]){ |
| 520 | + $no_middle_found = FALSE; |
| 521 | + $middle_found[$k] = TRUE; |
| 522 | + //echo "<br>middle found"; |
| 523 | + if($fpf_end[$k]-$fpf_start[$k]>$best_middle_score){ |
| 524 | + //echo " and best! ($fpf_end[$k]-$fpf_start[$k]>$best_middle_score)"; |
| 525 | + $best_middle_start = $fpf_start[$k]; |
| 526 | + $best_middle_end = $fpf_end[$k]; |
| 527 | + $best_middle_k = $k; |
| 528 | + $best_middle_score = $best_middle_end-$best_middle_start; |
| 529 | + } |
401 | 530 | } |
| 531 | + $fpb_end[$k] = $this->backward_snake( $k, $fpb_start[$k], -1, -1, $a, $b, $x_start, $y_start); |
| 532 | + //echo ", fpb_end[$k]=$fpb_end[$k]"; |
| 533 | + if($fpf_end[$k]>$fpb_end[$k] && !$middle_found[$k]){ |
| 534 | + $no_middle_found = FALSE; |
| 535 | + $middle_found[$k] = TRUE; |
| 536 | + //echo "<br>middle found"; |
| 537 | + if($fpb_end[$k]-$fpf_end[$k]>$best_middle_score){ |
| 538 | + //echo " and best! ($fpb_end[$k]-$fpf_end[$k]>$best_middle_score)"; |
| 539 | + $best_middle_start = $fpb_end[$k]+1; |
| 540 | + $best_middle_end = $fpb_start[$k]+1; |
| 541 | + $best_middle_k = $k; |
| 542 | + $best_middle_score = $fpb_end[$k]-$fpf_end[$k]; |
| 543 | + } |
| 544 | + } |
402 | 545 | } |
403 | | - echo "forward:\n"; |
404 | | - print_r($fpf); |
405 | | - echo "backward:\n"; |
406 | | - print_r($fpb); |
407 | | - for($k=-$p;$k<0;$k++){ |
408 | | - $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $x_start, $y_start, $swapped); |
409 | | - if($fpf[$k]>$fpb[$k]+1){ |
410 | | - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
411 | | - return; |
| 546 | + |
| 547 | + |
| 548 | + // 0 |
| 549 | + $fpb_start[0] = min( $fpb_end[1]-1, $fpb_end[-1]); |
| 550 | + //echo "<br>p=$p, fpb_start[0]=$fpb_start[0]"; |
| 551 | + if($fpf_end[0]>$fpb_start[0] && $fpf_end[0]-$fpf_start[0]!=0 && !$middle_found[0]){ |
| 552 | + $no_middle_found = FALSE; |
| 553 | + $middle_found[0] = TRUE; |
| 554 | + //echo "<br>middle found"; |
| 555 | + if($fpf_end[0]-$fpf_start[0]>$best_middle_score){ |
| 556 | + //echo " and best! ($fpf_end[0]-$fpf_start[0]>$best_middle_score)"; |
| 557 | + $best_middle_start = $fpf_start[0]; |
| 558 | + $best_middle_end = $fpf_end[0]; |
| 559 | + $best_middle_k = 0; |
| 560 | + $best_middle_score = $best_middle_end-$best_middle_start; |
412 | 561 | } |
413 | 562 | } |
414 | | - echo "forward:\n"; |
415 | | - print_r($fpf); |
416 | | - echo "backward:\n"; |
417 | | - print_r($fpb); |
418 | | - $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1, $x_start, $y_start, $swapped); |
419 | | - if($fpf[0]>$fpb[0]+1){ |
420 | | - $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[0]+1, $x_start+$fpf[0], $y_start+$fpb[0]+1, $y_start+$fpf[0], $swapped); |
421 | | - return; |
| 563 | + $fpb_end[0] = $this->backward_snake( 0, $fpb_start[0], -1, -1, $a, $b, $x_start, $y_start); |
| 564 | + //echo ", fpb_end[0]=$fpb_end[0]"; |
| 565 | + if($fpf_end[0]>$fpb_end[0] && !$middle_found[0]){ |
| 566 | + $no_middle_found = FALSE; |
| 567 | + $middle_found[$k] = TRUE; |
| 568 | + //echo "<br>middle found"; |
| 569 | + if($fpb_end[0]-$fpf_end[0]>$best_middle_score){ |
| 570 | + //echo " and best! ($fpb_end[0]-$fpf_end[0]>$best_middle_score)"; |
| 571 | + $best_middle_start = $fpb_end[0]+1; |
| 572 | + $best_middle_end = $fpb_start[0]+1; |
| 573 | + $best_middle_k = 0; |
| 574 | + $best_middle_score = $fpb_end[0]-$fpf_end[0]; |
| 575 | + } |
422 | 576 | } |
423 | | - echo "forward:\n"; |
424 | | - print_r($fpf); |
425 | | - echo "backward:\n"; |
426 | | - print_r($fpb); |
427 | 577 | |
| 578 | + if(!$no_middle_found){ |
| 579 | + if($endgame || $p == $m){ |
| 580 | + //echo "<br>endgame: $best_middle_start->$best_middle_end, k=$best_middle_k"; |
| 581 | + return $this->foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $best_middle_start, $best_middle_end, $best_middle_k, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 582 | + } |
| 583 | + $endgame = TRUE; |
| 584 | + } |
428 | 585 | } |
429 | 586 | } |
430 | 587 | } |
431 | 588 | |
432 | 589 | /** |
433 | | - * Mark tokens $start until $end (exclusive) as in the LCS |
434 | | - * Diff the part before and after the snake. |
| 590 | + * Mark tokens as in the LCS if it is a snake. |
| 591 | + * Diff the part before and after the middle. |
435 | 592 | */ |
436 | | - protected function foundSnake($x_start, $x_end, $y_start, $y_end, $x_snakestart, $x_snakeend, $y_snakestart, $y_snakeend, $swapped){ |
437 | | - echo "<br>$x_start, $x_end, $y_start, $y_end, $x_snakestart, $x_snakeend, $y_snakestart, $y_snakeend, $swapped"; |
438 | | - if($swapped){ |
439 | | - $temp = $x_snakestart; |
440 | | - $x_snakestart = $y_snakestart; |
441 | | - $y_snakestart = $temp; |
442 | | - $temp = $x_snakeend; |
443 | | - $x_snakeend = $y_snakeend; |
444 | | - $y_snakeend = $temp; |
| 593 | + protected function foundMiddleSnake($x_start, $x_end, $y_start, $y_end, $fp_start, $fp_end, $k, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){ |
| 594 | + //echo "<br><b>foundmiddle= $x_start, $x_end, $y_start, $y_end, $fp_start->$fp_end, $k</b>"; |
445 | 595 | |
446 | | - $temp = $x_start; |
447 | | - $x_start = $y_start; |
448 | | - $y_start = $temp; |
449 | | - $temp = $x_end; |
450 | | - $x_end = $y_end; |
451 | | - $y_end = $temp; |
452 | | - } |
| 596 | + $x_snakestart = $x_start+$fp_start-$k; |
| 597 | + $x_snakeend = $x_start+$fp_end-$k; |
| 598 | + $y_snakestart = $y_start+$fp_start; |
| 599 | + $y_snakeend = $y_start+$fp_end; |
| 600 | + |
453 | 601 | for($x=$x_snakestart;$x<$x_snakeend;$x++){ |
454 | | - $this->a_inLcs[$x] = TRUE; |
| 602 | + $a_inLcs[$x] = TRUE; |
455 | 603 | } |
456 | 604 | for($y=$y_snakestart;$y<$y_snakeend;$y++){ |
457 | | - $this->b_inLcs[$y] = TRUE; |
| 605 | + $b_inLcs[$y] = TRUE; |
458 | 606 | } |
459 | | - echo "<br>left:"; |
460 | | - $this->diff_recursive($x_start, $x_snakestart, $y_start, $y_snakestart); |
461 | | - echo "<br>right:"; |
462 | | - $this->diff_recursive($x_snakeend, $x_end, $y_snakeend, $y_end); |
| 607 | + //echo "<br>left:"; |
| 608 | + $swaps = $nbSwaps; |
| 609 | + $this->diff_recursive($x_start, $x_snakestart, $y_start, $y_snakestart, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 610 | + //echo "<br>right:"; |
| 611 | + if($swaps==$nbSwaps){ |
| 612 | + $this->diff_recursive($x_snakeend, $x_end, $y_snakeend, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 613 | + }else{ |
| 614 | + $this->diff_recursive($y_snakeend, $y_end, $x_snakeend, $x_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 615 | + } |
463 | 616 | } |
| 617 | + |
| 618 | + protected function reachedEndHorizontal($x_start, $x_end, $y_start, $y_end, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){ |
| 619 | + //echo "<br><b>reached end horizontally! $x_start, $x_end, $y_start, $y_end</b>"; |
| 620 | + $y_end = $y_end-1; |
| 621 | + $this->diff_recursive($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 622 | + } |
| 623 | + |
| 624 | + protected function reachedEndVertical($x_start, $x_end, $y_start, $y_end, &$a, &$b, &$a_inLcs, &$b_inLcs, &$nbSwaps){ |
| 625 | + //echo "<br><b>reached end vertically! $x_start, $x_end, $y_start, $y_end</b>"; |
| 626 | + $x_end = $x_end-1; |
| 627 | + $this->diff_recursive($x_start, $x_end, $y_start, $y_end, $a, $b, $a_inLcs, $b_inLcs, $nbSwaps); |
| 628 | + } |
464 | 629 | } |
| 630 | + |
| 631 | +function swap(&$a, &$b){ |
| 632 | + //echo "<br>swapping $a and $b"; |
| 633 | + $temp = $b; |
| 634 | + $b = $a; |
| 635 | + $a = $temp; |
| 636 | +} |
| 637 | + |
465 | 638 | ?> |
\ No newline at end of file |