Index: branches/visual_diff/phase3/includes/Diff.php |
— | — | @@ -13,6 +13,8 @@ |
14 | 14 | protected $b; |
15 | 15 | protected $n; |
16 | 16 | |
| 17 | + protected $swapped; |
| 18 | + |
17 | 19 | /** |
18 | 20 | * The sequences need to be ordered by length. |
19 | 21 | */ |
— | — | @@ -22,27 +24,50 @@ |
23 | 25 | $this->m = sizeof($to); |
24 | 26 | $this->b = $from; |
25 | 27 | $this->n = sizeof($from); |
| 28 | + $this->swapped = TRUE; |
26 | 29 | }else{ |
27 | 30 | $this->a = $from; |
28 | 31 | $this->m = sizeof($from); |
29 | 32 | $this->b = $to; |
30 | 33 | $this->n = sizeof($to); |
| 34 | + $this->swapped = FALSE; |
31 | 35 | } |
32 | 36 | } |
33 | 37 | |
34 | 38 | /** |
35 | 39 | * Slide down the snake untill reaching an inequality. |
36 | 40 | */ |
37 | | - protected function snake($k,$y){ |
38 | | - $x = $y-$k; |
39 | | - while($x < $this->m && $y < $this->n && $this->equals($this->a[$x],$this->b[$y])){ |
40 | | - ++$x; |
41 | | - ++$y; |
| 41 | + protected function forward_snake( $k, $y, $maxx, $maxy, $offsetx=0, $offsety=0, $swapped=FALSE){ |
| 42 | + if($swapped){ |
| 43 | + return $this->forward_snake( $k, $y-$k, $maxy, $maxx, $offsety, $offsetx, $swapped ); |
| 44 | + }else{ |
| 45 | + $x = $y-$k; |
| 46 | + while($x < $maxx && $y < $maxy && $this->equals($this->a[$offsetx+$x],$this->b[$offsety+$y])){ |
| 47 | + ++$x; |
| 48 | + ++$y; |
| 49 | + } |
| 50 | + |
| 51 | + return $y; |
42 | 52 | } |
43 | | - return $y; |
44 | 53 | } |
45 | 54 | |
46 | 55 | /** |
| 56 | + * Slide up the snake until reaching an inequality. |
| 57 | + */ |
| 58 | + protected function backward_snake( $k, $y, $maxx, $maxy, $offsetx=0, $offsety=0, $swapped=FALSE){ |
| 59 | + if($swapped){ |
| 60 | + return $this->backward_snake( $k, $y-$k, $maxy, $maxx, $offsety, $offsetx, $swapped ); |
| 61 | + }else{ |
| 62 | + $x = $y-$k; |
| 63 | + while($x > $maxx && $y > $maxy && $this->equals($this->a[$offsetx+$x],$this->b[$offsety+$y])){ |
| 64 | + --$x; |
| 65 | + --$y; |
| 66 | + } |
| 67 | + return $y; |
| 68 | + } |
| 69 | + } |
| 70 | + |
| 71 | + /** |
47 | 72 | * Override this method to compute the LCS with different equality measures. |
48 | 73 | */ |
49 | 74 | public function equals($left, $right){ |
— | — | @@ -51,12 +76,14 @@ |
52 | 77 | } |
53 | 78 | |
54 | 79 | /** |
55 | | - * Class implementing the Diff algorithm. |
| 80 | + * Class implementing the LCS algorithm by backward propagation. |
56 | 81 | * |
57 | 82 | * This is the O(NP) variant as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
| 83 | + * |
| 84 | + * @author Guy Van den Broeck |
58 | 85 | */ |
59 | | -class LcsAlgorithm extends AbstractDiffAlgorithm { |
60 | | - |
| 86 | +class BackwardLcs extends AbstractDiffAlgorithm { |
| 87 | + |
61 | 88 | /* |
62 | 89 | * Computers the length of the longest common subsequence of |
63 | 90 | * the arrays $from and $to. |
— | — | @@ -64,7 +91,7 @@ |
65 | 92 | public function lcs (array $from, array $to) { |
66 | 93 | return (sizeof($from)+sizeof($to)-$this->ses( $from, $to))/2; |
67 | 94 | } |
68 | | - |
| 95 | + |
69 | 96 | /* |
70 | 97 | * Computers the length of the shortest edit script of |
71 | 98 | * the arrays $from and $to. |
— | — | @@ -72,34 +99,195 @@ |
73 | 100 | public function ses (array $from, array $to) { |
74 | 101 | $this->order($from, $to); |
75 | 102 | |
| 103 | + if($this->n==0){ |
| 104 | + return $this->m; |
| 105 | + }else if($this->m==0){ |
| 106 | + return $this->n; |
| 107 | + } |
| 108 | + |
76 | 109 | $delta = $this->n-$this->m; |
| 110 | + $fp = array_fill_keys( range( -($this->m+1), $this->n+1), $this->n); |
| 111 | + $p = -1; |
| 112 | + |
| 113 | + do{ |
| 114 | + ++$p; |
| 115 | + for($k=$delta+$p;$k>0;$k--){ |
| 116 | + $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1); |
| 117 | + } |
| 118 | + for($k=-$p;$k<0;$k++){ |
| 119 | + $fp[$k] = $this->backward_snake( $k, min( $fp[$k+1]-1, $fp[$k-1]), -1, -1); |
| 120 | + } |
| 121 | + $fp[0] = $this->backward_snake( 0, min( $fp[1]-1, $fp[-1]), -1, -1); |
| 122 | + }while($fp[0]!=-1); |
| 123 | + |
| 124 | + echo "<br>$delta, $p"; |
| 125 | + return $delta+2*$p; |
| 126 | + } |
| 127 | + |
| 128 | +} |
| 129 | + |
| 130 | +/** |
| 131 | + * Class implementing the LCS algorithm by forward propagation. |
| 132 | + * |
| 133 | + * This is the O(NP) variant as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
| 134 | + * |
| 135 | + * @author Guy Van den Broeck |
| 136 | + */ |
| 137 | +class ForwardLcs extends AbstractDiffAlgorithm { |
| 138 | + |
| 139 | + /* |
| 140 | + * Computers the length of the longest common subsequence of |
| 141 | + * the arrays $from and $to. |
| 142 | + */ |
| 143 | + public function lcs (array $from, array $to) { |
| 144 | + return (sizeof($from)+sizeof($to)-$this->ses( $from, $to))/2; |
| 145 | + } |
| 146 | + |
| 147 | + /* |
| 148 | + * Computers the length of the shortest edit script of |
| 149 | + * the arrays $from and $to. |
| 150 | + */ |
| 151 | + public function ses (array $from, array $to) { |
| 152 | + $this->order($from, $to); |
| 153 | + |
| 154 | + if($this->n==0){ |
| 155 | + return $this->m; |
| 156 | + }else if($this->m==0){ |
| 157 | + return $this->n; |
| 158 | + } |
| 159 | + |
| 160 | + $delta = $this->n-$this->m; |
77 | 161 | $fp = array_fill_keys( range( -($this->m+1), $this->n+1), -1); |
78 | 162 | $p = -1; |
79 | 163 | |
80 | 164 | do{ |
81 | 165 | ++$p; |
82 | 166 | for($k=-$p;$k<$delta;$k++){ |
83 | | - $fp[$k] = $this->snake( $k, max( $fp[$k-1]+1, $fp[$k+1])); |
| 167 | + $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $this->m, $this->n); |
84 | 168 | } |
85 | | - |
86 | 169 | for($k=$delta+$p;$k>$delta;$k--){ |
87 | | - $fp[$k] = $this->snake( $k, max( $fp[$k-1]+1, $fp[$k+1])); |
| 170 | + $fp[$k] = $this->forward_snake( $k, max( $fp[$k-1]+1, $fp[$k+1]), $this->m, $this->n); |
88 | 171 | } |
89 | | - |
90 | | - $fp[$k] = $this->snake( $delta, max( $fp[$delta-1]+1, $fp[$delta+1])); |
| 172 | + $fp[$delta] = $this->forward_snake( $delta, max( $fp[$delta-1]+1, $fp[$delta+1]), $this->m, $this->n); |
91 | 173 | }while($fp[$delta]!=$this->n); |
92 | 174 | |
| 175 | + echo "<br>$delta, $p"; |
93 | 176 | return $delta+2*$p; |
94 | 177 | } |
95 | | - |
| 178 | + |
96 | 179 | } |
97 | 180 | |
98 | 181 | /** |
| 182 | + * Class implementing the LCS algorithm by bidirectional propagation. |
| 183 | + * |
| 184 | + * !!!!!!!!!!!!!!!!!!!!!! |
| 185 | + * BEWARE HERE BE DRAGONS |
| 186 | + * This algorithm is not guaranteed to find the exact size of the LCS. |
| 187 | + * There are border cases where it will be 1 (or more) off. |
| 188 | + * This implmentation is useful because it is very fast. |
| 189 | + * !!!!!!!!!!!!!!!!!!!!!! |
| 190 | + * |
| 191 | + * This is the O(NP) variant as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
| 192 | + * |
| 193 | + * @author Guy Van den Broeck |
| 194 | + */ |
| 195 | +class BidirectionalLcs extends AbstractDiffAlgorithm { |
| 196 | + |
| 197 | + /* |
| 198 | + * Computers the length of the longest common subsequence of |
| 199 | + * the arrays $from and $to. |
| 200 | + */ |
| 201 | + public function lcs (array $from, array $to) { |
| 202 | + return (sizeof($from)+sizeof($to)-$this->ses( $from, $to))/2; |
| 203 | + } |
| 204 | + |
| 205 | + /* |
| 206 | + * Computers the length of the shortest edit script of |
| 207 | + * the arrays $from and $to. |
| 208 | + */ |
| 209 | + public function ses (array $from, array $to) { |
| 210 | + $this->order($from, $to); |
| 211 | + |
| 212 | + if($this->n==0){ |
| 213 | + return $this->m; |
| 214 | + }else if($this->m==0){ |
| 215 | + return $this->n; |
| 216 | + } |
| 217 | + |
| 218 | + $delta = $this->n-$this->m; |
| 219 | + |
| 220 | + $fpkeys = range( -($this->m+1), $this->n+1); |
| 221 | + $fpf = array_fill_keys( $fpkeys, -1); |
| 222 | + $fpb = array_fill_keys( $fpkeys, $this->n); |
| 223 | + $p = -1; |
| 224 | + |
| 225 | + while(1){ |
| 226 | + ++$p; |
| 227 | + |
| 228 | + //forward |
| 229 | + for($k=-$p;$k<$delta;$k++){ |
| 230 | + $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $this->m, $this->n); |
| 231 | + if($fpf[$k]>$fpb[$k]+1){ |
| 232 | + echo "<br>0, $delta, $p"; |
| 233 | + return $delta+4*$p-4; |
| 234 | + } |
| 235 | + } |
| 236 | + for($k=$delta+$p;$k>$delta;$k--){ |
| 237 | + $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $this->m, $this->n); |
| 238 | + if($fpf[$k]>$fpb[$k]+1){ |
| 239 | + echo "<br>0, $delta, $p"; |
| 240 | + return $delta+4*$p-4; |
| 241 | + } |
| 242 | + } |
| 243 | + $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $this->m, $this->n); |
| 244 | + |
| 245 | + if($fpf[$delta]>$fpb[$delta]+1){ |
| 246 | + echo "<br>0, $delta, $p"; |
| 247 | + return $delta+4*$p-4; |
| 248 | + } |
| 249 | + |
| 250 | + //backward |
| 251 | + for($k=$delta+$p;$k>0;$k--){ |
| 252 | + $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1); |
| 253 | + if($fpf[$k]>$fpb[$k]+1){ |
| 254 | + echo "<br>1, $delta, $p"; |
| 255 | + return $delta+4*$p; |
| 256 | + } |
| 257 | + } |
| 258 | + for($k=-$p;$k<0;$k++){ |
| 259 | + $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1); |
| 260 | + if($fpf[$k]>$fpb[$k]+1){ |
| 261 | + echo "<br>1, $delta, $p"; |
| 262 | + return $delta+4*$p; |
| 263 | + } |
| 264 | + } |
| 265 | + $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1); |
| 266 | + |
| 267 | + if($fpf[0]>$fpb[0]+1){ |
| 268 | + echo "<br>1, $delta, $p"; |
| 269 | + return $delta+4*$p; |
| 270 | + } |
| 271 | + |
| 272 | + } |
| 273 | + } |
| 274 | + |
| 275 | +} |
| 276 | + |
| 277 | +/** |
99 | 278 | * Class implementing the Diff algorithm. |
100 | 279 | * |
| 280 | + * !!!!!!!!!!!!!!!!!!!!!! |
| 281 | + * BEWARE HERE BE DRAGONS |
| 282 | + * This algorithm is not guaranteed to find the exact size of the LCS. |
| 283 | + * There are border cases where it will be 1 (or more) off. |
| 284 | + * This implmentation is useful because it is very fast. |
| 285 | + * !!!!!!!!!!!!!!!!!!!!!! |
| 286 | + * |
101 | 287 | * This is the O(NP) variant as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
| 288 | + * |
| 289 | + * @author Guy Van den Broeck |
102 | 290 | */ |
103 | | -class DiffAlgorithm extends AbstractDiffAlgorithm { |
| 291 | +class FastDiffAlgorithm extends AbstractDiffAlgorithm { |
104 | 292 | |
105 | 293 | private $a_inLcs; |
106 | 294 | |
— | — | @@ -114,12 +302,163 @@ |
115 | 303 | */ |
116 | 304 | public function diff (array $from, array $to) { |
117 | 305 | $this->order($from, $to); |
| 306 | + $this->a_inLcs = array_fill(0,$this->m,FALSE); |
| 307 | + $this->b_inLcs = array_fill(0,$this->n,FALSE); |
118 | 308 | |
119 | 309 | //There is no need to remove common tokens at the beginning and end, the first snake will catch them. |
120 | 310 | //Hashing the tokens is not generally applicable. |
121 | 311 | //Removing tokens that are not in both sequences is O(N*M)>=O(N*P). There is little to gain when most edits are small. |
122 | 312 | |
123 | | - //TODO |
| 313 | + $this->diff_recursive(0,$this->m,0,$this->n); |
| 314 | + |
| 315 | + if($this->swapped){ |
| 316 | + return array($this->b_inLcs,$this->a_inLcs); |
| 317 | + }else{ |
| 318 | + return array($this->a_inLcs,$this->b_inLcs); |
| 319 | + } |
124 | 320 | } |
| 321 | + |
| 322 | + /** |
| 323 | + * Calculate the diff with a divide and conquer method. |
| 324 | + * |
| 325 | + * Ends are exclusive |
| 326 | + */ |
| 327 | + protected function diff_recursive($x_start, $x_end, $y_start, $y_end){ |
| 328 | + echo "<br>$x_start-$x_end , $y_start-$y_end"; |
| 329 | + if($x_end>$x_start && $y_end>$y_start){ |
| 330 | + |
| 331 | + if($x_end-$x_start>$y_end-$y_start){ |
| 332 | + $temp = $x_end; |
| 333 | + $x_end = $y_end; |
| 334 | + $y_end = $temp; |
| 335 | + $temp = $x_start; |
| 336 | + $x_start = $y_start; |
| 337 | + $y_start = $temp; |
| 338 | + $swapped = TRUE; |
| 339 | + }else{ |
| 340 | + $swapped = FALSE; |
| 341 | + } |
| 342 | + |
| 343 | + $m = $x_end-$x_start; |
| 344 | + $n = $y_end-$y_start; |
| 345 | + |
| 346 | + $delta = $n-$m; |
| 347 | + |
| 348 | + $fpkeys = range( -($m+1), $n+1); |
| 349 | + $fpf = array_fill_keys( $fpkeys, -1); |
| 350 | + $fpb = array_fill_keys( $fpkeys, $n); |
| 351 | + $p = -1; |
| 352 | + |
| 353 | + while(1){ |
| 354 | + ++$p; |
| 355 | + |
| 356 | + echo "$p\n"; |
| 357 | + echo "forward:\n"; |
| 358 | + print_r($fpf); |
| 359 | + echo "backward:\n"; |
| 360 | + print_r($fpb); |
| 361 | + |
| 362 | + //forward |
| 363 | + for($k=-$p;$k<$delta;$k++){ |
| 364 | + $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $x_start, $y_start, $swapped); |
| 365 | + if($fpf[$k]>$fpb[$k]+1){ |
| 366 | + $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
| 367 | + return; |
| 368 | + } |
| 369 | + } |
| 370 | + echo "forward:\n"; |
| 371 | + print_r($fpf); |
| 372 | + echo "backward:\n"; |
| 373 | + print_r($fpb); |
| 374 | + for($k=$delta+$p;$k>$delta;$k--){ |
| 375 | + $fpf[$k] = $this->forward_snake( $k, max( $fpf[$k-1]+1, $fpf[$k+1]), $m, $n, $x_start, $y_start, $swapped); |
| 376 | + if($fpf[$k]>$fpb[$k]+1){ |
| 377 | + $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
| 378 | + return; |
| 379 | + } |
| 380 | + } |
| 381 | + echo "forward:\n"; |
| 382 | + print_r($fpf); |
| 383 | + echo "backward:\n"; |
| 384 | + print_r($fpb); |
| 385 | + $fpf[$delta] = $this->forward_snake( $delta, max( $fpf[$delta-1]+1, $fpf[$delta+1]), $m, $n, $x_start, $y_start, $swapped); |
| 386 | + if($fpf[$delta]>$fpb[$delta]+1){ |
| 387 | + $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$delta]-$delta+1, $x_start+$fpf[$delta]-$k, $y_start+$fpb[$delta]+1, $y_start+$fpf[$delta], $swapped); |
| 388 | + return; |
| 389 | + } |
| 390 | + echo "forward:\n"; |
| 391 | + print_r($fpf); |
| 392 | + echo "backward:\n"; |
| 393 | + print_r($fpb); |
| 394 | + |
| 395 | + //backward |
| 396 | + for($k=$delta+$p;$k>0;$k--){ |
| 397 | + $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $x_start, $y_start, $swapped); |
| 398 | + if($fpf[$k]>$fpb[$k]+1){ |
| 399 | + $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
| 400 | + return; |
| 401 | + } |
| 402 | + } |
| 403 | + echo "forward:\n"; |
| 404 | + print_r($fpf); |
| 405 | + echo "backward:\n"; |
| 406 | + print_r($fpb); |
| 407 | + for($k=-$p;$k<0;$k++){ |
| 408 | + $fpb[$k] = $this->backward_snake( $k, min( $fpb[$k+1]-1, $fpb[$k-1]), -1, -1, $x_start, $y_start, $swapped); |
| 409 | + if($fpf[$k]>$fpb[$k]+1){ |
| 410 | + $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[$k]-$k+1, $x_start+$fpf[$k]-$k, $y_start+$fpb[$k]+1, $y_start+$fpf[$k], $swapped); |
| 411 | + return; |
| 412 | + } |
| 413 | + } |
| 414 | + echo "forward:\n"; |
| 415 | + print_r($fpf); |
| 416 | + echo "backward:\n"; |
| 417 | + print_r($fpb); |
| 418 | + $fpb[0] = $this->backward_snake( 0, min( $fpb[1]-1, $fpb[-1]), -1, -1, $x_start, $y_start, $swapped); |
| 419 | + if($fpf[0]>$fpb[0]+1){ |
| 420 | + $this->foundSnake($x_start, $x_end, $y_start, $y_end, $x_start+$fpb[0]+1, $x_start+$fpf[0], $y_start+$fpb[0]+1, $y_start+$fpf[0], $swapped); |
| 421 | + return; |
| 422 | + } |
| 423 | + echo "forward:\n"; |
| 424 | + print_r($fpf); |
| 425 | + echo "backward:\n"; |
| 426 | + print_r($fpb); |
| 427 | + |
| 428 | + } |
| 429 | + } |
| 430 | + } |
| 431 | + |
| 432 | + /** |
| 433 | + * Mark tokens $start until $end (exclusive) as in the LCS |
| 434 | + * Diff the part before and after the snake. |
| 435 | + */ |
| 436 | + protected function foundSnake($x_start, $x_end, $y_start, $y_end, $x_snakestart, $x_snakeend, $y_snakestart, $y_snakeend, $swapped){ |
| 437 | + echo "<br>$x_start, $x_end, $y_start, $y_end, $x_snakestart, $x_snakeend, $y_snakestart, $y_snakeend, $swapped"; |
| 438 | + if($swapped){ |
| 439 | + $temp = $x_snakestart; |
| 440 | + $x_snakestart = $y_snakestart; |
| 441 | + $y_snakestart = $temp; |
| 442 | + $temp = $x_snakeend; |
| 443 | + $x_snakeend = $y_snakeend; |
| 444 | + $y_snakeend = $temp; |
| 445 | + |
| 446 | + $temp = $x_start; |
| 447 | + $x_start = $y_start; |
| 448 | + $y_start = $temp; |
| 449 | + $temp = $x_end; |
| 450 | + $x_end = $y_end; |
| 451 | + $y_end = $temp; |
| 452 | + } |
| 453 | + for($x=$x_snakestart;$x<$x_snakeend;$x++){ |
| 454 | + $this->a_inLcs[$x] = TRUE; |
| 455 | + } |
| 456 | + for($y=$y_snakestart;$y<$y_snakeend;$y++){ |
| 457 | + $this->b_inLcs[$y] = TRUE; |
| 458 | + } |
| 459 | + echo "<br>left:"; |
| 460 | + $this->diff_recursive($x_start, $x_snakestart, $y_start, $y_snakestart); |
| 461 | + echo "<br>right:"; |
| 462 | + $this->diff_recursive($x_snakeend, $x_end, $y_snakeend, $y_end); |
| 463 | + } |
125 | 464 | } |
126 | 465 | ?> |
\ No newline at end of file |