Index: branches/visual_diff/phase3/includes/Diff.php |
— | — | @@ -18,454 +18,487 @@ |
19 | 19 | */ |
20 | 20 | |
21 | 21 | /** |
22 | | - * Implementation of the Diff algorithm. |
23 | | - * |
24 | | - * DO NOT USE ON PRODUCTION SYSTEMS |
25 | | - * |
26 | | - * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
27 | | - * and extended with my own ideas. |
| 22 | + * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which |
| 23 | + * in turn is based on Myers' "An O(ND) difference algorithm and its variations" |
| 24 | + * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s |
| 25 | + * "An O(NP) Sequence Comparison Algorithm"). |
28 | 26 | * |
29 | | - * @return array($from_removed, $to_added) |
30 | | - * TRUE if the token was removed or added. |
| 27 | + * This implementation supports an upper bound on the excution time. |
31 | 28 | * |
| 29 | + * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space |
| 30 | + * |
32 | 31 | * @author Guy Van den Broeck |
| 32 | + * |
33 | 33 | */ |
34 | | -function wikidiff3_diff(/*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){ |
35 | | - wfProfileIn( __METHOD__ ); |
| 34 | +class WikiDiff3{ |
36 | 35 | |
37 | | - $m = sizeof($from); |
38 | | - $n = sizeof($to); |
| 36 | + //Input variables |
| 37 | + private $from; |
| 38 | + private $to; |
| 39 | + private $m; |
| 40 | + private $n; |
39 | 41 | |
40 | | - $result_from = array_fill(0,$m,TRUE); |
41 | | - $result_to = array_fill(0,$n,TRUE); |
| 42 | + private $tooLong; |
| 43 | + private $powLimit; |
42 | 44 | |
43 | | - //reduce the complexity for the next step (intentionally done twice) |
44 | | - //remove common tokens at the start |
45 | | - $i=0; |
46 | | - while($i<$m && $i<$n && $from[$i]===$to[$i]){ |
47 | | - $result_from[$i] = $result_to[$i] = FALSE; |
48 | | - unset($from[$i],$to[$i]); |
49 | | - ++$i; |
| 45 | + //State variables |
| 46 | + private $maxDifferences; |
| 47 | + |
| 48 | + //Output variables |
| 49 | + public $length; |
| 50 | + public $added; |
| 51 | + public $removed; |
| 52 | + public $heuristicUsed; |
| 53 | + |
| 54 | + function __construct($tooLong = 2000000, $powLimit = 1.45){ |
| 55 | + $this->tooLong = $tooLong; |
| 56 | + $this->powLimit = $powLimit; |
50 | 57 | } |
51 | 58 | |
52 | | - //remove common tokens at the end |
53 | | - $j=1; |
54 | | - while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ |
55 | | - $result_from[$m-$j] = $result_to[$n-$j] = FALSE; |
56 | | - unset($from[$m-$j],$to[$n-$j]); |
57 | | - ++$j; |
58 | | - } |
| 59 | + public function diff(/*array*/ $from, /*array*/ $to){ |
| 60 | + $m = sizeof($from); |
| 61 | + $n = sizeof($to); |
| 62 | + |
| 63 | + $this->heuristicUsed = FALSE; |
59 | 64 | |
60 | | - $newFrom = $newFromIndex = $newTo = $newToIndex = array(); |
| 65 | + $result_from = $m>0?array_fill(0,$m,true):array(); |
| 66 | + $result_to = $n>0?array_fill(0,$n,true):array(); |
61 | 67 | |
62 | | - //remove tokens not in both sequences |
63 | | - $shared = array_fill_keys($from,FALSE); |
64 | | - foreach($to as $index => $el){ |
65 | | - if(array_key_exists($el,$shared)){ |
66 | | - //keep it |
67 | | - $shared[$el] = TRUE; |
68 | | - $newTo[] = $el; |
69 | | - $newToIndex[] = $index; |
| 68 | + //reduce the complexity for the next step (intentionally done twice) |
| 69 | + //remove common tokens at the start |
| 70 | + $i=0; |
| 71 | + while($i<$m && $i<$n && $from[$i]===$to[$i]){ |
| 72 | + $result_from[$i] = $result_to[$i] = false; |
| 73 | + unset($from[$i],$to[$i]); |
| 74 | + ++$i; |
70 | 75 | } |
71 | | - } |
72 | | - foreach($from as $index => $el){ |
73 | | - if($shared[$el]){ |
74 | | - //keep it |
75 | | - $newFrom[] = $el; |
76 | | - $newFromIndex[] = $index; |
| 76 | + |
| 77 | + //remove common tokens at the end |
| 78 | + $j=1; |
| 79 | + while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ |
| 80 | + $result_from[$m-$j] = $result_to[$n-$j] = false; |
| 81 | + unset($from[$m-$j],$to[$n-$j]); |
| 82 | + ++$j; |
77 | 83 | } |
78 | | - } |
79 | 84 | |
80 | | - unset($from, $to, $shared); |
| 85 | + $newFrom = $newFromIndex = $newTo = $newToIndex = array(); |
81 | 86 | |
82 | | - $m2 = sizeof($newFrom); |
83 | | - $n2 = sizeof($newTo); |
84 | | - $offsetx = $offsety = 0; |
85 | | - $from_inLcs = new InLcs($m2); |
86 | | - $to_inLcs = new InLcs($n2); |
| 87 | + //remove tokens not in both sequences |
| 88 | + $shared = array_fill_keys($from,false); |
| 89 | + foreach($to as $index => $el){ |
| 90 | + if(array_key_exists($el,$shared)){ |
| 91 | + //keep it |
| 92 | + $this->to[] = $el; |
| 93 | + $shared[$el] = true; |
| 94 | + $newToIndex[] = $index; |
| 95 | + } |
| 96 | + } |
| 97 | + foreach($from as $index => $el){ |
| 98 | + if($shared[$el]){ |
| 99 | + //keep it |
| 100 | + $this->from[] = $el; |
| 101 | + $newFromIndex[] = $index; |
| 102 | + } |
| 103 | + } |
87 | 104 | |
88 | | - wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound); |
| 105 | + unset($shared); |
89 | 106 | |
90 | | - foreach($from_inLcs->inLcs as $key => $in){ |
91 | | - if($in){ |
92 | | - $result_from[$newFromIndex[$key]] = FALSE; |
| 107 | + $this->m = sizeof($this->from); |
| 108 | + $this->n = sizeof($this->to); |
| 109 | + $this->removed = $this->m>0?array_fill(0,$this->m,true):array(); |
| 110 | + $this->added = $this->n>0?array_fill(0,$this->n,true):array(); |
| 111 | + |
| 112 | + $this->longestCommonSubsequence(); |
| 113 | + |
| 114 | + $this->m = $m; |
| 115 | + $this->n = $n; |
| 116 | + $this->length += $i+$j-1; |
| 117 | + |
| 118 | + foreach($this->removed as $key => $removed){ |
| 119 | + if(!$removed){ |
| 120 | + $result_from[$newFromIndex[$key]] = false; |
| 121 | + } |
93 | 122 | } |
94 | | - } |
95 | | - foreach($to_inLcs->inLcs as $key => $in){ |
96 | | - if($in){ |
97 | | - $result_to[$newToIndex[$key]] = FALSE; |
| 123 | + foreach($this->added as $key => $added){ |
| 124 | + if(!$added){ |
| 125 | + $result_to[$newToIndex[$key]] = false; |
| 126 | + } |
98 | 127 | } |
| 128 | + unset($this->added, $this->removed); |
| 129 | + return array($result_from, $result_to); |
99 | 130 | } |
100 | | - |
101 | | - wfProfileOut( __METHOD__ ); |
102 | | - return array($result_from, $result_to); |
103 | | -} |
104 | 131 | |
105 | | -function wikidiff3_diffPart(array $a, array $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){ |
106 | | - if($bestKnownLcs==0 || $m==0 || $n==0){ |
107 | | - return; |
108 | | - } |
109 | | - |
110 | | - if($m>$n){ |
111 | | - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound); |
112 | | - } |
| 132 | + public function longestCommonSubsequence() { |
| 133 | + if ($this->m == 0 || $this->n == 0) { |
| 134 | + $this->length = 0; |
| 135 | + return; |
| 136 | + } |
113 | 137 | |
114 | | - $a_inLcs_sym = &$a_inLcs->inLcs; |
115 | | - $b_inLcs_sym = &$b_inLcs->inLcs; |
| 138 | + $this->maxDifferences = ceil(($this->m + $this->n) / 2.0); |
| 139 | + if ($this->m * $this->n > $this->tooLong) { |
| 140 | + // limit complexity to D^POW_LIMIT for long sequences |
| 141 | + $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0)); |
| 142 | + wfDebug("Limiting max number of differences to $this->maxDifferences"); |
| 143 | + } |
116 | 144 | |
117 | | - //remove common tokens at the start |
118 | | - while($m>0 && $n>0 && $a[0]===$b[0]){ |
119 | | - $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE; |
120 | | - ++$offsetx; ++$offsety; |
121 | | - --$m; --$n; |
122 | | - --$bestKnownLcs; |
123 | | - array_shift($a); |
124 | | - array_shift($b); |
125 | | - } |
| 145 | + /* |
| 146 | + * The common prefixes and suffixes are always part of some LCS, include |
| 147 | + * them now to reduce our search space |
| 148 | + */ |
| 149 | + $max = min($this->m, $this->n); |
| 150 | + for ($forwardBound = 0; $forwardBound < $max |
| 151 | + && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) { |
| 152 | + $this->removed[$forwardBound] = $this->added[$forwardBound] = false; |
| 153 | + } |
126 | 154 | |
127 | | - //remove common tokens at the end |
128 | | - while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){ |
129 | | - $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE; |
130 | | - --$m; --$n; |
131 | | - --$bestKnownLcs; |
132 | | - array_pop($a); |
133 | | - array_pop($b); |
134 | | - } |
| 155 | + $backBoundL1 = $this->m - 1; |
| 156 | + $backBoundL2 = $this->n - 1; |
135 | 157 | |
136 | | - if($bestKnownLcs==0 || $m==0 || $n==0){ |
137 | | - return; |
| 158 | + while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound |
| 159 | + && $this->from[$backBoundL1] === $this->to[$backBoundL2]) { |
| 160 | + $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; |
| 161 | + } |
| 162 | + |
| 163 | + $temp = array_fill(0,$this->m + $this->n + 1, 0); |
| 164 | + $V = array($temp,$temp); |
| 165 | + $snake = array(0,0,0); |
| 166 | + |
| 167 | + $this->length = $forwardBound + $this->m - $backBoundL1 - 1 |
| 168 | + + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, |
| 169 | + $V, $snake); |
138 | 170 | } |
139 | | - if($m>$n){ |
140 | | - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound); |
141 | | - } |
142 | 171 | |
143 | | - $delta = $n-$m; |
144 | | - $delta_plus_1 = $delta+1; |
145 | | - $delta_min_1 = $delta-1; |
| 172 | + private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) { |
| 173 | + // check that both sequences are non-empty |
| 174 | + if ($bottoml1 > $topl1 || $bottoml2 > $topl2) { |
| 175 | + return 0; |
| 176 | + } |
146 | 177 | |
147 | | - $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1); |
148 | | - $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0); |
149 | | - $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n); |
150 | | - $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array(); |
151 | | - $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1; |
| 178 | + $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake); |
152 | 179 | |
153 | | - if($boundRunningTime){ |
154 | | - $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2); |
155 | | - if($maxp_before_bound>=$m){ |
156 | | - $boundRunningTime = false; |
157 | | - unset($maxp_before_bound); |
| 180 | + // need to store these so we don't lose them when they're overwritten by |
| 181 | + // the recursion |
| 182 | + $len = $snake[2]; |
| 183 | + $startx = $snake[0]; |
| 184 | + $starty = $snake[1]; |
| 185 | + |
| 186 | + // the middle snake is part of the LCS, store it |
| 187 | + for ($i = 0; $i < $len; ++$i) { |
| 188 | + $this->removed[$startx + $i] = $this->added[$starty + $i] = false; |
158 | 189 | } |
| 190 | + |
| 191 | + if ($d > 1) { |
| 192 | + return $len |
| 193 | + + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake) |
| 194 | + + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake); |
| 195 | + } else if ($d == 1) { |
| 196 | + /* |
| 197 | + * In this case the sequences differ by exactly 1 line. We have |
| 198 | + * already saved all the lines after the difference in the for loop |
| 199 | + * above, now we need to save all the lines before the difference. |
| 200 | + */ |
| 201 | + $max = min($startx - $bottoml1, $starty - $bottoml2); |
| 202 | + for ($i = 0; $i < $max; ++$i) { |
| 203 | + $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false; |
| 204 | + } |
| 205 | + return $max + $len; |
| 206 | + } |
| 207 | + return $len; |
159 | 208 | } |
160 | 209 | |
161 | | - $p=-1; |
162 | | - $m_min_1 = $m-1; |
163 | | - $maxp=$m_min_1-$bestLcsLength; |
| 210 | + private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) { |
| 211 | + $from = &$this->from; |
| 212 | + $to = &$this->to; |
| 213 | + $V0 = &$V[0]; |
| 214 | + $V1 = &$V[1]; |
| 215 | + $snake0 = &$snake[0]; |
| 216 | + $snake1 = &$snake[1]; |
| 217 | + $snake2 = &$snake[2]; |
| 218 | + $bottoml1_min_1 = $bottoml1-1; |
| 219 | + $bottoml2_min_1 = $bottoml2-1; |
| 220 | + $N = $topl1 - $bottoml1_min_1; |
| 221 | + $M = $topl2 - $bottoml2_min_1; |
| 222 | + $delta = $N - $M; |
| 223 | + $maxabsx = $N+$bottoml1; |
| 224 | + $maxabsy = $M+$bottoml2; |
| 225 | + $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2) |
164 | 226 | |
165 | | - while($p<$maxp){ |
| 227 | + //value_to_add_forward: a 0 or 1 that we add to the start |
| 228 | + // offset |
| 229 | + // to make it odd/even |
| 230 | + if (($M & 1) == 1) { |
| 231 | + $value_to_add_forward = 1; |
| 232 | + } else { |
| 233 | + $value_to_add_forward = 0; |
| 234 | + } |
166 | 235 | |
167 | | - if($boundRunningTime && $p>$maxp_before_bound){ |
168 | | - // bound the running time by stopping early |
169 | | - if($bestLcsLength>=0){ |
170 | | - // accept the best LCS thusfar |
171 | | - break; |
172 | | - }else{ |
173 | | - $bestLcsProgressForw = $bestkForw = 0; |
174 | | - foreach($lcsSizeForw as $k => $localLcsProgress){ |
175 | | - if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){ |
176 | | - $bestLcsProgressForw = $localLcsProgress; |
177 | | - $bestkForw = $k; |
178 | | - } |
179 | | - } |
180 | | - $bestLcsProgressBack = $bestkBack = 0; |
181 | | - foreach($lcsSizeBack as $k => $localLcsProgress){ |
182 | | - if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){ |
183 | | - $bestLcsProgressBack = $localLcsProgress; |
184 | | - $bestkBack = $k; |
185 | | - } |
186 | | - } |
187 | | - if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){ |
188 | | - // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway. |
189 | | - }else{ |
190 | | - // lets do this |
191 | | - $topSnakeStart = $snakeBeginForw[$bestkForw]; |
192 | | - $topSnakeEnd = $snakeEndForw[$bestkForw]; |
193 | | - $topSnakek = $snakekForw[$bestkForw]; |
194 | | - $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd; |
| 236 | + if (($N & 1) == 1) { |
| 237 | + $value_to_add_backward = 1; |
| 238 | + } else { |
| 239 | + $value_to_add_backward = 0; |
| 240 | + } |
195 | 241 | |
196 | | - $bottomSnakeStart = $snakeEndBack[$bestkBack]+1; |
197 | | - $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1; |
198 | | - $bottomSnakek = $snakekBack[$bestkBack]; |
199 | | - $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd; |
| 242 | + $start_forward = -$M; |
| 243 | + $end_forward = $N; |
| 244 | + $start_backward = -$N; |
| 245 | + $end_backward = $M; |
200 | 246 | |
201 | | - if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){ |
202 | | - // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered) |
203 | | - // also diff the middle now |
204 | | - if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){ |
205 | | - // do the middle diff between the snakes |
206 | | - wfDebug(" Limiting diff run time from both sides, middle between snakes\n"); |
207 | | - $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek); |
208 | | - $n_t = $bottomSnakeStart-$topSnakeEnd; |
209 | | - $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t); |
210 | | - $b_t = array_slice($b, $topSnakeEnd, $n_t); |
211 | | - $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek); |
212 | | - $offsety_t = $offsety+$topSnakeEnd; |
213 | | - }else{ |
214 | | - // the snake is too early in the sequence, do the middle diff between endpoints of progress |
215 | | - wfDebug(" Limiting diff run time from both sides, middle between endpoints\n"); |
216 | | - $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw); |
217 | | - $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw]; |
218 | | - $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t); |
219 | | - $b_t = array_slice($b, $fpForw[$bestkForw], $n_t); |
220 | | - $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw); |
221 | | - $offsety_t = $offsety+$fpForw[$bestkForw]; |
222 | | - } |
223 | | - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound); |
224 | | - }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){ |
225 | | - // the bottom snake is more interesting |
226 | | - wfDebug("Limiting diff run time from bottom side\n"); |
227 | | - $topSnakeStart = $bottomSnakeStart; |
228 | | - $topSnakeEnd = $bottomSnakeEnd; |
229 | | - $topSnakek = $bottomSnakek; |
230 | | - $bestLcsLengthTop = $topSnakeEnd-$topSnakek; |
231 | | - $bottomSnakeStart = $bottomSnakeEnd; |
232 | | - }else{ |
233 | | - // the top snake is more interesting |
234 | | - wfDebug(" Limiting diff run time from top side\n"); |
235 | | - $bottomSnakeStart = $topSnakeEnd; |
236 | | - $bottomSnakeEnd = $topSnakeEnd; |
237 | | - $bottomSnakek = $topSnakek; |
238 | | - $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek); |
239 | | - } |
240 | | - break; |
241 | | - } |
242 | | - } |
243 | | - } |
244 | | - ++$p; |
245 | | - $overlap[-$p] = $overlap[$delta+$p] = FALSE; |
| 247 | + $limit_min_1 = $limit-1; |
| 248 | + $limit_plus_1 = $limit+1; |
246 | 249 | |
247 | | - $min_p_min_1 = -$p-1; |
248 | | - $delta_plus_1_plus_p = $delta_plus_1+$p; |
| 250 | + $V0[$limit_plus_1] = 0; |
| 251 | + $V1[$limit_min_1] = $N; |
| 252 | + $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2) |
| 253 | + |
| 254 | + if (($delta & 1) == 1) { |
| 255 | + for ($d = 0; $d <= $limit; ++$d) { |
| 256 | + $start_diag = max($value_to_add_forward + $start_forward, -$d); |
| 257 | + $end_diag = min($end_forward, $d); |
| 258 | + $value_to_add_forward = 1 - $value_to_add_forward; |
249 | 259 | |
250 | | - // forward |
251 | | - $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] = |
252 | | - $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1; |
253 | | - $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0; |
| 260 | + // compute forward furthest reaching paths |
| 261 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 262 | + if ($k == -$d |
| 263 | + || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) { |
| 264 | + $x = $V0[$limit_plus_1 + $k]; |
| 265 | + } else { |
| 266 | + $x = $V0[$limit_min_1 + $k] + 1; |
| 267 | + } |
254 | 268 | |
255 | | - $k=-$p; |
256 | | - do { |
257 | | - $k_plus_1 = $k+1; |
258 | | - $k_min_1 = $k-1; |
| 269 | + $absx = $snake0 = $x + $bottoml1; |
| 270 | + $absy = $snake1 = $x - $k + $bottoml2; |
| 271 | + |
| 272 | + while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { |
| 273 | + ++$absx; |
| 274 | + ++$absy; |
| 275 | + } |
| 276 | + $x = $absx-$bottoml1; |
| 277 | + |
| 278 | + $snake2 = $absx -$snake0; |
| 279 | + $V0[$limit + $k] = $x; |
| 280 | + if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1 |
| 281 | + && $x >= $V1[$limit + $k - $delta]) { |
| 282 | + return 2 * $d - 1; |
| 283 | + } |
259 | 284 | |
260 | | - $fpBelow = $fpForw[$k_min_1]+1; |
261 | | - $fpAbove = $fpForw[$k_plus_1]; |
262 | | - $y = &$fpForw[$k]; |
263 | | - if($fpBelow>$fpAbove){ |
264 | | - $oldy = $y = $fpBelow; |
265 | | - $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1]; |
266 | | - $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1]; |
267 | | - $snakeEndForw[$k] = $snakeEndForw[$k_min_1]; |
268 | | - $snakekForw[$k] = $snakekForw[$k_min_1]; |
269 | | - }else{ |
270 | | - $oldy = $y = $fpAbove; |
271 | | - $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1]; |
272 | | - $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1]; |
273 | | - $snakeEndForw[$k] = $snakeEndForw[$k_plus_1]; |
274 | | - $snakekForw[$k] = $snakekForw[$k_plus_1]; |
275 | | - } |
276 | | - $x = $y-$k; |
277 | | - while($x < $m && $y < $n && $a[$x]===$b[$y]){ |
278 | | - ++$x; |
279 | | - ++$y; |
280 | | - } |
281 | | - if($y-$oldy>0){ |
282 | | - $lcsSizeForw[$k] += $y-$oldy; |
283 | | - $snakeBeginForw[$k] = $oldy; |
284 | | - $snakeEndForw[$k]= $y; |
285 | | - $snakekForw[$k] = $k; |
286 | | - } |
287 | | - if($y>=$fpBack[$k] && !$overlap[$k]){ |
288 | | - // there is overlap |
289 | | - $overlap[$k]=TRUE; |
290 | | - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k]; |
291 | | - if($y>$fpBack[$k]+1){ |
292 | | - $snakeoverlap = $y-$fpBack[$k]-1; |
293 | | - $lcsLength -= $snakeoverlap; |
| 285 | + // check to see if we can cut down the diagonal range |
| 286 | + if ($x >= $N && $end_forward > $k - 1) { |
| 287 | + $end_forward = $k - 1; |
| 288 | + } else if ($absy-$bottoml2 >= $M) { |
| 289 | + $start_forward = $k + 1; |
| 290 | + $value_to_add_forward = 0; |
| 291 | + } |
294 | 292 | } |
295 | | - if($lcsLength>$bestLcsLength){ |
296 | | - // a better sequence found! |
297 | | - $bestLcsLength = $lcsLength; |
298 | 293 | |
299 | | - $topSnakeStart = $snakeBeginForw[$k]; |
300 | | - $topSnakeEnd = $snakeEndForw[$k]; |
301 | | - $topSnakek = $snakekForw[$k]; |
| 294 | + $start_diag = max($value_to_add_backward + $start_backward, -$d); |
| 295 | + $end_diag = min($end_backward, $d); |
| 296 | + $value_to_add_backward = 1 - $value_to_add_backward; |
302 | 297 | |
303 | | - // aligned snakes bite (\ ) |
304 | | - // ( \ \) |
305 | | - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k])); |
306 | | - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart); |
307 | | - $bottomSnakek = $snakekBack[$k]; |
| 298 | + // compute backward furthest reaching paths |
| 299 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 300 | + if ($k == $d |
| 301 | + || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) { |
| 302 | + $x = $V1[$limit_min_1 + $k]; |
| 303 | + } else { |
| 304 | + $x = $V1[$limit_plus_1 + $k] - 1; |
| 305 | + } |
308 | 306 | |
309 | | - if($bottomSnakeEnd<$y){ |
310 | | - $bottomSnakeStart = $y; |
311 | | - $bottomSnakeEnd = $y; |
312 | | - $bottomSnakek = $k; |
| 307 | + $y = $x - $k - $delta; |
| 308 | + |
| 309 | + $snake2 = 0; |
| 310 | + while ($x > 0 && $y > 0 |
| 311 | + && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) { |
| 312 | + --$x; |
| 313 | + --$y; |
| 314 | + ++$snake2; |
313 | 315 | } |
| 316 | + $V1[$limit + $k] = $x; |
314 | 317 | |
315 | | - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]); |
316 | | - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]); |
317 | | - if($bestKnownLcs==$lcsLength){ |
318 | | - $fpForw[$k]=$y; |
319 | | - break 2; |
| 318 | + // check to see if we can cut down our diagonal range |
| 319 | + if ($x <= 0) { |
| 320 | + $start_backward = $k + 1; |
| 321 | + $value_to_add_backward = 0; |
| 322 | + } else if ($y <= 0 && $end_backward > $k - 1) { |
| 323 | + $end_backward = $k - 1; |
320 | 324 | } |
321 | | - $maxp=$m_min_1-$bestLcsLength; |
322 | 325 | } |
323 | 326 | } |
324 | | - if($k<$delta_min_1){ |
325 | | - ++$k; |
326 | | - }else if($k>$delta){ |
327 | | - --$k; |
328 | | - }else if($k==$delta_min_1){ |
329 | | - $k = $delta+$p; |
330 | | - }else{ |
331 | | - break; |
332 | | - } |
333 | | - } while(TRUE); |
| 327 | + } else { |
| 328 | + for ($d = 0; $d <= $limit; ++$d) { |
| 329 | + $start_diag = max($value_to_add_forward + $start_forward, -$d); |
| 330 | + $end_diag = min($end_forward, $d); |
| 331 | + $value_to_add_forward = 1 - $value_to_add_forward; |
334 | 332 | |
335 | | - // backward |
336 | | - $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] = |
337 | | - $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n; |
338 | | - $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0; |
| 333 | + // compute forward furthest reaching paths |
| 334 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 335 | + if ($k == -$d |
| 336 | + || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) { |
| 337 | + $x = $V0[$limit_plus_1 + $k]; |
| 338 | + } else { |
| 339 | + $x = $V0[$limit_min_1 + $k] + 1; |
| 340 | + } |
339 | 341 | |
340 | | - $k=$delta+$p; |
341 | | - do { |
342 | | - $k_plus_1 = $k+1; |
343 | | - $k_min_1 = $k-1; |
| 342 | + $absx = $snake0 = $x + $bottoml1; |
| 343 | + $absy = $snake1 = $x - $k + $bottoml2; |
| 344 | + |
| 345 | + while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { |
| 346 | + ++$absx; |
| 347 | + ++$absy; |
| 348 | + } |
| 349 | + $x = $absx-$bottoml1; |
| 350 | + $snake2 = $absx -$snake0; |
| 351 | + $V0[$limit + $k] = $x; |
344 | 352 | |
345 | | - $fpBelow = $fpBack[$k_min_1]; |
346 | | - $fpAbove = $fpBack[$k_plus_1]-1; |
347 | | - $y = &$fpBack[$k]; |
348 | | - if($fpBelow<$fpAbove){ |
349 | | - $oldy = $y = $fpBelow; |
350 | | - $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1]; |
351 | | - $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1]; |
352 | | - $snakeEndBack[$k] = $snakeEndBack[$k_min_1]; |
353 | | - $snakekBack[$k] = $snakekBack[$k_min_1]; |
354 | | - }else{ |
355 | | - $oldy = $y = $fpAbove; |
356 | | - $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1]; |
357 | | - $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1]; |
358 | | - $snakeEndBack[$k] = $snakeEndBack[$k_plus_1]; |
359 | | - $snakekBack[$k] = $snakekBack[$k_plus_1]; |
360 | | - } |
361 | | - $x = $y-$k; |
362 | | - while($x > -1 && $y > -1 && $a[$x]===$b[$y]){ |
363 | | - --$x; |
364 | | - --$y; |
365 | | - } |
366 | | - if($oldy-$y>0){ |
367 | | - $lcsSizeBack[$k] += $oldy-$y; |
368 | | - $snakeBeginBack[$k] = $oldy; |
369 | | - $snakeEndBack[$k] = $y; |
370 | | - $snakekBack[$k] = $k; |
371 | | - } |
372 | | - if($fpForw[$k]>=$y && !$overlap[$k]){ |
373 | | - // there is overlap |
374 | | - $overlap[$k]=TRUE; |
375 | | - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k]; |
376 | | - if($fpForw[$k]>$y+1){ |
377 | | - $snakeoverlap = $fpForw[$k]-$y-1; |
378 | | - $lcsLength -= $snakeoverlap; |
| 353 | + // check to see if we can cut down the diagonal range |
| 354 | + if ($x >= $N && $end_forward > $k - 1) { |
| 355 | + $end_forward = $k - 1; |
| 356 | + } else if ($absy-$bottoml2 >= $M) { |
| 357 | + $start_forward = $k + 1; |
| 358 | + $value_to_add_forward = 0; |
| 359 | + } |
379 | 360 | } |
380 | | - if($lcsLength>$bestLcsLength){ |
381 | | - // a better sequence found! |
382 | | - $bestLcsLength = $lcsLength; |
383 | 361 | |
384 | | - $topSnakeStart = $snakeBeginForw[$k]; |
385 | | - $topSnakeEnd = $snakeEndForw[$k]; |
386 | | - $topSnakek = $snakekForw[$k]; |
| 362 | + $start_diag = max($value_to_add_backward + $start_backward, -$d); |
| 363 | + $end_diag = min($end_backward, $d); |
| 364 | + $value_to_add_backward = 1 - $value_to_add_backward; |
387 | 365 | |
388 | | - // aligned snakes bite (\ ) |
389 | | - // ( \ \) |
390 | | - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k])); |
391 | | - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart); |
392 | | - $bottomSnakek = $snakekBack[$k]; |
| 366 | + // compute backward furthest reaching paths |
| 367 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 368 | + if ($k == $d |
| 369 | + || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) { |
| 370 | + $x = $V1[$limit_min_1 + $k]; |
| 371 | + } else { |
| 372 | + $x = $V1[$limit_plus_1 + $k] - 1; |
| 373 | + } |
393 | 374 | |
394 | | - if($bottomSnakeEnd<$fpForw[$k]){ |
395 | | - $bottomSnakeStart = $fpForw[$k]; |
396 | | - $bottomSnakeEnd = $fpForw[$k]; |
397 | | - $bottomSnakek = $k; |
| 375 | + $y = $x - $k - $delta; |
| 376 | + |
| 377 | + $snake2 = 0; |
| 378 | + while ($x > 0 && $y > 0 |
| 379 | + && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) { |
| 380 | + --$x; |
| 381 | + --$y; |
| 382 | + ++$snake2; |
398 | 383 | } |
| 384 | + $V1[$limit + $k] = $x; |
399 | 385 | |
400 | | - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]); |
401 | | - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]); |
402 | | - if($bestKnownLcs==$lcsLength){ |
403 | | - $fpBack[$k] = $y; |
404 | | - break 2; |
| 386 | + if ($k >= -$delta - $d && $k <= $d - $delta |
| 387 | + && $x <= $V0[$limit + $k + $delta]) { |
| 388 | + $snake0 = $bottoml1 + $x; |
| 389 | + $snake1 = $bottoml2 + $y; |
| 390 | + return 2 * $d; |
405 | 391 | } |
406 | | - $maxp=$m_min_1-$bestLcsLength; |
| 392 | + |
| 393 | + // check to see if we can cut down our diagonal range |
| 394 | + if ($x <= 0) { |
| 395 | + $start_backward = $k + 1; |
| 396 | + $value_to_add_backward = 0; |
| 397 | + } else if ($y <= 0 && $end_backward > $k - 1) { |
| 398 | + $end_backward = $k - 1; |
| 399 | + } |
407 | 400 | } |
408 | 401 | } |
409 | | - if($k>1){ |
410 | | - --$k; |
411 | | - }else if($k<0){ |
412 | | - ++$k; |
413 | | - }else if($k==1){ |
414 | | - $k = -$p; |
415 | | - }else{ |
416 | | - break; |
417 | | - } |
418 | | - } while(TRUE); |
419 | | - } |
| 402 | + } |
| 403 | + /* |
| 404 | + * computing the true LCS is too expensive, instead find the diagonal |
| 405 | + * with the most progress and pretend a midle snake of length 0 occurs |
| 406 | + * there. |
| 407 | + */ |
420 | 408 | |
421 | | - unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack); |
422 | | - unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack); |
423 | | - unset($overlap); |
| 409 | + $most_progress = self::findMostProgress($M, $N, $limit, $V); |
424 | 410 | |
425 | | - // Mark snakes as in LCS |
426 | | - $maxi = $offsetx+$topSnakeEnd-$topSnakek; |
427 | | - for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){ |
428 | | - $a_inLcs_sym[$i] = TRUE; |
| 411 | + $snake0 = $bottoml1 + $most_progress[0]; |
| 412 | + $snake1 = $bottoml2 + $most_progress[1]; |
| 413 | + $snake2 = 0; |
| 414 | + wfDebug('Computing the LCS is too expensive. Using a heuristic.'); |
| 415 | + $this->heuristicUsed = true; |
| 416 | + return 5; /* |
| 417 | + * HACK: since we didn't really finish the LCS computation |
| 418 | + * we don't really know the length of the SES. We don't do |
| 419 | + * anything with the result anyway, unless it's <=1. We know |
| 420 | + * for a fact SES > 1 so 5 is as good a number as any to |
| 421 | + * return here |
| 422 | + */ |
429 | 423 | } |
430 | | - $maxi = $offsety+$topSnakeEnd; |
431 | | - for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){ |
432 | | - $b_inLcs_sym[$i] = TRUE; |
433 | | - } |
434 | | - $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek; |
435 | | - for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){ |
436 | | - $a_inLcs_sym[$i] = TRUE; |
437 | | - } |
438 | | - $maxi = $offsety+$bottomSnakeEnd; |
439 | | - for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){ |
440 | | - $b_inLcs_sym[$i] = TRUE; |
441 | | - } |
442 | 424 | |
443 | | - $m_t = $topSnakeStart-$topSnakek; |
444 | | - $a_t = array_slice($a, 0, $m_t); |
445 | | - $b_t = array_slice($b, 0, $topSnakeStart); |
| 425 | + private static function findMostProgress($M, $N, $limit, $V) { |
| 426 | + $delta = $N - $M; |
446 | 427 | |
447 | | - $m_b = $m-($bottomSnakeEnd-$bottomSnakek); |
448 | | - $n_b = $n-$bottomSnakeEnd; |
449 | | - $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b); |
450 | | - $b_b = array_slice($b, $bottomSnakeEnd, $n_b); |
| 428 | + if (($M & 1) == ($limit & 1)) { |
| 429 | + $forward_start_diag = max(-$M, -$limit); |
| 430 | + } else { |
| 431 | + $forward_start_diag = max(1 - $M, -$limit); |
| 432 | + } |
451 | 433 | |
452 | | - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound); |
| 434 | + $forward_end_diag = min($N, $limit); |
453 | 435 | |
454 | | - wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound); |
455 | | -} |
| 436 | + if (($N & 1) == ($limit & 1)) { |
| 437 | + $backward_start_diag = max(-$N, -$limit); |
| 438 | + } else { |
| 439 | + $backward_start_diag = max(1 - $N, -$limit); |
| 440 | + } |
456 | 441 | |
457 | | -class InLcs { |
| 442 | + $backward_end_diag = -min($M, $limit); |
458 | 443 | |
459 | | - public $inLcs; |
| 444 | + $temp = array(0,0,0); |
460 | 445 | |
461 | | - function __construct($length){ |
462 | | - $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array(); |
463 | | - } |
464 | 446 | |
465 | | - /** |
466 | | - * Get the length of the Longest Common Subsequence (computed) |
467 | | - */ |
468 | | - public function getLcsLength(){ |
469 | | - return array_sum($this->inLcs); |
| 447 | + $max_progress = array_fill(0,ceil(max($forward_end_diag |
| 448 | + - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp); |
| 449 | + $num_progress = 0; // the 1st entry is current, it is initialized |
| 450 | + // with 0s |
| 451 | + |
| 452 | + // first search the forward diagonals |
| 453 | + for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) { |
| 454 | + $x = $V[0][$limit + $k]; |
| 455 | + $y = $x - $k; |
| 456 | + if ($x > $N || $y > $M) { |
| 457 | + continue; |
| 458 | + } |
| 459 | + |
| 460 | + $progress = $x + $y; |
| 461 | + if ($progress > $max_progress[0][2]) { |
| 462 | + $num_progress = 0; |
| 463 | + $max_progress[0][0] = $x; |
| 464 | + $max_progress[0][1] = $y; |
| 465 | + $max_progress[0][2] = $progress; |
| 466 | + } else if ($progress == $max_progress[0][2]) { |
| 467 | + ++$num_progress; |
| 468 | + $max_progress[$num_progress][0] = $x; |
| 469 | + $max_progress[$num_progress][1] = $y; |
| 470 | + $max_progress[$num_progress][2] = $progress; |
| 471 | + } |
| 472 | + } |
| 473 | + |
| 474 | + $max_progress_forward = true; // initially the maximum |
| 475 | + // progress is in the forward |
| 476 | + // direction |
| 477 | + |
| 478 | + // now search the backward diagonals |
| 479 | + for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) { |
| 480 | + $x = $V[1][$limit + $k]; |
| 481 | + $y = $x - $k - $delta; |
| 482 | + if ($x < 0 || $y < 0) { |
| 483 | + continue; |
| 484 | + } |
| 485 | + |
| 486 | + $progress = $N - $x + $M - $y; |
| 487 | + if ($progress > $max_progress[0][2]) { |
| 488 | + $num_progress = 0; |
| 489 | + $max_progress_forward = false; |
| 490 | + $max_progress[0][0] = $x; |
| 491 | + $max_progress[0][1] = $y; |
| 492 | + $max_progress[0][2] = $progress; |
| 493 | + } else if ($progress == $max_progress[0][2] && !$max_progress_forward) { |
| 494 | + ++$num_progress; |
| 495 | + $max_progress[$num_progress][0] = $x; |
| 496 | + $max_progress[$num_progress][1] = $y; |
| 497 | + $max_progress[$num_progress][2] = $progress; |
| 498 | + } |
| 499 | + } |
| 500 | + |
| 501 | + // return the middle diagonal with maximal progress. |
| 502 | + return $max_progress[floor($num_progress / 2)]; |
470 | 503 | } |
471 | 504 | |
472 | 505 | } |
Index: branches/visual_diff/phase3/includes/DifferenceEngine.php |
— | — | @@ -74,9 +74,10 @@ |
75 | 75 | } |
76 | 76 | |
77 | 77 | function showDiffPage( $diffOnly = false ) { |
78 | | - global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol; |
| 78 | + global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff; |
79 | 79 | wfProfileIn( __METHOD__ ); |
80 | 80 | |
| 81 | + |
81 | 82 | # If external diffs are enabled both globally and for the user, |
82 | 83 | # we'll use the application/x-external-editor interface to call |
83 | 84 | # an external diff tool like kompare, kdiff3, etc. |
— | — | @@ -265,17 +266,16 @@ |
266 | 267 | '<div id="mw-diff-ntitle3">' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "</div>" . |
267 | 268 | '<div id="mw-diff-ntitle4">' . $nextlink . $patrol . '</div>'; |
268 | 269 | |
269 | | - $this->showDiff( $oldHeader, $newHeader ); |
| 270 | + if($wgEnableHtmlDiff){ |
| 271 | + $this->renderHtmlDiff(); |
| 272 | + }else{ |
270 | 273 | |
271 | | - if ( !$diffOnly ){ |
272 | | - global $wgEnableHtmlDiff; |
273 | | - if($wgEnableHtmlDiff){ |
274 | | - $this->renderHtmlDiff(); |
275 | | - }else{ |
| 274 | + $this->showDiff( $oldHeader, $newHeader ); |
| 275 | + |
| 276 | + if ( !$diffOnly ){ |
276 | 277 | $this->renderNewRevision(); |
277 | 278 | } |
278 | 279 | } |
279 | | - |
280 | 280 | wfProfileOut( __METHOD__ ); |
281 | 281 | } |
282 | 282 | |
— | — | @@ -329,10 +329,11 @@ |
330 | 330 | function renderHtmlDiff() { |
331 | 331 | global $wgOut, $IP; |
332 | 332 | wfProfileIn( __METHOD__ ); |
333 | | - |
| 333 | + |
| 334 | + $this->showDiffStyle(); |
| 335 | + |
334 | 336 | require_once( "$IP/includes/HTMLDiff.php" ); |
335 | 337 | |
336 | | - $wgOut->addHTML( "<hr /><h2>HTML Diff</h2>\n" ); |
337 | 338 | #add deleted rev tag if needed |
338 | 339 | if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) { |
339 | 340 | $wgOut->addWikiMsg( 'rev-deleted-text-permission' ); |
— | — | @@ -1064,10 +1065,6 @@ |
1065 | 1066 | // Diff and store locally |
1066 | 1067 | $this->diff_local($from_lines, $to_lines); |
1067 | 1068 | |
1068 | | - // Merge edits when possible |
1069 | | - $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); |
1070 | | - $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); |
1071 | | - |
1072 | 1069 | // Compute the ranges operations. |
1073 | 1070 | $n_from = sizeof($from_lines); |
1074 | 1071 | $n_to = sizeof($to_lines); |
— | — | @@ -1102,18 +1099,18 @@ |
1103 | 1100 | |
1104 | 1101 | function diff_local ($from_lines, $to_lines) { |
1105 | 1102 | global $wgExternalDiffEngine; |
| 1103 | + wfProfileIn( __METHOD__); |
1106 | 1104 | |
1107 | | - $n_from = sizeof($from_lines); |
1108 | | - $n_to = sizeof($to_lines); |
1109 | | - |
1110 | 1105 | if($wgExternalDiffEngine == 'wikidiff3'){ |
1111 | 1106 | // wikidiff3 |
1112 | 1107 | global $IP; |
1113 | 1108 | require_once( "$IP/includes/Diff.php" ); |
1114 | | - list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000); |
| 1109 | + $wikidiff3 = new WikiDiff3(); |
| 1110 | + list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines); |
1115 | 1111 | }else{ |
1116 | 1112 | // old diff |
1117 | | - wfProfileIn( __METHOD__ ." - basicdiff"); |
| 1113 | + $n_from = sizeof($from_lines); |
| 1114 | + $n_to = sizeof($to_lines); |
1118 | 1115 | $this->xchanged = $this->ychanged = array(); |
1119 | 1116 | $this->xv = $this->yv = array(); |
1120 | 1117 | $this->xind = $this->yind = array(); |
— | — | @@ -1158,8 +1155,8 @@ |
1159 | 1156 | |
1160 | 1157 | // Find the LCS. |
1161 | 1158 | $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv)); |
1162 | | - wfProfileOut( __METHOD__ ." - basicdiff"); |
1163 | 1159 | } |
| 1160 | + wfProfileOut( __METHOD__ ); |
1164 | 1161 | } |
1165 | 1162 | |
1166 | 1163 | /** |
— | — | @@ -1215,7 +1212,6 @@ |
1216 | 1213 | $numer = $xlim - $xoff + $nchunks - 1; |
1217 | 1214 | $x = $xoff; |
1218 | 1215 | for ($chunk = 0; $chunk < $nchunks; $chunk++) { |
1219 | | - wfProfileIn( __METHOD__ . "-chunk" ); |
1220 | 1216 | if ($chunk > 0) |
1221 | 1217 | for ($i = 0; $i <= $this->lcs; $i++) |
1222 | 1218 | $ymids[$i][$chunk-1] = $this->seq[$i]; |
— | — | @@ -1268,7 +1264,6 @@ |
1269 | 1265 | if ($end == 0 || $ypos > $this->seq[$end]) { |
1270 | 1266 | $this->seq[++$this->lcs] = $ypos; |
1271 | 1267 | $this->in_seq[$ypos] = 1; |
1272 | | - wfProfileOut( __METHOD__ ); |
1273 | 1268 | return $this->lcs; |
1274 | 1269 | } |
1275 | 1270 | |