Index: branches/visual_diff/phase3/includes/HTMLDiff.php |
— | — | @@ -17,6 +17,11 @@ |
18 | 18 | * or see http://www.gnu.org/ |
19 | 19 | */ |
20 | 20 | |
| 21 | +/** |
| 22 | + * The HTML differ depends on WikiDiff3 |
| 23 | + */ |
| 24 | +global $IP; |
| 25 | +require_once( "$IP/includes/Diff.php" ); |
21 | 26 | |
22 | 27 | /** |
23 | 28 | * Any element in the DOM tree of an HTML document. |
— | — | @@ -24,8 +29,8 @@ |
25 | 30 | abstract class Node{ |
26 | 31 | |
27 | 32 | protected $parent; |
| 33 | + protected $parentTree; |
28 | 34 | |
29 | | - |
30 | 35 | function __construct($parent){ |
31 | 36 | $this->parent = $parent; |
32 | 37 | if(!is_null($parent)){ |
— | — | @@ -38,16 +43,18 @@ |
39 | 44 | } |
40 | 45 | |
41 | 46 | public function getParentTree(){ |
42 | | - if(!is_null($this->parent)){ |
43 | | - $parentTree = $this->parent->getParentTree(); |
44 | | - $parentTree[] = $this->parent; |
45 | | - return $parentTree; |
46 | | - }else{ |
47 | | - return array(); |
| 47 | + if(!isset($this->parentTree)){ |
| 48 | + if(!is_null($this->parent)){ |
| 49 | + $this->parentTree = $this->parent->getParentTree(); |
| 50 | + $this->parentTree[] = $this->parent; |
| 51 | + }else{ |
| 52 | + $this->parentTree = array(); |
| 53 | + } |
48 | 54 | } |
| 55 | + return $this->parentTree; |
49 | 56 | } |
50 | 57 | |
51 | | - public abstract function getMinimalDeletedSet($id); |
| 58 | + public abstract function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted); |
52 | 59 | |
53 | 60 | public function detectIgnorableWhiteSpace(){ |
54 | 61 | // no op |
— | — | @@ -95,6 +102,7 @@ |
96 | 103 | |
97 | 104 | public function setParent($parent) { |
98 | 105 | $this->parent = $parent; |
| 106 | + unset($this->parentTree); |
99 | 107 | } |
100 | 108 | |
101 | 109 | public abstract function copyTree(); |
— | — | @@ -208,25 +216,29 @@ |
209 | 217 | return '</' . $this->getQName() + '>"'; |
210 | 218 | } |
211 | 219 | |
212 | | - public function getMinimalDeletedSet($id) { |
| 220 | + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { |
213 | 221 | $nodes = array(); |
| 222 | + if ($this->getNbChildren() == 0){ |
| 223 | + $allDeleted = false; |
| 224 | + $somethingDeleted = false; |
| 225 | + return $nodes; |
| 226 | + } |
214 | 227 | |
215 | | - if ($this->getNbChildren() == 0) |
216 | | - return $nodes; |
217 | | - |
| 228 | + $allDeleted = false; |
| 229 | + $somethingDeleted = false; |
218 | 230 | $hasNotDeletedDescendant = false; |
219 | 231 | |
220 | 232 | foreach ($this->children as $child) { |
221 | | - $childrenChildren = $child->getMinimalDeletedSet($id); |
222 | | - $nodes = array_merge($nodes, $childrenChildren); |
223 | | - if (!$hasNotDeletedDescendant |
224 | | - && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) { |
225 | | - // This child is not entirely deleted |
226 | | - $hasNotDeletedDescendant = true; |
| 233 | + $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local); |
| 234 | + if($somethingDeleted_local){ |
| 235 | + $nodes = array_merge($nodes, $childrenChildren); |
| 236 | + $somethingDeleted = true; |
227 | 237 | } |
| 238 | + $hasNotDeletedDescendant |= !$allDeleted_local; |
228 | 239 | } |
229 | 240 | if (!$hasNotDeletedDescendant) { |
230 | 241 | $nodes = array($this); |
| 242 | + $allDeleted = true; |
231 | 243 | } |
232 | 244 | return $nodes; |
233 | 245 | } |
— | — | @@ -420,9 +432,11 @@ |
421 | 433 | return $this; |
422 | 434 | } |
423 | 435 | |
424 | | - public function getMinimalDeletedSet($id) { |
425 | | - if ($this->getModification()->getType() == Modification::REMOVED |
426 | | - && $this->getModification()->getID() == $id){ |
| 436 | + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { |
| 437 | + if ($this->modification->getType() == Modification::REMOVED |
| 438 | + && $this->modification->getID() == $id){ |
| 439 | + $somethingDeleted = true; |
| 440 | + $allDeleted = true; |
427 | 441 | return array($this); |
428 | 442 | } |
429 | 443 | return array(); |
— | — | @@ -500,10 +514,10 @@ |
501 | 515 | return $newThis; |
502 | 516 | } |
503 | 517 | |
504 | | - public function getMinimalDeletedSet($id) { |
| 518 | + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { |
505 | 519 | $nodes = array(); |
506 | 520 | foreach ($this->children as $child) { |
507 | | - $childrenChildren = $child->getMinimalDeletedSet($id); |
| 521 | + $childrenChildren = $child->getMinimalDeletedSet($id,$allDeleted,$somethingDeleted); |
508 | 522 | $nodes = array_merge($nodes, $childrenChildren); |
509 | 523 | } |
510 | 524 | return $nodes; |
— | — | @@ -758,29 +772,33 @@ |
759 | 773 | } |
760 | 774 | } |
761 | 775 | |
| 776 | + public static $regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|]{1})/'; |
| 777 | + |
762 | 778 | public function characters($parser, $data){ |
763 | | - //wfDebug('Parsing '. strlen($data).' characters.'); |
764 | | - $array = str_split($data); |
765 | | - foreach($array as $c) { |
766 | | - if (self::isDelimiter($c)) { |
| 779 | + $matches = preg_split(self::$regex,$data,-1,PREG_SPLIT_DELIM_CAPTURE); |
| 780 | + |
| 781 | + $notInPre = !$this->currentParent->isPre(); |
| 782 | + $max = sizeof($matches)-1; |
| 783 | + |
| 784 | + foreach($matches as $word){ |
| 785 | + if(preg_match(self::$regex, $word)){ |
| 786 | + //whitespace |
767 | 787 | $this->endWord(); |
768 | | - if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre() |
769 | | - && !$this->currentParent->inPre()) { |
| 788 | + if (WhiteSpaceNode::isWhiteSpace($word) && $notInPre) { |
770 | 789 | if (!is_null($this->lastSibling)){ |
771 | 790 | $this->lastSibling->setWhiteAfter(true); |
772 | 791 | } |
773 | 792 | $this->whiteSpaceBeforeThis = true; |
774 | 793 | } else { |
775 | | - $textNode = new TextNode($this->currentParent, $c); |
| 794 | + $textNode = new TextNode($this->currentParent, $word); |
776 | 795 | $textNode->setWhiteBefore($this->whiteSpaceBeforeThis); |
777 | 796 | $this->whiteSpaceBeforeThis = false; |
778 | 797 | $this->lastSibling = $textNode; |
779 | 798 | $this->textNodes[] = $textNode; |
780 | 799 | } |
781 | | - } else { |
782 | | - $this->newWord .= $c; |
| 800 | + }else{ |
| 801 | + $this->newWord .= $word; |
783 | 802 | } |
784 | | - |
785 | 803 | } |
786 | 804 | } |
787 | 805 | |
— | — | @@ -993,8 +1011,10 @@ |
994 | 1012 | } |
995 | 1013 | $this->oldTextNodes[$start]->getModification()->setFirstOfID(true); |
996 | 1014 | |
997 | | - $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID); |
| 1015 | + $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->getLastCommonParent(); |
998 | 1016 | |
| 1017 | + $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2); |
| 1018 | + |
999 | 1019 | //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes)); |
1000 | 1020 | |
1001 | 1021 | // Set prevLeaf to the leaf after which the old HTML needs to be |
— | — | @@ -1091,14 +1111,15 @@ |
1092 | 1112 | } |
1093 | 1113 | |
1094 | 1114 | class HTMLDiffer{ |
1095 | | - |
| 1115 | + |
1096 | 1116 | private $output; |
1097 | | - |
| 1117 | + |
1098 | 1118 | function __construct($output){ |
1099 | 1119 | $this->output = $output; |
1100 | 1120 | } |
1101 | 1121 | |
1102 | 1122 | function htmlDiff($from, $to){ |
| 1123 | + wfProfileIn( __METHOD__ ); |
1103 | 1124 | // Create an XML parser |
1104 | 1125 | $xml_parser = xml_parser_create(''); |
1105 | 1126 | |
— | — | @@ -1110,12 +1131,11 @@ |
1111 | 1132 | // Set the function to handle blocks of character data |
1112 | 1133 | xml_set_character_data_handler($xml_parser, array($domfrom,"characters")); |
1113 | 1134 | |
1114 | | - ; |
1115 | | - //wfDebug('Parsing '.strlen($from)." characters worth of HTML"); |
| 1135 | + //wfDebug('Parsing '.strlen($from)." characters worth of HTML\n"); |
1116 | 1136 | if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE) |
1117 | 1137 | || !xml_parse($xml_parser, $from, FALSE) |
1118 | 1138 | || !xml_parse($xml_parser, '</body>', TRUE)){ |
1119 | | - wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); |
| 1139 | + wfDebug(sprintf("XML error: %s at line %d\n",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); |
1120 | 1140 | } |
1121 | 1141 | xml_parser_free($xml_parser); |
1122 | 1142 | unset($from); |
— | — | @@ -1130,19 +1150,20 @@ |
1131 | 1151 | // Set the function to handle blocks of character data |
1132 | 1152 | xml_set_character_data_handler($xml_parser, array($domto,"characters")); |
1133 | 1153 | |
1134 | | - //wfDebug('Parsing '.strlen($to)." characters worth of HTML"); |
| 1154 | + //wfDebug('Parsing '.strlen($to)." characters worth of HTML\n"); |
1135 | 1155 | if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE) |
1136 | 1156 | || !xml_parse($xml_parser, $to, FALSE) |
1137 | 1157 | || !xml_parse($xml_parser, '</body>', TRUE)){ |
1138 | | - wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); |
| 1158 | + wfDebug(sprintf("XML error in HTML diff: %s at line %d\n",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); |
1139 | 1159 | } |
1140 | 1160 | xml_parser_free($xml_parser); |
1141 | 1161 | unset($to); |
1142 | 1162 | |
1143 | | - $diffengine = new _DiffEngine(); |
| 1163 | + $diffengine = new WikiDiff3(); |
1144 | 1164 | $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines())); |
1145 | 1165 | unset($xml_parser,$diffengine); |
1146 | 1166 | |
| 1167 | + |
1147 | 1168 | $domdiffer = new TextNodeDiffer($domto, $domfrom); |
1148 | 1169 | |
1149 | 1170 | $currentIndexLeft = 0; |
— | — | @@ -1163,10 +1184,10 @@ |
1164 | 1185 | if ($currentIndexLeft < $domdiffer->lengthOld()) { |
1165 | 1186 | $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew()); |
1166 | 1187 | } |
1167 | | - |
1168 | 1188 | $domdiffer->expandWhiteSpace(); |
1169 | 1189 | $output = new HTMLOutput('htmldiff', $this->output); |
1170 | 1190 | $output->parse($domdiffer->getBodyNode()); |
| 1191 | + wfProfileOut( __METHOD__ ); |
1171 | 1192 | } |
1172 | 1193 | |
1173 | 1194 | private function preProcess(/*array*/ $differences){ |
— | — | @@ -1244,15 +1265,15 @@ |
1245 | 1266 | if($nbOthers == 0 || $nbThis == 0){ |
1246 | 1267 | return -log(0); |
1247 | 1268 | } |
1248 | | - |
1249 | | - $diffengine = new _DiffEngine(); |
1250 | | - $diffengine->diff_local($this->leafs, $other->leafs); |
1251 | 1269 | |
1252 | | - $distanceThis = array_sum($diffengine->xchanged); |
1253 | | - $distanceOther = array_sum($diffengine->ychanged); |
| 1270 | + $diffengine = new WikiDiff3(25000,1.35); |
| 1271 | + $diffengine->diff($this->leafs, $other->leafs); |
1254 | 1272 | |
1255 | | - return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis) |
1256 | | - / $nbThis) / 2.0; |
| 1273 | + $lcsLength = $diffengine->getLcsLength(); |
| 1274 | + |
| 1275 | + $distanceThis = $nbThis-$lcsLength; |
| 1276 | + |
| 1277 | + return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0; |
1257 | 1278 | } |
1258 | 1279 | } |
1259 | 1280 | |
— | — | @@ -1301,7 +1322,7 @@ |
1302 | 1323 | public function getResult(AncestorComparator $other) { |
1303 | 1324 | $result = new AncestorComparatorResult(); |
1304 | 1325 | |
1305 | | - $diffengine = new _DiffEngine(); |
| 1326 | + $diffengine = new WikiDiff3(10000,1.35); |
1306 | 1327 | $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText); |
1307 | 1328 | |
1308 | 1329 | if (sizeof($differences) == 0){ |
— | — | @@ -1528,7 +1549,6 @@ |
1529 | 1550 | } |
1530 | 1551 | |
1531 | 1552 | public function getRemovedDescription(ChangeText $txt) { |
1532 | | - |
1533 | 1553 | if ($this->sem == TagToStringFactory::MOVED) { |
1534 | 1554 | $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' '); |
1535 | 1555 | $txt->addHtml('<b>'); |
— | — | @@ -1550,7 +1570,6 @@ |
1551 | 1571 | } |
1552 | 1572 | |
1553 | 1573 | public function getAddedDescription(ChangeText $txt) { |
1554 | | - |
1555 | 1574 | if ($this->sem == TagToStringFactory::MOVED) { |
1556 | 1575 | $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' '); |
1557 | 1576 | $txt->addHtml('<b>'); |
— | — | @@ -1869,12 +1888,12 @@ |
1870 | 1889 | $this->addAttributes($mod, $attrs); |
1871 | 1890 | $attrs['onclick'] = 'return tipC(constructToolTipC(this));'; |
1872 | 1891 | $this->handler->startElement('span', $attrs); |
1873 | | - |
| 1892 | + |
1874 | 1893 | //tooltip |
1875 | 1894 | $this->handler->startElement('span', array('class'=>'tip')); |
1876 | 1895 | $this->handler->characters($mod->getChanges()); |
1877 | 1896 | $this->handler->endElement('span'); |
1878 | | - |
| 1897 | + |
1879 | 1898 | $changeStarted = true; |
1880 | 1899 | $changeTXT = $mod->getChanges(); |
1881 | 1900 | } else if (!$remStarted && $mod->getType() == Modification::REMOVED) { |
— | — | @@ -1971,9 +1990,9 @@ |
1972 | 1991 | } |
1973 | 1992 | |
1974 | 1993 | class DelegatingContentHandler{ |
1975 | | - |
| 1994 | + |
1976 | 1995 | private $delegate; |
1977 | | - |
| 1996 | + |
1978 | 1997 | function __construct($delegate){ |
1979 | 1998 | $this->delegate=$delegate; |
1980 | 1999 | } |
Index: branches/visual_diff/phase3/includes/Diff.php |
— | — | @@ -43,32 +43,35 @@ |
44 | 44 | |
45 | 45 | //State variables |
46 | 46 | private $maxDifferences; |
47 | | - |
| 47 | + private $lcsLengthCorrectedForHeuristic = false; |
| 48 | + |
48 | 49 | //Output variables |
49 | 50 | public $length; |
| 51 | + public $removed; |
50 | 52 | public $added; |
51 | | - public $removed; |
52 | 53 | public $heuristicUsed; |
53 | | - |
| 54 | + |
54 | 55 | function __construct($tooLong = 2000000, $powLimit = 1.45){ |
55 | 56 | $this->tooLong = $tooLong; |
56 | 57 | $this->powLimit = $powLimit; |
57 | 58 | } |
58 | 59 | |
59 | 60 | public function diff(/*array*/ $from, /*array*/ $to){ |
| 61 | + //remember initial lengths |
60 | 62 | $m = sizeof($from); |
61 | 63 | $n = sizeof($to); |
62 | | - |
| 64 | + |
63 | 65 | $this->heuristicUsed = FALSE; |
64 | 66 | |
65 | | - $result_from = $m>0?array_fill(0,$m,true):array(); |
66 | | - $result_to = $n>0?array_fill(0,$n,true):array(); |
| 67 | + //output |
| 68 | + $removed = $m>0?array_fill(0,$m,true):array(); |
| 69 | + $added = $n>0?array_fill(0,$n,true):array(); |
67 | 70 | |
68 | 71 | //reduce the complexity for the next step (intentionally done twice) |
69 | 72 | //remove common tokens at the start |
70 | 73 | $i=0; |
71 | 74 | while($i<$m && $i<$n && $from[$i]===$to[$i]){ |
72 | | - $result_from[$i] = $result_to[$i] = false; |
| 75 | + $removed[$i] = $added[$i] = false; |
73 | 76 | unset($from[$i],$to[$i]); |
74 | 77 | ++$i; |
75 | 78 | } |
— | — | @@ -76,12 +79,12 @@ |
77 | 80 | //remove common tokens at the end |
78 | 81 | $j=1; |
79 | 82 | while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ |
80 | | - $result_from[$m-$j] = $result_to[$n-$j] = false; |
| 83 | + $removed[$m-$j] = $added[$n-$j] = false; |
81 | 84 | unset($from[$m-$j],$to[$n-$j]); |
82 | 85 | ++$j; |
83 | 86 | } |
84 | 87 | |
85 | | - $newFrom = $newFromIndex = $newTo = $newToIndex = array(); |
| 88 | + $this->from = $newFromIndex = $this->to = $newToIndex = array(); |
86 | 89 | |
87 | 90 | //remove tokens not in both sequences |
88 | 91 | $shared = array_fill_keys($from,false); |
— | — | @@ -101,71 +104,100 @@ |
102 | 105 | } |
103 | 106 | } |
104 | 107 | |
105 | | - unset($shared); |
| 108 | + unset($shared, $from, $to); |
106 | 109 | |
107 | 110 | $this->m = sizeof($this->from); |
108 | 111 | $this->n = sizeof($this->to); |
| 112 | + |
109 | 113 | $this->removed = $this->m>0?array_fill(0,$this->m,true):array(); |
110 | 114 | $this->added = $this->n>0?array_fill(0,$this->n,true):array(); |
111 | 115 | |
112 | | - $this->longestCommonSubsequence(); |
| 116 | + if ($this->m == 0 || $this->n == 0) { |
| 117 | + $this->length = 0; |
| 118 | + }else{ |
| 119 | + $this->maxDifferences = ceil(($this->m + $this->n) / 2.0); |
| 120 | + if ($this->m * $this->n > $this->tooLong) { |
| 121 | + // limit complexity to D^POW_LIMIT for long sequences |
| 122 | + $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0)); |
| 123 | + wfDebug("Limiting max number of differences to $this->maxDifferences\n"); |
| 124 | + } |
113 | 125 | |
| 126 | + /* |
| 127 | + * The common prefixes and suffixes are always part of some LCS, include |
| 128 | + * them now to reduce our search space |
| 129 | + */ |
| 130 | + $max = min($this->m, $this->n); |
| 131 | + for ($forwardBound = 0; $forwardBound < $max |
| 132 | + && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) { |
| 133 | + $this->removed[$forwardBound] = $this->added[$forwardBound] = false; |
| 134 | + } |
| 135 | + |
| 136 | + $backBoundL1 = $this->m - 1; |
| 137 | + $backBoundL2 = $this->n - 1; |
| 138 | + |
| 139 | + while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound |
| 140 | + && $this->from[$backBoundL1] === $this->to[$backBoundL2]) { |
| 141 | + $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; |
| 142 | + } |
| 143 | + |
| 144 | + $temp = array_fill(0,$this->m + $this->n + 1, 0); |
| 145 | + $V = array($temp,$temp); |
| 146 | + $snake = array(0,0,0); |
| 147 | + |
| 148 | + $this->length = $forwardBound + $this->m - $backBoundL1 - 1 |
| 149 | + + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, |
| 150 | + $V, $snake); |
| 151 | + } |
| 152 | + |
114 | 153 | $this->m = $m; |
115 | 154 | $this->n = $n; |
| 155 | + |
116 | 156 | $this->length += $i+$j-1; |
117 | 157 | |
118 | | - foreach($this->removed as $key => $removed){ |
119 | | - if(!$removed){ |
120 | | - $result_from[$newFromIndex[$key]] = false; |
| 158 | + foreach($this->removed as $key => $removed_elem){ |
| 159 | + if(!$removed_elem){ |
| 160 | + $removed[$newFromIndex[$key]] = false; |
121 | 161 | } |
122 | 162 | } |
123 | | - foreach($this->added as $key => $added){ |
124 | | - if(!$added){ |
125 | | - $result_to[$newToIndex[$key]] = false; |
| 163 | + foreach($this->added as $key => $added_elem){ |
| 164 | + if(!$added_elem){ |
| 165 | + $added[$newToIndex[$key]] = false; |
126 | 166 | } |
127 | 167 | } |
128 | | - unset($this->added, $this->removed); |
129 | | - return array($result_from, $result_to); |
| 168 | + $this->removed = $removed; |
| 169 | + $this->added = $added; |
130 | 170 | } |
131 | 171 | |
132 | | - public function longestCommonSubsequence() { |
133 | | - if ($this->m == 0 || $this->n == 0) { |
134 | | - $this->length = 0; |
135 | | - return; |
136 | | - } |
| 172 | + function diff_range ($from_lines, $to_lines){ |
| 173 | + // Diff and store locally |
| 174 | + $this->diff($from_lines, $to_lines); |
| 175 | + unset($from_lines, $to_lines); |
137 | 176 | |
138 | | - $this->maxDifferences = ceil(($this->m + $this->n) / 2.0); |
139 | | - if ($this->m * $this->n > $this->tooLong) { |
140 | | - // limit complexity to D^POW_LIMIT for long sequences |
141 | | - $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0)); |
142 | | - wfDebug("Limiting max number of differences to $this->maxDifferences"); |
143 | | - } |
| 177 | + $ranges = array(); |
| 178 | + $xi = $yi = 0; |
| 179 | + while ($xi < $this->m || $yi < $this->n) { |
| 180 | + // Matching "snake". |
| 181 | + while ( $xi < $this->m && $yi < $this->n |
| 182 | + && !$this->removed[$xi] && !$this->added[$yi]) { |
| 183 | + ++$xi; |
| 184 | + ++$yi; |
| 185 | + } |
| 186 | + // Find deletes & adds. |
| 187 | + $xstart = $xi; |
| 188 | + while ($xi < $this->m && $this->removed[$xi]){ |
| 189 | + ++$xi; |
| 190 | + } |
144 | 191 | |
145 | | - /* |
146 | | - * The common prefixes and suffixes are always part of some LCS, include |
147 | | - * them now to reduce our search space |
148 | | - */ |
149 | | - $max = min($this->m, $this->n); |
150 | | - for ($forwardBound = 0; $forwardBound < $max |
151 | | - && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) { |
152 | | - $this->removed[$forwardBound] = $this->added[$forwardBound] = false; |
153 | | - } |
| 192 | + $ystart = $yi; |
| 193 | + while ($yi < $this->n && $this->added[$yi]){ |
| 194 | + ++$yi; |
| 195 | + } |
154 | 196 | |
155 | | - $backBoundL1 = $this->m - 1; |
156 | | - $backBoundL2 = $this->n - 1; |
157 | | - |
158 | | - while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound |
159 | | - && $this->from[$backBoundL1] === $this->to[$backBoundL2]) { |
160 | | - $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; |
| 197 | + if ($xi>$xstart || $yi>$ystart){ |
| 198 | + $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi); |
| 199 | + } |
161 | 200 | } |
162 | | - |
163 | | - $temp = array_fill(0,$this->m + $this->n + 1, 0); |
164 | | - $V = array($temp,$temp); |
165 | | - $snake = array(0,0,0); |
166 | | - |
167 | | - $this->length = $forwardBound + $this->m - $backBoundL1 - 1 |
168 | | - + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, |
169 | | - $V, $snake); |
| 201 | + return $ranges; |
170 | 202 | } |
171 | 203 | |
172 | 204 | private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) { |
— | — | @@ -267,13 +299,13 @@ |
268 | 300 | |
269 | 301 | $absx = $snake0 = $x + $bottoml1; |
270 | 302 | $absy = $snake1 = $x - $k + $bottoml2; |
271 | | - |
| 303 | + |
272 | 304 | while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { |
273 | 305 | ++$absx; |
274 | 306 | ++$absy; |
275 | 307 | } |
276 | 308 | $x = $absx-$bottoml1; |
277 | | - |
| 309 | + |
278 | 310 | $snake2 = $absx -$snake0; |
279 | 311 | $V0[$limit + $k] = $x; |
280 | 312 | if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1 |
— | — | @@ -340,7 +372,7 @@ |
341 | 373 | |
342 | 374 | $absx = $snake0 = $x + $bottoml1; |
343 | 375 | $absy = $snake1 = $x - $k + $bottoml2; |
344 | | - |
| 376 | + |
345 | 377 | while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { |
346 | 378 | ++$absx; |
347 | 379 | ++$absy; |
— | — | @@ -410,7 +442,7 @@ |
411 | 443 | $snake0 = $bottoml1 + $most_progress[0]; |
412 | 444 | $snake1 = $bottoml2 + $most_progress[1]; |
413 | 445 | $snake2 = 0; |
414 | | - wfDebug('Computing the LCS is too expensive. Using a heuristic.'); |
| 446 | + wfDebug('Computing the LCS is too expensive. Using a heuristic.\n'); |
415 | 447 | $this->heuristicUsed = true; |
416 | 448 | return 5; /* |
417 | 449 | * HACK: since we didn't really finish the LCS computation |
— | — | @@ -501,5 +533,37 @@ |
502 | 534 | return $max_progress[floor($num_progress / 2)]; |
503 | 535 | } |
504 | 536 | |
| 537 | + public function getLcsLength(){ |
| 538 | + if($this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic){ |
| 539 | + $this->lcsLengthCorrectedForHeuristic = true; |
| 540 | + $this->length = $this->m-array_sum($this->added); |
| 541 | + } |
| 542 | + return $this->length; |
| 543 | + } |
| 544 | + |
505 | 545 | } |
506 | 546 | |
| 547 | +/** |
| 548 | + * Alternative representation of a set of changes, by the index |
| 549 | + * ranges that are changed. |
| 550 | + */ |
| 551 | +class RangeDifference { |
| 552 | + |
| 553 | + public $leftstart; |
| 554 | + public $leftend; |
| 555 | + public $leftlength; |
| 556 | + |
| 557 | + public $rightstart; |
| 558 | + public $rightend; |
| 559 | + public $rightlength; |
| 560 | + |
| 561 | + function __construct($leftstart, $leftend, $rightstart, $rightend){ |
| 562 | + $this->leftstart = $leftstart; |
| 563 | + $this->leftend = $leftend; |
| 564 | + $this->leftlength = $leftend-$leftstart; |
| 565 | + $this->rightstart = $rightstart; |
| 566 | + $this->rightend = $rightend; |
| 567 | + $this->rightlength = $rightend-$rightstart; |
| 568 | + } |
| 569 | +} |
| 570 | + |
Index: branches/visual_diff/phase3/includes/DifferenceEngine.php |
— | — | @@ -332,8 +332,6 @@ |
333 | 333 | |
334 | 334 | $this->showDiffStyle(); |
335 | 335 | |
336 | | - require_once( "$IP/includes/HTMLDiff.php" ); |
337 | | - |
338 | 336 | #add deleted rev tag if needed |
339 | 337 | if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) { |
340 | 338 | $wgOut->addWikiMsg( 'rev-deleted-text-permission' ); |
— | — | @@ -378,6 +376,9 @@ |
379 | 377 | $newHtml = $parserOutput->getText(); |
380 | 378 | wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) ); |
381 | 379 | |
| 380 | + unset($parserOutput,$popts); |
| 381 | + |
| 382 | + require_once( "$IP/includes/HTMLDiff.php" ); |
382 | 383 | $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut)); |
383 | 384 | $differ->htmlDiff($oldHtml, $newHtml); |
384 | 385 | |
— | — | @@ -958,30 +959,6 @@ |
959 | 960 | } |
960 | 961 | |
961 | 962 | /** |
962 | | - * Alternative representation of a set of changes, by the index |
963 | | - * ranges that are changed. |
964 | | - */ |
965 | | -class RangeDifference { |
966 | | - |
967 | | - public $leftstart; |
968 | | - public $leftend; |
969 | | - public $leftlength; |
970 | | - |
971 | | - public $rightstart; |
972 | | - public $rightend; |
973 | | - public $rightlength; |
974 | | - |
975 | | - function __construct($leftstart, $leftend, $rightstart, $rightend){ |
976 | | - $this->leftstart = $leftstart; |
977 | | - $this->leftend = $leftend; |
978 | | - $this->leftlength = $leftend-$leftstart; |
979 | | - $this->rightstart = $rightstart; |
980 | | - $this->rightend = $rightend; |
981 | | - $this->rightlength = $rightend-$rightstart; |
982 | | - } |
983 | | -} |
984 | | - |
985 | | -/** |
986 | 963 | * Class used internally by Diff to actually compute the diffs. |
987 | 964 | * |
988 | 965 | * The algorithm used here is mostly lifted from the perl module |
— | — | @@ -1059,44 +1036,6 @@ |
1060 | 1037 | return $edits; |
1061 | 1038 | } |
1062 | 1039 | |
1063 | | - function diff_range ($from_lines, $to_lines){ |
1064 | | - wfProfileIn( __METHOD__ ); |
1065 | | - |
1066 | | - // Diff and store locally |
1067 | | - $this->diff_local($from_lines, $to_lines); |
1068 | | - |
1069 | | - // Compute the ranges operations. |
1070 | | - $n_from = sizeof($from_lines); |
1071 | | - $n_to = sizeof($to_lines); |
1072 | | - |
1073 | | - $ranges = array(); |
1074 | | - $xi = $yi = 0; |
1075 | | - while ($xi < $n_from || $yi < $n_to) { |
1076 | | - // Matching "snake". |
1077 | | - while ( $xi < $n_from && $yi < $n_to |
1078 | | - && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
1079 | | - ++$xi; |
1080 | | - ++$yi; |
1081 | | - } |
1082 | | - // Find deletes & adds. |
1083 | | - $xstart = $xi; |
1084 | | - while ($xi < $n_from && $this->xchanged[$xi]){ |
1085 | | - ++$xi; |
1086 | | - } |
1087 | | - |
1088 | | - $ystart = $yi; |
1089 | | - while ($yi < $n_to && $this->ychanged[$yi]){ |
1090 | | - ++$yi; |
1091 | | - } |
1092 | | - |
1093 | | - if ($xi>$xstart || $yi>$ystart){ |
1094 | | - $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi); |
1095 | | - } |
1096 | | - } |
1097 | | - wfProfileOut( __METHOD__ ); |
1098 | | - return $ranges; |
1099 | | - } |
1100 | | - |
1101 | 1040 | function diff_local ($from_lines, $to_lines) { |
1102 | 1041 | global $wgExternalDiffEngine; |
1103 | 1042 | wfProfileIn( __METHOD__); |
— | — | @@ -1106,7 +1045,10 @@ |
1107 | 1046 | global $IP; |
1108 | 1047 | require_once( "$IP/includes/Diff.php" ); |
1109 | 1048 | $wikidiff3 = new WikiDiff3(); |
1110 | | - list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines); |
| 1049 | + $wikidiff3->diff($from_lines, $to_lines); |
| 1050 | + $this->xchanged = $wikidiff3->removed; |
| 1051 | + $this->ychanged = $wikidiff3->added; |
| 1052 | + unset($wikidiff3); |
1111 | 1053 | }else{ |
1112 | 1054 | // old diff |
1113 | 1055 | $n_from = sizeof($from_lines); |