r39511 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r39510‎ | r39511 | r39512 >
Date:23:12, 16 August 2008
Author:guyvdb
Status:old
Tags:
Comment:
Performance improvements to diff code
Modified paths:
  • /branches/visual_diff/phase3/includes/Diff.php (modified) (history)
  • /branches/visual_diff/phase3/includes/DifferenceEngine.php (modified) (history)
  • /branches/visual_diff/phase3/includes/HTMLDiff.php (modified) (history)

Diff [purge]

Index: branches/visual_diff/phase3/includes/HTMLDiff.php
@@ -17,6 +17,11 @@
1818 * or see http://www.gnu.org/
1919 */
2020
 21+/**
 22+ * The HTML differ depends on WikiDiff3
 23+ */
 24+global $IP;
 25+require_once( "$IP/includes/Diff.php" );
2126
2227 /**
2328 * Any element in the DOM tree of an HTML document.
@@ -24,8 +29,8 @@
2530 abstract class Node{
2631
2732 protected $parent;
 33+ protected $parentTree;
2834
29 -
3035 function __construct($parent){
3136 $this->parent = $parent;
3237 if(!is_null($parent)){
@@ -38,16 +43,18 @@
3944 }
4045
4146 public function getParentTree(){
42 - if(!is_null($this->parent)){
43 - $parentTree = $this->parent->getParentTree();
44 - $parentTree[] = $this->parent;
45 - return $parentTree;
46 - }else{
47 - return array();
 47+ if(!isset($this->parentTree)){
 48+ if(!is_null($this->parent)){
 49+ $this->parentTree = $this->parent->getParentTree();
 50+ $this->parentTree[] = $this->parent;
 51+ }else{
 52+ $this->parentTree = array();
 53+ }
4854 }
 55+ return $this->parentTree;
4956 }
5057
51 - public abstract function getMinimalDeletedSet($id);
 58+ public abstract function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted);
5259
5360 public function detectIgnorableWhiteSpace(){
5461 // no op
@@ -95,6 +102,7 @@
96103
97104 public function setParent($parent) {
98105 $this->parent = $parent;
 106+ unset($this->parentTree);
99107 }
100108
101109 public abstract function copyTree();
@@ -208,25 +216,29 @@
209217 return '</' . $this->getQName() + '>"';
210218 }
211219
212 - public function getMinimalDeletedSet($id) {
 220+ public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
213221 $nodes = array();
 222+ if ($this->getNbChildren() == 0){
 223+ $allDeleted = false;
 224+ $somethingDeleted = false;
 225+ return $nodes;
 226+ }
214227
215 - if ($this->getNbChildren() == 0)
216 - return $nodes;
217 -
 228+ $allDeleted = false;
 229+ $somethingDeleted = false;
218230 $hasNotDeletedDescendant = false;
219231
220232 foreach ($this->children as $child) {
221 - $childrenChildren = $child->getMinimalDeletedSet($id);
222 - $nodes = array_merge($nodes, $childrenChildren);
223 - if (!$hasNotDeletedDescendant
224 - && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
225 - // This child is not entirely deleted
226 - $hasNotDeletedDescendant = true;
 233+ $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
 234+ if($somethingDeleted_local){
 235+ $nodes = array_merge($nodes, $childrenChildren);
 236+ $somethingDeleted = true;
227237 }
 238+ $hasNotDeletedDescendant |= !$allDeleted_local;
228239 }
229240 if (!$hasNotDeletedDescendant) {
230241 $nodes = array($this);
 242+ $allDeleted = true;
231243 }
232244 return $nodes;
233245 }
@@ -420,9 +432,11 @@
421433 return $this;
422434 }
423435
424 - public function getMinimalDeletedSet($id) {
425 - if ($this->getModification()->getType() == Modification::REMOVED
426 - && $this->getModification()->getID() == $id){
 436+ public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
 437+ if ($this->modification->getType() == Modification::REMOVED
 438+ && $this->modification->getID() == $id){
 439+ $somethingDeleted = true;
 440+ $allDeleted = true;
427441 return array($this);
428442 }
429443 return array();
@@ -500,10 +514,10 @@
501515 return $newThis;
502516 }
503517
504 - public function getMinimalDeletedSet($id) {
 518+ public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
505519 $nodes = array();
506520 foreach ($this->children as $child) {
507 - $childrenChildren = $child->getMinimalDeletedSet($id);
 521+ $childrenChildren = $child->getMinimalDeletedSet($id,$allDeleted,$somethingDeleted);
508522 $nodes = array_merge($nodes, $childrenChildren);
509523 }
510524 return $nodes;
@@ -758,29 +772,33 @@
759773 }
760774 }
761775
 776+ public static $regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|]{1})/';
 777+
762778 public function characters($parser, $data){
763 - //wfDebug('Parsing '. strlen($data).' characters.');
764 - $array = str_split($data);
765 - foreach($array as $c) {
766 - if (self::isDelimiter($c)) {
 779+ $matches = preg_split(self::$regex,$data,-1,PREG_SPLIT_DELIM_CAPTURE);
 780+
 781+ $notInPre = !$this->currentParent->isPre();
 782+ $max = sizeof($matches)-1;
 783+
 784+ foreach($matches as $word){
 785+ if(preg_match(self::$regex, $word)){
 786+ //whitespace
767787 $this->endWord();
768 - if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
769 - && !$this->currentParent->inPre()) {
 788+ if (WhiteSpaceNode::isWhiteSpace($word) && $notInPre) {
770789 if (!is_null($this->lastSibling)){
771790 $this->lastSibling->setWhiteAfter(true);
772791 }
773792 $this->whiteSpaceBeforeThis = true;
774793 } else {
775 - $textNode = new TextNode($this->currentParent, $c);
 794+ $textNode = new TextNode($this->currentParent, $word);
776795 $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
777796 $this->whiteSpaceBeforeThis = false;
778797 $this->lastSibling = $textNode;
779798 $this->textNodes[] = $textNode;
780799 }
781 - } else {
782 - $this->newWord .= $c;
 800+ }else{
 801+ $this->newWord .= $word;
783802 }
784 -
785803 }
786804 }
787805
@@ -993,8 +1011,10 @@
9941012 }
9951013 $this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
9961014
997 - $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
 1015+ $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->getLastCommonParent();
9981016
 1017+ $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
 1018+
9991019 //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
10001020
10011021 // Set prevLeaf to the leaf after which the old HTML needs to be
@@ -1091,14 +1111,15 @@
10921112 }
10931113
10941114 class HTMLDiffer{
1095 -
 1115+
10961116 private $output;
1097 -
 1117+
10981118 function __construct($output){
10991119 $this->output = $output;
11001120 }
11011121
11021122 function htmlDiff($from, $to){
 1123+ wfProfileIn( __METHOD__ );
11031124 // Create an XML parser
11041125 $xml_parser = xml_parser_create('');
11051126
@@ -1110,12 +1131,11 @@
11111132 // Set the function to handle blocks of character data
11121133 xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
11131134
1114 - ;
1115 - //wfDebug('Parsing '.strlen($from)." characters worth of HTML");
 1135+ //wfDebug('Parsing '.strlen($from)." characters worth of HTML\n");
11161136 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
11171137 || !xml_parse($xml_parser, $from, FALSE)
11181138 || !xml_parse($xml_parser, '</body>', TRUE)){
1119 - wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
 1139+ wfDebug(sprintf("XML error: %s at line %d\n",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
11201140 }
11211141 xml_parser_free($xml_parser);
11221142 unset($from);
@@ -1130,19 +1150,20 @@
11311151 // Set the function to handle blocks of character data
11321152 xml_set_character_data_handler($xml_parser, array($domto,"characters"));
11331153
1134 - //wfDebug('Parsing '.strlen($to)." characters worth of HTML");
 1154+ //wfDebug('Parsing '.strlen($to)." characters worth of HTML\n");
11351155 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
11361156 || !xml_parse($xml_parser, $to, FALSE)
11371157 || !xml_parse($xml_parser, '</body>', TRUE)){
1138 - wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
 1158+ wfDebug(sprintf("XML error in HTML diff: %s at line %d\n",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
11391159 }
11401160 xml_parser_free($xml_parser);
11411161 unset($to);
11421162
1143 - $diffengine = new _DiffEngine();
 1163+ $diffengine = new WikiDiff3();
11441164 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
11451165 unset($xml_parser,$diffengine);
11461166
 1167+
11471168 $domdiffer = new TextNodeDiffer($domto, $domfrom);
11481169
11491170 $currentIndexLeft = 0;
@@ -1163,10 +1184,10 @@
11641185 if ($currentIndexLeft < $domdiffer->lengthOld()) {
11651186 $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
11661187 }
1167 -
11681188 $domdiffer->expandWhiteSpace();
11691189 $output = new HTMLOutput('htmldiff', $this->output);
11701190 $output->parse($domdiffer->getBodyNode());
 1191+ wfProfileOut( __METHOD__ );
11711192 }
11721193
11731194 private function preProcess(/*array*/ $differences){
@@ -1244,15 +1265,15 @@
12451266 if($nbOthers == 0 || $nbThis == 0){
12461267 return -log(0);
12471268 }
1248 -
1249 - $diffengine = new _DiffEngine();
1250 - $diffengine->diff_local($this->leafs, $other->leafs);
12511269
1252 - $distanceThis = array_sum($diffengine->xchanged);
1253 - $distanceOther = array_sum($diffengine->ychanged);
 1270+ $diffengine = new WikiDiff3(25000,1.35);
 1271+ $diffengine->diff($this->leafs, $other->leafs);
12541272
1255 - return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
1256 - / $nbThis) / 2.0;
 1273+ $lcsLength = $diffengine->getLcsLength();
 1274+
 1275+ $distanceThis = $nbThis-$lcsLength;
 1276+
 1277+ return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
12571278 }
12581279 }
12591280
@@ -1301,7 +1322,7 @@
13021323 public function getResult(AncestorComparator $other) {
13031324 $result = new AncestorComparatorResult();
13041325
1305 - $diffengine = new _DiffEngine();
 1326+ $diffengine = new WikiDiff3(10000,1.35);
13061327 $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
13071328
13081329 if (sizeof($differences) == 0){
@@ -1528,7 +1549,6 @@
15291550 }
15301551
15311552 public function getRemovedDescription(ChangeText $txt) {
1532 -
15331553 if ($this->sem == TagToStringFactory::MOVED) {
15341554 $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
15351555 $txt->addHtml('<b>');
@@ -1550,7 +1570,6 @@
15511571 }
15521572
15531573 public function getAddedDescription(ChangeText $txt) {
1554 -
15551574 if ($this->sem == TagToStringFactory::MOVED) {
15561575 $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
15571576 $txt->addHtml('<b>');
@@ -1869,12 +1888,12 @@
18701889 $this->addAttributes($mod, $attrs);
18711890 $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
18721891 $this->handler->startElement('span', $attrs);
1873 -
 1892+
18741893 //tooltip
18751894 $this->handler->startElement('span', array('class'=>'tip'));
18761895 $this->handler->characters($mod->getChanges());
18771896 $this->handler->endElement('span');
1878 -
 1897+
18791898 $changeStarted = true;
18801899 $changeTXT = $mod->getChanges();
18811900 } else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
@@ -1971,9 +1990,9 @@
19721991 }
19731992
19741993 class DelegatingContentHandler{
1975 -
 1994+
19761995 private $delegate;
1977 -
 1996+
19781997 function __construct($delegate){
19791998 $this->delegate=$delegate;
19801999 }
Index: branches/visual_diff/phase3/includes/Diff.php
@@ -43,32 +43,35 @@
4444
4545 //State variables
4646 private $maxDifferences;
47 -
 47+ private $lcsLengthCorrectedForHeuristic = false;
 48+
4849 //Output variables
4950 public $length;
 51+ public $removed;
5052 public $added;
51 - public $removed;
5253 public $heuristicUsed;
53 -
 54+
5455 function __construct($tooLong = 2000000, $powLimit = 1.45){
5556 $this->tooLong = $tooLong;
5657 $this->powLimit = $powLimit;
5758 }
5859
5960 public function diff(/*array*/ $from, /*array*/ $to){
 61+ //remember initial lengths
6062 $m = sizeof($from);
6163 $n = sizeof($to);
62 -
 64+
6365 $this->heuristicUsed = FALSE;
6466
65 - $result_from = $m>0?array_fill(0,$m,true):array();
66 - $result_to = $n>0?array_fill(0,$n,true):array();
 67+ //output
 68+ $removed = $m>0?array_fill(0,$m,true):array();
 69+ $added = $n>0?array_fill(0,$n,true):array();
6770
6871 //reduce the complexity for the next step (intentionally done twice)
6972 //remove common tokens at the start
7073 $i=0;
7174 while($i<$m && $i<$n && $from[$i]===$to[$i]){
72 - $result_from[$i] = $result_to[$i] = false;
 75+ $removed[$i] = $added[$i] = false;
7376 unset($from[$i],$to[$i]);
7477 ++$i;
7578 }
@@ -76,12 +79,12 @@
7780 //remove common tokens at the end
7881 $j=1;
7982 while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
80 - $result_from[$m-$j] = $result_to[$n-$j] = false;
 83+ $removed[$m-$j] = $added[$n-$j] = false;
8184 unset($from[$m-$j],$to[$n-$j]);
8285 ++$j;
8386 }
8487
85 - $newFrom = $newFromIndex = $newTo = $newToIndex = array();
 88+ $this->from = $newFromIndex = $this->to = $newToIndex = array();
8689
8790 //remove tokens not in both sequences
8891 $shared = array_fill_keys($from,false);
@@ -101,71 +104,100 @@
102105 }
103106 }
104107
105 - unset($shared);
 108+ unset($shared, $from, $to);
106109
107110 $this->m = sizeof($this->from);
108111 $this->n = sizeof($this->to);
 112+
109113 $this->removed = $this->m>0?array_fill(0,$this->m,true):array();
110114 $this->added = $this->n>0?array_fill(0,$this->n,true):array();
111115
112 - $this->longestCommonSubsequence();
 116+ if ($this->m == 0 || $this->n == 0) {
 117+ $this->length = 0;
 118+ }else{
 119+ $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
 120+ if ($this->m * $this->n > $this->tooLong) {
 121+ // limit complexity to D^POW_LIMIT for long sequences
 122+ $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
 123+ wfDebug("Limiting max number of differences to $this->maxDifferences\n");
 124+ }
113125
 126+ /*
 127+ * The common prefixes and suffixes are always part of some LCS, include
 128+ * them now to reduce our search space
 129+ */
 130+ $max = min($this->m, $this->n);
 131+ for ($forwardBound = 0; $forwardBound < $max
 132+ && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
 133+ $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
 134+ }
 135+
 136+ $backBoundL1 = $this->m - 1;
 137+ $backBoundL2 = $this->n - 1;
 138+
 139+ while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
 140+ && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
 141+ $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
 142+ }
 143+
 144+ $temp = array_fill(0,$this->m + $this->n + 1, 0);
 145+ $V = array($temp,$temp);
 146+ $snake = array(0,0,0);
 147+
 148+ $this->length = $forwardBound + $this->m - $backBoundL1 - 1
 149+ + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
 150+ $V, $snake);
 151+ }
 152+
114153 $this->m = $m;
115154 $this->n = $n;
 155+
116156 $this->length += $i+$j-1;
117157
118 - foreach($this->removed as $key => $removed){
119 - if(!$removed){
120 - $result_from[$newFromIndex[$key]] = false;
 158+ foreach($this->removed as $key => $removed_elem){
 159+ if(!$removed_elem){
 160+ $removed[$newFromIndex[$key]] = false;
121161 }
122162 }
123 - foreach($this->added as $key => $added){
124 - if(!$added){
125 - $result_to[$newToIndex[$key]] = false;
 163+ foreach($this->added as $key => $added_elem){
 164+ if(!$added_elem){
 165+ $added[$newToIndex[$key]] = false;
126166 }
127167 }
128 - unset($this->added, $this->removed);
129 - return array($result_from, $result_to);
 168+ $this->removed = $removed;
 169+ $this->added = $added;
130170 }
131171
132 - public function longestCommonSubsequence() {
133 - if ($this->m == 0 || $this->n == 0) {
134 - $this->length = 0;
135 - return;
136 - }
 172+ function diff_range ($from_lines, $to_lines){
 173+ // Diff and store locally
 174+ $this->diff($from_lines, $to_lines);
 175+ unset($from_lines, $to_lines);
137176
138 - $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
139 - if ($this->m * $this->n > $this->tooLong) {
140 - // limit complexity to D^POW_LIMIT for long sequences
141 - $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
142 - wfDebug("Limiting max number of differences to $this->maxDifferences");
143 - }
 177+ $ranges = array();
 178+ $xi = $yi = 0;
 179+ while ($xi < $this->m || $yi < $this->n) {
 180+ // Matching "snake".
 181+ while ( $xi < $this->m && $yi < $this->n
 182+ && !$this->removed[$xi] && !$this->added[$yi]) {
 183+ ++$xi;
 184+ ++$yi;
 185+ }
 186+ // Find deletes & adds.
 187+ $xstart = $xi;
 188+ while ($xi < $this->m && $this->removed[$xi]){
 189+ ++$xi;
 190+ }
144191
145 - /*
146 - * The common prefixes and suffixes are always part of some LCS, include
147 - * them now to reduce our search space
148 - */
149 - $max = min($this->m, $this->n);
150 - for ($forwardBound = 0; $forwardBound < $max
151 - && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
152 - $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
153 - }
 192+ $ystart = $yi;
 193+ while ($yi < $this->n && $this->added[$yi]){
 194+ ++$yi;
 195+ }
154196
155 - $backBoundL1 = $this->m - 1;
156 - $backBoundL2 = $this->n - 1;
157 -
158 - while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
159 - && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
160 - $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
 197+ if ($xi>$xstart || $yi>$ystart){
 198+ $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi);
 199+ }
161200 }
162 -
163 - $temp = array_fill(0,$this->m + $this->n + 1, 0);
164 - $V = array($temp,$temp);
165 - $snake = array(0,0,0);
166 -
167 - $this->length = $forwardBound + $this->m - $backBoundL1 - 1
168 - + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
169 - $V, $snake);
 201+ return $ranges;
170202 }
171203
172204 private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
@@ -267,13 +299,13 @@
268300
269301 $absx = $snake0 = $x + $bottoml1;
270302 $absy = $snake1 = $x - $k + $bottoml2;
271 -
 303+
272304 while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
273305 ++$absx;
274306 ++$absy;
275307 }
276308 $x = $absx-$bottoml1;
277 -
 309+
278310 $snake2 = $absx -$snake0;
279311 $V0[$limit + $k] = $x;
280312 if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
@@ -340,7 +372,7 @@
341373
342374 $absx = $snake0 = $x + $bottoml1;
343375 $absy = $snake1 = $x - $k + $bottoml2;
344 -
 376+
345377 while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
346378 ++$absx;
347379 ++$absy;
@@ -410,7 +442,7 @@
411443 $snake0 = $bottoml1 + $most_progress[0];
412444 $snake1 = $bottoml2 + $most_progress[1];
413445 $snake2 = 0;
414 - wfDebug('Computing the LCS is too expensive. Using a heuristic.');
 446+ wfDebug('Computing the LCS is too expensive. Using a heuristic.\n');
415447 $this->heuristicUsed = true;
416448 return 5; /*
417449 * HACK: since we didn't really finish the LCS computation
@@ -501,5 +533,37 @@
502534 return $max_progress[floor($num_progress / 2)];
503535 }
504536
 537+ public function getLcsLength(){
 538+ if($this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic){
 539+ $this->lcsLengthCorrectedForHeuristic = true;
 540+ $this->length = $this->m-array_sum($this->added);
 541+ }
 542+ return $this->length;
 543+ }
 544+
505545 }
506546
 547+/**
 548+ * Alternative representation of a set of changes, by the index
 549+ * ranges that are changed.
 550+ */
 551+class RangeDifference {
 552+
 553+ public $leftstart;
 554+ public $leftend;
 555+ public $leftlength;
 556+
 557+ public $rightstart;
 558+ public $rightend;
 559+ public $rightlength;
 560+
 561+ function __construct($leftstart, $leftend, $rightstart, $rightend){
 562+ $this->leftstart = $leftstart;
 563+ $this->leftend = $leftend;
 564+ $this->leftlength = $leftend-$leftstart;
 565+ $this->rightstart = $rightstart;
 566+ $this->rightend = $rightend;
 567+ $this->rightlength = $rightend-$rightstart;
 568+ }
 569+}
 570+
Index: branches/visual_diff/phase3/includes/DifferenceEngine.php
@@ -332,8 +332,6 @@
333333
334334 $this->showDiffStyle();
335335
336 - require_once( "$IP/includes/HTMLDiff.php" );
337 -
338336 #add deleted rev tag if needed
339337 if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
340338 $wgOut->addWikiMsg( 'rev-deleted-text-permission' );
@@ -378,6 +376,9 @@
379377 $newHtml = $parserOutput->getText();
380378 wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) );
381379
 380+ unset($parserOutput,$popts);
 381+
 382+ require_once( "$IP/includes/HTMLDiff.php" );
382383 $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut));
383384 $differ->htmlDiff($oldHtml, $newHtml);
384385
@@ -958,30 +959,6 @@
959960 }
960961
961962 /**
962 - * Alternative representation of a set of changes, by the index
963 - * ranges that are changed.
964 - */
965 -class RangeDifference {
966 -
967 - public $leftstart;
968 - public $leftend;
969 - public $leftlength;
970 -
971 - public $rightstart;
972 - public $rightend;
973 - public $rightlength;
974 -
975 - function __construct($leftstart, $leftend, $rightstart, $rightend){
976 - $this->leftstart = $leftstart;
977 - $this->leftend = $leftend;
978 - $this->leftlength = $leftend-$leftstart;
979 - $this->rightstart = $rightstart;
980 - $this->rightend = $rightend;
981 - $this->rightlength = $rightend-$rightstart;
982 - }
983 -}
984 -
985 -/**
986963 * Class used internally by Diff to actually compute the diffs.
987964 *
988965 * The algorithm used here is mostly lifted from the perl module
@@ -1059,44 +1036,6 @@
10601037 return $edits;
10611038 }
10621039
1063 - function diff_range ($from_lines, $to_lines){
1064 - wfProfileIn( __METHOD__ );
1065 -
1066 - // Diff and store locally
1067 - $this->diff_local($from_lines, $to_lines);
1068 -
1069 - // Compute the ranges operations.
1070 - $n_from = sizeof($from_lines);
1071 - $n_to = sizeof($to_lines);
1072 -
1073 - $ranges = array();
1074 - $xi = $yi = 0;
1075 - while ($xi < $n_from || $yi < $n_to) {
1076 - // Matching "snake".
1077 - while ( $xi < $n_from && $yi < $n_to
1078 - && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
1079 - ++$xi;
1080 - ++$yi;
1081 - }
1082 - // Find deletes & adds.
1083 - $xstart = $xi;
1084 - while ($xi < $n_from && $this->xchanged[$xi]){
1085 - ++$xi;
1086 - }
1087 -
1088 - $ystart = $yi;
1089 - while ($yi < $n_to && $this->ychanged[$yi]){
1090 - ++$yi;
1091 - }
1092 -
1093 - if ($xi>$xstart || $yi>$ystart){
1094 - $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi);
1095 - }
1096 - }
1097 - wfProfileOut( __METHOD__ );
1098 - return $ranges;
1099 - }
1100 -
11011040 function diff_local ($from_lines, $to_lines) {
11021041 global $wgExternalDiffEngine;
11031042 wfProfileIn( __METHOD__);
@@ -1106,7 +1045,10 @@
11071046 global $IP;
11081047 require_once( "$IP/includes/Diff.php" );
11091048 $wikidiff3 = new WikiDiff3();
1110 - list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines);
 1049+ $wikidiff3->diff($from_lines, $to_lines);
 1050+ $this->xchanged = $wikidiff3->removed;
 1051+ $this->ychanged = $wikidiff3->added;
 1052+ unset($wikidiff3);
11111053 }else{
11121054 // old diff
11131055 $n_from = sizeof($from_lines);

Status & tagging log