Index: trunk/phase3/skins/common/diff.js |
— | — | @@ -17,4 +17,16 @@ |
18 | 18 | lastSheet.insertRule( |
19 | 19 | "table.diff td div { overflow: visible; }", |
20 | 20 | lastSheet.cssRules.length); |
21 | | -} |
\ No newline at end of file |
| 21 | +} |
| 22 | + |
| 23 | +function tipA(content){ |
| 24 | + return false; |
| 25 | +} |
| 26 | + |
| 27 | +function tipR(content){ |
| 28 | + return false; |
| 29 | +} |
| 30 | + |
| 31 | +function tipC(content){ |
| 32 | + return false; |
| 33 | +} |
Index: trunk/phase3/skins/common/diff.css |
— | — | @@ -74,3 +74,66 @@ |
75 | 75 | table: */ |
76 | 76 | /* overflow: visible; */ |
77 | 77 | } |
| 78 | + |
| 79 | +/* |
| 80 | + * Styles for the HTML Diff |
| 81 | + */ |
| 82 | +span.diff-html-added { |
| 83 | + font-size: 100%; |
| 84 | + background-color: #20ff20 |
| 85 | +} |
| 86 | + |
| 87 | +span.diff-html-removed { |
| 88 | + font-size: 100%; |
| 89 | + text-decoration: line-through; |
| 90 | + background-color: #ff2020 |
| 91 | +} |
| 92 | + |
| 93 | +span.diff-html-changed { |
| 94 | + background: url(images/diffunderline.gif) bottom repeat-x; |
| 95 | + *background-color: #c6c6fd; /* light blue */ |
| 96 | +} |
| 97 | + |
| 98 | +span.diff-html-added img{ |
| 99 | + border: 5px solid #ccffcc; |
| 100 | +} |
| 101 | + |
| 102 | +span.diff-html-removed img{ |
| 103 | + border: 5px solid #fdc6c6; |
| 104 | +} |
| 105 | + |
| 106 | +span.diff-html-changed img{ |
| 107 | + border: 5px dotted #000099; |
| 108 | + |
| 109 | +} |
| 110 | + |
| 111 | +span.diff-html-changed { |
| 112 | + position: relative; /* this is key */ |
| 113 | + cursor: help; |
| 114 | +} |
| 115 | + |
| 116 | +span.diff-html-changed span.tip { |
| 117 | + display: none; /* so is this */ |
| 118 | +} |
| 119 | + |
| 120 | +/* tooltip will display on :hover event */ |
| 121 | + |
| 122 | +span.diff-html-changed:hover span.tip { |
| 123 | + display: block; |
| 124 | + z-index: 95; |
| 125 | + position: absolute; |
| 126 | + top: 2.5em; |
| 127 | + left: 0; |
| 128 | + width: auto; |
| 129 | + line-height: 1.2em; |
| 130 | + padding: 3px 7px 4px 6px; |
| 131 | + border: 1px solid #336; |
| 132 | + background-color: #f7f7ee; |
| 133 | + font-family: arial, helvetica, sans-serif; |
| 134 | + font-size: 10px; |
| 135 | + font-weight: normal; |
| 136 | + color: #000; |
| 137 | + text-align: left; |
| 138 | +} |
| 139 | + |
| 140 | + |
Index: trunk/phase3/skins/common/images/diffunderline.gif |
— | — | @@ -0,0 +1 @@ |
| 2 | +GIF89a � ������ � !� � , �%!g ; |
\ No newline at end of file |
Index: trunk/phase3/includes/HTMLDiff.php |
— | — | @@ -0,0 +1,1997 @@ |
| 2 | +<?php |
| 3 | +/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu> |
| 4 | + * |
| 5 | + * This program is free software; you can redistribute it and/or modify |
| 6 | + * it under the terms of the GNU General Public License as published by |
| 7 | + * the Free Software Foundation; either version 2 of the License, or |
| 8 | + * (at your option) any later version. |
| 9 | + * |
| 10 | + * This program is distributed in the hope that it will be useful, |
| 11 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | + * GNU General Public License for more details. |
| 14 | + * |
| 15 | + * You should have received a copy of the GNU General Public License |
| 16 | + * along with this program; if not, write to the Free Software |
| 17 | + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 18 | + * or see http://www.gnu.org/ |
| 19 | + */ |
| 20 | + |
| 21 | + |
| 22 | +/** |
| 23 | + * Any element in the DOM tree of an HTML document. |
| 24 | + */ |
| 25 | +abstract class Node{ |
| 26 | + |
| 27 | + protected $parent; |
| 28 | + |
| 29 | + |
| 30 | + function __construct($parent){ |
| 31 | + $this->parent = $parent; |
| 32 | + if(!is_null($parent)){ |
| 33 | + $parent->addChild($this); |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + public function getParent(){ |
| 38 | + return $this->parent; |
| 39 | + } |
| 40 | + |
| 41 | + public function getParentTree(){ |
| 42 | + if(!is_null($this->parent)){ |
| 43 | + $parentTree = $this->parent->getParentTree(); |
| 44 | + $parentTree[] = $this->parent; |
| 45 | + return $parentTree; |
| 46 | + }else{ |
| 47 | + return array(); |
| 48 | + } |
| 49 | + } |
| 50 | + |
| 51 | + public abstract function getMinimalDeletedSet($id); |
| 52 | + |
| 53 | + public function detectIgnorableWhiteSpace(){ |
| 54 | + // no op |
| 55 | + } |
| 56 | + |
| 57 | + public function getLastCommonParent(Node $other){ |
| 58 | + if(is_null($other)) |
| 59 | + throw new Exception('The given node is NULL'); |
| 60 | + |
| 61 | + $result = new LastCommonParentResult(); |
| 62 | + |
| 63 | + $myParents = $this->getParentTree(); |
| 64 | + $otherParents = $other->getParentTree(); |
| 65 | + |
| 66 | + $i = 1; |
| 67 | + $isSame = true; |
| 68 | + while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) { |
| 69 | + if (!$myParents[$i]->isSameTag($otherParents[$i])) { |
| 70 | + $isSame = false; |
| 71 | + } else { |
| 72 | + // After the while, the index i-1 must be the last common parent |
| 73 | + $i++; |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + $result->setLastCommonParentDepth($i - 1); |
| 78 | + $result->setLastCommonParent($myParents[$i - 1]); |
| 79 | + |
| 80 | + if (!$isSame) { |
| 81 | + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i])); |
| 82 | + $result->setSplittingNeeded(); |
| 83 | + } else if (sizeof($myParents) < sizeof($otherParents)) { |
| 84 | + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this)); |
| 85 | + } else if (sizeof($myParents) > sizeof($otherParents)) { |
| 86 | + // All tags matched but there are tags left in this tree |
| 87 | + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i])); |
| 88 | + $result->setSplittingNeeded(); |
| 89 | + } else { |
| 90 | + // All tags matched untill the very last one in both trees |
| 91 | + // or there were no tags besides the BODY |
| 92 | + $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this)); |
| 93 | + } |
| 94 | + return $result; |
| 95 | + } |
| 96 | + |
| 97 | + public function setParent($parent) { |
| 98 | + $this->parent = $parent; |
| 99 | + } |
| 100 | + |
| 101 | + public abstract function copyTree(); |
| 102 | + |
| 103 | + public function inPre() { |
| 104 | + $tree = $this->getParentTree(); |
| 105 | + foreach ($tree as $ancestor) { |
| 106 | + if ($ancestor->isPre()) { |
| 107 | + return true; |
| 108 | + } |
| 109 | + } |
| 110 | + return false; |
| 111 | + } |
| 112 | + |
| 113 | + private $whiteBefore = false; |
| 114 | + |
| 115 | + private $whiteAfter = false; |
| 116 | + |
| 117 | + public function isWhiteBefore() { |
| 118 | + return $this->whiteBefore; |
| 119 | + } |
| 120 | + |
| 121 | + public function setWhiteBefore($whiteBefore) { |
| 122 | + $this->whiteBefore = $whiteBefore; |
| 123 | + } |
| 124 | + |
| 125 | + public function isWhiteAfter() { |
| 126 | + return $this->whiteAfter; |
| 127 | + } |
| 128 | + |
| 129 | + public function setWhiteAfter($whiteAfter) { |
| 130 | + $this->whiteAfter = $whiteAfter; |
| 131 | + } |
| 132 | + |
| 133 | + public abstract function getLeftMostChild(); |
| 134 | + |
| 135 | + public abstract function getRightMostChild(); |
| 136 | +} |
| 137 | + |
| 138 | +/** |
| 139 | + * Node that can contain other nodes. Represents an HTML tag. |
| 140 | + */ |
| 141 | +class TagNode extends Node{ |
| 142 | + |
| 143 | + public $children = array(); |
| 144 | + |
| 145 | + protected $qName; |
| 146 | + |
| 147 | + protected $attributes = array(); |
| 148 | + |
| 149 | + function __construct($parent, $qName, /*array*/ $attributes) { |
| 150 | + parent::__construct($parent); |
| 151 | + $this->qName = strtolower($qName); |
| 152 | + foreach($attributes as $key => $value){ |
| 153 | + $this->attributes[strtolower($key)] = $value; |
| 154 | + } |
| 155 | + } |
| 156 | + |
| 157 | + public function addChild(Node $node, $index=-1) { |
| 158 | + if ($node->getParent() !== $this) |
| 159 | + throw new Exception( |
| 160 | + 'The new child must have this node as a parent.'); |
| 161 | + if($index == -1){ |
| 162 | + $this->children[] = $node; |
| 163 | + }else{ |
| 164 | + array_splice($this->children,$index,0,array($node)); |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + public function getIndexOf(Node $child) { |
| 169 | + // don't trust array_search with objects |
| 170 | + foreach($this->children as $key=>$value){ |
| 171 | + if($value === $child){ |
| 172 | + return $key; |
| 173 | + } |
| 174 | + } |
| 175 | + return NULL; |
| 176 | + } |
| 177 | + |
| 178 | + public function getChild($i) { |
| 179 | + return $this->children[$i]; |
| 180 | + } |
| 181 | + |
| 182 | + public function getNbChildren() { |
| 183 | + return count($this->children); |
| 184 | + } |
| 185 | + |
| 186 | + public function getQName() { |
| 187 | + return $this->qName; |
| 188 | + } |
| 189 | + |
| 190 | + public function getAttributes() { |
| 191 | + return $this->attributes; |
| 192 | + } |
| 193 | + |
| 194 | + public function isSameTag(TagNode $other) { |
| 195 | + if (is_null($other)) |
| 196 | + return false; |
| 197 | + return $this->getOpeningTag() === $other->getOpeningTag(); |
| 198 | + } |
| 199 | + |
| 200 | + public function getOpeningTag() { |
| 201 | + $s = '<'.$this->getQName(); |
| 202 | + foreach ($this->attributes as $attribute => $value) { |
| 203 | + $s .= ' ' . $attribute . '="' . $value . '"'; |
| 204 | + } |
| 205 | + return $s .= '>'; |
| 206 | + } |
| 207 | + |
| 208 | + public function getEndTag() { |
| 209 | + return '</' . $this->getQName() + '>"'; |
| 210 | + } |
| 211 | + |
| 212 | + public function getMinimalDeletedSet($id) { |
| 213 | + $nodes = array(); |
| 214 | + |
| 215 | + if ($this->getNbChildren() == 0) |
| 216 | + return $nodes; |
| 217 | + |
| 218 | + $hasNotDeletedDescendant = false; |
| 219 | + |
| 220 | + foreach ($this->children as $child) { |
| 221 | + $childrenChildren = $child->getMinimalDeletedSet($id); |
| 222 | + $nodes = array_merge($nodes, $childrenChildren); |
| 223 | + if (!$hasNotDeletedDescendant |
| 224 | + && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) { |
| 225 | + // This child is not entirely deleted |
| 226 | + $hasNotDeletedDescendant = true; |
| 227 | + } |
| 228 | + } |
| 229 | + if (!$hasNotDeletedDescendant) { |
| 230 | + $nodes = array($this); |
| 231 | + } |
| 232 | + return $nodes; |
| 233 | + } |
| 234 | + |
| 235 | + public function toString() { |
| 236 | + return $this->getOpeningTag(); |
| 237 | + } |
| 238 | + |
| 239 | + public function splitUntill(TagNode $parent, Node $split, $includeLeft) { |
| 240 | + $splitOccured = false; |
| 241 | + if ($parent !== $this) { |
| 242 | + $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes()); |
| 243 | + $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes()); |
| 244 | + $part1->setParent($this->getParent()); |
| 245 | + $part2->setParent($this->getParent()); |
| 246 | + |
| 247 | + $i = 0; |
| 248 | + $nbChildren = $this->getNbChildren(); |
| 249 | + while ($i < $nbChildren && $this->children[$i] !== $split) { |
| 250 | + $this->children[$i]->setParent($part1); |
| 251 | + $part1->addChild($this->children[$i]); |
| 252 | + ++$i; |
| 253 | + } |
| 254 | + if ($i < $nbChildren) { |
| 255 | + if ($includeLeft) { |
| 256 | + $this->children[$i]->setParent($part1); |
| 257 | + $part1->addChild($this->children[$i]); |
| 258 | + } else { |
| 259 | + $this->children[$i]->setParent($part2); |
| 260 | + $part2->addChild($this->children[$i]); |
| 261 | + } |
| 262 | + ++$i; |
| 263 | + } |
| 264 | + while ($i < $nbChildren) { |
| 265 | + $this->children[$i]->setParent($part2); |
| 266 | + $part2->addChild($this->children[$i]); |
| 267 | + ++$i; |
| 268 | + } |
| 269 | + $myindexinparent = $this->parent->getIndexOf($this); |
| 270 | + if ($part1->getNbChildren() > 0) |
| 271 | + $this->parent->addChild($part1,$myindexinparent); |
| 272 | + |
| 273 | + if ($part2->getNbChildren() > 0) |
| 274 | + $this->parent->addChild($part2,$myindexinparent); |
| 275 | + |
| 276 | + if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) { |
| 277 | + $splitOccured = true; |
| 278 | + } |
| 279 | + |
| 280 | + $this->parent->removeChild($myindexinparent); |
| 281 | + |
| 282 | + if ($includeLeft) |
| 283 | + $this->parent->splitUntill($parent, $part1, $includeLeft); |
| 284 | + else |
| 285 | + $this->parent->splitUntill($parent, $part2, $includeLeft); |
| 286 | + } |
| 287 | + return $splitOccured; |
| 288 | + |
| 289 | + } |
| 290 | + |
| 291 | + private function removeChild($index) { |
| 292 | + unset($this->children[$index]); |
| 293 | + $this->children = array_values($this->children); |
| 294 | + } |
| 295 | + |
| 296 | + public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE, |
| 297 | + 'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE, |
| 298 | + 'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE); |
| 299 | + |
| 300 | + public static function isBlockLevelName($qName) { |
| 301 | + return array_key_exists(strtolower($qName),self::$blocks); |
| 302 | + } |
| 303 | + |
| 304 | + public static function isBlockLevelNode(Node $node) { |
| 305 | + if(! $node instanceof TagNode) |
| 306 | + return false; |
| 307 | + return self::isBlockLevelName($node->getQName()); |
| 308 | + } |
| 309 | + |
| 310 | + public function isBlockLevel() { |
| 311 | + return self::isBlockLevelNode($this); |
| 312 | + } |
| 313 | + |
| 314 | + public static function isInlineName($qName) { |
| 315 | + return !self::isBlockLevelName($qName); |
| 316 | + } |
| 317 | + |
| 318 | + public static function isInlineNode(Node $node) { |
| 319 | + return !self::isBlockLevelNode($node); |
| 320 | + } |
| 321 | + |
| 322 | + public function isInline() { |
| 323 | + return self::isInlineNode($this); |
| 324 | + } |
| 325 | + |
| 326 | + public function copyTree() { |
| 327 | + $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes()); |
| 328 | + $newThis->setWhiteBefore($this->isWhiteBefore()); |
| 329 | + $newThis->setWhiteAfter($this->isWhiteAfter()); |
| 330 | + foreach($this->children as $child) { |
| 331 | + $newChild = $child->copyTree(); |
| 332 | + $newChild->setParent($newThis); |
| 333 | + $newThis->addChild($newChild); |
| 334 | + } |
| 335 | + return $newThis; |
| 336 | + } |
| 337 | + |
| 338 | + public function getMatchRatio(TagNode $other) { |
| 339 | + $txtComp = new TextOnlyComparator($other); |
| 340 | + return $txtComp->getMatchRatio(new TextOnlyComparator($this)); |
| 341 | + } |
| 342 | + |
| 343 | + public function expandWhiteSpace() { |
| 344 | + $shift = 0; |
| 345 | + $spaceAdded = false; |
| 346 | + |
| 347 | + $nbOriginalChildren = $this->getNbChildren(); |
| 348 | + for ($i = 0; $i < $nbOriginalChildren; ++$i) { |
| 349 | + $child = $this->getChild($i + $shift); |
| 350 | + |
| 351 | + if($child instanceof TagNode){ |
| 352 | + if (!$child->isPre()) { |
| 353 | + $child->expandWhiteSpace(); |
| 354 | + } |
| 355 | + } |
| 356 | + if (!$spaceAdded && $child->isWhiteBefore()) { |
| 357 | + $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild()); |
| 358 | + $ws->setParent($this); |
| 359 | + $this->addChild($ws,$i + ($shift++)); |
| 360 | + } |
| 361 | + if ($child->isWhiteAfter()) { |
| 362 | + $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild()); |
| 363 | + $ws->setParent($this); |
| 364 | + $this->addChild($ws,$i + 1 + ($shift++)); |
| 365 | + $spaceAdded = true; |
| 366 | + } else { |
| 367 | + $spaceAdded = false; |
| 368 | + } |
| 369 | + |
| 370 | + } |
| 371 | + } |
| 372 | + |
| 373 | + public function getLeftMostChild() { |
| 374 | + if ($this->getNbChildren() < 1) |
| 375 | + return $this; |
| 376 | + return $this->getChild(0)->getLeftMostChild(); |
| 377 | + |
| 378 | + } |
| 379 | + |
| 380 | + public function getRightMostChild() { |
| 381 | + if ($this->getNbChildren() < 1) |
| 382 | + return $this; |
| 383 | + return $this->getChild($this->getNbChildren() - 1)->getRightMostChild(); |
| 384 | + } |
| 385 | + |
| 386 | + public function isPre() { |
| 387 | + return 0 == strcasecmp($this->getQName(),'pre'); |
| 388 | + } |
| 389 | + |
| 390 | + public static function toDiffLine(TagNode $node){ |
| 391 | + return $node->getOpeningTag(); |
| 392 | + } |
| 393 | +} |
| 394 | + |
| 395 | +/** |
| 396 | + * Represents a piece of text in the HTML file. |
| 397 | + */ |
| 398 | +class TextNode extends Node{ |
| 399 | + |
| 400 | + private $s; |
| 401 | + |
| 402 | + private $modification; |
| 403 | + |
| 404 | + function __construct($parent, $s) { |
| 405 | + parent::__construct($parent); |
| 406 | + $this->modification = new Modification(Modification::NONE); |
| 407 | + $this->s = $s; |
| 408 | + } |
| 409 | + |
| 410 | + public function copyTree() { |
| 411 | + $clone = clone $this; |
| 412 | + $clone->setParent(NULL); |
| 413 | + return $clone; |
| 414 | + } |
| 415 | + |
| 416 | + public function getLeftMostChild() { |
| 417 | + return $this; |
| 418 | + } |
| 419 | + |
| 420 | + public function getRightMostChild() { |
| 421 | + return $this; |
| 422 | + } |
| 423 | + |
| 424 | + public function getMinimalDeletedSet($id) { |
| 425 | + if ($this->getModification()->getType() == Modification::REMOVED |
| 426 | + && $this->getModification()->getID() == $id){ |
| 427 | + return array($this); |
| 428 | + } |
| 429 | + return array(); |
| 430 | + } |
| 431 | + |
| 432 | + public function getModification() { |
| 433 | + return $this->modification; |
| 434 | + } |
| 435 | + |
| 436 | + public function getText() { |
| 437 | + return $this->s; |
| 438 | + } |
| 439 | + |
| 440 | + public function isSameText($other) { |
| 441 | + if (is_null($other) || ! $other instanceof TextNode){ |
| 442 | + return false; |
| 443 | + } |
| 444 | + return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText()); |
| 445 | + } |
| 446 | + |
| 447 | + public function setModification(Modification $m) { |
| 448 | + //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType())); |
| 449 | + $this->modification = $m; |
| 450 | + } |
| 451 | + |
| 452 | + public function toString() { |
| 453 | + return $this->getText(); |
| 454 | + } |
| 455 | + |
| 456 | + public static function toDiffLine(TextNode $node){ |
| 457 | + return str_replace('\n', ' ',$node->getText()); |
| 458 | + } |
| 459 | +} |
| 460 | + |
| 461 | +class WhiteSpaceNode extends TextNode { |
| 462 | + |
| 463 | + function __construct($parent, $s, Node $like = NULL) { |
| 464 | + parent::__construct($parent, $s); |
| 465 | + if(!is_null($like) && $like instanceof TextNode){ |
| 466 | + $newModification = clone $like->getModification(); |
| 467 | + $newModification->setFirstOfID(false); |
| 468 | + $this->setModification($newModification); |
| 469 | + } |
| 470 | + } |
| 471 | + |
| 472 | + public static function isWhiteSpace($c) { |
| 473 | + switch ($c) { |
| 474 | + case ' ': |
| 475 | + case '\t': |
| 476 | + case '\r': |
| 477 | + case '\n': |
| 478 | + return true; |
| 479 | + default: |
| 480 | + return false; |
| 481 | + } |
| 482 | + } |
| 483 | +} |
| 484 | + |
| 485 | +/** |
| 486 | + * Represents the root of a HTML document. |
| 487 | + */ |
| 488 | +class BodyNode extends TagNode { |
| 489 | + |
| 490 | + function __construct() { |
| 491 | + parent::__construct(NULL, 'body', array()); |
| 492 | + } |
| 493 | + |
| 494 | + public function copyTree() { |
| 495 | + $newThis = new BodyNode(); |
| 496 | + foreach ($this->children as $child) { |
| 497 | + $newChild = $child->copyTree(); |
| 498 | + $newChild->setParent($newThis); |
| 499 | + $newThis->addChild($newChild); |
| 500 | + } |
| 501 | + return $newThis; |
| 502 | + } |
| 503 | + |
| 504 | + public function getMinimalDeletedSet($id) { |
| 505 | + $nodes = array(); |
| 506 | + foreach ($this->children as $child) { |
| 507 | + $childrenChildren = $child->getMinimalDeletedSet($id); |
| 508 | + $nodes = array_merge($nodes, $childrenChildren); |
| 509 | + } |
| 510 | + return $nodes; |
| 511 | + } |
| 512 | + |
| 513 | +} |
| 514 | + |
| 515 | +/** |
| 516 | + * Represents an image in HTML. Even though images do not contain any text they |
| 517 | + * are independent visible objects on the page. They are logically a TextNode. |
| 518 | + */ |
| 519 | +class ImageNode extends TextNode { |
| 520 | + |
| 521 | + private $attributes; |
| 522 | + |
| 523 | + function __construct(TagNode $parent, /*array*/ $attrs) { |
| 524 | + if(!array_key_exists('src',$attrs)){ |
| 525 | + //wfDebug('Image without a source:'); |
| 526 | + foreach($attrs as $key => $value){ |
| 527 | + //wfDebug("$key = $value"); |
| 528 | + } |
| 529 | + parent::__construct($parent, '<img></img>'); |
| 530 | + }else{ |
| 531 | + parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>'); |
| 532 | + } |
| 533 | + $this->attributes = $attrs; |
| 534 | + } |
| 535 | + |
| 536 | + public function isSameText($other) { |
| 537 | + if (is_null($other) || ! $other instanceof ImageNode) |
| 538 | + return false; |
| 539 | + return $this->getText() === $other->getText(); |
| 540 | + } |
| 541 | + |
| 542 | + public function getAttributes() { |
| 543 | + return $this->attributes; |
| 544 | + } |
| 545 | + |
| 546 | +} |
| 547 | + |
| 548 | +/** |
| 549 | + * When detecting the last common parent of two nodes, all results are stored as |
| 550 | + * a LastCommonParentResult. |
| 551 | + */ |
| 552 | +class LastCommonParentResult { |
| 553 | + |
| 554 | + // Parent |
| 555 | + private $parent; |
| 556 | + |
| 557 | + public function getLastCommonParent() { |
| 558 | + return $this->parent; |
| 559 | + } |
| 560 | + |
| 561 | + public function setLastCommonParent(TagNode $parent) { |
| 562 | + $this->parent = $parent; |
| 563 | + } |
| 564 | + |
| 565 | + // Splitting |
| 566 | + private $splittingNeeded = false; |
| 567 | + |
| 568 | + public function isSplittingNeeded() { |
| 569 | + return $this->splittingNeeded; |
| 570 | + } |
| 571 | + |
| 572 | + public function setSplittingNeeded() { |
| 573 | + $this->splittingNeeded = true; |
| 574 | + } |
| 575 | + |
| 576 | + // Depth |
| 577 | + private $lastCommonParentDepth = -1; |
| 578 | + |
| 579 | + public function getLastCommonParentDepth() { |
| 580 | + return $this->lastCommonParentDepth; |
| 581 | + } |
| 582 | + |
| 583 | + public function setLastCommonParentDepth($depth) { |
| 584 | + $this->lastCommonParentDepth = $depth; |
| 585 | + } |
| 586 | + |
| 587 | + // Index |
| 588 | + private $indexInLastCommonParent = -1; |
| 589 | + |
| 590 | + public function getIndexInLastCommonParent() { |
| 591 | + return $this->indexInLastCommonParent; |
| 592 | + } |
| 593 | + |
| 594 | + public function setIndexInLastCommonParent($index) { |
| 595 | + $this->indexInLastCommonParent = $index; |
| 596 | + } |
| 597 | +} |
| 598 | + |
| 599 | +class Modification{ |
| 600 | + |
| 601 | + const NONE = 1; |
| 602 | + const REMOVED = 2; |
| 603 | + const ADDED = 4; |
| 604 | + const CHANGED = 8; |
| 605 | + |
| 606 | + private $type; |
| 607 | + |
| 608 | + private $id = -1; |
| 609 | + |
| 610 | + private $prevMod; |
| 611 | + |
| 612 | + private $nextMod; |
| 613 | + |
| 614 | + private $firstOfID = false; |
| 615 | + |
| 616 | + function __construct($type) { |
| 617 | + $this->type = $type; |
| 618 | + } |
| 619 | + |
| 620 | + public function copy() { |
| 621 | + $newM = new Modification($this->getType()); |
| 622 | + $newM->setID($this->getID()); |
| 623 | + $newM->setChanges($this->getChanges()); |
| 624 | + $newM->setFirstOfID($this->isFirstOfID()); |
| 625 | + $newM->setNext($this->getNext()); |
| 626 | + $newM->setPrevious($this->getPrevious()); |
| 627 | + return $newM; |
| 628 | + } |
| 629 | + |
| 630 | + public function getType() { |
| 631 | + return $this->type; |
| 632 | + } |
| 633 | + |
| 634 | + public function setID($id) { |
| 635 | + $this->id = $id; |
| 636 | + } |
| 637 | + |
| 638 | + public function getID() { |
| 639 | + return $this->id; |
| 640 | + } |
| 641 | + |
| 642 | + public function setPrevious($m) { |
| 643 | + $this->prevMod = $m; |
| 644 | + } |
| 645 | + |
| 646 | + public function getPrevious() { |
| 647 | + return $this->prevMod; |
| 648 | + } |
| 649 | + |
| 650 | + public function setNext($m) { |
| 651 | + $this->nextMod = $m; |
| 652 | + } |
| 653 | + |
| 654 | + public function getNext() { |
| 655 | + return $this->nextMod; |
| 656 | + } |
| 657 | + |
| 658 | + private $changes; |
| 659 | + |
| 660 | + public function setChanges($changes) { |
| 661 | + $this->changes = $changes; |
| 662 | + } |
| 663 | + |
| 664 | + public function getChanges() { |
| 665 | + return $this->changes; |
| 666 | + } |
| 667 | + |
| 668 | + public function isFirstOfID() { |
| 669 | + return $this->firstOfID; |
| 670 | + } |
| 671 | + |
| 672 | + public function setFirstOfID($firstOfID) { |
| 673 | + $this->firstOfID = $firstOfID; |
| 674 | + } |
| 675 | + |
| 676 | + public static function typeToString($type){ |
| 677 | + switch($type){ |
| 678 | + case self::NONE: return 'none'; |
| 679 | + case self::REMOVED: return 'removed'; |
| 680 | + case self::ADDED: return 'added'; |
| 681 | + case self::CHANGED: return 'changed'; |
| 682 | + } |
| 683 | + } |
| 684 | +} |
| 685 | + |
| 686 | +class DomTreeBuilder { |
| 687 | + |
| 688 | + private $textNodes = array(); |
| 689 | + |
| 690 | + private $bodyNode; |
| 691 | + |
| 692 | + private $currentParent; |
| 693 | + |
| 694 | + private $newWord = ""; |
| 695 | + |
| 696 | + protected $bodyStarted = false; |
| 697 | + |
| 698 | + protected $bodyEnded = false; |
| 699 | + |
| 700 | + private $whiteSpaceBeforeThis = false; |
| 701 | + |
| 702 | + private $lastSibling; |
| 703 | + |
| 704 | + function __construct(){ |
| 705 | + $this->bodyNode = $this->currentParent = new BodyNode(); |
| 706 | + } |
| 707 | + |
| 708 | + public function getBodyNode() { |
| 709 | + return $this->bodyNode; |
| 710 | + } |
| 711 | + |
| 712 | + public function getTextNodes() { |
| 713 | + return $this->textNodes; |
| 714 | + } |
| 715 | + |
| 716 | + /** |
| 717 | + * Must be called manually |
| 718 | + */ |
| 719 | + public function endDocument() { |
| 720 | + $this->endWord(); |
| 721 | + //wfDebug(sizeof($this->textNodes) . ' text nodes in document.'); |
| 722 | + } |
| 723 | + |
| 724 | + public function startElement($parser, $name, /*array*/ $attributes) { |
| 725 | + if(!strcasecmp($name, 'body')==0){ |
| 726 | + //wfDebug("Starting $name node."); |
| 727 | + $this->endWord(); |
| 728 | + |
| 729 | + $newTagNode = new TagNode($this->currentParent, $name, $attributes); |
| 730 | + $this->currentParent = $newTagNode; |
| 731 | + $this->lastSibling = NULL; |
| 732 | + if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) { |
| 733 | + $newTagNode->setWhiteBefore(true); |
| 734 | + } |
| 735 | + $this->whiteSpaceBeforeThis = false; |
| 736 | + } |
| 737 | + } |
| 738 | + |
| 739 | + public function endElement($parser, $name) { |
| 740 | + if(!strcasecmp($name, 'body')==0){ |
| 741 | + //wfDebug("Ending $name node."); |
| 742 | + if (0 == strcasecmp($name,'img')) { |
| 743 | + // Insert a dummy leaf for the image |
| 744 | + $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes()); |
| 745 | + $img->setWhiteBefore($this->whiteSpaceBeforeThis); |
| 746 | + $this->lastSibling = $img; |
| 747 | + $this->textNodes[] = $img; |
| 748 | + } |
| 749 | + $this->endWord(); |
| 750 | + if ($this->currentParent->isInline()) { |
| 751 | + $this->lastSibling = $this->currentParent; |
| 752 | + } else { |
| 753 | + $this->lastSibling = NULL; |
| 754 | + } |
| 755 | + $this->currentParent = $this->currentParent->getParent(); |
| 756 | + $this->whiteSpaceBeforeThis = false; |
| 757 | + }else{ |
| 758 | + $this->endDocument(); |
| 759 | + } |
| 760 | + } |
| 761 | + |
| 762 | + public function characters($parser, $data){ |
| 763 | + //wfDebug('Parsing '. strlen($data).' characters.'); |
| 764 | + $array = str_split($data); |
| 765 | + foreach($array as $c) { |
| 766 | + if (self::isDelimiter($c)) { |
| 767 | + $this->endWord(); |
| 768 | + if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre() |
| 769 | + && !$this->currentParent->inPre()) { |
| 770 | + if (!is_null($this->lastSibling)){ |
| 771 | + $this->lastSibling->setWhiteAfter(true); |
| 772 | + } |
| 773 | + $this->whiteSpaceBeforeThis = true; |
| 774 | + } else { |
| 775 | + $textNode = new TextNode($this->currentParent, $c); |
| 776 | + $textNode->setWhiteBefore($this->whiteSpaceBeforeThis); |
| 777 | + $this->whiteSpaceBeforeThis = false; |
| 778 | + $this->lastSibling = $textNode; |
| 779 | + $this->textNodes[] = $textNode; |
| 780 | + } |
| 781 | + } else { |
| 782 | + $this->newWord .= $c; |
| 783 | + } |
| 784 | + |
| 785 | + } |
| 786 | + } |
| 787 | + |
| 788 | + private function endWord() { |
| 789 | + if (strlen($this->newWord) > 0) { |
| 790 | + $node = new TextNode($this->currentParent, $this->newWord); |
| 791 | + $node->setWhiteBefore($this->whiteSpaceBeforeThis); |
| 792 | + $this->whiteSpaceBeforeThis = false; |
| 793 | + $this->lastSibling = $node; |
| 794 | + $this->textNodes[] = $node; |
| 795 | + $this->newWord = ""; |
| 796 | + } |
| 797 | + } |
| 798 | + |
| 799 | + public static function isDelimiter($c) { |
| 800 | + switch ($c) { |
| 801 | + // Basic Delimiters |
| 802 | + case '/': |
| 803 | + case '.': |
| 804 | + case '!': |
| 805 | + case ',': |
| 806 | + case ';': |
| 807 | + case '?': |
| 808 | + case '=': |
| 809 | + case '\'': |
| 810 | + case '"': |
| 811 | + // Extra Delimiters |
| 812 | + case '[': |
| 813 | + case ']': |
| 814 | + case '{': |
| 815 | + case '}': |
| 816 | + case '(': |
| 817 | + case ')': |
| 818 | + case '&': |
| 819 | + case '|': |
| 820 | + case '\\': |
| 821 | + case '-': |
| 822 | + case '_': |
| 823 | + case '+': |
| 824 | + case '*': |
| 825 | + case ':': |
| 826 | + return true; |
| 827 | + default: |
| 828 | + return WhiteSpaceNode::isWhiteSpace($c); |
| 829 | + } |
| 830 | + } |
| 831 | + |
| 832 | + public function getDiffLines(){ |
| 833 | + return array_map(array('TextNode','toDiffLine'), $this->textNodes); |
| 834 | + } |
| 835 | +} |
| 836 | + |
| 837 | +class TextNodeDiffer{ |
| 838 | + |
| 839 | + private $textNodes; |
| 840 | + private $bodyNode; |
| 841 | + |
| 842 | + private $oldTextNodes; |
| 843 | + private $oldBodyNode; |
| 844 | + |
| 845 | + private $lastModified = array(); |
| 846 | + |
| 847 | + function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) { |
| 848 | + $this->textNodes = $tree->getTextNodes(); |
| 849 | + $this->bodyNode = $tree->getBodyNode(); |
| 850 | + $this->oldTextNodes = $oldTree->getTextNodes(); |
| 851 | + $this->oldBodyNode = $oldTree->getBodyNode(); |
| 852 | + } |
| 853 | + |
| 854 | + public function getBodyNode() { |
| 855 | + return $this->bodyNode; |
| 856 | + } |
| 857 | + |
| 858 | + private $newID = 0; |
| 859 | + |
| 860 | + public function markAsNew($start, $end) { |
| 861 | + if ($end <= $start) |
| 862 | + return; |
| 863 | + |
| 864 | + if ($this->whiteAfterLastChangedPart) |
| 865 | + $this->textNodes[$start]->setWhiteBefore(false); |
| 866 | + |
| 867 | + $nextLastModified = array(); |
| 868 | + |
| 869 | + for ($i = $start; $i < $end; ++$i) { |
| 870 | + $mod = new Modification(Modification::ADDED); |
| 871 | + $mod->setID($this->newID); |
| 872 | + if (sizeof($this->lastModified) > 0) { |
| 873 | + $mod->setPrevious($this->lastModified[0]); |
| 874 | + if (is_null($this->lastModified[0]->getNext())) { |
| 875 | + foreach ($this->lastModified as $lastMod) { |
| 876 | + $lastMod->setNext($mod); |
| 877 | + } |
| 878 | + } |
| 879 | + } |
| 880 | + $nextLastModified[] = $mod; |
| 881 | + $this->textNodes[$i]->setModification($mod); |
| 882 | + } |
| 883 | + if ($start < $end) { |
| 884 | + $this->textNodes[$start]->getModification()->setFirstOfID(true); |
| 885 | + } |
| 886 | + ++$this->newID; |
| 887 | + $this->lastModified = $nextLastModified; |
| 888 | + } |
| 889 | + |
| 890 | + private $changedID = 0; |
| 891 | + |
| 892 | + private $changedIDUsed = false; |
| 893 | + |
| 894 | + public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) { |
| 895 | + $i = $rightstart; |
| 896 | + $j = $leftstart; |
| 897 | + |
| 898 | + if ($this->changedIDUsed) { |
| 899 | + ++$this->changedID; |
| 900 | + $this->changedIDUsed = false; |
| 901 | + } |
| 902 | + |
| 903 | + $nextLastModified = array(); |
| 904 | + |
| 905 | + $changes; |
| 906 | + while ($i < $rightend) { |
| 907 | + $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree()); |
| 908 | + $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree()); |
| 909 | + $result = $acthis->getResult($acother); |
| 910 | + unset($acthis, $acother); |
| 911 | + |
| 912 | + $nbLastModified = sizeof($this->lastModified); |
| 913 | + if ($result->isChanged()) { |
| 914 | + $mod = new Modification(Modification::CHANGED); |
| 915 | + |
| 916 | + if (!$this->changedIDUsed) { |
| 917 | + $mod->setFirstOfID(true); |
| 918 | + if (sizeof($nextLastModified) > 0) { |
| 919 | + $this->lastModified = $nextLastModified; |
| 920 | + $nextLastModified = array(); |
| 921 | + } |
| 922 | + } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) { |
| 923 | + ++$this->changedID; |
| 924 | + $mod->setFirstOfID(true); |
| 925 | + if (sizeof($nextLastModified) > 0) { |
| 926 | + $this->lastModified = $nextLastModified; |
| 927 | + $nextLastModified = array(); |
| 928 | + } |
| 929 | + } |
| 930 | + |
| 931 | + if ($nbLastModified > 0) { |
| 932 | + $mod->setPrevious($this->lastModified[0]); |
| 933 | + if (is_null($this->lastModified[0]->getNext())) { |
| 934 | + foreach ($this->lastModified as $lastMod) { |
| 935 | + $lastMod->setNext($mod); |
| 936 | + } |
| 937 | + } |
| 938 | + } |
| 939 | + $nextLastModified[] = $mod; |
| 940 | + |
| 941 | + $mod->setChanges($result->getChanges()); |
| 942 | + $mod->setID($this->changedID); |
| 943 | + |
| 944 | + $this->textNodes[$i]->setModification($mod); |
| 945 | + $this->changes = $result->getChanges(); |
| 946 | + $this->changedIDUsed = true; |
| 947 | + } else if ($this->changedIDUsed) { |
| 948 | + ++$this->changedID; |
| 949 | + $this->changedIDUsed = false; |
| 950 | + } |
| 951 | + ++$i; |
| 952 | + ++$j; |
| 953 | + } |
| 954 | + if (sizeof($nextLastModified) > 0){ |
| 955 | + $this->lastModified = $nextLastModified; |
| 956 | + } |
| 957 | + } |
| 958 | + |
| 959 | + // used to remove the whitespace between a red and green block |
| 960 | + private $whiteAfterLastChangedPart = false; |
| 961 | + |
| 962 | + private $deletedID = 0; |
| 963 | + |
| 964 | + public function markAsDeleted($start, $end, $before) { |
| 965 | + |
| 966 | + if ($end <= $start) |
| 967 | + return; |
| 968 | + |
| 969 | + if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) { |
| 970 | + $this->whiteAfterLastChangedPart = true; |
| 971 | + } else { |
| 972 | + $this->whiteAfterLastChangedPart = false; |
| 973 | + } |
| 974 | + |
| 975 | + $nextLastModified = array(); |
| 976 | + |
| 977 | + for ($i = $start; $i < $end; ++$i) { |
| 978 | + $mod = new Modification(Modification::REMOVED); |
| 979 | + $mod->setID($this->deletedID); |
| 980 | + if (sizeof($this->lastModified) > 0) { |
| 981 | + $mod->setPrevious($this->lastModified[0]); |
| 982 | + if (is_null($this->lastModified[0]->getNext())) { |
| 983 | + foreach ($this->lastModified as $lastMod) { |
| 984 | + $lastMod->setNext($mod); |
| 985 | + } |
| 986 | + } |
| 987 | + } |
| 988 | + $nextLastModified[] = $mod; |
| 989 | + |
| 990 | + // oldTextNodes is used here because we're going to move its deleted |
| 991 | + // elements |
| 992 | + // to this tree! |
| 993 | + $this->oldTextNodes[$i]->setModification($mod); |
| 994 | + } |
| 995 | + $this->oldTextNodes[$start]->getModification()->setFirstOfID(true); |
| 996 | + |
| 997 | + $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID); |
| 998 | + |
| 999 | + //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes)); |
| 1000 | + |
| 1001 | + // Set prevLeaf to the leaf after which the old HTML needs to be |
| 1002 | + // inserted |
| 1003 | + if ($before > 0){ |
| 1004 | + $prevLeaf = $this->textNodes[$before - 1]; |
| 1005 | + } |
| 1006 | + // Set nextLeaf to the leaf before which the old HTML needs to be |
| 1007 | + // inserted |
| 1008 | + if ($before < sizeof($this->textNodes)){ |
| 1009 | + $nextLeaf = $this->textNodes[$before]; |
| 1010 | + } |
| 1011 | + |
| 1012 | + while (sizeof($deletedNodes) > 0) { |
| 1013 | + if (isset($prevLeaf)) { |
| 1014 | + $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]); |
| 1015 | + } else { |
| 1016 | + $prevResult = new LastCommonParentResult(); |
| 1017 | + $prevResult->setLastCommonParent($this->getBodyNode()); |
| 1018 | + $prevResult->setIndexInLastCommonParent(0); |
| 1019 | + } |
| 1020 | + if (isset($nextleaf)) { |
| 1021 | + $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]); |
| 1022 | + } else { |
| 1023 | + $nextResult = new LastCommonParentResult(); |
| 1024 | + $nextResult->setLastCommonParent($this->getBodyNode()); |
| 1025 | + $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren()); |
| 1026 | + } |
| 1027 | + |
| 1028 | + if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) { |
| 1029 | + // We need some metric to choose which way to add-... |
| 1030 | + if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent() |
| 1031 | + && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) { |
| 1032 | + // The difference is not in the parent |
| 1033 | + $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); |
| 1034 | + } else { |
| 1035 | + // The difference is in the parent, so compare them |
| 1036 | + // now THIS is tricky |
| 1037 | + $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent()); |
| 1038 | + $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent()); |
| 1039 | + |
| 1040 | + if ($distancePrev <= $distanceNext) { |
| 1041 | + $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); |
| 1042 | + } else { |
| 1043 | + $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1); |
| 1044 | + } |
| 1045 | + } |
| 1046 | + |
| 1047 | + } |
| 1048 | + |
| 1049 | + if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) { |
| 1050 | + // Inserting at the front |
| 1051 | + if ($prevResult->isSplittingNeeded()) { |
| 1052 | + $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true); |
| 1053 | + } |
| 1054 | + $prevLeaf = $deletedNodes[0]->copyTree(); |
| 1055 | + unset($deletedNodes[0]); |
| 1056 | + $deletedNodes = array_values($deletedNodes); |
| 1057 | + $prevLeaf->setParent($prevResult->getLastCommonParent()); |
| 1058 | + $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1); |
| 1059 | + } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) { |
| 1060 | + // Inserting at the back |
| 1061 | + if ($nextResult->isSplittingNeeded()) { |
| 1062 | + $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false); |
| 1063 | + if ($splitOccured) { |
| 1064 | + // The place where to insert is shifted one place to the |
| 1065 | + // right |
| 1066 | + $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1); |
| 1067 | + } |
| 1068 | + } |
| 1069 | + $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree(); |
| 1070 | + unset($deletedNodes[sizeof(deletedNodes) - 1]); |
| 1071 | + $deletedNodes = array_values($deletedNodes); |
| 1072 | + $nextLeaf->setParent($nextResult->getLastCommonParent()); |
| 1073 | + $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent()); |
| 1074 | + } else |
| 1075 | + throw new Exception("Uh?"); |
| 1076 | + } |
| 1077 | + $this->lastModified = $nextLastModified; |
| 1078 | + ++$this->deletedID; |
| 1079 | + } |
| 1080 | + |
| 1081 | + public function expandWhiteSpace() { |
| 1082 | + $this->getBodyNode()->expandWhiteSpace(); |
| 1083 | + } |
| 1084 | + |
| 1085 | + public function lengthNew(){ |
| 1086 | + return sizeof($this->textNodes); |
| 1087 | + } |
| 1088 | + |
| 1089 | + public function lengthOld(){ |
| 1090 | + return sizeof($this->oldTextNodes); |
| 1091 | + } |
| 1092 | +} |
| 1093 | + |
| 1094 | +class HTMLDiffer{ |
| 1095 | + |
| 1096 | + private $output; |
| 1097 | + |
| 1098 | + function __construct($output){ |
| 1099 | + $this->output = $output; |
| 1100 | + } |
| 1101 | + |
| 1102 | + function htmlDiff($from, $to){ |
| 1103 | + // Create an XML parser |
| 1104 | + $xml_parser = xml_parser_create(''); |
| 1105 | + |
| 1106 | + $domfrom = new DomTreeBuilder(); |
| 1107 | + |
| 1108 | + // Set the functions to handle opening and closing tags |
| 1109 | + xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement")); |
| 1110 | + |
| 1111 | + // Set the function to handle blocks of character data |
| 1112 | + xml_set_character_data_handler($xml_parser, array($domfrom,"characters")); |
| 1113 | + |
| 1114 | + ; |
| 1115 | + //wfDebug('Parsing '.strlen($from)." characters worth of HTML"); |
| 1116 | + if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE) |
| 1117 | + || !xml_parse($xml_parser, $from, FALSE) |
| 1118 | + || !xml_parse($xml_parser, '</body>', TRUE)){ |
| 1119 | + wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); |
| 1120 | + } |
| 1121 | + xml_parser_free($xml_parser); |
| 1122 | + unset($from); |
| 1123 | + |
| 1124 | + $xml_parser = xml_parser_create(''); |
| 1125 | + |
| 1126 | + $domto = new DomTreeBuilder(); |
| 1127 | + |
| 1128 | + // Set the functions to handle opening and closing tags |
| 1129 | + xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement")); |
| 1130 | + |
| 1131 | + // Set the function to handle blocks of character data |
| 1132 | + xml_set_character_data_handler($xml_parser, array($domto,"characters")); |
| 1133 | + |
| 1134 | + //wfDebug('Parsing '.strlen($to)." characters worth of HTML"); |
| 1135 | + if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE) |
| 1136 | + || !xml_parse($xml_parser, $to, FALSE) |
| 1137 | + || !xml_parse($xml_parser, '</body>', TRUE)){ |
| 1138 | + wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser))); |
| 1139 | + } |
| 1140 | + xml_parser_free($xml_parser); |
| 1141 | + unset($to); |
| 1142 | + |
| 1143 | + $diffengine = new _DiffEngine(); |
| 1144 | + $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines())); |
| 1145 | + unset($xml_parser,$diffengine); |
| 1146 | + |
| 1147 | + $domdiffer = new TextNodeDiffer($domto, $domfrom); |
| 1148 | + |
| 1149 | + $currentIndexLeft = 0; |
| 1150 | + $currentIndexRight = 0; |
| 1151 | + foreach ($differences as $d) { |
| 1152 | + if ($d->leftstart > $currentIndexLeft) { |
| 1153 | + $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart, |
| 1154 | + $currentIndexRight, $d->rightstart); |
| 1155 | + } |
| 1156 | + if ($d->leftlength > 0) { |
| 1157 | + $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart); |
| 1158 | + } |
| 1159 | + $domdiffer->markAsNew($d->rightstart, $d->rightend); |
| 1160 | + |
| 1161 | + $currentIndexLeft = $d->leftend; |
| 1162 | + $currentIndexRight = $d->rightend; |
| 1163 | + } |
| 1164 | + if ($currentIndexLeft < $domdiffer->lengthOld()) { |
| 1165 | + $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew()); |
| 1166 | + } |
| 1167 | + |
| 1168 | + $domdiffer->expandWhiteSpace(); |
| 1169 | + $output = new HTMLOutput('htmldiff', $this->output); |
| 1170 | + $output->parse($domdiffer->getBodyNode()); |
| 1171 | + } |
| 1172 | + |
| 1173 | + private function preProcess(/*array*/ $differences){ |
| 1174 | + $newRanges = array(); |
| 1175 | + |
| 1176 | + $nbDifferences = sizeof($differences); |
| 1177 | + for ($i = 0; $i < $nbDifferences; ++$i) { |
| 1178 | + $leftStart = $differences[$i]->leftstart; |
| 1179 | + $leftEnd = $differences[$i]->leftend; |
| 1180 | + $rightStart = $differences[$i]->rightstart; |
| 1181 | + $rightEnd = $differences[$i]->rightend; |
| 1182 | + |
| 1183 | + $leftLength = $leftEnd - $leftStart; |
| 1184 | + $rightLength = $rightEnd - $rightStart; |
| 1185 | + |
| 1186 | + while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength, |
| 1187 | + $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) { |
| 1188 | + $leftEnd = $differences[$i + 1]->leftend; |
| 1189 | + $rightEnd = $differences[$i + 1]->rightend; |
| 1190 | + $leftLength = $leftEnd - $leftStart; |
| 1191 | + $rightLength = $rightEnd - $rightStart; |
| 1192 | + ++$i; |
| 1193 | + } |
| 1194 | + $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd); |
| 1195 | + } |
| 1196 | + return $newRanges; |
| 1197 | + } |
| 1198 | + |
| 1199 | + /** |
| 1200 | + * Heuristic to merge differences for readability. |
| 1201 | + */ |
| 1202 | + public static function score($ll, $nll, $rl, $nrl) { |
| 1203 | + if (($ll == 0 && $nll == 0) |
| 1204 | + || ($rl == 0 && $nrl == 0)){ |
| 1205 | + return 0; |
| 1206 | + } |
| 1207 | + $numbers = array($ll, $nll, $rl, $nrl); |
| 1208 | + $d = 0; |
| 1209 | + foreach ($numbers as $number) { |
| 1210 | + while ($number > 3) { |
| 1211 | + $d += 3; |
| 1212 | + $number -= 3; |
| 1213 | + $number *= 0.5; |
| 1214 | + } |
| 1215 | + $d += $number; |
| 1216 | + |
| 1217 | + } |
| 1218 | + return $d / (1.5 * sizeof($numbers)); |
| 1219 | + } |
| 1220 | + |
| 1221 | +} |
| 1222 | + |
| 1223 | +class TextOnlyComparator{ |
| 1224 | + |
| 1225 | + public $leafs = array(); |
| 1226 | + |
| 1227 | + function _construct(TagNode $tree) { |
| 1228 | + $this->addRecursive($tree); |
| 1229 | + $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs); |
| 1230 | + } |
| 1231 | + |
| 1232 | + private function addRecursive(TagNode $tree) { |
| 1233 | + foreach ($tree->children as $child) { |
| 1234 | + if ($child instanceof TagNode) { |
| 1235 | + $this->addRecursive($child); |
| 1236 | + } else if ($child instanceof TextNode) { |
| 1237 | + $this->leafs[] = $node; |
| 1238 | + } |
| 1239 | + } |
| 1240 | + } |
| 1241 | + |
| 1242 | + public function getMatchRatio(TextOnlyComparator $other) { |
| 1243 | + $nbOthers = sizeof($other->leafs); |
| 1244 | + $nbThis = sizeof($this->leafs); |
| 1245 | + if($nbOthers == 0 || $nbThis == 0){ |
| 1246 | + return -log(0); |
| 1247 | + } |
| 1248 | + |
| 1249 | + $diffengine = new _DiffEngine(); |
| 1250 | + $diffengine->diff_local($this->leafs, $other->leafs); |
| 1251 | + |
| 1252 | + $distanceThis = array_sum($diffengine->xchanged); |
| 1253 | + $distanceOther = array_sum($diffengine->ychanged); |
| 1254 | + |
| 1255 | + return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis) |
| 1256 | + / $nbThis) / 2.0; |
| 1257 | + } |
| 1258 | +} |
| 1259 | + |
| 1260 | +class AncestorComparatorResult { |
| 1261 | + |
| 1262 | + private $changed = false; |
| 1263 | + |
| 1264 | + private $changes = ""; |
| 1265 | + |
| 1266 | + public function isChanged() { |
| 1267 | + return $this->changed; |
| 1268 | + } |
| 1269 | + |
| 1270 | + public function setChanged($changed) { |
| 1271 | + $this->changed = $changed; |
| 1272 | + } |
| 1273 | + |
| 1274 | + public function getChanges() { |
| 1275 | + return $this->changes; |
| 1276 | + } |
| 1277 | + |
| 1278 | + public function setChanges($changes) { |
| 1279 | + $this->changes = $changes; |
| 1280 | + } |
| 1281 | +} |
| 1282 | + |
| 1283 | +/** |
| 1284 | + * A comparator used when calculating the difference in ancestry of two Nodes. |
| 1285 | + */ |
| 1286 | +class AncestorComparator{ |
| 1287 | + |
| 1288 | + public $ancestors; |
| 1289 | + public $ancestorsText; |
| 1290 | + |
| 1291 | + function __construct(/*array*/ $ancestors) { |
| 1292 | + $this->ancestors = $ancestors; |
| 1293 | + $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors); |
| 1294 | + } |
| 1295 | + |
| 1296 | + private $compareTxt = ""; |
| 1297 | + |
| 1298 | + public function getCompareTxt() { |
| 1299 | + return $this->compareTxt; |
| 1300 | + } |
| 1301 | + |
| 1302 | + public function getResult(AncestorComparator $other) { |
| 1303 | + $result = new AncestorComparatorResult(); |
| 1304 | + |
| 1305 | + $diffengine = new _DiffEngine(); |
| 1306 | + $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText); |
| 1307 | + |
| 1308 | + if (sizeof($differences) == 0){ |
| 1309 | + return $result; |
| 1310 | + } |
| 1311 | + $changeTxt = new ChangeTextGenerator($this, $other); |
| 1312 | + |
| 1313 | + $result->setChanged(true); |
| 1314 | + $result->setChanges($changeTxt->getChanged($differences)->toString()); |
| 1315 | + |
| 1316 | + return $result; |
| 1317 | + } |
| 1318 | +} |
| 1319 | + |
| 1320 | +class ChangeTextGenerator { |
| 1321 | + |
| 1322 | + private $new; |
| 1323 | + private $old; |
| 1324 | + |
| 1325 | + private $factory; |
| 1326 | + |
| 1327 | + function __construct(AncestorComparator $old, AncestorComparator $new) { |
| 1328 | + $this->new = $new; |
| 1329 | + $this->old = $old; |
| 1330 | + $this->factory = new TagToStringFactory(); |
| 1331 | + } |
| 1332 | + |
| 1333 | + public function getChanged(/*array*/ $differences) { |
| 1334 | + $txt = new ChangeText; |
| 1335 | + |
| 1336 | + $rootlistopened = false; |
| 1337 | + |
| 1338 | + if (sizeof($differences) > 1) { |
| 1339 | + $txt->addHtml('<ul class="changelist">'); |
| 1340 | + $rootlistopened = true; |
| 1341 | + } |
| 1342 | + |
| 1343 | + $nbDifferences = sizeof($differences); |
| 1344 | + for ($j = 0; $j < $nbDifferences; ++$j) { |
| 1345 | + $d = $differences[$j]; |
| 1346 | + |
| 1347 | + $lvl1listopened = false; |
| 1348 | + |
| 1349 | + if ($rootlistopened) { |
| 1350 | + $txt->addHtml('<li>'); |
| 1351 | + } |
| 1352 | + |
| 1353 | + if ($d->leftlength + $d->rightlength > 1) { |
| 1354 | + $txt->addHtml('<ul class="changelist">'); |
| 1355 | + $lvl1listopened = true; |
| 1356 | + } |
| 1357 | + |
| 1358 | + // left are the old ones |
| 1359 | + for ($i = $d->leftstart; $i < $d->leftend; ++$i) { |
| 1360 | + if ($lvl1listopened){ |
| 1361 | + $txt->addHtml('<li>'); |
| 1362 | + } |
| 1363 | + // add a bullet for a old tag |
| 1364 | + $this->addTagOld($txt, $this->old->ancestors[$i]); |
| 1365 | + |
| 1366 | + if ($lvl1listopened){ |
| 1367 | + $txt->addHtml('</li>'); |
| 1368 | + } |
| 1369 | + } |
| 1370 | + |
| 1371 | + // right are the new ones |
| 1372 | + for ($i = $d->rightstart; $i < $d->rightend; ++$i) { |
| 1373 | + if ($lvl1listopened){ |
| 1374 | + $txt->addHtml('<li>'); |
| 1375 | + } |
| 1376 | + |
| 1377 | + // add a bullet for a new tag |
| 1378 | + $this->addTagNew($txt, $this->new->ancestors[$i]); |
| 1379 | + |
| 1380 | + if ($lvl1listopened){ |
| 1381 | + $txt->addHtml('</li>'); |
| 1382 | + } |
| 1383 | + |
| 1384 | + } |
| 1385 | + |
| 1386 | + if ($lvl1listopened) { |
| 1387 | + $txt->addHtml('</ul>'); |
| 1388 | + } |
| 1389 | + |
| 1390 | + if ($rootlistopened) { |
| 1391 | + $txt->addHtml('</li>'); |
| 1392 | + } |
| 1393 | + } |
| 1394 | + |
| 1395 | + if ($rootlistopened) { |
| 1396 | + $txt->addHtml('</ul>'); |
| 1397 | + } |
| 1398 | + |
| 1399 | + return $txt; |
| 1400 | + |
| 1401 | + } |
| 1402 | + |
| 1403 | + private function addTagOld(ChangeText $txt, TagNode $ancestor) { |
| 1404 | + $this->factory->create($ancestor)->getRemovedDescription($txt); |
| 1405 | + } |
| 1406 | + |
| 1407 | + private function addTagNew(ChangeText $txt, TagNode $ancestor) { |
| 1408 | + $this->factory->create($ancestor)->getAddedDescription($txt); |
| 1409 | + } |
| 1410 | +} |
| 1411 | + |
| 1412 | +class ChangeText { |
| 1413 | + |
| 1414 | + private $txt = ""; |
| 1415 | + |
| 1416 | + const newLine = "<br/>"; |
| 1417 | + |
| 1418 | + public function addText($s) { |
| 1419 | + $s = $this->clean($s); |
| 1420 | + $this->txt .= $s; |
| 1421 | + } |
| 1422 | + |
| 1423 | + public function addHtml($s) { |
| 1424 | + $this->txt .= $s; |
| 1425 | + } |
| 1426 | + |
| 1427 | + public function addNewLine() { |
| 1428 | + $this->addHtml(self::newLine); |
| 1429 | + } |
| 1430 | + |
| 1431 | + public function toString() { |
| 1432 | + return $this->txt; |
| 1433 | + } |
| 1434 | + |
| 1435 | + private function clean($s) { |
| 1436 | + return htmlspecialchars($s); |
| 1437 | + } |
| 1438 | +} |
| 1439 | + |
| 1440 | +class TagToStringFactory { |
| 1441 | + |
| 1442 | + private static $containerTags = array( |
| 1443 | + 'html' => TRUE, |
| 1444 | + 'body' => TRUE, |
| 1445 | + 'p' => TRUE, |
| 1446 | + 'blockquote' => TRUE, |
| 1447 | + 'h1' => TRUE, |
| 1448 | + 'h2' => TRUE, |
| 1449 | + 'h3' => TRUE, |
| 1450 | + 'h4' => TRUE, |
| 1451 | + 'h5' => TRUE, |
| 1452 | + 'pre' => TRUE, |
| 1453 | + 'div' => TRUE, |
| 1454 | + 'ul' => TRUE, |
| 1455 | + 'ol' => TRUE, |
| 1456 | + 'li' => TRUE, |
| 1457 | + 'table' => TRUE, |
| 1458 | + 'tbody' => TRUE, |
| 1459 | + 'tr' => TRUE, |
| 1460 | + 'td' => TRUE, |
| 1461 | + 'th' => TRUE, |
| 1462 | + 'br' => TRUE, |
| 1463 | + 'hr' => TRUE, |
| 1464 | + 'code' => TRUE, |
| 1465 | + 'dl' => TRUE, |
| 1466 | + 'dt' => TRUE, |
| 1467 | + 'dd' => TRUE, |
| 1468 | + 'input' => TRUE, |
| 1469 | + 'form' => TRUE, |
| 1470 | + 'img' => TRUE, |
| 1471 | + // in-line tags that can be considered containers not styles |
| 1472 | + 'span' => TRUE, |
| 1473 | + 'a' => TRUE |
| 1474 | + ); |
| 1475 | + |
| 1476 | + private static $styleTags = array( |
| 1477 | + 'i' => TRUE, |
| 1478 | + 'b' => TRUE, |
| 1479 | + 'strong' => TRUE, |
| 1480 | + 'em' => TRUE, |
| 1481 | + 'font' => TRUE, |
| 1482 | + 'big' => TRUE, |
| 1483 | + 'del' => TRUE, |
| 1484 | + 'tt' => TRUE, |
| 1485 | + 'sub' => TRUE, |
| 1486 | + 'sup' => TRUE, |
| 1487 | + 'strike' => TRUE |
| 1488 | + ); |
| 1489 | + |
| 1490 | + const MOVED = 1; |
| 1491 | + const STYLE = 2; |
| 1492 | + const UNKNOWN = 4; |
| 1493 | + |
| 1494 | + public function create(TagNode $node) { |
| 1495 | + $sem = $this->getChangeSemantic($node->getQName()); |
| 1496 | + if (0 == strcasecmp($node->getQName(),'a')){ |
| 1497 | + return new AnchorToString($node, $sem); |
| 1498 | + } |
| 1499 | + if (0 == strcasecmp($node->getQName(),'img')){ |
| 1500 | + return new NoContentTagToString($node, $sem); |
| 1501 | + } |
| 1502 | + return new TagToString($node, $sem); |
| 1503 | + } |
| 1504 | + |
| 1505 | + protected function getChangeSemantic($qname) { |
| 1506 | + if (array_key_exists(strtolower($qname),self::$containerTags)){ |
| 1507 | + return self::MOVED; |
| 1508 | + } |
| 1509 | + if (array_key_exists(strtolower($qname),self::$styleTags)){ |
| 1510 | + return self::STYLE; |
| 1511 | + } |
| 1512 | + return self::UNKNOWN; |
| 1513 | + } |
| 1514 | +} |
| 1515 | + |
| 1516 | +class TagToString { |
| 1517 | + |
| 1518 | + protected $node; |
| 1519 | + |
| 1520 | + protected $sem; |
| 1521 | + |
| 1522 | + function __construct(TagNode $node, $sem) { |
| 1523 | + $this->node = $node; |
| 1524 | + $this->sem = $sem; |
| 1525 | + } |
| 1526 | + |
| 1527 | + public function getDescription() { |
| 1528 | + return $this->getString('diff-' . $this->node->getQName()); |
| 1529 | + } |
| 1530 | + |
| 1531 | + public function getRemovedDescription(ChangeText $txt) { |
| 1532 | + |
| 1533 | + if ($this->sem == TagToStringFactory::MOVED) { |
| 1534 | + $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' '); |
| 1535 | + $txt->addHtml('<b>'); |
| 1536 | + $txt->addText(strtolower($this->getDescription())); |
| 1537 | + $txt->addHtml('</b>'); |
| 1538 | + } else if ($this->sem == TagToStringFactory::STYLE) { |
| 1539 | + $txt->addHtml('<b>'); |
| 1540 | + $txt->addText($this->getDescription()); |
| 1541 | + $txt->addHtml('</b>'); |
| 1542 | + $txt->addText(' ' . strtolower($this->getStyleRemoved())); |
| 1543 | + } else { |
| 1544 | + $txt->addHtml('<b>'); |
| 1545 | + $txt->addText($this->getDescription()); |
| 1546 | + $txt->addHtml('</b>'); |
| 1547 | + $txt->addText(' ' . strtolower($this->getRemoved())); |
| 1548 | + } |
| 1549 | + $this->addAttributes($txt, $this->node->getAttributes()); |
| 1550 | + $txt->addText('.'); |
| 1551 | + } |
| 1552 | + |
| 1553 | + public function getAddedDescription(ChangeText $txt) { |
| 1554 | + |
| 1555 | + if ($this->sem == TagToStringFactory::MOVED) { |
| 1556 | + $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' '); |
| 1557 | + $txt->addHtml('<b>'); |
| 1558 | + $txt->addText(strtolower($this->getDescription())); |
| 1559 | + $txt->addHtml('</b>'); |
| 1560 | + } else if ($this->sem == TagToStringFactory::STYLE) { |
| 1561 | + $txt->addHtml('<b>'); |
| 1562 | + $txt->addText($this->getDescription()); |
| 1563 | + $txt->addHtml('</b>'); |
| 1564 | + $txt->addText(' ' . strtolower($this->getStyleAdded())); |
| 1565 | + } else { |
| 1566 | + $txt->addHtml('<b>'); |
| 1567 | + $txt->addText($this->getDescription()); |
| 1568 | + $txt->addHtml('</b>'); |
| 1569 | + $txt->addText(' ' . strtolower($this->getAdded())); |
| 1570 | + } |
| 1571 | + $this->addAttributes($txt, $this->node->getAttributes()); |
| 1572 | + $txt->addText('.'); |
| 1573 | + } |
| 1574 | + |
| 1575 | + protected function getMovedTo() { |
| 1576 | + return $this->getString('diff-movedto'); |
| 1577 | + } |
| 1578 | + |
| 1579 | + protected function getStyleAdded() { |
| 1580 | + return $this->getString('diff-styleadded'); |
| 1581 | + } |
| 1582 | + |
| 1583 | + protected function getAdded() { |
| 1584 | + return $this->getString('diff-added'); |
| 1585 | + } |
| 1586 | + |
| 1587 | + protected function getMovedOutOf() { |
| 1588 | + return $this->getString('diff-movedoutof'); |
| 1589 | + } |
| 1590 | + |
| 1591 | + protected function getStyleRemoved() { |
| 1592 | + return $this->getString('diff-styleremoved'); |
| 1593 | + } |
| 1594 | + |
| 1595 | + protected function getRemoved() { |
| 1596 | + return $this->getString('diff-removed'); |
| 1597 | + } |
| 1598 | + |
| 1599 | + protected function addAttributes(ChangeText $txt, array $attributes) { |
| 1600 | + if (sizeof($attributes) < 1) |
| 1601 | + return; |
| 1602 | + |
| 1603 | + $keys = array_keys($attributes); |
| 1604 | + |
| 1605 | + $txt->addText(' ' . strtolower($this->getWith()) . ' ' |
| 1606 | + . $this->translateArgument($keys[0]) . ' ' |
| 1607 | + . $attributes[$keys[0]]); |
| 1608 | + for ($i = 1; $i < sizeof($attributes) - 1; $i++) { |
| 1609 | + $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' ' |
| 1610 | + . $attributes[$keys[$i]]); |
| 1611 | + } |
| 1612 | + if (sizeof($attributes) > 1) { |
| 1613 | + $txt->addText(' ' |
| 1614 | + . strtolower($this->getAnd()) |
| 1615 | + . ' ' |
| 1616 | + . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' ' |
| 1617 | + . $attributes[$keys[sizeof($attributes) - 1]]); |
| 1618 | + } |
| 1619 | + } |
| 1620 | + |
| 1621 | + private function getAnd() { |
| 1622 | + return $this->getString('diff-and'); |
| 1623 | + } |
| 1624 | + |
| 1625 | + private function getWith() { |
| 1626 | + return $this->getString('diff-with'); |
| 1627 | + } |
| 1628 | + |
| 1629 | + protected function translateArgument($name) { |
| 1630 | + if (0 == strcasecmp($name,'src')) |
| 1631 | + return strtolower($this->getSource()); |
| 1632 | + if (0 == strcasecmp($name,'width')) |
| 1633 | + return strtolower($this->getWidth()); |
| 1634 | + if (0 == strcasecmp($name,'height')) |
| 1635 | + return strtolower($this->getHeight()); |
| 1636 | + return $name; |
| 1637 | + } |
| 1638 | + |
| 1639 | + private function getHeight() { |
| 1640 | + return $this->getString('diff-height'); |
| 1641 | + } |
| 1642 | + |
| 1643 | + private function getWidth() { |
| 1644 | + return $this->getString('diff-width'); |
| 1645 | + } |
| 1646 | + |
| 1647 | + protected function getSource() { |
| 1648 | + return $this->getString('diff-source'); |
| 1649 | + } |
| 1650 | + |
| 1651 | + protected function getArticle() { |
| 1652 | + return $this->getString('diff-' . $this->node->getQName() . '-article'); |
| 1653 | + } |
| 1654 | + |
| 1655 | + public static $bundle = array( |
| 1656 | + 'diff-movedto' => 'Moved to', |
| 1657 | + 'diff-styleadded' => 'Style added', |
| 1658 | + 'diff-added' => 'Added', |
| 1659 | + 'diff-changedto' => 'Changed to', |
| 1660 | + 'diff-movedoutof' => 'Moved out of', |
| 1661 | + 'diff-styleremoved' => 'Style removed', |
| 1662 | + 'diff-removed' => 'Removed', |
| 1663 | + 'diff-changedfrom' => 'Changed from', |
| 1664 | + 'diff-source' => 'Source', |
| 1665 | + 'diff-withdestination' => 'With destination', |
| 1666 | + 'diff-and' => 'And', |
| 1667 | + 'diff-with' => 'With', |
| 1668 | + 'diff-width' => 'Width', |
| 1669 | + 'diff-height' => 'Height', |
| 1670 | + 'diff-html-article' => 'A', |
| 1671 | + 'diff-html' => 'Html page', |
| 1672 | + 'diff-body-article' => 'A', |
| 1673 | + 'diff-body' => 'Html document', |
| 1674 | + 'diff-p-article' => 'A', |
| 1675 | + 'diff-p' => 'Paragraph', |
| 1676 | + 'diff-blockquote-article' => 'A', |
| 1677 | + 'diff-blockquote' => 'Quote', |
| 1678 | + 'diff-h1-article' => 'A', |
| 1679 | + 'diff-h1' => 'Heading (level 1)', |
| 1680 | + 'diff-h2-article' => 'A', |
| 1681 | + 'diff-h2' => 'Heading (level 2)', |
| 1682 | + 'diff-h3-article' => 'A', |
| 1683 | + 'diff-h3' => 'Heading (level 3)', |
| 1684 | + 'diff-h4-article' => 'A', |
| 1685 | + 'diff-h4' => 'Heading (level 4)', |
| 1686 | + 'diff-h5-article' => 'A', |
| 1687 | + 'diff-h5' => 'Heading (level 5)', |
| 1688 | + 'diff-pre-article' => 'A', |
| 1689 | + 'diff-pre' => 'Preformatted block', |
| 1690 | + 'diff-div-article' => 'A', |
| 1691 | + 'diff-div' => 'Division', |
| 1692 | + 'diff-ul-article' => 'An', |
| 1693 | + 'diff-ul' => 'Unordered list', |
| 1694 | + 'diff-ol-article' => 'An', |
| 1695 | + 'diff-ol' => 'Ordered list', |
| 1696 | + 'diff-li-article' => 'A', |
| 1697 | + 'diff-li' => 'List item', |
| 1698 | + 'diff-table-article' => 'A', |
| 1699 | + 'diff-table' => 'Table', |
| 1700 | + 'diff-tbody-article' => 'A', |
| 1701 | + 'diff-tbody' => "Table's content", |
| 1702 | + 'diff-tr-article' => 'A', |
| 1703 | + 'diff-tr' => 'Row', |
| 1704 | + 'diff-td-article' => 'A', |
| 1705 | + 'diff-td' => 'Cell', |
| 1706 | + 'diff-th-article' => 'A', |
| 1707 | + 'diff-th' => 'Header', |
| 1708 | + 'diff-br-article' => 'A', |
| 1709 | + 'diff-br' => 'Break', |
| 1710 | + 'diff-hr-article' => 'A', |
| 1711 | + 'diff-hr' => 'Horizontal rule', |
| 1712 | + 'diff-code-article' => 'A', |
| 1713 | + 'diff-code' => 'Computer code block', |
| 1714 | + 'diff-dl-article' => 'A', |
| 1715 | + 'diff-dl' => 'Definition list', |
| 1716 | + 'diff-dt-article' => 'A', |
| 1717 | + 'diff-dt' => 'Definition term', |
| 1718 | + 'diff-dd-article' => 'A', |
| 1719 | + 'diff-dd' => 'Definition', |
| 1720 | + 'diff-input-article' => 'An', |
| 1721 | + 'diff-input' => 'Input', |
| 1722 | + 'diff-form-article' => 'A', |
| 1723 | + 'diff-form' => 'Form', |
| 1724 | + 'diff-img-article' => 'An', |
| 1725 | + 'diff-img' => 'Image', |
| 1726 | + 'diff-span-article' => 'A', |
| 1727 | + 'diff-span' => 'Span', |
| 1728 | + 'diff-a-article' => 'A', |
| 1729 | + 'diff-a' => 'Link', |
| 1730 | + 'diff-i' => 'Italics', |
| 1731 | + 'diff-b' => 'Bold', |
| 1732 | + 'diff-strong' => 'Strong', |
| 1733 | + 'diff-em' => 'Emphasis', |
| 1734 | + 'diff-font' => 'Font', |
| 1735 | + 'diff-big' => 'Big', |
| 1736 | + 'diff-del' => 'Deleted', |
| 1737 | + 'diff-tt' => 'Fixed width', |
| 1738 | + 'diff-sub' => 'Subscript', |
| 1739 | + 'diff-sup' => 'Superscript', |
| 1740 | + 'diff-strike' => 'Strikethrough' |
| 1741 | + ); |
| 1742 | + |
| 1743 | + public function getString($key) { |
| 1744 | + return self::$bundle[$key]; |
| 1745 | + } |
| 1746 | +} |
| 1747 | + |
| 1748 | +class NoContentTagToString extends TagToString { |
| 1749 | + |
| 1750 | + function __construct(TagNode $node, $sem) { |
| 1751 | + parent::__construct($node, $sem); |
| 1752 | + } |
| 1753 | + |
| 1754 | + public function getAddedDescription(ChangeText $txt) { |
| 1755 | + $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' '); |
| 1756 | + $txt.addHtml('<b>'); |
| 1757 | + $txt.addText(strtolower($this->getDescription())); |
| 1758 | + $txt.addHtml('</b>'); |
| 1759 | + |
| 1760 | + $this->addAttributes($txt, $this->node->getAttributes()); |
| 1761 | + $txt.addText('.'); |
| 1762 | + } |
| 1763 | + |
| 1764 | + private function getChangedTo() { |
| 1765 | + return $this->getString('diff-changedto'); |
| 1766 | + } |
| 1767 | + |
| 1768 | + public function getRemovedDescription(ChangeText $txt) { |
| 1769 | + $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' '); |
| 1770 | + $txt.addHtml('<b>'); |
| 1771 | + $txt.addText(strtolower($this->getDescription())); |
| 1772 | + $txt.addHtml('</b>'); |
| 1773 | + |
| 1774 | + $this->addAttributes($txt, $this->node->getAttributes()); |
| 1775 | + $txt.addText('.'); |
| 1776 | + } |
| 1777 | + |
| 1778 | + private function getChangedFrom() { |
| 1779 | + return $this->getString('diff-changedfrom'); |
| 1780 | + } |
| 1781 | +} |
| 1782 | + |
| 1783 | +class AnchorToString extends TagToString { |
| 1784 | + |
| 1785 | + function __construct(TagNode $node, $sem) { |
| 1786 | + parent::__construct($node, $sem); |
| 1787 | + } |
| 1788 | + |
| 1789 | + protected function addAttributes(ChangeText $txt, array $attributes) { |
| 1790 | + if (array_key_exists('href',$attributes)) { |
| 1791 | + $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']); |
| 1792 | + unset($attributes['href']); |
| 1793 | + } |
| 1794 | + parent::addAttributes($txt, $attributes); |
| 1795 | + } |
| 1796 | + |
| 1797 | + private function getWithDestination() { |
| 1798 | + return $this->getString('diff-withdestination'); |
| 1799 | + } |
| 1800 | + |
| 1801 | +} |
| 1802 | + |
| 1803 | +/** |
| 1804 | + * Takes a branch root and creates an HTML file for it. |
| 1805 | + */ |
| 1806 | +class HTMLOutput{ |
| 1807 | + |
| 1808 | + private $prefix; |
| 1809 | + private $handler; |
| 1810 | + |
| 1811 | + function __construct($prefix, $handler) { |
| 1812 | + $this->prefix = $prefix; |
| 1813 | + $this->handler = $handler; |
| 1814 | + } |
| 1815 | + |
| 1816 | + public function parse(TagNode $node) { |
| 1817 | + |
| 1818 | + if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) { |
| 1819 | + $this->handler->startElement($node->getQName(), $node->getAttributes()); |
| 1820 | + } |
| 1821 | + |
| 1822 | + $newStarted = false; |
| 1823 | + $remStarted = false; |
| 1824 | + $changeStarted = false; |
| 1825 | + $changeTXT = ''; |
| 1826 | + |
| 1827 | + foreach ($node->children as $child) { |
| 1828 | + if ($child instanceof TagNode) { |
| 1829 | + if ($newStarted) { |
| 1830 | + $this->handler->endElement('span'); |
| 1831 | + $newStarted = false; |
| 1832 | + } else if ($changeStarted) { |
| 1833 | + $this->handler->endElement('span'); |
| 1834 | + $changeStarted = false; |
| 1835 | + } else if ($remStarted) { |
| 1836 | + $this->handler->endElement('span'); |
| 1837 | + $remStarted = false; |
| 1838 | + } |
| 1839 | + $this->parse($child); |
| 1840 | + } else if ($child instanceof TextNode) { |
| 1841 | + $mod = $child->getModification(); |
| 1842 | + |
| 1843 | + if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) { |
| 1844 | + $this->handler->endElement('span'); |
| 1845 | + $newStarted = false; |
| 1846 | + } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) { |
| 1847 | + $this->handler->endElement('span'); |
| 1848 | + $changeStarted = false; |
| 1849 | + } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) { |
| 1850 | + $this->handler->endElement('span'); |
| 1851 | + $remStarted = false; |
| 1852 | + } |
| 1853 | + |
| 1854 | + // no else because a removed part can just be closed and a new |
| 1855 | + // part can start |
| 1856 | + if (!$newStarted && $mod->getType() == Modification::ADDED) { |
| 1857 | + $attrs = array('class'=>'diff-html-added'); |
| 1858 | + if ($mod->isFirstOfID()) { |
| 1859 | + $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID(); |
| 1860 | + } |
| 1861 | + $this->addAttributes($mod, $attrs); |
| 1862 | + $attrs['onclick'] = 'return tipA(constructToolTipA(this));'; |
| 1863 | + $this->handler->startElement('span', $attrs); |
| 1864 | + $newStarted = true; |
| 1865 | + } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) { |
| 1866 | + $attrs = array('class'=>'diff-html-changed'); |
| 1867 | + if ($mod->isFirstOfID()) { |
| 1868 | + $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID(); |
| 1869 | + } |
| 1870 | + $this->addAttributes($mod, $attrs); |
| 1871 | + $attrs['onclick'] = 'return tipC(constructToolTipC(this));'; |
| 1872 | + $this->handler->startElement('span', $attrs); |
| 1873 | + |
| 1874 | + //tooltip |
| 1875 | + $this->handler->startElement('span', array('class'=>'tip')); |
| 1876 | + $this->handler->characters($mod->getChanges()); |
| 1877 | + $this->handler->endElement('span'); |
| 1878 | + |
| 1879 | + $changeStarted = true; |
| 1880 | + $changeTXT = $mod->getChanges(); |
| 1881 | + } else if (!$remStarted && $mod->getType() == Modification::REMOVED) { |
| 1882 | + $attrs = array('class'=>'diff-html-removed'); |
| 1883 | + if ($mod->isFirstOfID()) { |
| 1884 | + $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID(); |
| 1885 | + } |
| 1886 | + $this->addAttributes($mod, $attrs); |
| 1887 | + $attrs['onclick'] = 'return tipR(constructToolTipR(this));'; |
| 1888 | + $this->handler->startElement('span', $attrs); |
| 1889 | + $remStarted = true; |
| 1890 | + } |
| 1891 | + |
| 1892 | + $chars = $child->getText(); |
| 1893 | + |
| 1894 | + if ($child instanceof ImageNode) { |
| 1895 | + $this->writeImage($child); |
| 1896 | + } else { |
| 1897 | + $this->handler->characters($chars); |
| 1898 | + } |
| 1899 | + |
| 1900 | + } |
| 1901 | + } |
| 1902 | + |
| 1903 | + if ($newStarted) { |
| 1904 | + $this->handler->endElement('span'); |
| 1905 | + $newStarted = false; |
| 1906 | + } else if ($changeStarted) { |
| 1907 | + $this->handler->endElement('span'); |
| 1908 | + $changeStarted = false; |
| 1909 | + } else if ($remStarted) { |
| 1910 | + $this->handler->endElement('span'); |
| 1911 | + $remStarted = false; |
| 1912 | + } |
| 1913 | + |
| 1914 | + if (0 != strcasecmp($node->getQName(),'img') |
| 1915 | + && 0 != strcasecmp($node->getQName(),'body')) |
| 1916 | + $this->handler->endElement($node->getQName()); |
| 1917 | + } |
| 1918 | + |
| 1919 | + private function writeImage(ImageNode $imgNode){ |
| 1920 | + $attrs = $imgNode->getAttributes(); |
| 1921 | + if ($imgNode->getModification()->getType() == Modification::REMOVED) |
| 1922 | + $attrs['changeType']='diff-removed-image'; |
| 1923 | + else if ($imgNode->getModification()->getType() == Modification::ADDED) |
| 1924 | + $attrs['changeType'] = 'diff-added-image'; |
| 1925 | + $attrs['onload'] = 'updateOverlays()'; |
| 1926 | + $attrs['onError'] = 'updateOverlays()'; |
| 1927 | + $attrs['onAbort'] = 'updateOverlays()'; |
| 1928 | + $this->handler->startElement('img', $attrs); |
| 1929 | + $this->handler->endElement('img'); |
| 1930 | + } |
| 1931 | + |
| 1932 | + private function addAttributes(Modification $mod, /*array*/ &$attrs) { |
| 1933 | + if (is_null($mod->getPrevious())) { |
| 1934 | + $previous = 'first-' . $this->prefix; |
| 1935 | + } else { |
| 1936 | + $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-' |
| 1937 | + . $mod->getPrevious()->getID(); |
| 1938 | + } |
| 1939 | + $attrs['previous'] = $previous; |
| 1940 | + |
| 1941 | + $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID(); |
| 1942 | + $attrs['changeId'] = $changeId; |
| 1943 | + |
| 1944 | + if (is_null($mod->getNext())) { |
| 1945 | + $next = 'last-' . $this->prefix; |
| 1946 | + } else { |
| 1947 | + $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-' |
| 1948 | + . $mod->getNext()->getID(); |
| 1949 | + } |
| 1950 | + $attrs['next'] = $next; |
| 1951 | + } |
| 1952 | +} |
| 1953 | + |
| 1954 | +class EchoingContentHandler{ |
| 1955 | + |
| 1956 | + function startElement($qname, /*array*/ $arguments){ |
| 1957 | + echo '<'.$qname; |
| 1958 | + foreach($arguments as $key => $value){ |
| 1959 | + echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"'; |
| 1960 | + } |
| 1961 | + echo '>'; |
| 1962 | + } |
| 1963 | + |
| 1964 | + function endElement($qname){ |
| 1965 | + echo '</'.$qname.'>'; |
| 1966 | + } |
| 1967 | + |
| 1968 | + function characters($chars){ |
| 1969 | + echo $chars; |
| 1970 | + } |
| 1971 | + |
| 1972 | +} |
| 1973 | + |
| 1974 | +class DelegatingContentHandler{ |
| 1975 | + |
| 1976 | + private $delegate; |
| 1977 | + |
| 1978 | + function __construct($delegate){ |
| 1979 | + $this->delegate=$delegate; |
| 1980 | + } |
| 1981 | + |
| 1982 | + function startElement($qname, /*array*/ $arguments){ |
| 1983 | + $this->delegate->addHtml('<'.$qname) ; |
| 1984 | + foreach($arguments as $key => $value){ |
| 1985 | + $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"'); |
| 1986 | + } |
| 1987 | + $this->delegate->addHtml('>'); |
| 1988 | + } |
| 1989 | + |
| 1990 | + function endElement($qname){ |
| 1991 | + $this->delegate->addHtml('</'.$qname.'>'); |
| 1992 | + } |
| 1993 | + |
| 1994 | + function characters($chars){ |
| 1995 | + $this->delegate->addHtml( $chars ); |
| 1996 | + } |
| 1997 | + |
| 1998 | +} |
Index: trunk/phase3/includes/Diff.php |
— | — | @@ -18,454 +18,488 @@ |
19 | 19 | */ |
20 | 20 | |
21 | 21 | /** |
22 | | - * Implementation of the Diff algorithm. |
23 | | - * |
24 | | - * DO NOT USE ON PRODUCTION SYSTEMS |
25 | | - * |
26 | | - * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm" |
27 | | - * and extended with my own ideas. |
| 22 | + * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which |
| 23 | + * in turn is based on Myers' "An O(ND) difference algorithm and its variations" |
| 24 | + * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s |
| 25 | + * "An O(NP) Sequence Comparison Algorithm"). |
28 | 26 | * |
29 | | - * @return array($from_removed, $to_added) |
30 | | - * TRUE if the token was removed or added. |
| 27 | + * This implementation supports an upper bound on the excution time. |
31 | 28 | * |
| 29 | + * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space |
| 30 | + * |
32 | 31 | * @author Guy Van den Broeck |
| 32 | + * |
33 | 33 | */ |
34 | | -function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){ |
35 | | - wfProfileIn( __METHOD__ ); |
| 34 | +class WikiDiff3{ |
36 | 35 | |
37 | | - $m = sizeof($from); |
38 | | - $n = sizeof($to); |
| 36 | + //Input variables |
| 37 | + private $from; |
| 38 | + private $to; |
| 39 | + private $m; |
| 40 | + private $n; |
39 | 41 | |
40 | | - $result_from = array_fill(0,$m,TRUE); |
41 | | - $result_to = array_fill(0,$n,TRUE); |
| 42 | + private $tooLong; |
| 43 | + private $powLimit; |
42 | 44 | |
43 | | - //reduce the complexity for the next step (intentionally done twice) |
44 | | - //remove common tokens at the start |
45 | | - $i=0; |
46 | | - while($i<$m && $i<$n && $from[$i]===$to[$i]){ |
47 | | - $result_from[$i] = $result_to[$i] = FALSE; |
48 | | - unset($from[$i],$to[$i]); |
49 | | - ++$i; |
| 45 | + //State variables |
| 46 | + private $maxDifferences; |
| 47 | + |
| 48 | + //Output variables |
| 49 | + public $length; |
| 50 | + public $added; |
| 51 | + public $removed; |
| 52 | + public $heuristicUsed; |
| 53 | + |
| 54 | + function __construct($tooLong = 2000000, $powLimit = 1.45){ |
| 55 | + $this->tooLong = $tooLong; |
| 56 | + $this->powLimit = $powLimit; |
50 | 57 | } |
51 | 58 | |
52 | | - //remove common tokens at the end |
53 | | - $j=1; |
54 | | - while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ |
55 | | - $result_from[$m-$j] = $result_to[$n-$j] = FALSE; |
56 | | - unset($from[$m-$j],$to[$n-$j]); |
57 | | - ++$j; |
58 | | - } |
| 59 | + public function diff(/*array*/ $from, /*array*/ $to){ |
| 60 | + $m = sizeof($from); |
| 61 | + $n = sizeof($to); |
| 62 | + |
| 63 | + $this->heuristicUsed = FALSE; |
59 | 64 | |
60 | | - $newFrom = $newFromIndex = $newTo = $newToIndex = array(); |
| 65 | + $result_from = $m>0?array_fill(0,$m,true):array(); |
| 66 | + $result_to = $n>0?array_fill(0,$n,true):array(); |
61 | 67 | |
62 | | - //remove tokens not in both sequences |
63 | | - $shared = array_fill_keys($from,FALSE); |
64 | | - foreach($to as $index => $el){ |
65 | | - if(array_key_exists($el,$shared)){ |
66 | | - //keep it |
67 | | - $shared[$el] = TRUE; |
68 | | - $newTo[] = $el; |
69 | | - $newToIndex[] = $index; |
| 68 | + //reduce the complexity for the next step (intentionally done twice) |
| 69 | + //remove common tokens at the start |
| 70 | + $i=0; |
| 71 | + while($i<$m && $i<$n && $from[$i]===$to[$i]){ |
| 72 | + $result_from[$i] = $result_to[$i] = false; |
| 73 | + unset($from[$i],$to[$i]); |
| 74 | + ++$i; |
70 | 75 | } |
71 | | - } |
72 | | - foreach($from as $index => $el){ |
73 | | - if($shared[$el]){ |
74 | | - //keep it |
75 | | - $newFrom[] = $el; |
76 | | - $newFromIndex[] = $index; |
| 76 | + |
| 77 | + //remove common tokens at the end |
| 78 | + $j=1; |
| 79 | + while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){ |
| 80 | + $result_from[$m-$j] = $result_to[$n-$j] = false; |
| 81 | + unset($from[$m-$j],$to[$n-$j]); |
| 82 | + ++$j; |
77 | 83 | } |
78 | | - } |
79 | 84 | |
80 | | - unset($from, $to, $shared); |
| 85 | + $newFrom = $newFromIndex = $newTo = $newToIndex = array(); |
81 | 86 | |
82 | | - $m2 = sizeof($newFrom); |
83 | | - $n2 = sizeof($newTo); |
84 | | - $offsetx = $offsety = 0; |
85 | | - $from_inLcs = new InLcs($m2); |
86 | | - $to_inLcs = new InLcs($n2); |
| 87 | + //remove tokens not in both sequences |
| 88 | + $shared = array_fill_keys($from,false); |
| 89 | + foreach($to as $index => $el){ |
| 90 | + if(array_key_exists($el,$shared)){ |
| 91 | + //keep it |
| 92 | + $this->to[] = $el; |
| 93 | + $shared[$el] = true; |
| 94 | + $newToIndex[] = $index; |
| 95 | + } |
| 96 | + } |
| 97 | + foreach($from as $index => $el){ |
| 98 | + if($shared[$el]){ |
| 99 | + //keep it |
| 100 | + $this->from[] = $el; |
| 101 | + $newFromIndex[] = $index; |
| 102 | + } |
| 103 | + } |
87 | 104 | |
88 | | - wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound); |
| 105 | + unset($shared); |
89 | 106 | |
90 | | - foreach($from_inLcs->inLcs as $key => $in){ |
91 | | - if($in){ |
92 | | - $result_from[$newFromIndex[$key]] = FALSE; |
| 107 | + $this->m = sizeof($this->from); |
| 108 | + $this->n = sizeof($this->to); |
| 109 | + $this->removed = $this->m>0?array_fill(0,$this->m,true):array(); |
| 110 | + $this->added = $this->n>0?array_fill(0,$this->n,true):array(); |
| 111 | + |
| 112 | + $this->longestCommonSubsequence(); |
| 113 | + |
| 114 | + $this->m = $m; |
| 115 | + $this->n = $n; |
| 116 | + $this->length += $i+$j-1; |
| 117 | + |
| 118 | + foreach($this->removed as $key => $removed){ |
| 119 | + if(!$removed){ |
| 120 | + $result_from[$newFromIndex[$key]] = false; |
| 121 | + } |
93 | 122 | } |
94 | | - } |
95 | | - foreach($to_inLcs->inLcs as $key => $in){ |
96 | | - if($in){ |
97 | | - $result_to[$newToIndex[$key]] = FALSE; |
| 123 | + foreach($this->added as $key => $added){ |
| 124 | + if(!$added){ |
| 125 | + $result_to[$newToIndex[$key]] = false; |
| 126 | + } |
98 | 127 | } |
| 128 | + unset($this->added, $this->removed); |
| 129 | + return array($result_from, $result_to); |
99 | 130 | } |
100 | | - |
101 | | - wfProfileOut( __METHOD__ ); |
102 | | - return array($result_from, $result_to); |
103 | | -} |
104 | 131 | |
105 | | -function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){ |
106 | | - if($bestKnownLcs==0 || $m==0 || $n==0){ |
107 | | - return; |
108 | | - } |
109 | | - |
110 | | - if($m>$n){ |
111 | | - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound); |
112 | | - } |
| 132 | + public function longestCommonSubsequence() { |
| 133 | + if ($this->m == 0 || $this->n == 0) { |
| 134 | + $this->length = 0; |
| 135 | + return; |
| 136 | + } |
113 | 137 | |
114 | | - $a_inLcs_sym = &$a_inLcs->inLcs; |
115 | | - $b_inLcs_sym = &$b_inLcs->inLcs; |
| 138 | + $this->maxDifferences = ceil(($this->m + $this->n) / 2.0); |
| 139 | + if ($this->m * $this->n > $this->tooLong) { |
| 140 | + // limit complexity to D^POW_LIMIT for long sequences |
| 141 | + $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0)); |
| 142 | + wfDebug("Limiting max number of differences to $this->maxDifferences"); |
| 143 | + } |
116 | 144 | |
117 | | - //remove common tokens at the start |
118 | | - while($m>0 && $n>0 && $a[0]===$b[0]){ |
119 | | - $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE; |
120 | | - ++$offsetx; ++$offsety; |
121 | | - --$m; --$n; |
122 | | - --$bestKnownLcs; |
123 | | - array_shift($a); |
124 | | - array_shift($b); |
125 | | - } |
| 145 | + /* |
| 146 | + * The common prefixes and suffixes are always part of some LCS, include |
| 147 | + * them now to reduce our search space |
| 148 | + */ |
| 149 | + $max = min($this->m, $this->n); |
| 150 | + for ($forwardBound = 0; $forwardBound < $max |
| 151 | + && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) { |
| 152 | + $this->removed[$forwardBound] = $this->added[$forwardBound] = false; |
| 153 | + } |
126 | 154 | |
127 | | - //remove common tokens at the end |
128 | | - while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){ |
129 | | - $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE; |
130 | | - --$m; --$n; |
131 | | - --$bestKnownLcs; |
132 | | - array_pop($a); |
133 | | - array_pop($b); |
134 | | - } |
| 155 | + $backBoundL1 = $this->m - 1; |
| 156 | + $backBoundL2 = $this->n - 1; |
135 | 157 | |
136 | | - if($bestKnownLcs==0 || $m==0 || $n==0){ |
137 | | - return; |
| 158 | + while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound |
| 159 | + && $this->from[$backBoundL1] === $this->to[$backBoundL2]) { |
| 160 | + $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; |
| 161 | + } |
| 162 | + |
| 163 | + $temp = array_fill(0,$this->m + $this->n + 1, 0); |
| 164 | + $V = array($temp,$temp); |
| 165 | + $snake = array(0,0,0); |
| 166 | + |
| 167 | + $this->length = $forwardBound + $this->m - $backBoundL1 - 1 |
| 168 | + + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, |
| 169 | + $V, $snake); |
138 | 170 | } |
139 | | - if($m>$n){ |
140 | | - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound); |
141 | | - } |
142 | 171 | |
143 | | - $delta = $n-$m; |
144 | | - $delta_plus_1 = $delta+1; |
145 | | - $delta_min_1 = $delta-1; |
| 172 | + private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) { |
| 173 | + // check that both sequences are non-empty |
| 174 | + if ($bottoml1 > $topl1 || $bottoml2 > $topl2) { |
| 175 | + return 0; |
| 176 | + } |
146 | 177 | |
147 | | - $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1); |
148 | | - $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0); |
149 | | - $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n); |
150 | | - $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array(); |
151 | | - $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1; |
| 178 | + $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake); |
152 | 179 | |
153 | | - if($boundRunningTime){ |
154 | | - $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2); |
155 | | - if($maxp_before_bound>=$m){ |
156 | | - $boundRunningTime = false; |
157 | | - unset($maxp_before_bound); |
| 180 | + // need to store these so we don't lose them when they're overwritten by |
| 181 | + // the recursion |
| 182 | + $len = $snake[2]; |
| 183 | + $startx = $snake[0]; |
| 184 | + $starty = $snake[1]; |
| 185 | + |
| 186 | + // the middle snake is part of the LCS, store it |
| 187 | + for ($i = 0; $i < $len; ++$i) { |
| 188 | + $this->removed[$startx + $i] = $this->added[$starty + $i] = false; |
158 | 189 | } |
| 190 | + |
| 191 | + if ($d > 1) { |
| 192 | + return $len |
| 193 | + + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake) |
| 194 | + + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake); |
| 195 | + } else if ($d == 1) { |
| 196 | + /* |
| 197 | + * In this case the sequences differ by exactly 1 line. We have |
| 198 | + * already saved all the lines after the difference in the for loop |
| 199 | + * above, now we need to save all the lines before the difference. |
| 200 | + */ |
| 201 | + $max = min($startx - $bottoml1, $starty - $bottoml2); |
| 202 | + for ($i = 0; $i < $max; ++$i) { |
| 203 | + $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false; |
| 204 | + } |
| 205 | + return $max + $len; |
| 206 | + } |
| 207 | + return $len; |
159 | 208 | } |
160 | 209 | |
161 | | - $p=-1; |
162 | | - $m_min_1 = $m-1; |
163 | | - $maxp=$m_min_1-$bestLcsLength; |
| 210 | + private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) { |
| 211 | + $from = &$this->from; |
| 212 | + $to = &$this->to; |
| 213 | + $V0 = &$V[0]; |
| 214 | + $V1 = &$V[1]; |
| 215 | + $snake0 = &$snake[0]; |
| 216 | + $snake1 = &$snake[1]; |
| 217 | + $snake2 = &$snake[2]; |
| 218 | + $bottoml1_min_1 = $bottoml1-1; |
| 219 | + $bottoml2_min_1 = $bottoml2-1; |
| 220 | + $N = $topl1 - $bottoml1_min_1; |
| 221 | + $M = $topl2 - $bottoml2_min_1; |
| 222 | + $delta = $N - $M; |
| 223 | + $maxabsx = $N+$bottoml1; |
| 224 | + $maxabsy = $M+$bottoml2; |
| 225 | + $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2) |
164 | 226 | |
165 | | - while($p<$maxp){ |
| 227 | + //value_to_add_forward: a 0 or 1 that we add to the start |
| 228 | + // offset |
| 229 | + // to make it odd/even |
| 230 | + if (($M & 1) == 1) { |
| 231 | + $value_to_add_forward = 1; |
| 232 | + } else { |
| 233 | + $value_to_add_forward = 0; |
| 234 | + } |
166 | 235 | |
167 | | - if($boundRunningTime && $p>$maxp_before_bound){ |
168 | | - // bound the running time by stopping early |
169 | | - if($bestLcsLength>=0){ |
170 | | - // accept the best LCS thusfar |
171 | | - break; |
172 | | - }else{ |
173 | | - $bestLcsProgressForw = $bestkForw = 0; |
174 | | - foreach($lcsSizeForw as $k => $localLcsProgress){ |
175 | | - if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){ |
176 | | - $bestLcsProgressForw = $localLcsProgress; |
177 | | - $bestkForw = $k; |
178 | | - } |
179 | | - } |
180 | | - $bestLcsProgressBack = $bestkBack = 0; |
181 | | - foreach($lcsSizeBack as $k => $localLcsProgress){ |
182 | | - if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){ |
183 | | - $bestLcsProgressBack = $localLcsProgress; |
184 | | - $bestkBack = $k; |
185 | | - } |
186 | | - } |
187 | | - if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){ |
188 | | - // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway. |
189 | | - }else{ |
190 | | - // lets do this |
191 | | - $topSnakeStart = $snakeBeginForw[$bestkForw]; |
192 | | - $topSnakeEnd = $snakeEndForw[$bestkForw]; |
193 | | - $topSnakek = $snakekForw[$bestkForw]; |
194 | | - $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd; |
| 236 | + if (($N & 1) == 1) { |
| 237 | + $value_to_add_backward = 1; |
| 238 | + } else { |
| 239 | + $value_to_add_backward = 0; |
| 240 | + } |
195 | 241 | |
196 | | - $bottomSnakeStart = $snakeEndBack[$bestkBack]+1; |
197 | | - $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1; |
198 | | - $bottomSnakek = $snakekBack[$bestkBack]; |
199 | | - $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd; |
| 242 | + $start_forward = -$M; |
| 243 | + $end_forward = $N; |
| 244 | + $start_backward = -$N; |
| 245 | + $end_backward = $M; |
200 | 246 | |
201 | | - if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){ |
202 | | - // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered) |
203 | | - // also diff the middle now |
204 | | - if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){ |
205 | | - // do the middle diff between the snakes |
206 | | - wfDebug(" Limiting diff run time from both sides, middle between snakes\n"); |
207 | | - $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek); |
208 | | - $n_t = $bottomSnakeStart-$topSnakeEnd; |
209 | | - $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t); |
210 | | - $b_t = array_slice($b, $topSnakeEnd, $n_t); |
211 | | - $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek); |
212 | | - $offsety_t = $offsety+$topSnakeEnd; |
213 | | - }else{ |
214 | | - // the snake is too early in the sequence, do the middle diff between endpoints of progress |
215 | | - wfDebug(" Limiting diff run time from both sides, middle between endpoints\n"); |
216 | | - $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw); |
217 | | - $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw]; |
218 | | - $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t); |
219 | | - $b_t = array_slice($b, $fpForw[$bestkForw], $n_t); |
220 | | - $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw); |
221 | | - $offsety_t = $offsety+$fpForw[$bestkForw]; |
222 | | - } |
223 | | - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound); |
224 | | - }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){ |
225 | | - // the bottom snake is more interesting |
226 | | - wfDebug("Limiting diff run time from bottom side\n"); |
227 | | - $topSnakeStart = $bottomSnakeStart; |
228 | | - $topSnakeEnd = $bottomSnakeEnd; |
229 | | - $topSnakek = $bottomSnakek; |
230 | | - $bestLcsLengthTop = $topSnakeEnd-$topSnakek; |
231 | | - $bottomSnakeStart = $bottomSnakeEnd; |
232 | | - }else{ |
233 | | - // the top snake is more interesting |
234 | | - wfDebug(" Limiting diff run time from top side\n"); |
235 | | - $bottomSnakeStart = $topSnakeEnd; |
236 | | - $bottomSnakeEnd = $topSnakeEnd; |
237 | | - $bottomSnakek = $topSnakek; |
238 | | - $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek); |
239 | | - } |
240 | | - break; |
241 | | - } |
242 | | - } |
243 | | - } |
244 | | - ++$p; |
245 | | - $overlap[-$p] = $overlap[$delta+$p] = FALSE; |
| 247 | + $limit_min_1 = $limit-1; |
| 248 | + $limit_plus_1 = $limit+1; |
246 | 249 | |
247 | | - $min_p_min_1 = -$p-1; |
248 | | - $delta_plus_1_plus_p = $delta_plus_1+$p; |
| 250 | + $V0[$limit_plus_1] = 0; |
| 251 | + $V1[$limit_min_1] = $N; |
| 252 | + $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2) |
| 253 | + |
| 254 | + if (($delta & 1) == 1) { |
| 255 | + for ($d = 0; $d <= $limit; ++$d) { |
| 256 | + $start_diag = max($value_to_add_forward + $start_forward, -$d); |
| 257 | + $end_diag = min($end_forward, $d); |
| 258 | + $value_to_add_forward = 1 - $value_to_add_forward; |
249 | 259 | |
250 | | - // forward |
251 | | - $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] = |
252 | | - $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1; |
253 | | - $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0; |
| 260 | + // compute forward furthest reaching paths |
| 261 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 262 | + if ($k == -$d |
| 263 | + || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) { |
| 264 | + $x = $V0[$limit_plus_1 + $k]; |
| 265 | + } else { |
| 266 | + $x = $V0[$limit_min_1 + $k] + 1; |
| 267 | + } |
254 | 268 | |
255 | | - $k=-$p; |
256 | | - do { |
257 | | - $k_plus_1 = $k+1; |
258 | | - $k_min_1 = $k-1; |
| 269 | + $absx = $snake0 = $x + $bottoml1; |
| 270 | + $absy = $snake1 = $x - $k + $bottoml2; |
| 271 | + |
| 272 | + while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { |
| 273 | + ++$absx; |
| 274 | + ++$absy; |
| 275 | + } |
| 276 | + $x = $absx-$bottoml1; |
| 277 | + |
| 278 | + $snake2 = $absx -$snake0; |
| 279 | + $V0[$limit + $k] = $x; |
| 280 | + if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1 |
| 281 | + && $x >= $V1[$limit + $k - $delta]) { |
| 282 | + return 2 * $d - 1; |
| 283 | + } |
259 | 284 | |
260 | | - $fpBelow = $fpForw[$k_min_1]+1; |
261 | | - $fpAbove = $fpForw[$k_plus_1]; |
262 | | - $y = &$fpForw[$k]; |
263 | | - if($fpBelow>$fpAbove){ |
264 | | - $oldy = $y = $fpBelow; |
265 | | - $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1]; |
266 | | - $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1]; |
267 | | - $snakeEndForw[$k] = $snakeEndForw[$k_min_1]; |
268 | | - $snakekForw[$k] = $snakekForw[$k_min_1]; |
269 | | - }else{ |
270 | | - $oldy = $y = $fpAbove; |
271 | | - $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1]; |
272 | | - $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1]; |
273 | | - $snakeEndForw[$k] = $snakeEndForw[$k_plus_1]; |
274 | | - $snakekForw[$k] = $snakekForw[$k_plus_1]; |
275 | | - } |
276 | | - $x = $y-$k; |
277 | | - while($x < $m && $y < $n && $a[$x]===$b[$y]){ |
278 | | - ++$x; |
279 | | - ++$y; |
280 | | - } |
281 | | - if($y-$oldy>0){ |
282 | | - $lcsSizeForw[$k] += $y-$oldy; |
283 | | - $snakeBeginForw[$k] = $oldy; |
284 | | - $snakeEndForw[$k]= $y; |
285 | | - $snakekForw[$k] = $k; |
286 | | - } |
287 | | - if($y>=$fpBack[$k] && !$overlap[$k]){ |
288 | | - // there is overlap |
289 | | - $overlap[$k]=TRUE; |
290 | | - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k]; |
291 | | - if($y>$fpBack[$k]+1){ |
292 | | - $snakeoverlap = $y-$fpBack[$k]-1; |
293 | | - $lcsLength -= $snakeoverlap; |
| 285 | + // check to see if we can cut down the diagonal range |
| 286 | + if ($x >= $N && $end_forward > $k - 1) { |
| 287 | + $end_forward = $k - 1; |
| 288 | + } else if ($absy-$bottoml2 >= $M) { |
| 289 | + $start_forward = $k + 1; |
| 290 | + $value_to_add_forward = 0; |
| 291 | + } |
294 | 292 | } |
295 | | - if($lcsLength>$bestLcsLength){ |
296 | | - // a better sequence found! |
297 | | - $bestLcsLength = $lcsLength; |
298 | 293 | |
299 | | - $topSnakeStart = $snakeBeginForw[$k]; |
300 | | - $topSnakeEnd = $snakeEndForw[$k]; |
301 | | - $topSnakek = $snakekForw[$k]; |
| 294 | + $start_diag = max($value_to_add_backward + $start_backward, -$d); |
| 295 | + $end_diag = min($end_backward, $d); |
| 296 | + $value_to_add_backward = 1 - $value_to_add_backward; |
302 | 297 | |
303 | | - // aligned snakes bite (\ ) |
304 | | - // ( \ \) |
305 | | - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k])); |
306 | | - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart); |
307 | | - $bottomSnakek = $snakekBack[$k]; |
| 298 | + // compute backward furthest reaching paths |
| 299 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 300 | + if ($k == $d |
| 301 | + || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) { |
| 302 | + $x = $V1[$limit_min_1 + $k]; |
| 303 | + } else { |
| 304 | + $x = $V1[$limit_plus_1 + $k] - 1; |
| 305 | + } |
308 | 306 | |
309 | | - if($bottomSnakeEnd<$y){ |
310 | | - $bottomSnakeStart = $y; |
311 | | - $bottomSnakeEnd = $y; |
312 | | - $bottomSnakek = $k; |
| 307 | + $y = $x - $k - $delta; |
| 308 | + |
| 309 | + $snake2 = 0; |
| 310 | + while ($x > 0 && $y > 0 |
| 311 | + && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) { |
| 312 | + --$x; |
| 313 | + --$y; |
| 314 | + ++$snake2; |
313 | 315 | } |
| 316 | + $V1[$limit + $k] = $x; |
314 | 317 | |
315 | | - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]); |
316 | | - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]); |
317 | | - if($bestKnownLcs==$lcsLength){ |
318 | | - $fpForw[$k]=$y; |
319 | | - break 2; |
| 318 | + // check to see if we can cut down our diagonal range |
| 319 | + if ($x <= 0) { |
| 320 | + $start_backward = $k + 1; |
| 321 | + $value_to_add_backward = 0; |
| 322 | + } else if ($y <= 0 && $end_backward > $k - 1) { |
| 323 | + $end_backward = $k - 1; |
320 | 324 | } |
321 | | - $maxp=$m_min_1-$bestLcsLength; |
322 | 325 | } |
323 | 326 | } |
324 | | - if($k<$delta_min_1){ |
325 | | - ++$k; |
326 | | - }else if($k>$delta){ |
327 | | - --$k; |
328 | | - }else if($k==$delta_min_1){ |
329 | | - $k = $delta+$p; |
330 | | - }else{ |
331 | | - break; |
332 | | - } |
333 | | - } while(TRUE); |
| 327 | + } else { |
| 328 | + for ($d = 0; $d <= $limit; ++$d) { |
| 329 | + $start_diag = max($value_to_add_forward + $start_forward, -$d); |
| 330 | + $end_diag = min($end_forward, $d); |
| 331 | + $value_to_add_forward = 1 - $value_to_add_forward; |
334 | 332 | |
335 | | - // backward |
336 | | - $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] = |
337 | | - $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n; |
338 | | - $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0; |
| 333 | + // compute forward furthest reaching paths |
| 334 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 335 | + if ($k == -$d |
| 336 | + || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) { |
| 337 | + $x = $V0[$limit_plus_1 + $k]; |
| 338 | + } else { |
| 339 | + $x = $V0[$limit_min_1 + $k] + 1; |
| 340 | + } |
339 | 341 | |
340 | | - $k=$delta+$p; |
341 | | - do { |
342 | | - $k_plus_1 = $k+1; |
343 | | - $k_min_1 = $k-1; |
| 342 | + $absx = $snake0 = $x + $bottoml1; |
| 343 | + $absy = $snake1 = $x - $k + $bottoml2; |
| 344 | + |
| 345 | + while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) { |
| 346 | + ++$absx; |
| 347 | + ++$absy; |
| 348 | + } |
| 349 | + $x = $absx-$bottoml1; |
| 350 | + $snake2 = $absx -$snake0; |
| 351 | + $V0[$limit + $k] = $x; |
344 | 352 | |
345 | | - $fpBelow = $fpBack[$k_min_1]; |
346 | | - $fpAbove = $fpBack[$k_plus_1]-1; |
347 | | - $y = &$fpBack[$k]; |
348 | | - if($fpBelow<$fpAbove){ |
349 | | - $oldy = $y = $fpBelow; |
350 | | - $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1]; |
351 | | - $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1]; |
352 | | - $snakeEndBack[$k] = $snakeEndBack[$k_min_1]; |
353 | | - $snakekBack[$k] = $snakekBack[$k_min_1]; |
354 | | - }else{ |
355 | | - $oldy = $y = $fpAbove; |
356 | | - $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1]; |
357 | | - $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1]; |
358 | | - $snakeEndBack[$k] = $snakeEndBack[$k_plus_1]; |
359 | | - $snakekBack[$k] = $snakekBack[$k_plus_1]; |
360 | | - } |
361 | | - $x = $y-$k; |
362 | | - while($x > -1 && $y > -1 && $a[$x]===$b[$y]){ |
363 | | - --$x; |
364 | | - --$y; |
365 | | - } |
366 | | - if($oldy-$y>0){ |
367 | | - $lcsSizeBack[$k] += $oldy-$y; |
368 | | - $snakeBeginBack[$k] = $oldy; |
369 | | - $snakeEndBack[$k] = $y; |
370 | | - $snakekBack[$k] = $k; |
371 | | - } |
372 | | - if($fpForw[$k]>=$y && !$overlap[$k]){ |
373 | | - // there is overlap |
374 | | - $overlap[$k]=TRUE; |
375 | | - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k]; |
376 | | - if($fpForw[$k]>$y+1){ |
377 | | - $snakeoverlap = $fpForw[$k]-$y-1; |
378 | | - $lcsLength -= $snakeoverlap; |
| 353 | + // check to see if we can cut down the diagonal range |
| 354 | + if ($x >= $N && $end_forward > $k - 1) { |
| 355 | + $end_forward = $k - 1; |
| 356 | + } else if ($absy-$bottoml2 >= $M) { |
| 357 | + $start_forward = $k + 1; |
| 358 | + $value_to_add_forward = 0; |
| 359 | + } |
379 | 360 | } |
380 | | - if($lcsLength>$bestLcsLength){ |
381 | | - // a better sequence found! |
382 | | - $bestLcsLength = $lcsLength; |
383 | 361 | |
384 | | - $topSnakeStart = $snakeBeginForw[$k]; |
385 | | - $topSnakeEnd = $snakeEndForw[$k]; |
386 | | - $topSnakek = $snakekForw[$k]; |
| 362 | + $start_diag = max($value_to_add_backward + $start_backward, -$d); |
| 363 | + $end_diag = min($end_backward, $d); |
| 364 | + $value_to_add_backward = 1 - $value_to_add_backward; |
387 | 365 | |
388 | | - // aligned snakes bite (\ ) |
389 | | - // ( \ \) |
390 | | - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k])); |
391 | | - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart); |
392 | | - $bottomSnakek = $snakekBack[$k]; |
| 366 | + // compute backward furthest reaching paths |
| 367 | + for ($k = $start_diag; $k <= $end_diag; $k += 2) { |
| 368 | + if ($k == $d |
| 369 | + || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) { |
| 370 | + $x = $V1[$limit_min_1 + $k]; |
| 371 | + } else { |
| 372 | + $x = $V1[$limit_plus_1 + $k] - 1; |
| 373 | + } |
393 | 374 | |
394 | | - if($bottomSnakeEnd<$fpForw[$k]){ |
395 | | - $bottomSnakeStart = $fpForw[$k]; |
396 | | - $bottomSnakeEnd = $fpForw[$k]; |
397 | | - $bottomSnakek = $k; |
| 375 | + $y = $x - $k - $delta; |
| 376 | + |
| 377 | + $snake2 = 0; |
| 378 | + while ($x > 0 && $y > 0 |
| 379 | + && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) { |
| 380 | + --$x; |
| 381 | + --$y; |
| 382 | + ++$snake2; |
398 | 383 | } |
| 384 | + $V1[$limit + $k] = $x; |
399 | 385 | |
400 | | - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]); |
401 | | - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]); |
402 | | - if($bestKnownLcs==$lcsLength){ |
403 | | - $fpBack[$k] = $y; |
404 | | - break 2; |
| 386 | + if ($k >= -$delta - $d && $k <= $d - $delta |
| 387 | + && $x <= $V0[$limit + $k + $delta]) { |
| 388 | + $snake0 = $bottoml1 + $x; |
| 389 | + $snake1 = $bottoml2 + $y; |
| 390 | + return 2 * $d; |
405 | 391 | } |
406 | | - $maxp=$m_min_1-$bestLcsLength; |
| 392 | + |
| 393 | + // check to see if we can cut down our diagonal range |
| 394 | + if ($x <= 0) { |
| 395 | + $start_backward = $k + 1; |
| 396 | + $value_to_add_backward = 0; |
| 397 | + } else if ($y <= 0 && $end_backward > $k - 1) { |
| 398 | + $end_backward = $k - 1; |
| 399 | + } |
407 | 400 | } |
408 | 401 | } |
409 | | - if($k>1){ |
410 | | - --$k; |
411 | | - }else if($k<0){ |
412 | | - ++$k; |
413 | | - }else if($k==1){ |
414 | | - $k = -$p; |
415 | | - }else{ |
416 | | - break; |
417 | | - } |
418 | | - } while(TRUE); |
419 | | - } |
| 402 | + } |
| 403 | + /* |
| 404 | + * computing the true LCS is too expensive, instead find the diagonal |
| 405 | + * with the most progress and pretend a midle snake of length 0 occurs |
| 406 | + * there. |
| 407 | + */ |
420 | 408 | |
421 | | - unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack); |
422 | | - unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack); |
423 | | - unset($overlap); |
| 409 | + $most_progress = self::findMostProgress($M, $N, $limit, $V); |
424 | 410 | |
425 | | - // Mark snakes as in LCS |
426 | | - $maxi = $offsetx+$topSnakeEnd-$topSnakek; |
427 | | - for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){ |
428 | | - $a_inLcs_sym[$i] = TRUE; |
| 411 | + $snake0 = $bottoml1 + $most_progress[0]; |
| 412 | + $snake1 = $bottoml2 + $most_progress[1]; |
| 413 | + $snake2 = 0; |
| 414 | + wfDebug('Computing the LCS is too expensive. Using a heuristic.'); |
| 415 | + $this->heuristicUsed = true; |
| 416 | + return 5; /* |
| 417 | + * HACK: since we didn't really finish the LCS computation |
| 418 | + * we don't really know the length of the SES. We don't do |
| 419 | + * anything with the result anyway, unless it's <=1. We know |
| 420 | + * for a fact SES > 1 so 5 is as good a number as any to |
| 421 | + * return here |
| 422 | + */ |
429 | 423 | } |
430 | | - $maxi = $offsety+$topSnakeEnd; |
431 | | - for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){ |
432 | | - $b_inLcs_sym[$i] = TRUE; |
433 | | - } |
434 | | - $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek; |
435 | | - for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){ |
436 | | - $a_inLcs_sym[$i] = TRUE; |
437 | | - } |
438 | | - $maxi = $offsety+$bottomSnakeEnd; |
439 | | - for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){ |
440 | | - $b_inLcs_sym[$i] = TRUE; |
441 | | - } |
442 | 424 | |
443 | | - $m_t = $topSnakeStart-$topSnakek; |
444 | | - $a_t = array_slice($a, 0, $m_t); |
445 | | - $b_t = array_slice($b, 0, $topSnakeStart); |
| 425 | + private static function findMostProgress($M, $N, $limit, $V) { |
| 426 | + $delta = $N - $M; |
446 | 427 | |
447 | | - $m_b = $m-($bottomSnakeEnd-$bottomSnakek); |
448 | | - $n_b = $n-$bottomSnakeEnd; |
449 | | - $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b); |
450 | | - $b_b = array_slice($b, $bottomSnakeEnd, $n_b); |
| 428 | + if (($M & 1) == ($limit & 1)) { |
| 429 | + $forward_start_diag = max(-$M, -$limit); |
| 430 | + } else { |
| 431 | + $forward_start_diag = max(1 - $M, -$limit); |
| 432 | + } |
451 | 433 | |
452 | | - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound); |
| 434 | + $forward_end_diag = min($N, $limit); |
453 | 435 | |
454 | | - wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound); |
455 | | -} |
| 436 | + if (($N & 1) == ($limit & 1)) { |
| 437 | + $backward_start_diag = max(-$N, -$limit); |
| 438 | + } else { |
| 439 | + $backward_start_diag = max(1 - $N, -$limit); |
| 440 | + } |
456 | 441 | |
457 | | -class InLcs { |
| 442 | + $backward_end_diag = -min($M, $limit); |
458 | 443 | |
459 | | - public $inLcs; |
| 444 | + $temp = array(0,0,0); |
460 | 445 | |
461 | | - function __construct($length){ |
462 | | - $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array(); |
463 | | - } |
464 | 446 | |
465 | | - /** |
466 | | - * Get the length of the Longest Common Subsequence (computed) |
467 | | - */ |
468 | | - public function getLcsLength(){ |
469 | | - return array_sum($this->inLcs); |
| 447 | + $max_progress = array_fill(0,ceil(max($forward_end_diag |
| 448 | + - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp); |
| 449 | + $num_progress = 0; // the 1st entry is current, it is initialized |
| 450 | + // with 0s |
| 451 | + |
| 452 | + // first search the forward diagonals |
| 453 | + for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) { |
| 454 | + $x = $V[0][$limit + $k]; |
| 455 | + $y = $x - $k; |
| 456 | + if ($x > $N || $y > $M) { |
| 457 | + continue; |
| 458 | + } |
| 459 | + |
| 460 | + $progress = $x + $y; |
| 461 | + if ($progress > $max_progress[0][2]) { |
| 462 | + $num_progress = 0; |
| 463 | + $max_progress[0][0] = $x; |
| 464 | + $max_progress[0][1] = $y; |
| 465 | + $max_progress[0][2] = $progress; |
| 466 | + } else if ($progress == $max_progress[0][2]) { |
| 467 | + ++$num_progress; |
| 468 | + $max_progress[$num_progress][0] = $x; |
| 469 | + $max_progress[$num_progress][1] = $y; |
| 470 | + $max_progress[$num_progress][2] = $progress; |
| 471 | + } |
| 472 | + } |
| 473 | + |
| 474 | + $max_progress_forward = true; // initially the maximum |
| 475 | + // progress is in the forward |
| 476 | + // direction |
| 477 | + |
| 478 | + // now search the backward diagonals |
| 479 | + for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) { |
| 480 | + $x = $V[1][$limit + $k]; |
| 481 | + $y = $x - $k - $delta; |
| 482 | + if ($x < 0 || $y < 0) { |
| 483 | + continue; |
| 484 | + } |
| 485 | + |
| 486 | + $progress = $N - $x + $M - $y; |
| 487 | + if ($progress > $max_progress[0][2]) { |
| 488 | + $num_progress = 0; |
| 489 | + $max_progress_forward = false; |
| 490 | + $max_progress[0][0] = $x; |
| 491 | + $max_progress[0][1] = $y; |
| 492 | + $max_progress[0][2] = $progress; |
| 493 | + } else if ($progress == $max_progress[0][2] && !$max_progress_forward) { |
| 494 | + ++$num_progress; |
| 495 | + $max_progress[$num_progress][0] = $x; |
| 496 | + $max_progress[$num_progress][1] = $y; |
| 497 | + $max_progress[$num_progress][2] = $progress; |
| 498 | + } |
| 499 | + } |
| 500 | + |
| 501 | + // return the middle diagonal with maximal progress. |
| 502 | + return $max_progress[floor($num_progress / 2)]; |
470 | 503 | } |
471 | 504 | |
472 | 505 | } |
| 506 | + |
Index: trunk/phase3/includes/DifferenceEngine.php |
— | — | @@ -74,9 +74,10 @@ |
75 | 75 | } |
76 | 76 | |
77 | 77 | function showDiffPage( $diffOnly = false ) { |
78 | | - global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol; |
| 78 | + global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff; |
79 | 79 | wfProfileIn( __METHOD__ ); |
80 | 80 | |
| 81 | + |
81 | 82 | # If external diffs are enabled both globally and for the user, |
82 | 83 | # we'll use the application/x-external-editor interface to call |
83 | 84 | # an external diff tool like kompare, kdiff3, etc. |
— | — | @@ -265,11 +266,16 @@ |
266 | 267 | '<div id="mw-diff-ntitle3">' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "</div>" . |
267 | 268 | '<div id="mw-diff-ntitle4">' . $nextlink . $patrol . '</div>'; |
268 | 269 | |
269 | | - $this->showDiff( $oldHeader, $newHeader ); |
| 270 | + if($wgEnableHtmlDiff){ |
| 271 | + $this->renderHtmlDiff(); |
| 272 | + }else{ |
270 | 273 | |
271 | | - if ( !$diffOnly ) |
272 | | - $this->renderNewRevision(); |
| 274 | + $this->showDiff( $oldHeader, $newHeader ); |
273 | 275 | |
| 276 | + if ( !$diffOnly ){ |
| 277 | + $this->renderNewRevision(); |
| 278 | + } |
| 279 | + } |
274 | 280 | wfProfileOut( __METHOD__ ); |
275 | 281 | } |
276 | 282 | |
— | — | @@ -319,6 +325,65 @@ |
320 | 326 | wfProfileOut( __METHOD__ ); |
321 | 327 | } |
322 | 328 | |
| 329 | + |
| 330 | + function renderHtmlDiff() { |
| 331 | + global $wgOut, $IP; |
| 332 | + wfProfileIn( __METHOD__ ); |
| 333 | + |
| 334 | + $this->showDiffStyle(); |
| 335 | + |
| 336 | + require_once( "$IP/includes/HTMLDiff.php" ); |
| 337 | + |
| 338 | + #add deleted rev tag if needed |
| 339 | + if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) { |
| 340 | + $wgOut->addWikiMsg( 'rev-deleted-text-permission' ); |
| 341 | + } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) { |
| 342 | + $wgOut->addWikiMsg( 'rev-deleted-text-view' ); |
| 343 | + } |
| 344 | + |
| 345 | + if( !$this->mNewRev->isCurrent() ) { |
| 346 | + $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false ); |
| 347 | + } |
| 348 | + |
| 349 | + $this->loadText(); |
| 350 | + |
| 351 | + if( is_object( $this->mOldRev ) ) { |
| 352 | + $wgOut->setRevisionId( $this->mOldRev->getId() ); |
| 353 | + } |
| 354 | + |
| 355 | + global $wgTitle, $wgParser, $wgTitle; |
| 356 | + $popts = $wgOut->parserOptions(); |
| 357 | + $oldTidy = $popts->setTidy( TRUE ); |
| 358 | + |
| 359 | + $parserOutput = $wgParser->parse($this->mOldtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId ); |
| 360 | + $popts->setTidy( $oldTidy ); |
| 361 | + |
| 362 | + //only for new? |
| 363 | + //$wgOut->addParserOutputNoText( $parserOutput ); |
| 364 | + $oldHtml = $parserOutput->getText(); |
| 365 | + wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$oldHtml ) ); |
| 366 | + |
| 367 | + if( is_object( $this->mNewRev ) ) { |
| 368 | + $wgOut->setRevisionId( $this->mNewRev->getId() ); |
| 369 | + } |
| 370 | + |
| 371 | + $popts = $wgOut->parserOptions(); |
| 372 | + $oldTidy = $popts->setTidy( TRUE ); |
| 373 | + |
| 374 | + $parserOutput = $wgParser->parse($this->mNewtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId ); |
| 375 | + $popts->setTidy( $oldTidy ); |
| 376 | + |
| 377 | + //only for new? |
| 378 | + $wgOut->addParserOutputNoText( $parserOutput ); |
| 379 | + $newHtml = $parserOutput->getText(); |
| 380 | + wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) ); |
| 381 | + |
| 382 | + $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut)); |
| 383 | + $differ->htmlDiff($oldHtml, $newHtml); |
| 384 | + |
| 385 | + wfProfileOut( __METHOD__ ); |
| 386 | + } |
| 387 | + |
323 | 388 | /** |
324 | 389 | * Show the first revision of an article. Uses normal diff headers in |
325 | 390 | * contrast to normal "old revision" display style. |
— | — | @@ -893,6 +958,30 @@ |
894 | 959 | } |
895 | 960 | |
896 | 961 | /** |
| 962 | + * Alternative representation of a set of changes, by the index |
| 963 | + * ranges that are changed. |
| 964 | + */ |
| 965 | +class RangeDifference { |
| 966 | + |
| 967 | + public $leftstart; |
| 968 | + public $leftend; |
| 969 | + public $leftlength; |
| 970 | + |
| 971 | + public $rightstart; |
| 972 | + public $rightend; |
| 973 | + public $rightlength; |
| 974 | + |
| 975 | + function __construct($leftstart, $leftend, $rightstart, $rightend){ |
| 976 | + $this->leftstart = $leftstart; |
| 977 | + $this->leftend = $leftend; |
| 978 | + $this->leftlength = $leftend-$leftstart; |
| 979 | + $this->rightstart = $rightstart; |
| 980 | + $this->rightend = $rightend; |
| 981 | + $this->rightlength = $rightend-$rightstart; |
| 982 | + } |
| 983 | +} |
| 984 | + |
| 985 | +/** |
897 | 986 | * Class used internally by Diff to actually compute the diffs. |
898 | 987 | * |
899 | 988 | * The algorithm used here is mostly lifted from the perl module |
— | — | @@ -920,22 +1009,108 @@ |
921 | 1010 | |
922 | 1011 | const MAX_XREF_LENGTH = 10000; |
923 | 1012 | |
924 | | - function diff ($from_lines, $to_lines) { |
| 1013 | + function diff ($from_lines, $to_lines){ |
925 | 1014 | wfProfileIn( __METHOD__ ); |
926 | 1015 | |
927 | | - global $wgExternalDiffEngine; |
| 1016 | + // Diff and store locally |
| 1017 | + $this->diff_local($from_lines, $to_lines); |
928 | 1018 | |
| 1019 | + // Merge edits when possible |
| 1020 | + $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); |
| 1021 | + $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); |
| 1022 | + |
| 1023 | + // Compute the edit operations. |
929 | 1024 | $n_from = sizeof($from_lines); |
930 | 1025 | $n_to = sizeof($to_lines); |
931 | 1026 | |
| 1027 | + $edits = array(); |
| 1028 | + $xi = $yi = 0; |
| 1029 | + while ($xi < $n_from || $yi < $n_to) { |
| 1030 | + USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); |
| 1031 | + USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); |
| 1032 | + |
| 1033 | + // Skip matching "snake". |
| 1034 | + $copy = array(); |
| 1035 | + while ( $xi < $n_from && $yi < $n_to |
| 1036 | + && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
| 1037 | + $copy[] = $from_lines[$xi++]; |
| 1038 | + ++$yi; |
| 1039 | + } |
| 1040 | + if ($copy) |
| 1041 | + $edits[] = new _DiffOp_Copy($copy); |
| 1042 | + |
| 1043 | + // Find deletes & adds. |
| 1044 | + $delete = array(); |
| 1045 | + while ($xi < $n_from && $this->xchanged[$xi]) |
| 1046 | + $delete[] = $from_lines[$xi++]; |
| 1047 | + |
| 1048 | + $add = array(); |
| 1049 | + while ($yi < $n_to && $this->ychanged[$yi]) |
| 1050 | + $add[] = $to_lines[$yi++]; |
| 1051 | + |
| 1052 | + if ($delete && $add) |
| 1053 | + $edits[] = new _DiffOp_Change($delete, $add); |
| 1054 | + elseif ($delete) |
| 1055 | + $edits[] = new _DiffOp_Delete($delete); |
| 1056 | + elseif ($add) |
| 1057 | + $edits[] = new _DiffOp_Add($add); |
| 1058 | + } |
| 1059 | + wfProfileOut( __METHOD__ ); |
| 1060 | + return $edits; |
| 1061 | + } |
| 1062 | + |
| 1063 | + function diff_range ($from_lines, $to_lines){ |
| 1064 | + wfProfileIn( __METHOD__ ); |
| 1065 | + |
| 1066 | + // Diff and store locally |
| 1067 | + $this->diff_local($from_lines, $to_lines); |
| 1068 | + |
| 1069 | + // Compute the ranges operations. |
| 1070 | + $n_from = sizeof($from_lines); |
| 1071 | + $n_to = sizeof($to_lines); |
| 1072 | + |
| 1073 | + $ranges = array(); |
| 1074 | + $xi = $yi = 0; |
| 1075 | + while ($xi < $n_from || $yi < $n_to) { |
| 1076 | + // Matching "snake". |
| 1077 | + while ( $xi < $n_from && $yi < $n_to |
| 1078 | + && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
| 1079 | + ++$xi; |
| 1080 | + ++$yi; |
| 1081 | + } |
| 1082 | + // Find deletes & adds. |
| 1083 | + $xstart = $xi; |
| 1084 | + while ($xi < $n_from && $this->xchanged[$xi]){ |
| 1085 | + ++$xi; |
| 1086 | + } |
| 1087 | + |
| 1088 | + $ystart = $yi; |
| 1089 | + while ($yi < $n_to && $this->ychanged[$yi]){ |
| 1090 | + ++$yi; |
| 1091 | + } |
| 1092 | + |
| 1093 | + if ($xi>$xstart || $yi>$ystart){ |
| 1094 | + $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi); |
| 1095 | + } |
| 1096 | + } |
| 1097 | + wfProfileOut( __METHOD__ ); |
| 1098 | + return $ranges; |
| 1099 | + } |
| 1100 | + |
| 1101 | + function diff_local ($from_lines, $to_lines) { |
| 1102 | + global $wgExternalDiffEngine; |
| 1103 | + wfProfileIn( __METHOD__); |
| 1104 | + |
932 | 1105 | if($wgExternalDiffEngine == 'wikidiff3'){ |
933 | 1106 | // wikidiff3 |
934 | 1107 | global $IP; |
935 | 1108 | require_once( "$IP/includes/Diff.php" ); |
936 | | - list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000); |
| 1109 | + $wikidiff3 = new WikiDiff3(); |
| 1110 | + list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines); |
937 | 1111 | }else{ |
938 | 1112 | // old diff |
939 | | - wfProfileIn( __METHOD__ ." - basicdiff"); |
| 1113 | + $n_from = sizeof($from_lines); |
| 1114 | + $n_to = sizeof($to_lines); |
940 | 1115 | $this->xchanged = $this->ychanged = array(); |
941 | 1116 | $this->xv = $this->yv = array(); |
942 | 1117 | $this->xind = $this->yind = array(); |
— | — | @@ -980,48 +1155,8 @@ |
981 | 1156 | |
982 | 1157 | // Find the LCS. |
983 | 1158 | $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv)); |
984 | | - wfProfileOut( __METHOD__ ." - basicdiff"); |
985 | 1159 | } |
986 | | - |
987 | | - // Merge edits when possible |
988 | | - $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); |
989 | | - $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); |
990 | | - |
991 | | - // Compute the edit operations. |
992 | | - $edits = array(); |
993 | | - $xi = $yi = 0; |
994 | | - while ($xi < $n_from || $yi < $n_to) { |
995 | | - USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); |
996 | | - USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); |
997 | | - |
998 | | - // Skip matching "snake". |
999 | | - $copy = array(); |
1000 | | - while ( $xi < $n_from && $yi < $n_to |
1001 | | - && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
1002 | | - $copy[] = $from_lines[$xi++]; |
1003 | | - ++$yi; |
1004 | | - } |
1005 | | - if ($copy) |
1006 | | - $edits[] = new _DiffOp_Copy($copy); |
1007 | | - |
1008 | | - // Find deletes & adds. |
1009 | | - $delete = array(); |
1010 | | - while ($xi < $n_from && $this->xchanged[$xi]) |
1011 | | - $delete[] = $from_lines[$xi++]; |
1012 | | - |
1013 | | - $add = array(); |
1014 | | - while ($yi < $n_to && $this->ychanged[$yi]) |
1015 | | - $add[] = $to_lines[$yi++]; |
1016 | | - |
1017 | | - if ($delete && $add) |
1018 | | - $edits[] = new _DiffOp_Change($delete, $add); |
1019 | | - elseif ($delete) |
1020 | | - $edits[] = new _DiffOp_Delete($delete); |
1021 | | - elseif ($add) |
1022 | | - $edits[] = new _DiffOp_Add($add); |
1023 | | - } |
1024 | 1160 | wfProfileOut( __METHOD__ ); |
1025 | | - return $edits; |
1026 | 1161 | } |
1027 | 1162 | |
1028 | 1163 | /** |
— | — | @@ -1077,7 +1212,6 @@ |
1078 | 1213 | $numer = $xlim - $xoff + $nchunks - 1; |
1079 | 1214 | $x = $xoff; |
1080 | 1215 | for ($chunk = 0; $chunk < $nchunks; $chunk++) { |
1081 | | - wfProfileIn( __METHOD__ . "-chunk" ); |
1082 | 1216 | if ($chunk > 0) |
1083 | 1217 | for ($i = 0; $i <= $this->lcs; $i++) |
1084 | 1218 | $ymids[$i][$chunk-1] = $this->seq[$i]; |
— | — | @@ -1130,7 +1264,6 @@ |
1131 | 1265 | if ($end == 0 || $ypos > $this->seq[$end]) { |
1132 | 1266 | $this->seq[++$this->lcs] = $ypos; |
1133 | 1267 | $this->in_seq[$ypos] = 1; |
1134 | | - wfProfileOut( __METHOD__ ); |
1135 | 1268 | return $this->lcs; |
1136 | 1269 | } |
1137 | 1270 | |