r39406 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r39405‎ | r39406 | r39407 >
Date:11:46, 15 August 2008
Author:guyvdb
Status:old
Tags:
Comment:
New implementation of wikidiff3 algorithm replacing the previous one and basic HTML diffing support, both integrated with DifferenceEngine
Modified paths:
  • /trunk/phase3/includes/Diff.php (modified) (history)
  • /trunk/phase3/includes/DifferenceEngine.php (modified) (history)
  • /trunk/phase3/includes/HTMLDiff.php (added) (history)
  • /trunk/phase3/skins/common/diff.css (modified) (history)
  • /trunk/phase3/skins/common/diff.js (modified) (history)
  • /trunk/phase3/skins/common/images/diffunderline.gif (added) (history)

Diff [purge]

Index: trunk/phase3/skins/common/diff.js
@@ -17,4 +17,16 @@
1818 lastSheet.insertRule(
1919 "table.diff td div { overflow: visible; }",
2020 lastSheet.cssRules.length);
21 -}
\ No newline at end of file
 21+}
 22+
 23+function tipA(content){
 24+ return false;
 25+}
 26+
 27+function tipR(content){
 28+ return false;
 29+}
 30+
 31+function tipC(content){
 32+ return false;
 33+}
Index: trunk/phase3/skins/common/diff.css
@@ -74,3 +74,66 @@
7575 table: */
7676 /* overflow: visible; */
7777 }
 78+
 79+/*
 80+ * Styles for the HTML Diff
 81+ */
 82+span.diff-html-added {
 83+ font-size: 100%;
 84+ background-color: #20ff20
 85+}
 86+
 87+span.diff-html-removed {
 88+ font-size: 100%;
 89+ text-decoration: line-through;
 90+ background-color: #ff2020
 91+}
 92+
 93+span.diff-html-changed {
 94+ background: url(images/diffunderline.gif) bottom repeat-x;
 95+ *background-color: #c6c6fd; /* light blue */
 96+}
 97+
 98+span.diff-html-added img{
 99+ border: 5px solid #ccffcc;
 100+}
 101+
 102+span.diff-html-removed img{
 103+ border: 5px solid #fdc6c6;
 104+}
 105+
 106+span.diff-html-changed img{
 107+ border: 5px dotted #000099;
 108+
 109+}
 110+
 111+span.diff-html-changed {
 112+ position: relative; /* this is key */
 113+ cursor: help;
 114+}
 115+
 116+span.diff-html-changed span.tip {
 117+ display: none; /* so is this */
 118+}
 119+
 120+/* tooltip will display on :hover event */
 121+
 122+span.diff-html-changed:hover span.tip {
 123+ display: block;
 124+ z-index: 95;
 125+ position: absolute;
 126+ top: 2.5em;
 127+ left: 0;
 128+ width: auto;
 129+ line-height: 1.2em;
 130+ padding: 3px 7px 4px 6px;
 131+ border: 1px solid #336;
 132+ background-color: #f7f7ee;
 133+ font-family: arial, helvetica, sans-serif;
 134+ font-size: 10px;
 135+ font-weight: normal;
 136+ color: #000;
 137+ text-align: left;
 138+}
 139+
 140+
Index: trunk/phase3/skins/common/images/diffunderline.gif
@@ -0,0 +1 @@
 2+GIF89a��������!��,�%!g;
\ No newline at end of file
Index: trunk/phase3/includes/HTMLDiff.php
@@ -0,0 +1,1997 @@
 2+<?php
 3+/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
 4+ *
 5+ * This program is free software; you can redistribute it and/or modify
 6+ * it under the terms of the GNU General Public License as published by
 7+ * the Free Software Foundation; either version 2 of the License, or
 8+ * (at your option) any later version.
 9+ *
 10+ * This program is distributed in the hope that it will be useful,
 11+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13+ * GNU General Public License for more details.
 14+ *
 15+ * You should have received a copy of the GNU General Public License
 16+ * along with this program; if not, write to the Free Software
 17+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 18+ * or see http://www.gnu.org/
 19+ */
 20+
 21+
 22+/**
 23+ * Any element in the DOM tree of an HTML document.
 24+ */
 25+abstract class Node{
 26+
 27+ protected $parent;
 28+
 29+
 30+ function __construct($parent){
 31+ $this->parent = $parent;
 32+ if(!is_null($parent)){
 33+ $parent->addChild($this);
 34+ }
 35+ }
 36+
 37+ public function getParent(){
 38+ return $this->parent;
 39+ }
 40+
 41+ public function getParentTree(){
 42+ if(!is_null($this->parent)){
 43+ $parentTree = $this->parent->getParentTree();
 44+ $parentTree[] = $this->parent;
 45+ return $parentTree;
 46+ }else{
 47+ return array();
 48+ }
 49+ }
 50+
 51+ public abstract function getMinimalDeletedSet($id);
 52+
 53+ public function detectIgnorableWhiteSpace(){
 54+ // no op
 55+ }
 56+
 57+ public function getLastCommonParent(Node $other){
 58+ if(is_null($other))
 59+ throw new Exception('The given node is NULL');
 60+
 61+ $result = new LastCommonParentResult();
 62+
 63+ $myParents = $this->getParentTree();
 64+ $otherParents = $other->getParentTree();
 65+
 66+ $i = 1;
 67+ $isSame = true;
 68+ while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
 69+ if (!$myParents[$i]->isSameTag($otherParents[$i])) {
 70+ $isSame = false;
 71+ } else {
 72+ // After the while, the index i-1 must be the last common parent
 73+ $i++;
 74+ }
 75+ }
 76+
 77+ $result->setLastCommonParentDepth($i - 1);
 78+ $result->setLastCommonParent($myParents[$i - 1]);
 79+
 80+ if (!$isSame) {
 81+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
 82+ $result->setSplittingNeeded();
 83+ } else if (sizeof($myParents) < sizeof($otherParents)) {
 84+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
 85+ } else if (sizeof($myParents) > sizeof($otherParents)) {
 86+ // All tags matched but there are tags left in this tree
 87+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
 88+ $result->setSplittingNeeded();
 89+ } else {
 90+ // All tags matched untill the very last one in both trees
 91+ // or there were no tags besides the BODY
 92+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
 93+ }
 94+ return $result;
 95+ }
 96+
 97+ public function setParent($parent) {
 98+ $this->parent = $parent;
 99+ }
 100+
 101+ public abstract function copyTree();
 102+
 103+ public function inPre() {
 104+ $tree = $this->getParentTree();
 105+ foreach ($tree as $ancestor) {
 106+ if ($ancestor->isPre()) {
 107+ return true;
 108+ }
 109+ }
 110+ return false;
 111+ }
 112+
 113+ private $whiteBefore = false;
 114+
 115+ private $whiteAfter = false;
 116+
 117+ public function isWhiteBefore() {
 118+ return $this->whiteBefore;
 119+ }
 120+
 121+ public function setWhiteBefore($whiteBefore) {
 122+ $this->whiteBefore = $whiteBefore;
 123+ }
 124+
 125+ public function isWhiteAfter() {
 126+ return $this->whiteAfter;
 127+ }
 128+
 129+ public function setWhiteAfter($whiteAfter) {
 130+ $this->whiteAfter = $whiteAfter;
 131+ }
 132+
 133+ public abstract function getLeftMostChild();
 134+
 135+ public abstract function getRightMostChild();
 136+}
 137+
 138+/**
 139+ * Node that can contain other nodes. Represents an HTML tag.
 140+ */
 141+class TagNode extends Node{
 142+
 143+ public $children = array();
 144+
 145+ protected $qName;
 146+
 147+ protected $attributes = array();
 148+
 149+ function __construct($parent, $qName, /*array*/ $attributes) {
 150+ parent::__construct($parent);
 151+ $this->qName = strtolower($qName);
 152+ foreach($attributes as $key => $value){
 153+ $this->attributes[strtolower($key)] = $value;
 154+ }
 155+ }
 156+
 157+ public function addChild(Node $node, $index=-1) {
 158+ if ($node->getParent() !== $this)
 159+ throw new Exception(
 160+ 'The new child must have this node as a parent.');
 161+ if($index == -1){
 162+ $this->children[] = $node;
 163+ }else{
 164+ array_splice($this->children,$index,0,array($node));
 165+ }
 166+ }
 167+
 168+ public function getIndexOf(Node $child) {
 169+ // don't trust array_search with objects
 170+ foreach($this->children as $key=>$value){
 171+ if($value === $child){
 172+ return $key;
 173+ }
 174+ }
 175+ return NULL;
 176+ }
 177+
 178+ public function getChild($i) {
 179+ return $this->children[$i];
 180+ }
 181+
 182+ public function getNbChildren() {
 183+ return count($this->children);
 184+ }
 185+
 186+ public function getQName() {
 187+ return $this->qName;
 188+ }
 189+
 190+ public function getAttributes() {
 191+ return $this->attributes;
 192+ }
 193+
 194+ public function isSameTag(TagNode $other) {
 195+ if (is_null($other))
 196+ return false;
 197+ return $this->getOpeningTag() === $other->getOpeningTag();
 198+ }
 199+
 200+ public function getOpeningTag() {
 201+ $s = '<'.$this->getQName();
 202+ foreach ($this->attributes as $attribute => $value) {
 203+ $s .= ' ' . $attribute . '="' . $value . '"';
 204+ }
 205+ return $s .= '>';
 206+ }
 207+
 208+ public function getEndTag() {
 209+ return '</' . $this->getQName() + '>"';
 210+ }
 211+
 212+ public function getMinimalDeletedSet($id) {
 213+ $nodes = array();
 214+
 215+ if ($this->getNbChildren() == 0)
 216+ return $nodes;
 217+
 218+ $hasNotDeletedDescendant = false;
 219+
 220+ foreach ($this->children as $child) {
 221+ $childrenChildren = $child->getMinimalDeletedSet($id);
 222+ $nodes = array_merge($nodes, $childrenChildren);
 223+ if (!$hasNotDeletedDescendant
 224+ && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
 225+ // This child is not entirely deleted
 226+ $hasNotDeletedDescendant = true;
 227+ }
 228+ }
 229+ if (!$hasNotDeletedDescendant) {
 230+ $nodes = array($this);
 231+ }
 232+ return $nodes;
 233+ }
 234+
 235+ public function toString() {
 236+ return $this->getOpeningTag();
 237+ }
 238+
 239+ public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
 240+ $splitOccured = false;
 241+ if ($parent !== $this) {
 242+ $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
 243+ $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
 244+ $part1->setParent($this->getParent());
 245+ $part2->setParent($this->getParent());
 246+
 247+ $i = 0;
 248+ $nbChildren = $this->getNbChildren();
 249+ while ($i < $nbChildren && $this->children[$i] !== $split) {
 250+ $this->children[$i]->setParent($part1);
 251+ $part1->addChild($this->children[$i]);
 252+ ++$i;
 253+ }
 254+ if ($i < $nbChildren) {
 255+ if ($includeLeft) {
 256+ $this->children[$i]->setParent($part1);
 257+ $part1->addChild($this->children[$i]);
 258+ } else {
 259+ $this->children[$i]->setParent($part2);
 260+ $part2->addChild($this->children[$i]);
 261+ }
 262+ ++$i;
 263+ }
 264+ while ($i < $nbChildren) {
 265+ $this->children[$i]->setParent($part2);
 266+ $part2->addChild($this->children[$i]);
 267+ ++$i;
 268+ }
 269+ $myindexinparent = $this->parent->getIndexOf($this);
 270+ if ($part1->getNbChildren() > 0)
 271+ $this->parent->addChild($part1,$myindexinparent);
 272+
 273+ if ($part2->getNbChildren() > 0)
 274+ $this->parent->addChild($part2,$myindexinparent);
 275+
 276+ if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
 277+ $splitOccured = true;
 278+ }
 279+
 280+ $this->parent->removeChild($myindexinparent);
 281+
 282+ if ($includeLeft)
 283+ $this->parent->splitUntill($parent, $part1, $includeLeft);
 284+ else
 285+ $this->parent->splitUntill($parent, $part2, $includeLeft);
 286+ }
 287+ return $splitOccured;
 288+
 289+ }
 290+
 291+ private function removeChild($index) {
 292+ unset($this->children[$index]);
 293+ $this->children = array_values($this->children);
 294+ }
 295+
 296+ public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
 297+ 'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
 298+ 'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
 299+
 300+ public static function isBlockLevelName($qName) {
 301+ return array_key_exists(strtolower($qName),self::$blocks);
 302+ }
 303+
 304+ public static function isBlockLevelNode(Node $node) {
 305+ if(! $node instanceof TagNode)
 306+ return false;
 307+ return self::isBlockLevelName($node->getQName());
 308+ }
 309+
 310+ public function isBlockLevel() {
 311+ return  self::isBlockLevelNode($this);
 312+ }
 313+
 314+ public static function isInlineName($qName) {
 315+ return !self::isBlockLevelName($qName);
 316+ }
 317+
 318+ public static function isInlineNode(Node $node) {
 319+ return !self::isBlockLevelNode($node);
 320+ }
 321+
 322+ public function isInline() {
 323+ return self::isInlineNode($this);
 324+ }
 325+
 326+ public function copyTree() {
 327+ $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
 328+ $newThis->setWhiteBefore($this->isWhiteBefore());
 329+ $newThis->setWhiteAfter($this->isWhiteAfter());
 330+ foreach($this->children as $child) {
 331+ $newChild = $child->copyTree();
 332+ $newChild->setParent($newThis);
 333+ $newThis->addChild($newChild);
 334+ }
 335+ return $newThis;
 336+ }
 337+
 338+ public function getMatchRatio(TagNode $other) {
 339+ $txtComp = new TextOnlyComparator($other);
 340+ return $txtComp->getMatchRatio(new TextOnlyComparator($this));
 341+ }
 342+
 343+ public function expandWhiteSpace() {
 344+ $shift = 0;
 345+ $spaceAdded = false;
 346+
 347+ $nbOriginalChildren = $this->getNbChildren();
 348+ for ($i = 0; $i < $nbOriginalChildren; ++$i) {
 349+ $child = $this->getChild($i + $shift);
 350+
 351+ if($child instanceof TagNode){
 352+ if (!$child->isPre()) {
 353+ $child->expandWhiteSpace();
 354+ }
 355+ }
 356+ if (!$spaceAdded && $child->isWhiteBefore()) {
 357+ $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
 358+ $ws->setParent($this);
 359+ $this->addChild($ws,$i + ($shift++));
 360+ }
 361+ if ($child->isWhiteAfter()) {
 362+ $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
 363+ $ws->setParent($this);
 364+ $this->addChild($ws,$i + 1 + ($shift++));
 365+ $spaceAdded = true;
 366+ } else {
 367+ $spaceAdded = false;
 368+ }
 369+
 370+ }
 371+ }
 372+
 373+ public function getLeftMostChild() {
 374+ if ($this->getNbChildren() < 1)
 375+ return $this;
 376+ return $this->getChild(0)->getLeftMostChild();
 377+
 378+ }
 379+
 380+ public function getRightMostChild() {
 381+ if ($this->getNbChildren() < 1)
 382+ return $this;
 383+ return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
 384+ }
 385+
 386+ public function isPre() {
 387+ return 0 == strcasecmp($this->getQName(),'pre');
 388+ }
 389+
 390+ public static function toDiffLine(TagNode $node){
 391+ return $node->getOpeningTag();
 392+ }
 393+}
 394+
 395+/**
 396+ * Represents a piece of text in the HTML file.
 397+ */
 398+class TextNode extends Node{
 399+
 400+ private $s;
 401+
 402+ private $modification;
 403+
 404+ function __construct($parent, $s) {
 405+ parent::__construct($parent);
 406+ $this->modification = new Modification(Modification::NONE);
 407+ $this->s = $s;
 408+ }
 409+
 410+ public function copyTree() {
 411+ $clone = clone $this;
 412+ $clone->setParent(NULL);
 413+ return $clone;
 414+ }
 415+
 416+ public function getLeftMostChild() {
 417+ return $this;
 418+ }
 419+
 420+ public function getRightMostChild() {
 421+ return $this;
 422+ }
 423+
 424+ public function getMinimalDeletedSet($id) {
 425+ if ($this->getModification()->getType() == Modification::REMOVED
 426+ && $this->getModification()->getID() == $id){
 427+ return array($this);
 428+ }
 429+ return array();
 430+ }
 431+
 432+ public function getModification() {
 433+ return $this->modification;
 434+ }
 435+
 436+ public function getText() {
 437+ return $this->s;
 438+ }
 439+
 440+ public function isSameText($other) {
 441+ if (is_null($other) || ! $other instanceof TextNode){
 442+ return false;
 443+ }
 444+ return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
 445+ }
 446+
 447+ public function setModification(Modification $m) {
 448+ //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
 449+ $this->modification = $m;
 450+ }
 451+
 452+ public function toString() {
 453+ return $this->getText();
 454+ }
 455+
 456+ public static function toDiffLine(TextNode $node){
 457+ return str_replace('\n', ' ',$node->getText());
 458+ }
 459+}
 460+
 461+class WhiteSpaceNode extends TextNode {
 462+
 463+ function __construct($parent, $s, Node $like = NULL) {
 464+ parent::__construct($parent, $s);
 465+ if(!is_null($like) && $like instanceof TextNode){
 466+ $newModification = clone $like->getModification();
 467+ $newModification->setFirstOfID(false);
 468+ $this->setModification($newModification);
 469+ }
 470+ }
 471+
 472+ public static function isWhiteSpace($c) {
 473+ switch ($c) {
 474+ case ' ':
 475+ case '\t':
 476+ case '\r':
 477+ case '\n':
 478+ return true;
 479+ default:
 480+ return false;
 481+ }
 482+ }
 483+}
 484+
 485+/**
 486+ * Represents the root of a HTML document.
 487+ */
 488+class BodyNode extends TagNode {
 489+
 490+ function __construct() {
 491+ parent::__construct(NULL, 'body', array());
 492+ }
 493+
 494+ public function copyTree() {
 495+ $newThis = new BodyNode();
 496+ foreach ($this->children as $child) {
 497+ $newChild = $child->copyTree();
 498+ $newChild->setParent($newThis);
 499+ $newThis->addChild($newChild);
 500+ }
 501+ return $newThis;
 502+ }
 503+
 504+ public function getMinimalDeletedSet($id) {
 505+ $nodes = array();
 506+ foreach ($this->children as $child) {
 507+ $childrenChildren = $child->getMinimalDeletedSet($id);
 508+ $nodes = array_merge($nodes, $childrenChildren);
 509+ }
 510+ return $nodes;
 511+ }
 512+
 513+}
 514+
 515+/**
 516+ * Represents an image in HTML. Even though images do not contain any text they
 517+ * are independent visible objects on the page. They are logically a TextNode.
 518+ */
 519+class ImageNode extends TextNode {
 520+
 521+ private $attributes;
 522+
 523+ function __construct(TagNode $parent, /*array*/ $attrs) {
 524+ if(!array_key_exists('src',$attrs)){
 525+ //wfDebug('Image without a source:');
 526+ foreach($attrs as $key => $value){
 527+ //wfDebug("$key = $value");
 528+ }
 529+ parent::__construct($parent, '<img></img>');
 530+ }else{
 531+ parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
 532+ }
 533+ $this->attributes = $attrs;
 534+ }
 535+
 536+ public function isSameText($other) {
 537+ if (is_null($other) || ! $other instanceof ImageNode)
 538+ return false;
 539+ return $this->getText() === $other->getText();
 540+ }
 541+
 542+ public function getAttributes() {
 543+ return $this->attributes;
 544+ }
 545+
 546+}
 547+
 548+/**
 549+ * When detecting the last common parent of two nodes, all results are stored as
 550+ * a LastCommonParentResult.
 551+ */
 552+class LastCommonParentResult {
 553+
 554+ // Parent
 555+ private $parent;
 556+
 557+ public function getLastCommonParent() {
 558+ return $this->parent;
 559+ }
 560+
 561+ public function setLastCommonParent(TagNode $parent) {
 562+ $this->parent = $parent;
 563+ }
 564+
 565+ // Splitting
 566+ private $splittingNeeded = false;
 567+
 568+ public function isSplittingNeeded() {
 569+ return $this->splittingNeeded;
 570+ }
 571+
 572+ public function setSplittingNeeded() {
 573+ $this->splittingNeeded = true;
 574+ }
 575+
 576+ // Depth
 577+ private $lastCommonParentDepth = -1;
 578+
 579+ public function getLastCommonParentDepth() {
 580+ return $this->lastCommonParentDepth;
 581+ }
 582+
 583+ public function setLastCommonParentDepth($depth) {
 584+ $this->lastCommonParentDepth = $depth;
 585+ }
 586+
 587+ // Index
 588+ private $indexInLastCommonParent = -1;
 589+
 590+ public function getIndexInLastCommonParent() {
 591+ return $this->indexInLastCommonParent;
 592+ }
 593+
 594+ public function setIndexInLastCommonParent($index) {
 595+ $this->indexInLastCommonParent = $index;
 596+ }
 597+}
 598+
 599+class Modification{
 600+
 601+ const NONE = 1;
 602+ const REMOVED = 2;
 603+ const ADDED = 4;
 604+ const CHANGED = 8;
 605+
 606+ private $type;
 607+
 608+ private $id = -1;
 609+
 610+ private $prevMod;
 611+
 612+ private $nextMod;
 613+
 614+ private $firstOfID = false;
 615+
 616+ function __construct($type) {
 617+ $this->type = $type;
 618+ }
 619+
 620+ public function copy() {
 621+ $newM = new Modification($this->getType());
 622+ $newM->setID($this->getID());
 623+ $newM->setChanges($this->getChanges());
 624+ $newM->setFirstOfID($this->isFirstOfID());
 625+ $newM->setNext($this->getNext());
 626+ $newM->setPrevious($this->getPrevious());
 627+ return $newM;
 628+ }
 629+
 630+ public function getType() {
 631+ return $this->type;
 632+ }
 633+
 634+ public function setID($id) {
 635+ $this->id = $id;
 636+ }
 637+
 638+ public function getID() {
 639+ return $this->id;
 640+ }
 641+
 642+ public function setPrevious($m) {
 643+ $this->prevMod = $m;
 644+ }
 645+
 646+ public function getPrevious() {
 647+ return $this->prevMod;
 648+ }
 649+
 650+ public function setNext($m) {
 651+ $this->nextMod = $m;
 652+ }
 653+
 654+ public function getNext() {
 655+ return $this->nextMod;
 656+ }
 657+
 658+ private $changes;
 659+
 660+ public function setChanges($changes) {
 661+ $this->changes = $changes;
 662+ }
 663+
 664+ public function getChanges() {
 665+ return $this->changes;
 666+ }
 667+
 668+ public function isFirstOfID() {
 669+ return $this->firstOfID;
 670+ }
 671+
 672+ public function setFirstOfID($firstOfID) {
 673+ $this->firstOfID = $firstOfID;
 674+ }
 675+
 676+ public static function typeToString($type){
 677+ switch($type){
 678+ case self::NONE: return 'none';
 679+ case self::REMOVED: return 'removed';
 680+ case self::ADDED: return 'added';
 681+ case self::CHANGED: return 'changed';
 682+ }
 683+ }
 684+}
 685+
 686+class DomTreeBuilder {
 687+
 688+ private $textNodes = array();
 689+
 690+ private $bodyNode;
 691+
 692+ private $currentParent;
 693+
 694+ private $newWord = "";
 695+
 696+ protected $bodyStarted = false;
 697+
 698+ protected $bodyEnded = false;
 699+
 700+ private $whiteSpaceBeforeThis = false;
 701+
 702+ private $lastSibling;
 703+
 704+ function __construct(){
 705+ $this->bodyNode = $this->currentParent = new BodyNode();
 706+ }
 707+
 708+ public function getBodyNode() {
 709+ return $this->bodyNode;
 710+ }
 711+
 712+ public function getTextNodes() {
 713+ return $this->textNodes;
 714+ }
 715+
 716+ /**
 717+ * Must be called manually
 718+ */
 719+ public function endDocument() {
 720+ $this->endWord();
 721+ //wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
 722+ }
 723+
 724+ public function startElement($parser, $name, /*array*/ $attributes) {
 725+ if(!strcasecmp($name, 'body')==0){
 726+ //wfDebug("Starting $name node.");
 727+ $this->endWord();
 728+
 729+ $newTagNode = new TagNode($this->currentParent, $name, $attributes);
 730+ $this->currentParent = $newTagNode;
 731+ $this->lastSibling = NULL;
 732+ if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
 733+ $newTagNode->setWhiteBefore(true);
 734+ }
 735+ $this->whiteSpaceBeforeThis = false;
 736+ }
 737+ }
 738+
 739+ public function endElement($parser, $name) {
 740+ if(!strcasecmp($name, 'body')==0){
 741+ //wfDebug("Ending $name node.");
 742+ if (0 == strcasecmp($name,'img')) {
 743+ // Insert a dummy leaf for the image
 744+ $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
 745+ $img->setWhiteBefore($this->whiteSpaceBeforeThis);
 746+ $this->lastSibling = $img;
 747+ $this->textNodes[] = $img;
 748+ }
 749+ $this->endWord();
 750+ if ($this->currentParent->isInline()) {
 751+ $this->lastSibling = $this->currentParent;
 752+ } else {
 753+ $this->lastSibling = NULL;
 754+ }
 755+ $this->currentParent = $this->currentParent->getParent();
 756+ $this->whiteSpaceBeforeThis = false;
 757+ }else{
 758+ $this->endDocument();
 759+ }
 760+ }
 761+
 762+ public function characters($parser, $data){
 763+ //wfDebug('Parsing '. strlen($data).' characters.');
 764+ $array = str_split($data);
 765+ foreach($array as $c) {
 766+ if (self::isDelimiter($c)) {
 767+ $this->endWord();
 768+ if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
 769+ && !$this->currentParent->inPre()) {
 770+ if (!is_null($this->lastSibling)){
 771+ $this->lastSibling->setWhiteAfter(true);
 772+ }
 773+ $this->whiteSpaceBeforeThis = true;
 774+ } else {
 775+ $textNode = new TextNode($this->currentParent, $c);
 776+ $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
 777+ $this->whiteSpaceBeforeThis = false;
 778+ $this->lastSibling = $textNode;
 779+ $this->textNodes[] = $textNode;
 780+ }
 781+ } else {
 782+ $this->newWord .= $c;
 783+ }
 784+
 785+ }
 786+ }
 787+
 788+ private function endWord() {
 789+ if (strlen($this->newWord) > 0) {
 790+ $node = new TextNode($this->currentParent, $this->newWord);
 791+ $node->setWhiteBefore($this->whiteSpaceBeforeThis);
 792+ $this->whiteSpaceBeforeThis = false;
 793+ $this->lastSibling = $node;
 794+ $this->textNodes[] = $node;
 795+ $this->newWord = "";
 796+ }
 797+ }
 798+
 799+ public static function isDelimiter($c) {
 800+ switch ($c) {
 801+ // Basic Delimiters
 802+ case '/':
 803+ case '.':
 804+ case '!':
 805+ case ',':
 806+ case ';':
 807+ case '?':
 808+ case '=':
 809+ case '\'':
 810+ case '"':
 811+ // Extra Delimiters
 812+ case '[':
 813+ case ']':
 814+ case '{':
 815+ case '}':
 816+ case '(':
 817+ case ')':
 818+ case '&':
 819+ case '|':
 820+ case '\\':
 821+ case '-':
 822+ case '_':
 823+ case '+':
 824+ case '*':
 825+ case ':':
 826+ return true;
 827+ default:
 828+ return WhiteSpaceNode::isWhiteSpace($c);
 829+ }
 830+ }
 831+
 832+ public function getDiffLines(){
 833+ return array_map(array('TextNode','toDiffLine'), $this->textNodes);
 834+ }
 835+}
 836+
 837+class TextNodeDiffer{
 838+
 839+ private $textNodes;
 840+ private $bodyNode;
 841+
 842+ private $oldTextNodes;
 843+ private $oldBodyNode;
 844+
 845+ private $lastModified = array();
 846+
 847+ function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
 848+ $this->textNodes = $tree->getTextNodes();
 849+ $this->bodyNode = $tree->getBodyNode();
 850+ $this->oldTextNodes = $oldTree->getTextNodes();
 851+ $this->oldBodyNode = $oldTree->getBodyNode();
 852+ }
 853+
 854+ public function getBodyNode() {
 855+ return $this->bodyNode;
 856+ }
 857+
 858+ private $newID = 0;
 859+
 860+ public function markAsNew($start, $end) {
 861+ if ($end <= $start)
 862+ return;
 863+
 864+ if ($this->whiteAfterLastChangedPart)
 865+ $this->textNodes[$start]->setWhiteBefore(false);
 866+
 867+ $nextLastModified = array();
 868+
 869+ for ($i = $start; $i < $end; ++$i) {
 870+ $mod = new Modification(Modification::ADDED);
 871+ $mod->setID($this->newID);
 872+ if (sizeof($this->lastModified) > 0) {
 873+ $mod->setPrevious($this->lastModified[0]);
 874+ if (is_null($this->lastModified[0]->getNext())) {
 875+ foreach ($this->lastModified as $lastMod) {
 876+ $lastMod->setNext($mod);
 877+ }
 878+ }
 879+ }
 880+ $nextLastModified[] = $mod;
 881+ $this->textNodes[$i]->setModification($mod);
 882+ }
 883+ if ($start < $end) {
 884+ $this->textNodes[$start]->getModification()->setFirstOfID(true);
 885+ }
 886+ ++$this->newID;
 887+ $this->lastModified = $nextLastModified;
 888+ }
 889+
 890+ private $changedID = 0;
 891+
 892+ private $changedIDUsed = false;
 893+
 894+ public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
 895+ $i = $rightstart;
 896+ $j = $leftstart;
 897+
 898+ if ($this->changedIDUsed) {
 899+ ++$this->changedID;
 900+ $this->changedIDUsed = false;
 901+ }
 902+
 903+ $nextLastModified = array();
 904+
 905+ $changes;
 906+ while ($i < $rightend) {
 907+ $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
 908+ $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
 909+ $result = $acthis->getResult($acother);
 910+ unset($acthis, $acother);
 911+
 912+ $nbLastModified = sizeof($this->lastModified);
 913+ if ($result->isChanged()) {
 914+ $mod = new Modification(Modification::CHANGED);
 915+
 916+ if (!$this->changedIDUsed) {
 917+ $mod->setFirstOfID(true);
 918+ if (sizeof($nextLastModified) > 0) {
 919+ $this->lastModified = $nextLastModified;
 920+ $nextLastModified = array();
 921+ }
 922+ } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
 923+ ++$this->changedID;
 924+ $mod->setFirstOfID(true);
 925+ if (sizeof($nextLastModified) > 0) {
 926+ $this->lastModified = $nextLastModified;
 927+ $nextLastModified = array();
 928+ }
 929+ }
 930+
 931+ if ($nbLastModified > 0) {
 932+ $mod->setPrevious($this->lastModified[0]);
 933+ if (is_null($this->lastModified[0]->getNext())) {
 934+ foreach ($this->lastModified as $lastMod) {
 935+ $lastMod->setNext($mod);
 936+ }
 937+ }
 938+ }
 939+ $nextLastModified[] = $mod;
 940+
 941+ $mod->setChanges($result->getChanges());
 942+ $mod->setID($this->changedID);
 943+
 944+ $this->textNodes[$i]->setModification($mod);
 945+ $this->changes = $result->getChanges();
 946+ $this->changedIDUsed = true;
 947+ } else if ($this->changedIDUsed) {
 948+ ++$this->changedID;
 949+ $this->changedIDUsed = false;
 950+ }
 951+ ++$i;
 952+ ++$j;
 953+ }
 954+ if (sizeof($nextLastModified) > 0){
 955+ $this->lastModified = $nextLastModified;
 956+ }
 957+ }
 958+
 959+ // used to remove the whitespace between a red and green block
 960+ private $whiteAfterLastChangedPart = false;
 961+
 962+ private $deletedID = 0;
 963+
 964+ public function markAsDeleted($start, $end, $before) {
 965+
 966+ if ($end <= $start)
 967+ return;
 968+
 969+ if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
 970+ $this->whiteAfterLastChangedPart = true;
 971+ } else {
 972+ $this->whiteAfterLastChangedPart = false;
 973+ }
 974+
 975+ $nextLastModified = array();
 976+
 977+ for ($i = $start; $i < $end; ++$i) {
 978+ $mod = new Modification(Modification::REMOVED);
 979+ $mod->setID($this->deletedID);
 980+ if (sizeof($this->lastModified) > 0) {
 981+ $mod->setPrevious($this->lastModified[0]);
 982+ if (is_null($this->lastModified[0]->getNext())) {
 983+ foreach ($this->lastModified as $lastMod) {
 984+ $lastMod->setNext($mod);
 985+ }
 986+ }
 987+ }
 988+ $nextLastModified[] = $mod;
 989+
 990+ // oldTextNodes is used here because we're going to move its deleted
 991+ // elements
 992+ // to this tree!
 993+ $this->oldTextNodes[$i]->setModification($mod);
 994+ }
 995+ $this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
 996+
 997+ $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
 998+
 999+ //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
 1000+
 1001+ // Set prevLeaf to the leaf after which the old HTML needs to be
 1002+ // inserted
 1003+ if ($before > 0){
 1004+ $prevLeaf = $this->textNodes[$before - 1];
 1005+ }
 1006+ // Set nextLeaf to the leaf before which the old HTML needs to be
 1007+ // inserted
 1008+ if ($before < sizeof($this->textNodes)){
 1009+ $nextLeaf = $this->textNodes[$before];
 1010+ }
 1011+
 1012+ while (sizeof($deletedNodes) > 0) {
 1013+ if (isset($prevLeaf)) {
 1014+ $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
 1015+ } else {
 1016+ $prevResult = new LastCommonParentResult();
 1017+ $prevResult->setLastCommonParent($this->getBodyNode());
 1018+ $prevResult->setIndexInLastCommonParent(0);
 1019+ }
 1020+ if (isset($nextleaf)) {
 1021+ $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
 1022+ } else {
 1023+ $nextResult = new LastCommonParentResult();
 1024+ $nextResult->setLastCommonParent($this->getBodyNode());
 1025+ $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
 1026+ }
 1027+
 1028+ if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
 1029+ // We need some metric to choose which way to add-...
 1030+ if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
 1031+ && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
 1032+ // The difference is not in the parent
 1033+ $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
 1034+ } else {
 1035+ // The difference is in the parent, so compare them
 1036+ // now THIS is tricky
 1037+ $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
 1038+ $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
 1039+
 1040+ if ($distancePrev <= $distanceNext) {
 1041+ $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
 1042+ } else {
 1043+ $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
 1044+ }
 1045+ }
 1046+
 1047+ }
 1048+
 1049+ if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
 1050+ // Inserting at the front
 1051+ if ($prevResult->isSplittingNeeded()) {
 1052+ $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
 1053+ }
 1054+ $prevLeaf = $deletedNodes[0]->copyTree();
 1055+ unset($deletedNodes[0]);
 1056+ $deletedNodes = array_values($deletedNodes);
 1057+ $prevLeaf->setParent($prevResult->getLastCommonParent());
 1058+ $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
 1059+ } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
 1060+ // Inserting at the back
 1061+ if ($nextResult->isSplittingNeeded()) {
 1062+ $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
 1063+ if ($splitOccured) {
 1064+ // The place where to insert is shifted one place to the
 1065+ // right
 1066+ $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
 1067+ }
 1068+ }
 1069+ $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
 1070+ unset($deletedNodes[sizeof(deletedNodes) - 1]);
 1071+ $deletedNodes = array_values($deletedNodes);
 1072+ $nextLeaf->setParent($nextResult->getLastCommonParent());
 1073+ $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
 1074+ } else
 1075+ throw new Exception("Uh?");
 1076+ }
 1077+ $this->lastModified = $nextLastModified;
 1078+ ++$this->deletedID;
 1079+ }
 1080+
 1081+ public function expandWhiteSpace() {
 1082+ $this->getBodyNode()->expandWhiteSpace();
 1083+ }
 1084+
 1085+ public function lengthNew(){
 1086+ return sizeof($this->textNodes);
 1087+ }
 1088+
 1089+ public function lengthOld(){
 1090+ return sizeof($this->oldTextNodes);
 1091+ }
 1092+}
 1093+
 1094+class HTMLDiffer{
 1095+
 1096+ private $output;
 1097+
 1098+ function __construct($output){
 1099+ $this->output = $output;
 1100+ }
 1101+
 1102+ function htmlDiff($from, $to){
 1103+ // Create an XML parser
 1104+ $xml_parser = xml_parser_create('');
 1105+
 1106+ $domfrom = new DomTreeBuilder();
 1107+
 1108+ // Set the functions to handle opening and closing tags
 1109+ xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
 1110+
 1111+ // Set the function to handle blocks of character data
 1112+ xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
 1113+
 1114+ ;
 1115+ //wfDebug('Parsing '.strlen($from)." characters worth of HTML");
 1116+ if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
 1117+ || !xml_parse($xml_parser, $from, FALSE)
 1118+ || !xml_parse($xml_parser, '</body>', TRUE)){
 1119+ wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
 1120+ }
 1121+ xml_parser_free($xml_parser);
 1122+ unset($from);
 1123+
 1124+ $xml_parser = xml_parser_create('');
 1125+
 1126+ $domto = new DomTreeBuilder();
 1127+
 1128+ // Set the functions to handle opening and closing tags
 1129+ xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement"));
 1130+
 1131+ // Set the function to handle blocks of character data
 1132+ xml_set_character_data_handler($xml_parser, array($domto,"characters"));
 1133+
 1134+ //wfDebug('Parsing '.strlen($to)." characters worth of HTML");
 1135+ if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
 1136+ || !xml_parse($xml_parser, $to, FALSE)
 1137+ || !xml_parse($xml_parser, '</body>', TRUE)){
 1138+ wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
 1139+ }
 1140+ xml_parser_free($xml_parser);
 1141+ unset($to);
 1142+
 1143+ $diffengine = new _DiffEngine();
 1144+ $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
 1145+ unset($xml_parser,$diffengine);
 1146+
 1147+ $domdiffer = new TextNodeDiffer($domto, $domfrom);
 1148+
 1149+ $currentIndexLeft = 0;
 1150+ $currentIndexRight = 0;
 1151+ foreach ($differences as $d) {
 1152+ if ($d->leftstart > $currentIndexLeft) {
 1153+ $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
 1154+ $currentIndexRight, $d->rightstart);
 1155+ }
 1156+ if ($d->leftlength > 0) {
 1157+ $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
 1158+ }
 1159+ $domdiffer->markAsNew($d->rightstart, $d->rightend);
 1160+
 1161+ $currentIndexLeft = $d->leftend;
 1162+ $currentIndexRight = $d->rightend;
 1163+ }
 1164+ if ($currentIndexLeft < $domdiffer->lengthOld()) {
 1165+ $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
 1166+ }
 1167+
 1168+ $domdiffer->expandWhiteSpace();
 1169+ $output = new HTMLOutput('htmldiff', $this->output);
 1170+ $output->parse($domdiffer->getBodyNode());
 1171+ }
 1172+
 1173+ private function preProcess(/*array*/ $differences){
 1174+ $newRanges = array();
 1175+
 1176+ $nbDifferences = sizeof($differences);
 1177+ for ($i = 0; $i < $nbDifferences; ++$i) {
 1178+ $leftStart = $differences[$i]->leftstart;
 1179+ $leftEnd = $differences[$i]->leftend;
 1180+ $rightStart = $differences[$i]->rightstart;
 1181+ $rightEnd = $differences[$i]->rightend;
 1182+
 1183+ $leftLength = $leftEnd - $leftStart;
 1184+ $rightLength = $rightEnd - $rightStart;
 1185+
 1186+ while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength,
 1187+ $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) {
 1188+ $leftEnd = $differences[$i + 1]->leftend;
 1189+ $rightEnd = $differences[$i + 1]->rightend;
 1190+ $leftLength = $leftEnd - $leftStart;
 1191+ $rightLength = $rightEnd - $rightStart;
 1192+ ++$i;
 1193+ }
 1194+ $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
 1195+ }
 1196+ return $newRanges;
 1197+ }
 1198+
 1199+ /**
 1200+ * Heuristic to merge differences for readability.
 1201+ */
 1202+ public static function score($ll, $nll, $rl, $nrl) {
 1203+ if (($ll == 0 && $nll == 0)
 1204+ || ($rl == 0 && $nrl == 0)){
 1205+ return 0;
 1206+ }
 1207+ $numbers = array($ll, $nll, $rl, $nrl);
 1208+ $d = 0;
 1209+ foreach ($numbers as $number) {
 1210+ while ($number > 3) {
 1211+ $d += 3;
 1212+ $number -= 3;
 1213+ $number *= 0.5;
 1214+ }
 1215+ $d += $number;
 1216+
 1217+ }
 1218+ return $d / (1.5 * sizeof($numbers));
 1219+ }
 1220+
 1221+}
 1222+
 1223+class TextOnlyComparator{
 1224+
 1225+ public $leafs = array();
 1226+
 1227+ function _construct(TagNode $tree) {
 1228+ $this->addRecursive($tree);
 1229+ $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
 1230+ }
 1231+
 1232+ private function addRecursive(TagNode $tree) {
 1233+ foreach ($tree->children as $child) {
 1234+ if ($child instanceof TagNode) {
 1235+ $this->addRecursive($child);
 1236+ } else if ($child instanceof TextNode) {
 1237+ $this->leafs[] = $node;
 1238+ }
 1239+ }
 1240+ }
 1241+
 1242+ public function getMatchRatio(TextOnlyComparator $other) {
 1243+ $nbOthers = sizeof($other->leafs);
 1244+ $nbThis = sizeof($this->leafs);
 1245+ if($nbOthers == 0 || $nbThis == 0){
 1246+ return -log(0);
 1247+ }
 1248+
 1249+ $diffengine = new _DiffEngine();
 1250+ $diffengine->diff_local($this->leafs, $other->leafs);
 1251+
 1252+ $distanceThis = array_sum($diffengine->xchanged);
 1253+ $distanceOther = array_sum($diffengine->ychanged);
 1254+
 1255+ return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
 1256+ / $nbThis) / 2.0;
 1257+ }
 1258+}
 1259+
 1260+class AncestorComparatorResult {
 1261+
 1262+ private $changed = false;
 1263+
 1264+ private $changes = "";
 1265+
 1266+ public function isChanged() {
 1267+ return $this->changed;
 1268+ }
 1269+
 1270+ public function setChanged($changed) {
 1271+ $this->changed = $changed;
 1272+ }
 1273+
 1274+ public function getChanges() {
 1275+ return $this->changes;
 1276+ }
 1277+
 1278+ public function setChanges($changes) {
 1279+ $this->changes = $changes;
 1280+ }
 1281+}
 1282+
 1283+/**
 1284+ * A comparator used when calculating the difference in ancestry of two Nodes.
 1285+ */
 1286+class AncestorComparator{
 1287+
 1288+ public $ancestors;
 1289+ public $ancestorsText;
 1290+
 1291+ function __construct(/*array*/ $ancestors) {
 1292+ $this->ancestors = $ancestors;
 1293+ $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
 1294+ }
 1295+
 1296+ private $compareTxt = "";
 1297+
 1298+ public function getCompareTxt() {
 1299+ return $this->compareTxt;
 1300+ }
 1301+
 1302+ public function getResult(AncestorComparator $other) {
 1303+ $result = new AncestorComparatorResult();
 1304+
 1305+ $diffengine = new _DiffEngine();
 1306+ $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
 1307+
 1308+ if (sizeof($differences) == 0){
 1309+ return $result;
 1310+ }
 1311+ $changeTxt = new ChangeTextGenerator($this, $other);
 1312+
 1313+ $result->setChanged(true);
 1314+ $result->setChanges($changeTxt->getChanged($differences)->toString());
 1315+
 1316+ return $result;
 1317+ }
 1318+}
 1319+
 1320+class ChangeTextGenerator {
 1321+
 1322+ private $new;
 1323+ private $old;
 1324+
 1325+ private $factory;
 1326+
 1327+ function __construct(AncestorComparator $old, AncestorComparator $new) {
 1328+ $this->new = $new;
 1329+ $this->old = $old;
 1330+ $this->factory = new TagToStringFactory();
 1331+ }
 1332+
 1333+ public function getChanged(/*array*/ $differences) {
 1334+ $txt = new ChangeText;
 1335+
 1336+ $rootlistopened = false;
 1337+
 1338+ if (sizeof($differences) > 1) {
 1339+ $txt->addHtml('<ul class="changelist">');
 1340+ $rootlistopened = true;
 1341+ }
 1342+
 1343+ $nbDifferences = sizeof($differences);
 1344+ for ($j = 0; $j < $nbDifferences; ++$j) {
 1345+ $d = $differences[$j];
 1346+
 1347+ $lvl1listopened = false;
 1348+
 1349+ if ($rootlistopened) {
 1350+ $txt->addHtml('<li>');
 1351+ }
 1352+
 1353+ if ($d->leftlength + $d->rightlength > 1) {
 1354+ $txt->addHtml('<ul class="changelist">');
 1355+ $lvl1listopened = true;
 1356+ }
 1357+
 1358+ // left are the old ones
 1359+ for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
 1360+ if ($lvl1listopened){
 1361+ $txt->addHtml('<li>');
 1362+ }
 1363+ // add a bullet for a old tag
 1364+ $this->addTagOld($txt, $this->old->ancestors[$i]);
 1365+
 1366+ if ($lvl1listopened){
 1367+ $txt->addHtml('</li>');
 1368+ }
 1369+ }
 1370+
 1371+ // right are the new ones
 1372+ for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
 1373+ if ($lvl1listopened){
 1374+ $txt->addHtml('<li>');
 1375+ }
 1376+
 1377+ // add a bullet for a new tag
 1378+ $this->addTagNew($txt, $this->new->ancestors[$i]);
 1379+
 1380+ if ($lvl1listopened){
 1381+ $txt->addHtml('</li>');
 1382+ }
 1383+
 1384+ }
 1385+
 1386+ if ($lvl1listopened) {
 1387+ $txt->addHtml('</ul>');
 1388+ }
 1389+
 1390+ if ($rootlistopened) {
 1391+ $txt->addHtml('</li>');
 1392+ }
 1393+ }
 1394+
 1395+ if ($rootlistopened) {
 1396+ $txt->addHtml('</ul>');
 1397+ }
 1398+
 1399+ return $txt;
 1400+
 1401+ }
 1402+
 1403+ private function addTagOld(ChangeText $txt, TagNode $ancestor) {
 1404+ $this->factory->create($ancestor)->getRemovedDescription($txt);
 1405+ }
 1406+
 1407+ private function addTagNew(ChangeText $txt, TagNode $ancestor) {
 1408+ $this->factory->create($ancestor)->getAddedDescription($txt);
 1409+ }
 1410+}
 1411+
 1412+class ChangeText {
 1413+
 1414+ private $txt = "";
 1415+
 1416+ const newLine = "<br/>";
 1417+
 1418+ public function addText($s) {
 1419+ $s = $this->clean($s);
 1420+ $this->txt .= $s;
 1421+ }
 1422+
 1423+ public function addHtml($s) {
 1424+ $this->txt .= $s;
 1425+ }
 1426+
 1427+ public function addNewLine() {
 1428+ $this->addHtml(self::newLine);
 1429+ }
 1430+
 1431+ public function toString() {
 1432+ return $this->txt;
 1433+ }
 1434+
 1435+ private function clean($s) {
 1436+ return htmlspecialchars($s);
 1437+ }
 1438+}
 1439+
 1440+class TagToStringFactory {
 1441+
 1442+ private static $containerTags = array(
 1443+ 'html' => TRUE,
 1444+ 'body' => TRUE,
 1445+ 'p' => TRUE,
 1446+ 'blockquote' => TRUE,
 1447+ 'h1' => TRUE,
 1448+ 'h2' => TRUE,
 1449+ 'h3' => TRUE,
 1450+ 'h4' => TRUE,
 1451+ 'h5' => TRUE,
 1452+ 'pre' => TRUE,
 1453+ 'div' => TRUE,
 1454+ 'ul' => TRUE,
 1455+ 'ol' => TRUE,
 1456+ 'li' => TRUE,
 1457+ 'table' => TRUE,
 1458+ 'tbody' => TRUE,
 1459+ 'tr' => TRUE,
 1460+ 'td' => TRUE,
 1461+ 'th' => TRUE,
 1462+ 'br' => TRUE,
 1463+ 'hr' => TRUE,
 1464+ 'code' => TRUE,
 1465+ 'dl' => TRUE,
 1466+ 'dt' => TRUE,
 1467+ 'dd' => TRUE,
 1468+ 'input' => TRUE,
 1469+ 'form' => TRUE,
 1470+ 'img' => TRUE,
 1471+ // in-line tags that can be considered containers not styles
 1472+ 'span' => TRUE,
 1473+ 'a' => TRUE
 1474+ );
 1475+
 1476+ private static $styleTags = array(
 1477+ 'i' => TRUE,
 1478+ 'b' => TRUE,
 1479+ 'strong' => TRUE,
 1480+ 'em' => TRUE,
 1481+ 'font' => TRUE,
 1482+ 'big' => TRUE,
 1483+ 'del' => TRUE,
 1484+ 'tt' => TRUE,
 1485+ 'sub' => TRUE,
 1486+ 'sup' => TRUE,
 1487+ 'strike' => TRUE
 1488+ );
 1489+
 1490+ const MOVED = 1;
 1491+ const STYLE = 2;
 1492+ const UNKNOWN = 4;
 1493+
 1494+ public function create(TagNode $node) {
 1495+ $sem = $this->getChangeSemantic($node->getQName());
 1496+ if (0 == strcasecmp($node->getQName(),'a')){
 1497+ return new AnchorToString($node, $sem);
 1498+ }
 1499+ if (0 == strcasecmp($node->getQName(),'img')){
 1500+ return new NoContentTagToString($node, $sem);
 1501+ }
 1502+ return new TagToString($node, $sem);
 1503+ }
 1504+
 1505+ protected function getChangeSemantic($qname) {
 1506+ if (array_key_exists(strtolower($qname),self::$containerTags)){
 1507+ return self::MOVED;
 1508+ }
 1509+ if (array_key_exists(strtolower($qname),self::$styleTags)){
 1510+ return self::STYLE;
 1511+ }
 1512+ return self::UNKNOWN;
 1513+ }
 1514+}
 1515+
 1516+class TagToString {
 1517+
 1518+ protected $node;
 1519+
 1520+ protected $sem;
 1521+
 1522+ function __construct(TagNode $node, $sem) {
 1523+ $this->node = $node;
 1524+ $this->sem = $sem;
 1525+ }
 1526+
 1527+ public function getDescription() {
 1528+ return $this->getString('diff-' . $this->node->getQName());
 1529+ }
 1530+
 1531+ public function getRemovedDescription(ChangeText $txt) {
 1532+
 1533+ if ($this->sem == TagToStringFactory::MOVED) {
 1534+ $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
 1535+ $txt->addHtml('<b>');
 1536+ $txt->addText(strtolower($this->getDescription()));
 1537+ $txt->addHtml('</b>');
 1538+ } else if ($this->sem == TagToStringFactory::STYLE) {
 1539+ $txt->addHtml('<b>');
 1540+ $txt->addText($this->getDescription());
 1541+ $txt->addHtml('</b>');
 1542+ $txt->addText(' ' . strtolower($this->getStyleRemoved()));
 1543+ } else {
 1544+ $txt->addHtml('<b>');
 1545+ $txt->addText($this->getDescription());
 1546+ $txt->addHtml('</b>');
 1547+ $txt->addText(' ' . strtolower($this->getRemoved()));
 1548+ }
 1549+ $this->addAttributes($txt, $this->node->getAttributes());
 1550+ $txt->addText('.');
 1551+ }
 1552+
 1553+ public function getAddedDescription(ChangeText $txt) {
 1554+
 1555+ if ($this->sem == TagToStringFactory::MOVED) {
 1556+ $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
 1557+ $txt->addHtml('<b>');
 1558+ $txt->addText(strtolower($this->getDescription()));
 1559+ $txt->addHtml('</b>');
 1560+ } else if ($this->sem == TagToStringFactory::STYLE) {
 1561+ $txt->addHtml('<b>');
 1562+ $txt->addText($this->getDescription());
 1563+ $txt->addHtml('</b>');
 1564+ $txt->addText(' ' . strtolower($this->getStyleAdded()));
 1565+ } else {
 1566+ $txt->addHtml('<b>');
 1567+ $txt->addText($this->getDescription());
 1568+ $txt->addHtml('</b>');
 1569+ $txt->addText(' ' . strtolower($this->getAdded()));
 1570+ }
 1571+ $this->addAttributes($txt, $this->node->getAttributes());
 1572+ $txt->addText('.');
 1573+ }
 1574+
 1575+ protected function getMovedTo() {
 1576+ return $this->getString('diff-movedto');
 1577+ }
 1578+
 1579+ protected function getStyleAdded() {
 1580+ return $this->getString('diff-styleadded');
 1581+ }
 1582+
 1583+ protected function getAdded() {
 1584+ return $this->getString('diff-added');
 1585+ }
 1586+
 1587+ protected function getMovedOutOf() {
 1588+ return $this->getString('diff-movedoutof');
 1589+ }
 1590+
 1591+ protected function getStyleRemoved() {
 1592+ return $this->getString('diff-styleremoved');
 1593+ }
 1594+
 1595+ protected function getRemoved() {
 1596+ return $this->getString('diff-removed');
 1597+ }
 1598+
 1599+ protected function addAttributes(ChangeText $txt, array $attributes) {
 1600+ if (sizeof($attributes) < 1)
 1601+ return;
 1602+
 1603+ $keys = array_keys($attributes);
 1604+
 1605+ $txt->addText(' ' . strtolower($this->getWith()) . ' '
 1606+ . $this->translateArgument($keys[0]) . ' '
 1607+ . $attributes[$keys[0]]);
 1608+ for ($i = 1; $i < sizeof($attributes) - 1; $i++) {
 1609+ $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' '
 1610+ . $attributes[$keys[$i]]);
 1611+ }
 1612+ if (sizeof($attributes) > 1) {
 1613+ $txt->addText(' '
 1614+ . strtolower($this->getAnd())
 1615+ . ' '
 1616+ . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' '
 1617+ . $attributes[$keys[sizeof($attributes) - 1]]);
 1618+ }
 1619+ }
 1620+
 1621+ private function getAnd() {
 1622+ return $this->getString('diff-and');
 1623+ }
 1624+
 1625+ private function getWith() {
 1626+ return $this->getString('diff-with');
 1627+ }
 1628+
 1629+ protected function translateArgument($name) {
 1630+ if (0 == strcasecmp($name,'src'))
 1631+ return strtolower($this->getSource());
 1632+ if (0 == strcasecmp($name,'width'))
 1633+ return strtolower($this->getWidth());
 1634+ if (0 == strcasecmp($name,'height'))
 1635+ return strtolower($this->getHeight());
 1636+ return $name;
 1637+ }
 1638+
 1639+ private function getHeight() {
 1640+ return $this->getString('diff-height');
 1641+ }
 1642+
 1643+ private function getWidth() {
 1644+ return $this->getString('diff-width');
 1645+ }
 1646+
 1647+ protected function getSource() {
 1648+ return $this->getString('diff-source');
 1649+ }
 1650+
 1651+ protected function getArticle() {
 1652+ return $this->getString('diff-' . $this->node->getQName() . '-article');
 1653+ }
 1654+
 1655+ public static $bundle = array(
 1656+ 'diff-movedto' => 'Moved to',
 1657+ 'diff-styleadded' => 'Style added',
 1658+ 'diff-added' => 'Added',
 1659+ 'diff-changedto' => 'Changed to',
 1660+ 'diff-movedoutof' => 'Moved out of',
 1661+ 'diff-styleremoved' => 'Style removed',
 1662+ 'diff-removed' => 'Removed',
 1663+ 'diff-changedfrom' => 'Changed from',
 1664+ 'diff-source' => 'Source',
 1665+ 'diff-withdestination' => 'With destination',
 1666+ 'diff-and' => 'And',
 1667+ 'diff-with' => 'With',
 1668+ 'diff-width' => 'Width',
 1669+ 'diff-height' => 'Height',
 1670+ 'diff-html-article' => 'A',
 1671+ 'diff-html' => 'Html page',
 1672+ 'diff-body-article' => 'A',
 1673+ 'diff-body' => 'Html document',
 1674+ 'diff-p-article' => 'A',
 1675+ 'diff-p' => 'Paragraph',
 1676+ 'diff-blockquote-article' => 'A',
 1677+ 'diff-blockquote' => 'Quote',
 1678+ 'diff-h1-article' => 'A',
 1679+ 'diff-h1' => 'Heading (level 1)',
 1680+ 'diff-h2-article' => 'A',
 1681+ 'diff-h2' => 'Heading (level 2)',
 1682+ 'diff-h3-article' => 'A',
 1683+ 'diff-h3' => 'Heading (level 3)',
 1684+ 'diff-h4-article' => 'A',
 1685+ 'diff-h4' => 'Heading (level 4)',
 1686+ 'diff-h5-article' => 'A',
 1687+ 'diff-h5' => 'Heading (level 5)',
 1688+ 'diff-pre-article' => 'A',
 1689+ 'diff-pre' => 'Preformatted block',
 1690+ 'diff-div-article' => 'A',
 1691+ 'diff-div' => 'Division',
 1692+ 'diff-ul-article' => 'An',
 1693+ 'diff-ul' => 'Unordered list',
 1694+ 'diff-ol-article' => 'An',
 1695+ 'diff-ol' => 'Ordered list',
 1696+ 'diff-li-article' => 'A',
 1697+ 'diff-li' => 'List item',
 1698+ 'diff-table-article' => 'A',
 1699+ 'diff-table' => 'Table',
 1700+ 'diff-tbody-article' => 'A',
 1701+ 'diff-tbody' => "Table's content",
 1702+ 'diff-tr-article' => 'A',
 1703+ 'diff-tr' => 'Row',
 1704+ 'diff-td-article' => 'A',
 1705+ 'diff-td' => 'Cell',
 1706+ 'diff-th-article' => 'A',
 1707+ 'diff-th' => 'Header',
 1708+ 'diff-br-article' => 'A',
 1709+ 'diff-br' => 'Break',
 1710+ 'diff-hr-article' => 'A',
 1711+ 'diff-hr' => 'Horizontal rule',
 1712+ 'diff-code-article' => 'A',
 1713+ 'diff-code' => 'Computer code block',
 1714+ 'diff-dl-article' => 'A',
 1715+ 'diff-dl' => 'Definition list',
 1716+ 'diff-dt-article' => 'A',
 1717+ 'diff-dt' => 'Definition term',
 1718+ 'diff-dd-article' => 'A',
 1719+ 'diff-dd' => 'Definition',
 1720+ 'diff-input-article' => 'An',
 1721+ 'diff-input' => 'Input',
 1722+ 'diff-form-article' => 'A',
 1723+ 'diff-form' => 'Form',
 1724+ 'diff-img-article' => 'An',
 1725+ 'diff-img' => 'Image',
 1726+ 'diff-span-article' => 'A',
 1727+ 'diff-span' => 'Span',
 1728+ 'diff-a-article' => 'A',
 1729+ 'diff-a' => 'Link',
 1730+ 'diff-i' => 'Italics',
 1731+ 'diff-b' => 'Bold',
 1732+ 'diff-strong' => 'Strong',
 1733+ 'diff-em' => 'Emphasis',
 1734+ 'diff-font' => 'Font',
 1735+ 'diff-big' => 'Big',
 1736+ 'diff-del' => 'Deleted',
 1737+ 'diff-tt' => 'Fixed width',
 1738+ 'diff-sub' => 'Subscript',
 1739+ 'diff-sup' => 'Superscript',
 1740+ 'diff-strike' => 'Strikethrough'
 1741+ );
 1742+
 1743+ public function getString($key) {
 1744+ return self::$bundle[$key];
 1745+ }
 1746+}
 1747+
 1748+class NoContentTagToString extends TagToString {
 1749+
 1750+ function __construct(TagNode $node, $sem) {
 1751+ parent::__construct($node, $sem);
 1752+ }
 1753+
 1754+ public function getAddedDescription(ChangeText $txt) {
 1755+ $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
 1756+ $txt.addHtml('<b>');
 1757+ $txt.addText(strtolower($this->getDescription()));
 1758+ $txt.addHtml('</b>');
 1759+
 1760+ $this->addAttributes($txt, $this->node->getAttributes());
 1761+ $txt.addText('.');
 1762+ }
 1763+
 1764+ private function getChangedTo() {
 1765+ return $this->getString('diff-changedto');
 1766+ }
 1767+
 1768+ public function getRemovedDescription(ChangeText $txt) {
 1769+ $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
 1770+ $txt.addHtml('<b>');
 1771+ $txt.addText(strtolower($this->getDescription()));
 1772+ $txt.addHtml('</b>');
 1773+
 1774+ $this->addAttributes($txt, $this->node->getAttributes());
 1775+ $txt.addText('.');
 1776+ }
 1777+
 1778+ private function getChangedFrom() {
 1779+ return $this->getString('diff-changedfrom');
 1780+ }
 1781+}
 1782+
 1783+class AnchorToString extends TagToString {
 1784+
 1785+ function __construct(TagNode $node, $sem) {
 1786+ parent::__construct($node, $sem);
 1787+ }
 1788+
 1789+ protected function addAttributes(ChangeText $txt, array $attributes) {
 1790+ if (array_key_exists('href',$attributes)) {
 1791+ $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
 1792+ unset($attributes['href']);
 1793+ }
 1794+ parent::addAttributes($txt, $attributes);
 1795+ }
 1796+
 1797+ private function getWithDestination() {
 1798+ return $this->getString('diff-withdestination');
 1799+ }
 1800+
 1801+}
 1802+
 1803+/**
 1804+ * Takes a branch root and creates an HTML file for it.
 1805+ */
 1806+class HTMLOutput{
 1807+
 1808+ private $prefix;
 1809+ private $handler;
 1810+
 1811+ function __construct($prefix, $handler) {
 1812+ $this->prefix = $prefix;
 1813+ $this->handler = $handler;
 1814+ }
 1815+
 1816+ public function parse(TagNode $node) {
 1817+
 1818+ if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) {
 1819+ $this->handler->startElement($node->getQName(), $node->getAttributes());
 1820+ }
 1821+
 1822+ $newStarted = false;
 1823+ $remStarted = false;
 1824+ $changeStarted = false;
 1825+ $changeTXT = '';
 1826+
 1827+ foreach ($node->children as $child) {
 1828+ if ($child instanceof TagNode) {
 1829+ if ($newStarted) {
 1830+ $this->handler->endElement('span');
 1831+ $newStarted = false;
 1832+ } else if ($changeStarted) {
 1833+ $this->handler->endElement('span');
 1834+ $changeStarted = false;
 1835+ } else if ($remStarted) {
 1836+ $this->handler->endElement('span');
 1837+ $remStarted = false;
 1838+ }
 1839+ $this->parse($child);
 1840+ } else if ($child instanceof TextNode) {
 1841+ $mod = $child->getModification();
 1842+
 1843+ if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) {
 1844+ $this->handler->endElement('span');
 1845+ $newStarted = false;
 1846+ } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) {
 1847+ $this->handler->endElement('span');
 1848+ $changeStarted = false;
 1849+ } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) {
 1850+ $this->handler->endElement('span');
 1851+ $remStarted = false;
 1852+ }
 1853+
 1854+ // no else because a removed part can just be closed and a new
 1855+ // part can start
 1856+ if (!$newStarted && $mod->getType() == Modification::ADDED) {
 1857+ $attrs = array('class'=>'diff-html-added');
 1858+ if ($mod->isFirstOfID()) {
 1859+ $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID();
 1860+ }
 1861+ $this->addAttributes($mod, $attrs);
 1862+ $attrs['onclick'] = 'return tipA(constructToolTipA(this));';
 1863+ $this->handler->startElement('span', $attrs);
 1864+ $newStarted = true;
 1865+ } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) {
 1866+ $attrs = array('class'=>'diff-html-changed');
 1867+ if ($mod->isFirstOfID()) {
 1868+ $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID();
 1869+ }
 1870+ $this->addAttributes($mod, $attrs);
 1871+ $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
 1872+ $this->handler->startElement('span', $attrs);
 1873+
 1874+ //tooltip
 1875+ $this->handler->startElement('span', array('class'=>'tip'));
 1876+ $this->handler->characters($mod->getChanges());
 1877+ $this->handler->endElement('span');
 1878+
 1879+ $changeStarted = true;
 1880+ $changeTXT = $mod->getChanges();
 1881+ } else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
 1882+ $attrs = array('class'=>'diff-html-removed');
 1883+ if ($mod->isFirstOfID()) {
 1884+ $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID();
 1885+ }
 1886+ $this->addAttributes($mod, $attrs);
 1887+ $attrs['onclick'] = 'return tipR(constructToolTipR(this));';
 1888+ $this->handler->startElement('span', $attrs);
 1889+ $remStarted = true;
 1890+ }
 1891+
 1892+ $chars = $child->getText();
 1893+
 1894+ if ($child instanceof ImageNode) {
 1895+ $this->writeImage($child);
 1896+ } else {
 1897+ $this->handler->characters($chars);
 1898+ }
 1899+
 1900+ }
 1901+ }
 1902+
 1903+ if ($newStarted) {
 1904+ $this->handler->endElement('span');
 1905+ $newStarted = false;
 1906+ } else if ($changeStarted) {
 1907+ $this->handler->endElement('span');
 1908+ $changeStarted = false;
 1909+ } else if ($remStarted) {
 1910+ $this->handler->endElement('span');
 1911+ $remStarted = false;
 1912+ }
 1913+
 1914+ if (0 != strcasecmp($node->getQName(),'img')
 1915+ && 0 != strcasecmp($node->getQName(),'body'))
 1916+ $this->handler->endElement($node->getQName());
 1917+ }
 1918+
 1919+ private function writeImage(ImageNode $imgNode){
 1920+ $attrs = $imgNode->getAttributes();
 1921+ if ($imgNode->getModification()->getType() == Modification::REMOVED)
 1922+ $attrs['changeType']='diff-removed-image';
 1923+ else if ($imgNode->getModification()->getType() == Modification::ADDED)
 1924+ $attrs['changeType'] = 'diff-added-image';
 1925+ $attrs['onload'] = 'updateOverlays()';
 1926+ $attrs['onError'] = 'updateOverlays()';
 1927+ $attrs['onAbort'] = 'updateOverlays()';
 1928+ $this->handler->startElement('img', $attrs);
 1929+ $this->handler->endElement('img');
 1930+ }
 1931+
 1932+ private function addAttributes(Modification $mod, /*array*/ &$attrs) {
 1933+ if (is_null($mod->getPrevious())) {
 1934+ $previous = 'first-' . $this->prefix;
 1935+ } else {
 1936+ $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-'
 1937+ . $mod->getPrevious()->getID();
 1938+ }
 1939+ $attrs['previous'] = $previous;
 1940+
 1941+ $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID();
 1942+ $attrs['changeId'] = $changeId;
 1943+
 1944+ if (is_null($mod->getNext())) {
 1945+ $next = 'last-' . $this->prefix;
 1946+ } else {
 1947+ $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-'
 1948+ . $mod->getNext()->getID();
 1949+ }
 1950+ $attrs['next'] = $next;
 1951+ }
 1952+}
 1953+
 1954+class EchoingContentHandler{
 1955+
 1956+ function startElement($qname, /*array*/ $arguments){
 1957+ echo '<'.$qname;
 1958+ foreach($arguments as $key => $value){
 1959+ echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"';
 1960+ }
 1961+ echo '>';
 1962+ }
 1963+
 1964+ function endElement($qname){
 1965+ echo '</'.$qname.'>';
 1966+ }
 1967+
 1968+ function characters($chars){
 1969+ echo $chars;
 1970+ }
 1971+
 1972+}
 1973+
 1974+class DelegatingContentHandler{
 1975+
 1976+ private $delegate;
 1977+
 1978+ function __construct($delegate){
 1979+ $this->delegate=$delegate;
 1980+ }
 1981+
 1982+ function startElement($qname, /*array*/ $arguments){
 1983+ $this->delegate->addHtml('<'.$qname) ;
 1984+ foreach($arguments as $key => $value){
 1985+ $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"');
 1986+ }
 1987+ $this->delegate->addHtml('>');
 1988+ }
 1989+
 1990+ function endElement($qname){
 1991+ $this->delegate->addHtml('</'.$qname.'>');
 1992+ }
 1993+
 1994+ function characters($chars){
 1995+ $this->delegate->addHtml( $chars );
 1996+ }
 1997+
 1998+}
Index: trunk/phase3/includes/Diff.php
@@ -18,454 +18,488 @@
1919 */
2020
2121 /**
22 - * Implementation of the Diff algorithm.
23 - *
24 - * DO NOT USE ON PRODUCTION SYSTEMS
25 - *
26 - * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
27 - * and extended with my own ideas.
 22+ * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which
 23+ * in turn is based on Myers' "An O(ND) difference algorithm and its variations"
 24+ * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s
 25+ * "An O(NP) Sequence Comparison Algorithm").
2826 *
29 - * @return array($from_removed, $to_added)
30 - * TRUE if the token was removed or added.
 27+ * This implementation supports an upper bound on the excution time.
3128 *
 29+ * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space
 30+ *
3231 * @author Guy Van den Broeck
 32+ *
3333 */
34 -function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
35 - wfProfileIn( __METHOD__ );
 34+class WikiDiff3{
3635
37 - $m = sizeof($from);
38 - $n = sizeof($to);
 36+ //Input variables
 37+ private $from;
 38+ private $to;
 39+ private $m;
 40+ private $n;
3941
40 - $result_from = array_fill(0,$m,TRUE);
41 - $result_to = array_fill(0,$n,TRUE);
 42+ private $tooLong;
 43+ private $powLimit;
4244
43 - //reduce the complexity for the next step (intentionally done twice)
44 - //remove common tokens at the start
45 - $i=0;
46 - while($i<$m && $i<$n && $from[$i]===$to[$i]){
47 - $result_from[$i] = $result_to[$i] = FALSE;
48 - unset($from[$i],$to[$i]);
49 - ++$i;
 45+ //State variables
 46+ private $maxDifferences;
 47+
 48+ //Output variables
 49+ public $length;
 50+ public $added;
 51+ public $removed;
 52+ public $heuristicUsed;
 53+
 54+ function __construct($tooLong = 2000000, $powLimit = 1.45){
 55+ $this->tooLong = $tooLong;
 56+ $this->powLimit = $powLimit;
5057 }
5158
52 - //remove common tokens at the end
53 - $j=1;
54 - while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
55 - $result_from[$m-$j] = $result_to[$n-$j] = FALSE;
56 - unset($from[$m-$j],$to[$n-$j]);
57 - ++$j;
58 - }
 59+ public function diff(/*array*/ $from, /*array*/ $to){
 60+ $m = sizeof($from);
 61+ $n = sizeof($to);
 62+
 63+ $this->heuristicUsed = FALSE;
5964
60 - $newFrom = $newFromIndex = $newTo = $newToIndex = array();
 65+ $result_from = $m>0?array_fill(0,$m,true):array();
 66+ $result_to = $n>0?array_fill(0,$n,true):array();
6167
62 - //remove tokens not in both sequences
63 - $shared = array_fill_keys($from,FALSE);
64 - foreach($to as $index => $el){
65 - if(array_key_exists($el,$shared)){
66 - //keep it
67 - $shared[$el] = TRUE;
68 - $newTo[] = $el;
69 - $newToIndex[] = $index;
 68+ //reduce the complexity for the next step (intentionally done twice)
 69+ //remove common tokens at the start
 70+ $i=0;
 71+ while($i<$m && $i<$n && $from[$i]===$to[$i]){
 72+ $result_from[$i] = $result_to[$i] = false;
 73+ unset($from[$i],$to[$i]);
 74+ ++$i;
7075 }
71 - }
72 - foreach($from as $index => $el){
73 - if($shared[$el]){
74 - //keep it
75 - $newFrom[] = $el;
76 - $newFromIndex[] = $index;
 76+
 77+ //remove common tokens at the end
 78+ $j=1;
 79+ while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
 80+ $result_from[$m-$j] = $result_to[$n-$j] = false;
 81+ unset($from[$m-$j],$to[$n-$j]);
 82+ ++$j;
7783 }
78 - }
7984
80 - unset($from, $to, $shared);
 85+ $newFrom = $newFromIndex = $newTo = $newToIndex = array();
8186
82 - $m2 = sizeof($newFrom);
83 - $n2 = sizeof($newTo);
84 - $offsetx = $offsety = 0;
85 - $from_inLcs = new InLcs($m2);
86 - $to_inLcs = new InLcs($n2);
 87+ //remove tokens not in both sequences
 88+ $shared = array_fill_keys($from,false);
 89+ foreach($to as $index => $el){
 90+ if(array_key_exists($el,$shared)){
 91+ //keep it
 92+ $this->to[] = $el;
 93+ $shared[$el] = true;
 94+ $newToIndex[] = $index;
 95+ }
 96+ }
 97+ foreach($from as $index => $el){
 98+ if($shared[$el]){
 99+ //keep it
 100+ $this->from[] = $el;
 101+ $newFromIndex[] = $index;
 102+ }
 103+ }
87104
88 - wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
 105+ unset($shared);
89106
90 - foreach($from_inLcs->inLcs as $key => $in){
91 - if($in){
92 - $result_from[$newFromIndex[$key]] = FALSE;
 107+ $this->m = sizeof($this->from);
 108+ $this->n = sizeof($this->to);
 109+ $this->removed = $this->m>0?array_fill(0,$this->m,true):array();
 110+ $this->added = $this->n>0?array_fill(0,$this->n,true):array();
 111+
 112+ $this->longestCommonSubsequence();
 113+
 114+ $this->m = $m;
 115+ $this->n = $n;
 116+ $this->length += $i+$j-1;
 117+
 118+ foreach($this->removed as $key => $removed){
 119+ if(!$removed){
 120+ $result_from[$newFromIndex[$key]] = false;
 121+ }
93122 }
94 - }
95 - foreach($to_inLcs->inLcs as $key => $in){
96 - if($in){
97 - $result_to[$newToIndex[$key]] = FALSE;
 123+ foreach($this->added as $key => $added){
 124+ if(!$added){
 125+ $result_to[$newToIndex[$key]] = false;
 126+ }
98127 }
 128+ unset($this->added, $this->removed);
 129+ return array($result_from, $result_to);
99130 }
100 -
101 - wfProfileOut( __METHOD__ );
102 - return array($result_from, $result_to);
103 -}
104131
105 -function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
106 - if($bestKnownLcs==0 || $m==0 || $n==0){
107 - return;
108 - }
109 -
110 - if($m>$n){
111 - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
112 - }
 132+ public function longestCommonSubsequence() {
 133+ if ($this->m == 0 || $this->n == 0) {
 134+ $this->length = 0;
 135+ return;
 136+ }
113137
114 - $a_inLcs_sym = &$a_inLcs->inLcs;
115 - $b_inLcs_sym = &$b_inLcs->inLcs;
 138+ $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
 139+ if ($this->m * $this->n > $this->tooLong) {
 140+ // limit complexity to D^POW_LIMIT for long sequences
 141+ $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
 142+ wfDebug("Limiting max number of differences to $this->maxDifferences");
 143+ }
116144
117 - //remove common tokens at the start
118 - while($m>0 && $n>0 && $a[0]===$b[0]){
119 - $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
120 - ++$offsetx; ++$offsety;
121 - --$m; --$n;
122 - --$bestKnownLcs;
123 - array_shift($a);
124 - array_shift($b);
125 - }
 145+ /*
 146+ * The common prefixes and suffixes are always part of some LCS, include
 147+ * them now to reduce our search space
 148+ */
 149+ $max = min($this->m, $this->n);
 150+ for ($forwardBound = 0; $forwardBound < $max
 151+ && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
 152+ $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
 153+ }
126154
127 - //remove common tokens at the end
128 - while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
129 - $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
130 - --$m; --$n;
131 - --$bestKnownLcs;
132 - array_pop($a);
133 - array_pop($b);
134 - }
 155+ $backBoundL1 = $this->m - 1;
 156+ $backBoundL2 = $this->n - 1;
135157
136 - if($bestKnownLcs==0 || $m==0 || $n==0){
137 - return;
 158+ while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
 159+ && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
 160+ $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
 161+ }
 162+
 163+ $temp = array_fill(0,$this->m + $this->n + 1, 0);
 164+ $V = array($temp,$temp);
 165+ $snake = array(0,0,0);
 166+
 167+ $this->length = $forwardBound + $this->m - $backBoundL1 - 1
 168+ + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
 169+ $V, $snake);
138170 }
139 - if($m>$n){
140 - return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
141 - }
142171
143 - $delta = $n-$m;
144 - $delta_plus_1 = $delta+1;
145 - $delta_min_1 = $delta-1;
 172+ private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
 173+ // check that both sequences are non-empty
 174+ if ($bottoml1 > $topl1 || $bottoml2 > $topl2) {
 175+ return 0;
 176+ }
146177
147 - $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
148 - $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
149 - $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
150 - $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
151 - $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
 178+ $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake);
152179
153 - if($boundRunningTime){
154 - $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
155 - if($maxp_before_bound>=$m){
156 - $boundRunningTime = false;
157 - unset($maxp_before_bound);
 180+ // need to store these so we don't lose them when they're overwritten by
 181+ // the recursion
 182+ $len = $snake[2];
 183+ $startx = $snake[0];
 184+ $starty = $snake[1];
 185+
 186+ // the middle snake is part of the LCS, store it
 187+ for ($i = 0; $i < $len; ++$i) {
 188+ $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
158189 }
 190+
 191+ if ($d > 1) {
 192+ return $len
 193+ + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake)
 194+ + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake);
 195+ } else if ($d == 1) {
 196+ /*
 197+ * In this case the sequences differ by exactly 1 line. We have
 198+ * already saved all the lines after the difference in the for loop
 199+ * above, now we need to save all the lines before the difference.
 200+ */
 201+ $max = min($startx - $bottoml1, $starty - $bottoml2);
 202+ for ($i = 0; $i < $max; ++$i) {
 203+ $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false;
 204+ }
 205+ return $max + $len;
 206+ }
 207+ return $len;
159208 }
160209
161 - $p=-1;
162 - $m_min_1 = $m-1;
163 - $maxp=$m_min_1-$bestLcsLength;
 210+ private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) {
 211+ $from = &$this->from;
 212+ $to = &$this->to;
 213+ $V0 = &$V[0];
 214+ $V1 = &$V[1];
 215+ $snake0 = &$snake[0];
 216+ $snake1 = &$snake[1];
 217+ $snake2 = &$snake[2];
 218+ $bottoml1_min_1 = $bottoml1-1;
 219+ $bottoml2_min_1 = $bottoml2-1;
 220+ $N = $topl1 - $bottoml1_min_1;
 221+ $M = $topl2 - $bottoml2_min_1;
 222+ $delta = $N - $M;
 223+ $maxabsx = $N+$bottoml1;
 224+ $maxabsy = $M+$bottoml2;
 225+ $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
164226
165 - while($p<$maxp){
 227+ //value_to_add_forward: a 0 or 1 that we add to the start
 228+ // offset
 229+ // to make it odd/even
 230+ if (($M & 1) == 1) {
 231+ $value_to_add_forward = 1;
 232+ } else {
 233+ $value_to_add_forward = 0;
 234+ }
166235
167 - if($boundRunningTime && $p>$maxp_before_bound){
168 - // bound the running time by stopping early
169 - if($bestLcsLength>=0){
170 - // accept the best LCS thusfar
171 - break;
172 - }else{
173 - $bestLcsProgressForw = $bestkForw = 0;
174 - foreach($lcsSizeForw as $k => $localLcsProgress){
175 - if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
176 - $bestLcsProgressForw = $localLcsProgress;
177 - $bestkForw = $k;
178 - }
179 - }
180 - $bestLcsProgressBack = $bestkBack = 0;
181 - foreach($lcsSizeBack as $k => $localLcsProgress){
182 - if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
183 - $bestLcsProgressBack = $localLcsProgress;
184 - $bestkBack = $k;
185 - }
186 - }
187 - if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
188 - // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
189 - }else{
190 - // lets do this
191 - $topSnakeStart = $snakeBeginForw[$bestkForw];
192 - $topSnakeEnd = $snakeEndForw[$bestkForw];
193 - $topSnakek = $snakekForw[$bestkForw];
194 - $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
 236+ if (($N & 1) == 1) {
 237+ $value_to_add_backward = 1;
 238+ } else {
 239+ $value_to_add_backward = 0;
 240+ }
195241
196 - $bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
197 - $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
198 - $bottomSnakek = $snakekBack[$bestkBack];
199 - $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
 242+ $start_forward = -$M;
 243+ $end_forward = $N;
 244+ $start_backward = -$N;
 245+ $end_backward = $M;
200246
201 - if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
202 - // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
203 - // also diff the middle now
204 - if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
205 - // do the middle diff between the snakes
206 - wfDebug(" Limiting diff run time from both sides, middle between snakes\n");
207 - $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
208 - $n_t = $bottomSnakeStart-$topSnakeEnd;
209 - $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
210 - $b_t = array_slice($b, $topSnakeEnd, $n_t);
211 - $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
212 - $offsety_t = $offsety+$topSnakeEnd;
213 - }else{
214 - // the snake is too early in the sequence, do the middle diff between endpoints of progress
215 - wfDebug(" Limiting diff run time from both sides, middle between endpoints\n");
216 - $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
217 - $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
218 - $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
219 - $b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
220 - $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
221 - $offsety_t = $offsety+$fpForw[$bestkForw];
222 - }
223 - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
224 - }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
225 - // the bottom snake is more interesting
226 - wfDebug("Limiting diff run time from bottom side\n");
227 - $topSnakeStart = $bottomSnakeStart;
228 - $topSnakeEnd = $bottomSnakeEnd;
229 - $topSnakek = $bottomSnakek;
230 - $bestLcsLengthTop = $topSnakeEnd-$topSnakek;
231 - $bottomSnakeStart = $bottomSnakeEnd;
232 - }else{
233 - // the top snake is more interesting
234 - wfDebug(" Limiting diff run time from top side\n");
235 - $bottomSnakeStart = $topSnakeEnd;
236 - $bottomSnakeEnd = $topSnakeEnd;
237 - $bottomSnakek = $topSnakek;
238 - $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek);
239 - }
240 - break;
241 - }
242 - }
243 - }
244 - ++$p;
245 - $overlap[-$p] = $overlap[$delta+$p] = FALSE;
 247+ $limit_min_1 = $limit-1;
 248+ $limit_plus_1 = $limit+1;
246249
247 - $min_p_min_1 = -$p-1;
248 - $delta_plus_1_plus_p = $delta_plus_1+$p;
 250+ $V0[$limit_plus_1] = 0;
 251+ $V1[$limit_min_1] = $N;
 252+ $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
 253+
 254+ if (($delta & 1) == 1) {
 255+ for ($d = 0; $d <= $limit; ++$d) {
 256+ $start_diag = max($value_to_add_forward + $start_forward, -$d);
 257+ $end_diag = min($end_forward, $d);
 258+ $value_to_add_forward = 1 - $value_to_add_forward;
249259
250 - // forward
251 - $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
252 - $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
253 - $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
 260+ // compute forward furthest reaching paths
 261+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
 262+ if ($k == -$d
 263+ || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
 264+ $x = $V0[$limit_plus_1 + $k];
 265+ } else {
 266+ $x = $V0[$limit_min_1 + $k] + 1;
 267+ }
254268
255 - $k=-$p;
256 - do {
257 - $k_plus_1 = $k+1;
258 - $k_min_1 = $k-1;
 269+ $absx = $snake0 = $x + $bottoml1;
 270+ $absy = $snake1 = $x - $k + $bottoml2;
 271+
 272+ while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
 273+ ++$absx;
 274+ ++$absy;
 275+ }
 276+ $x = $absx-$bottoml1;
 277+
 278+ $snake2 = $absx -$snake0;
 279+ $V0[$limit + $k] = $x;
 280+ if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
 281+ && $x >= $V1[$limit + $k - $delta]) {
 282+ return 2 * $d - 1;
 283+ }
259284
260 - $fpBelow = $fpForw[$k_min_1]+1;
261 - $fpAbove = $fpForw[$k_plus_1];
262 - $y = &$fpForw[$k];
263 - if($fpBelow>$fpAbove){
264 - $oldy = $y = $fpBelow;
265 - $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
266 - $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
267 - $snakeEndForw[$k] = $snakeEndForw[$k_min_1];
268 - $snakekForw[$k] = $snakekForw[$k_min_1];
269 - }else{
270 - $oldy = $y = $fpAbove;
271 - $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
272 - $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
273 - $snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
274 - $snakekForw[$k] = $snakekForw[$k_plus_1];
275 - }
276 - $x = $y-$k;
277 - while($x < $m && $y < $n && $a[$x]===$b[$y]){
278 - ++$x;
279 - ++$y;
280 - }
281 - if($y-$oldy>0){
282 - $lcsSizeForw[$k] += $y-$oldy;
283 - $snakeBeginForw[$k] = $oldy;
284 - $snakeEndForw[$k]= $y;
285 - $snakekForw[$k] = $k;
286 - }
287 - if($y>=$fpBack[$k] && !$overlap[$k]){
288 - // there is overlap
289 - $overlap[$k]=TRUE;
290 - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
291 - if($y>$fpBack[$k]+1){
292 - $snakeoverlap = $y-$fpBack[$k]-1;
293 - $lcsLength -= $snakeoverlap;
 285+ // check to see if we can cut down the diagonal range
 286+ if ($x >= $N && $end_forward > $k - 1) {
 287+ $end_forward = $k - 1;
 288+ } else if ($absy-$bottoml2 >= $M) {
 289+ $start_forward = $k + 1;
 290+ $value_to_add_forward = 0;
 291+ }
294292 }
295 - if($lcsLength>$bestLcsLength){
296 - // a better sequence found!
297 - $bestLcsLength = $lcsLength;
298293
299 - $topSnakeStart = $snakeBeginForw[$k];
300 - $topSnakeEnd = $snakeEndForw[$k];
301 - $topSnakek = $snakekForw[$k];
 294+ $start_diag = max($value_to_add_backward + $start_backward, -$d);
 295+ $end_diag = min($end_backward, $d);
 296+ $value_to_add_backward = 1 - $value_to_add_backward;
302297
303 - // aligned snakes bite (\ )
304 - // ( \ \)
305 - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
306 - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
307 - $bottomSnakek = $snakekBack[$k];
 298+ // compute backward furthest reaching paths
 299+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
 300+ if ($k == $d
 301+ || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
 302+ $x = $V1[$limit_min_1 + $k];
 303+ } else {
 304+ $x = $V1[$limit_plus_1 + $k] - 1;
 305+ }
308306
309 - if($bottomSnakeEnd<$y){
310 - $bottomSnakeStart = $y;
311 - $bottomSnakeEnd = $y;
312 - $bottomSnakek = $k;
 307+ $y = $x - $k - $delta;
 308+
 309+ $snake2 = 0;
 310+ while ($x > 0 && $y > 0
 311+ && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
 312+ --$x;
 313+ --$y;
 314+ ++$snake2;
313315 }
 316+ $V1[$limit + $k] = $x;
314317
315 - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
316 - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
317 - if($bestKnownLcs==$lcsLength){
318 - $fpForw[$k]=$y;
319 - break 2;
 318+ // check to see if we can cut down our diagonal range
 319+ if ($x <= 0) {
 320+ $start_backward = $k + 1;
 321+ $value_to_add_backward = 0;
 322+ } else if ($y <= 0 && $end_backward > $k - 1) {
 323+ $end_backward = $k - 1;
320324 }
321 - $maxp=$m_min_1-$bestLcsLength;
322325 }
323326 }
324 - if($k<$delta_min_1){
325 - ++$k;
326 - }else if($k>$delta){
327 - --$k;
328 - }else if($k==$delta_min_1){
329 - $k = $delta+$p;
330 - }else{
331 - break;
332 - }
333 - } while(TRUE);
 327+ } else {
 328+ for ($d = 0; $d <= $limit; ++$d) {
 329+ $start_diag = max($value_to_add_forward + $start_forward, -$d);
 330+ $end_diag = min($end_forward, $d);
 331+ $value_to_add_forward = 1 - $value_to_add_forward;
334332
335 - // backward
336 - $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
337 - $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
338 - $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
 333+ // compute forward furthest reaching paths
 334+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
 335+ if ($k == -$d
 336+ || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
 337+ $x = $V0[$limit_plus_1 + $k];
 338+ } else {
 339+ $x = $V0[$limit_min_1 + $k] + 1;
 340+ }
339341
340 - $k=$delta+$p;
341 - do {
342 - $k_plus_1 = $k+1;
343 - $k_min_1 = $k-1;
 342+ $absx = $snake0 = $x + $bottoml1;
 343+ $absy = $snake1 = $x - $k + $bottoml2;
 344+
 345+ while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
 346+ ++$absx;
 347+ ++$absy;
 348+ }
 349+ $x = $absx-$bottoml1;
 350+ $snake2 = $absx -$snake0;
 351+ $V0[$limit + $k] = $x;
344352
345 - $fpBelow = $fpBack[$k_min_1];
346 - $fpAbove = $fpBack[$k_plus_1]-1;
347 - $y = &$fpBack[$k];
348 - if($fpBelow<$fpAbove){
349 - $oldy = $y = $fpBelow;
350 - $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
351 - $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
352 - $snakeEndBack[$k] = $snakeEndBack[$k_min_1];
353 - $snakekBack[$k] = $snakekBack[$k_min_1];
354 - }else{
355 - $oldy = $y = $fpAbove;
356 - $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
357 - $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
358 - $snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
359 - $snakekBack[$k] = $snakekBack[$k_plus_1];
360 - }
361 - $x = $y-$k;
362 - while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
363 - --$x;
364 - --$y;
365 - }
366 - if($oldy-$y>0){
367 - $lcsSizeBack[$k] += $oldy-$y;
368 - $snakeBeginBack[$k] = $oldy;
369 - $snakeEndBack[$k] = $y;
370 - $snakekBack[$k] = $k;
371 - }
372 - if($fpForw[$k]>=$y && !$overlap[$k]){
373 - // there is overlap
374 - $overlap[$k]=TRUE;
375 - $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
376 - if($fpForw[$k]>$y+1){
377 - $snakeoverlap = $fpForw[$k]-$y-1;
378 - $lcsLength -= $snakeoverlap;
 353+ // check to see if we can cut down the diagonal range
 354+ if ($x >= $N && $end_forward > $k - 1) {
 355+ $end_forward = $k - 1;
 356+ } else if ($absy-$bottoml2 >= $M) {
 357+ $start_forward = $k + 1;
 358+ $value_to_add_forward = 0;
 359+ }
379360 }
380 - if($lcsLength>$bestLcsLength){
381 - // a better sequence found!
382 - $bestLcsLength = $lcsLength;
383361
384 - $topSnakeStart = $snakeBeginForw[$k];
385 - $topSnakeEnd = $snakeEndForw[$k];
386 - $topSnakek = $snakekForw[$k];
 362+ $start_diag = max($value_to_add_backward + $start_backward, -$d);
 363+ $end_diag = min($end_backward, $d);
 364+ $value_to_add_backward = 1 - $value_to_add_backward;
387365
388 - // aligned snakes bite (\ )
389 - // ( \ \)
390 - $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
391 - $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
392 - $bottomSnakek = $snakekBack[$k];
 366+ // compute backward furthest reaching paths
 367+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
 368+ if ($k == $d
 369+ || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
 370+ $x = $V1[$limit_min_1 + $k];
 371+ } else {
 372+ $x = $V1[$limit_plus_1 + $k] - 1;
 373+ }
393374
394 - if($bottomSnakeEnd<$fpForw[$k]){
395 - $bottomSnakeStart = $fpForw[$k];
396 - $bottomSnakeEnd = $fpForw[$k];
397 - $bottomSnakek = $k;
 375+ $y = $x - $k - $delta;
 376+
 377+ $snake2 = 0;
 378+ while ($x > 0 && $y > 0
 379+ && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
 380+ --$x;
 381+ --$y;
 382+ ++$snake2;
398383 }
 384+ $V1[$limit + $k] = $x;
399385
400 - $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
401 - $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
402 - if($bestKnownLcs==$lcsLength){
403 - $fpBack[$k] = $y;
404 - break 2;
 386+ if ($k >= -$delta - $d && $k <= $d - $delta
 387+ && $x <= $V0[$limit + $k + $delta]) {
 388+ $snake0 = $bottoml1 + $x;
 389+ $snake1 = $bottoml2 + $y;
 390+ return 2 * $d;
405391 }
406 - $maxp=$m_min_1-$bestLcsLength;
 392+
 393+ // check to see if we can cut down our diagonal range
 394+ if ($x <= 0) {
 395+ $start_backward = $k + 1;
 396+ $value_to_add_backward = 0;
 397+ } else if ($y <= 0 && $end_backward > $k - 1) {
 398+ $end_backward = $k - 1;
 399+ }
407400 }
408401 }
409 - if($k>1){
410 - --$k;
411 - }else if($k<0){
412 - ++$k;
413 - }else if($k==1){
414 - $k = -$p;
415 - }else{
416 - break;
417 - }
418 - } while(TRUE);
419 - }
 402+ }
 403+ /*
 404+ * computing the true LCS is too expensive, instead find the diagonal
 405+ * with the most progress and pretend a midle snake of length 0 occurs
 406+ * there.
 407+ */
420408
421 - unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
422 - unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
423 - unset($overlap);
 409+ $most_progress = self::findMostProgress($M, $N, $limit, $V);
424410
425 - // Mark snakes as in LCS
426 - $maxi = $offsetx+$topSnakeEnd-$topSnakek;
427 - for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
428 - $a_inLcs_sym[$i] = TRUE;
 411+ $snake0 = $bottoml1 + $most_progress[0];
 412+ $snake1 = $bottoml2 + $most_progress[1];
 413+ $snake2 = 0;
 414+ wfDebug('Computing the LCS is too expensive. Using a heuristic.');
 415+ $this->heuristicUsed = true;
 416+ return 5; /*
 417+ * HACK: since we didn't really finish the LCS computation
 418+ * we don't really know the length of the SES. We don't do
 419+ * anything with the result anyway, unless it's <=1. We know
 420+ * for a fact SES > 1 so 5 is as good a number as any to
 421+ * return here
 422+ */
429423 }
430 - $maxi = $offsety+$topSnakeEnd;
431 - for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
432 - $b_inLcs_sym[$i] = TRUE;
433 - }
434 - $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
435 - for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
436 - $a_inLcs_sym[$i] = TRUE;
437 - }
438 - $maxi = $offsety+$bottomSnakeEnd;
439 - for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
440 - $b_inLcs_sym[$i] = TRUE;
441 - }
442424
443 - $m_t = $topSnakeStart-$topSnakek;
444 - $a_t = array_slice($a, 0, $m_t);
445 - $b_t = array_slice($b, 0, $topSnakeStart);
 425+ private static function findMostProgress($M, $N, $limit, $V) {
 426+ $delta = $N - $M;
446427
447 - $m_b = $m-($bottomSnakeEnd-$bottomSnakek);
448 - $n_b = $n-$bottomSnakeEnd;
449 - $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
450 - $b_b = array_slice($b, $bottomSnakeEnd, $n_b);
 428+ if (($M & 1) == ($limit & 1)) {
 429+ $forward_start_diag = max(-$M, -$limit);
 430+ } else {
 431+ $forward_start_diag = max(1 - $M, -$limit);
 432+ }
451433
452 - wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
 434+ $forward_end_diag = min($N, $limit);
453435
454 - wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
455 -}
 436+ if (($N & 1) == ($limit & 1)) {
 437+ $backward_start_diag = max(-$N, -$limit);
 438+ } else {
 439+ $backward_start_diag = max(1 - $N, -$limit);
 440+ }
456441
457 -class InLcs {
 442+ $backward_end_diag = -min($M, $limit);
458443
459 - public $inLcs;
 444+ $temp = array(0,0,0);
460445
461 - function __construct($length){
462 - $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
463 - }
464446
465 - /**
466 - * Get the length of the Longest Common Subsequence (computed)
467 - */
468 - public function getLcsLength(){
469 - return array_sum($this->inLcs);
 447+ $max_progress = array_fill(0,ceil(max($forward_end_diag
 448+ - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp);
 449+ $num_progress = 0; // the 1st entry is current, it is initialized
 450+ // with 0s
 451+
 452+ // first search the forward diagonals
 453+ for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) {
 454+ $x = $V[0][$limit + $k];
 455+ $y = $x - $k;
 456+ if ($x > $N || $y > $M) {
 457+ continue;
 458+ }
 459+
 460+ $progress = $x + $y;
 461+ if ($progress > $max_progress[0][2]) {
 462+ $num_progress = 0;
 463+ $max_progress[0][0] = $x;
 464+ $max_progress[0][1] = $y;
 465+ $max_progress[0][2] = $progress;
 466+ } else if ($progress == $max_progress[0][2]) {
 467+ ++$num_progress;
 468+ $max_progress[$num_progress][0] = $x;
 469+ $max_progress[$num_progress][1] = $y;
 470+ $max_progress[$num_progress][2] = $progress;
 471+ }
 472+ }
 473+
 474+ $max_progress_forward = true; // initially the maximum
 475+ // progress is in the forward
 476+ // direction
 477+
 478+ // now search the backward diagonals
 479+ for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) {
 480+ $x = $V[1][$limit + $k];
 481+ $y = $x - $k - $delta;
 482+ if ($x < 0 || $y < 0) {
 483+ continue;
 484+ }
 485+
 486+ $progress = $N - $x + $M - $y;
 487+ if ($progress > $max_progress[0][2]) {
 488+ $num_progress = 0;
 489+ $max_progress_forward = false;
 490+ $max_progress[0][0] = $x;
 491+ $max_progress[0][1] = $y;
 492+ $max_progress[0][2] = $progress;
 493+ } else if ($progress == $max_progress[0][2] && !$max_progress_forward) {
 494+ ++$num_progress;
 495+ $max_progress[$num_progress][0] = $x;
 496+ $max_progress[$num_progress][1] = $y;
 497+ $max_progress[$num_progress][2] = $progress;
 498+ }
 499+ }
 500+
 501+ // return the middle diagonal with maximal progress.
 502+ return $max_progress[floor($num_progress / 2)];
470503 }
471504
472505 }
 506+
Index: trunk/phase3/includes/DifferenceEngine.php
@@ -74,9 +74,10 @@
7575 }
7676
7777 function showDiffPage( $diffOnly = false ) {
78 - global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol;
 78+ global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff;
7979 wfProfileIn( __METHOD__ );
8080
 81+
8182 # If external diffs are enabled both globally and for the user,
8283 # we'll use the application/x-external-editor interface to call
8384 # an external diff tool like kompare, kdiff3, etc.
@@ -265,11 +266,16 @@
266267 '<div id="mw-diff-ntitle3">' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "</div>" .
267268 '<div id="mw-diff-ntitle4">' . $nextlink . $patrol . '</div>';
268269
269 - $this->showDiff( $oldHeader, $newHeader );
 270+ if($wgEnableHtmlDiff){
 271+ $this->renderHtmlDiff();
 272+ }else{
270273
271 - if ( !$diffOnly )
272 - $this->renderNewRevision();
 274+ $this->showDiff( $oldHeader, $newHeader );
273275
 276+ if ( !$diffOnly ){
 277+ $this->renderNewRevision();
 278+ }
 279+ }
274280 wfProfileOut( __METHOD__ );
275281 }
276282
@@ -319,6 +325,65 @@
320326 wfProfileOut( __METHOD__ );
321327 }
322328
 329+
 330+ function renderHtmlDiff() {
 331+ global $wgOut, $IP;
 332+ wfProfileIn( __METHOD__ );
 333+
 334+ $this->showDiffStyle();
 335+
 336+ require_once( "$IP/includes/HTMLDiff.php" );
 337+
 338+ #add deleted rev tag if needed
 339+ if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
 340+ $wgOut->addWikiMsg( 'rev-deleted-text-permission' );
 341+ } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
 342+ $wgOut->addWikiMsg( 'rev-deleted-text-view' );
 343+ }
 344+
 345+ if( !$this->mNewRev->isCurrent() ) {
 346+ $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false );
 347+ }
 348+
 349+ $this->loadText();
 350+
 351+ if( is_object( $this->mOldRev ) ) {
 352+ $wgOut->setRevisionId( $this->mOldRev->getId() );
 353+ }
 354+
 355+ global $wgTitle, $wgParser, $wgTitle;
 356+ $popts = $wgOut->parserOptions();
 357+ $oldTidy = $popts->setTidy( TRUE );
 358+
 359+ $parserOutput = $wgParser->parse($this->mOldtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
 360+ $popts->setTidy( $oldTidy );
 361+
 362+ //only for new?
 363+ //$wgOut->addParserOutputNoText( $parserOutput );
 364+ $oldHtml = $parserOutput->getText();
 365+ wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$oldHtml ) );
 366+
 367+ if( is_object( $this->mNewRev ) ) {
 368+ $wgOut->setRevisionId( $this->mNewRev->getId() );
 369+ }
 370+
 371+ $popts = $wgOut->parserOptions();
 372+ $oldTidy = $popts->setTidy( TRUE );
 373+
 374+ $parserOutput = $wgParser->parse($this->mNewtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
 375+ $popts->setTidy( $oldTidy );
 376+
 377+ //only for new?
 378+ $wgOut->addParserOutputNoText( $parserOutput );
 379+ $newHtml = $parserOutput->getText();
 380+ wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) );
 381+
 382+ $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut));
 383+ $differ->htmlDiff($oldHtml, $newHtml);
 384+
 385+ wfProfileOut( __METHOD__ );
 386+ }
 387+
323388 /**
324389 * Show the first revision of an article. Uses normal diff headers in
325390 * contrast to normal "old revision" display style.
@@ -893,6 +958,30 @@
894959 }
895960
896961 /**
 962+ * Alternative representation of a set of changes, by the index
 963+ * ranges that are changed.
 964+ */
 965+class RangeDifference {
 966+
 967+ public $leftstart;
 968+ public $leftend;
 969+ public $leftlength;
 970+
 971+ public $rightstart;
 972+ public $rightend;
 973+ public $rightlength;
 974+
 975+ function __construct($leftstart, $leftend, $rightstart, $rightend){
 976+ $this->leftstart = $leftstart;
 977+ $this->leftend = $leftend;
 978+ $this->leftlength = $leftend-$leftstart;
 979+ $this->rightstart = $rightstart;
 980+ $this->rightend = $rightend;
 981+ $this->rightlength = $rightend-$rightstart;
 982+ }
 983+}
 984+
 985+/**
897986 * Class used internally by Diff to actually compute the diffs.
898987 *
899988 * The algorithm used here is mostly lifted from the perl module
@@ -920,22 +1009,108 @@
9211010
9221011 const MAX_XREF_LENGTH = 10000;
9231012
924 - function diff ($from_lines, $to_lines) {
 1013+ function diff ($from_lines, $to_lines){
9251014 wfProfileIn( __METHOD__ );
9261015
927 - global $wgExternalDiffEngine;
 1016+ // Diff and store locally
 1017+ $this->diff_local($from_lines, $to_lines);
9281018
 1019+ // Merge edits when possible
 1020+ $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
 1021+ $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
 1022+
 1023+ // Compute the edit operations.
9291024 $n_from = sizeof($from_lines);
9301025 $n_to = sizeof($to_lines);
9311026
 1027+ $edits = array();
 1028+ $xi = $yi = 0;
 1029+ while ($xi < $n_from || $yi < $n_to) {
 1030+ USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
 1031+ USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
 1032+
 1033+ // Skip matching "snake".
 1034+ $copy = array();
 1035+ while ( $xi < $n_from && $yi < $n_to
 1036+ && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
 1037+ $copy[] = $from_lines[$xi++];
 1038+ ++$yi;
 1039+ }
 1040+ if ($copy)
 1041+ $edits[] = new _DiffOp_Copy($copy);
 1042+
 1043+ // Find deletes & adds.
 1044+ $delete = array();
 1045+ while ($xi < $n_from && $this->xchanged[$xi])
 1046+ $delete[] = $from_lines[$xi++];
 1047+
 1048+ $add = array();
 1049+ while ($yi < $n_to && $this->ychanged[$yi])
 1050+ $add[] = $to_lines[$yi++];
 1051+
 1052+ if ($delete && $add)
 1053+ $edits[] = new _DiffOp_Change($delete, $add);
 1054+ elseif ($delete)
 1055+ $edits[] = new _DiffOp_Delete($delete);
 1056+ elseif ($add)
 1057+ $edits[] = new _DiffOp_Add($add);
 1058+ }
 1059+ wfProfileOut( __METHOD__ );
 1060+ return $edits;
 1061+ }
 1062+
 1063+ function diff_range ($from_lines, $to_lines){
 1064+ wfProfileIn( __METHOD__ );
 1065+
 1066+ // Diff and store locally
 1067+ $this->diff_local($from_lines, $to_lines);
 1068+
 1069+ // Compute the ranges operations.
 1070+ $n_from = sizeof($from_lines);
 1071+ $n_to = sizeof($to_lines);
 1072+
 1073+ $ranges = array();
 1074+ $xi = $yi = 0;
 1075+ while ($xi < $n_from || $yi < $n_to) {
 1076+ // Matching "snake".
 1077+ while ( $xi < $n_from && $yi < $n_to
 1078+ && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
 1079+ ++$xi;
 1080+ ++$yi;
 1081+ }
 1082+ // Find deletes & adds.
 1083+ $xstart = $xi;
 1084+ while ($xi < $n_from && $this->xchanged[$xi]){
 1085+ ++$xi;
 1086+ }
 1087+
 1088+ $ystart = $yi;
 1089+ while ($yi < $n_to && $this->ychanged[$yi]){
 1090+ ++$yi;
 1091+ }
 1092+
 1093+ if ($xi>$xstart || $yi>$ystart){
 1094+ $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi);
 1095+ }
 1096+ }
 1097+ wfProfileOut( __METHOD__ );
 1098+ return $ranges;
 1099+ }
 1100+
 1101+ function diff_local ($from_lines, $to_lines) {
 1102+ global $wgExternalDiffEngine;
 1103+ wfProfileIn( __METHOD__);
 1104+
9321105 if($wgExternalDiffEngine == 'wikidiff3'){
9331106 // wikidiff3
9341107 global $IP;
9351108 require_once( "$IP/includes/Diff.php" );
936 - list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000);
 1109+ $wikidiff3 = new WikiDiff3();
 1110+ list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines);
9371111 }else{
9381112 // old diff
939 - wfProfileIn( __METHOD__ ." - basicdiff");
 1113+ $n_from = sizeof($from_lines);
 1114+ $n_to = sizeof($to_lines);
9401115 $this->xchanged = $this->ychanged = array();
9411116 $this->xv = $this->yv = array();
9421117 $this->xind = $this->yind = array();
@@ -980,48 +1155,8 @@
9811156
9821157 // Find the LCS.
9831158 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
984 - wfProfileOut( __METHOD__ ." - basicdiff");
9851159 }
986 -
987 - // Merge edits when possible
988 - $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
989 - $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
990 -
991 - // Compute the edit operations.
992 - $edits = array();
993 - $xi = $yi = 0;
994 - while ($xi < $n_from || $yi < $n_to) {
995 - USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
996 - USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
997 -
998 - // Skip matching "snake".
999 - $copy = array();
1000 - while ( $xi < $n_from && $yi < $n_to
1001 - && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
1002 - $copy[] = $from_lines[$xi++];
1003 - ++$yi;
1004 - }
1005 - if ($copy)
1006 - $edits[] = new _DiffOp_Copy($copy);
1007 -
1008 - // Find deletes & adds.
1009 - $delete = array();
1010 - while ($xi < $n_from && $this->xchanged[$xi])
1011 - $delete[] = $from_lines[$xi++];
1012 -
1013 - $add = array();
1014 - while ($yi < $n_to && $this->ychanged[$yi])
1015 - $add[] = $to_lines[$yi++];
1016 -
1017 - if ($delete && $add)
1018 - $edits[] = new _DiffOp_Change($delete, $add);
1019 - elseif ($delete)
1020 - $edits[] = new _DiffOp_Delete($delete);
1021 - elseif ($add)
1022 - $edits[] = new _DiffOp_Add($add);
1023 - }
10241160 wfProfileOut( __METHOD__ );
1025 - return $edits;
10261161 }
10271162
10281163 /**
@@ -1077,7 +1212,6 @@
10781213 $numer = $xlim - $xoff + $nchunks - 1;
10791214 $x = $xoff;
10801215 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
1081 - wfProfileIn( __METHOD__ . "-chunk" );
10821216 if ($chunk > 0)
10831217 for ($i = 0; $i <= $this->lcs; $i++)
10841218 $ymids[$i][$chunk-1] = $this->seq[$i];
@@ -1130,7 +1264,6 @@
11311265 if ($end == 0 || $ypos > $this->seq[$end]) {
11321266 $this->seq[++$this->lcs] = $ypos;
11331267 $this->in_seq[$ypos] = 1;
1134 - wfProfileOut( __METHOD__ );
11351268 return $this->lcs;
11361269 }
11371270

Follow-up revisions

RevisionCommit summaryAuthorDate
r39415Fixes for r39406:...ialex16:51, 15 August 2008

Status & tagging log