Index: trunk/extensions/DoubleWiki/DoubleWiki_body.php |
— | — | @@ -0,0 +1,335 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +# This program is free software; you can redistribute it and/or modify |
| 5 | +# it under the terms of the GNU General Public License as published by |
| 6 | +# the Free Software Foundation; either version 2 of the License, or |
| 7 | +# (at your option) any later version. |
| 8 | +# |
| 9 | +# This program is distributed in the hope that it will be useful, |
| 10 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | +# GNU General Public License for more details. |
| 13 | +# |
| 14 | +# You should have received a copy of the GNU General Public License along |
| 15 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 16 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 17 | +# http://www.gnu.org/copyleft/gpl.html |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +class DoubleWiki { |
| 22 | + |
| 23 | + /** |
| 24 | + * Constructor |
| 25 | + */ |
| 26 | + function DoubleWiki() { |
| 27 | + global $wgParser, $wgHooks; |
| 28 | + $wgHooks['OutputPageBeforeHTML'][] = array( &$this, 'addMatchedText' ); |
| 29 | + } |
| 30 | + |
| 31 | + |
| 32 | + /* |
| 33 | + * Hook function called with &match=lang |
| 34 | + * Transform $text into a bilingual version |
| 35 | + */ |
| 36 | + function addMatchedText ( &$parserOutput , &$text ) { |
| 37 | + |
| 38 | + global $wgContLang, $wgRequest, $wgLang, $wgContLanguageCode, $wgTitle; |
| 39 | + |
| 40 | + $match_request = $wgRequest->getText( 'match' ); |
| 41 | + if ( $match_request === '' ) { |
| 42 | + return true; |
| 43 | + } |
| 44 | + |
| 45 | + foreach( $parserOutput->mLanguageLinks as $l ) { |
| 46 | + $nt = Title::newFromText( $l ); |
| 47 | + $iw = $nt->getInterwiki(); |
| 48 | + if( $iw === $match_request ){ |
| 49 | + $url = $nt->getFullURL(); |
| 50 | + $myURL = $wgTitle -> getLocalURL() ; |
| 51 | + $languageName = $wgContLang->getLanguageName( $nt->getInterwiki() ); |
| 52 | + $myLanguage = $wgLang->getLanguageName( $wgContLanguageCode ); |
| 53 | + |
| 54 | + $sep = ( in_string( '?', $url ) ) ? '&' : '?'; |
| 55 | + $translation = Http::get( $url.$sep.'action=render' ); |
| 56 | + if ( $translation !== null ) { |
| 57 | + #first find all links that have no 'class' parameter. |
| 58 | + #these links are local so we add '?match=xx' to their url, |
| 59 | + #unless it already contains a '?' |
| 60 | + $translation = preg_replace( |
| 61 | + "/<a href=\"http:\/\/([^\"\?]*)\"(([\s]+)(c(?!lass=)|[^c\>\s])([^\>\s]*))*\>/i", |
| 62 | + "<a href=\"http://\\1?match={$wgContLanguageCode}\"\\2>", $translation ); |
| 63 | + #now add class='extiw' to these links |
| 64 | + $translation = preg_replace( |
| 65 | + "/<a href=\"http:\/\/([^\"]*)\"(([\s]+)(c(?!lass=)|[^c\>\s])([^\>\s]*))*\>/i", |
| 66 | + "<a href=\"http://\\1\" class=\"extiw\"\\3>", $translation ); |
| 67 | + #use class='extiw' for images too |
| 68 | + $translation = preg_replace( |
| 69 | + "/<a href=\"http:\/\/([^\"]*)\"([^\>]*)class=\"image\"([^\>]*)\>/i", |
| 70 | + "<a href=\"http://\\1\"\\2class=\"extiw\"\\3>", $translation ); |
| 71 | + |
| 72 | + #add prefixes to internal links, in order to prevent duplicates |
| 73 | + $translation = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#l_\\1\"", |
| 74 | + $translation ); |
| 75 | + $translation = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"l_\\1\"", |
| 76 | + $translation ); |
| 77 | + $text = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#r_\\1\"", $text ); |
| 78 | + $text = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"r_\\1\"", $text ); |
| 79 | + |
| 80 | + #add tags before h2 and h3 sections |
| 81 | + $translation = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>", |
| 82 | + $translation ); |
| 83 | + $translation = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>", |
| 84 | + $translation ); |
| 85 | + $text = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>", $text ); |
| 86 | + $text = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>", $text ); |
| 87 | + |
| 88 | + #add ?match= to local links of the local wiki |
| 89 | + $text = preg_replace( "/<a href=\"\/([^\"\?]*)\"/i", |
| 90 | + "<a href=\"/\\1?match={$match_request}\"", $text ); |
| 91 | + |
| 92 | + #do the job |
| 93 | + $text = $this->matchColumns ( $text, $myLanguage, $myURL, $wgContLanguageCode, |
| 94 | + $translation, $languageName, $url, $match_request ); |
| 95 | + } |
| 96 | + return true; |
| 97 | + } |
| 98 | + } |
| 99 | + return true; |
| 100 | + } |
| 101 | + |
| 102 | + |
| 103 | + /** |
| 104 | + * Return table with two columns of text |
| 105 | + * Text is split into slices based on title tags |
| 106 | + */ |
| 107 | + |
| 108 | + function matchColumns( $left_text, $left_title, $left_url, $left_lang_code, |
| 109 | + $right_text, $right_title, $right_url, $right_lang_code ) { |
| 110 | + |
| 111 | + # note about emdedding: |
| 112 | + # text is split only at a single level. |
| 113 | + # initially we assume that this level is zero |
| 114 | + # if nesting is encountered before the |
| 115 | + # first paragraph, then this split level is increased |
| 116 | + # we keep track of the current nesting level during processing |
| 117 | + # if (current level != split level) then we do not split the text |
| 118 | + |
| 119 | + # the current level of embedding (stack depth) |
| 120 | + $left_nesting = 0; |
| 121 | + $right_nesting = 0; |
| 122 | + |
| 123 | + #the level of embedding where the text is split |
| 124 | + #initial value is -1 until actual value is known |
| 125 | + $left_splitlevel = -1; |
| 126 | + $right_splitlevel = -1; |
| 127 | + |
| 128 | + # split text |
| 129 | + $tag_pattern = "/<div title=\"([^\"]*)\"><\/div>/i"; |
| 130 | + $left_slices = preg_split( $tag_pattern, $left_text ); |
| 131 | + $right_slices = preg_split( $tag_pattern, $right_text ); |
| 132 | + preg_match_all( $tag_pattern, $left_text, $left_tags, PREG_PATTERN_ORDER ); |
| 133 | + preg_match_all( $tag_pattern, $right_text, $right_tags, PREG_PATTERN_ORDER ); |
| 134 | + |
| 135 | + /** |
| 136 | + * Order slices in a two-column array. |
| 137 | + * slices that are surrounded by the same tag belong in the same line |
| 138 | + * $i indexes the left column, $j the right column. |
| 139 | + */ |
| 140 | + $body = ''; |
| 141 | + $left_chunk = ''; |
| 142 | + $right_chunk = ''; |
| 143 | + |
| 144 | + $j=0; |
| 145 | + $max_i = count( $left_slices ); |
| 146 | + for ( $i=0 ; $i < $max_i ; $i++ ) { |
| 147 | + $found = false; |
| 148 | + $left_chunk .= $left_slices[$i]; |
| 149 | + |
| 150 | + $max_k = count( $right_slices ); |
| 151 | + |
| 152 | + # if we are at the end of the loop, finish quickly |
| 153 | + if ( $i==$max_i - 1 ) { |
| 154 | + for ( $k=$j ; $k < $max_k ; $k++ ) $right_chunk .= $right_slices[$k]; |
| 155 | + $found = true; |
| 156 | + } |
| 157 | + else for ( $k=$j ; $k < $max_k ; $k++ ) { |
| 158 | + |
| 159 | + #look for requested tag in the text |
| 160 | + $a = strpos ( $right_slices[$k], $left_tags[1][$i] ); |
| 161 | + if( $a ) { |
| 162 | + #go to beginning of paragraph |
| 163 | + #this regexp matches the rightmost delimiter |
| 164 | + $sub = substr( $right_slices[$k], 0, $a); |
| 165 | + if ( preg_match("/(.*)<(p|dl)>/is", $sub, $matches ) ){ |
| 166 | + $right_chunk .= $matches[1]; |
| 167 | + $right_slices[$k] = substr( $right_slices[$k], strlen($matches[1]) ); |
| 168 | + } |
| 169 | + else { |
| 170 | + $right_chunk .= $sub; |
| 171 | + $right_slices[$k] = substr( $right_slices[$k], $a ); |
| 172 | + } |
| 173 | + |
| 174 | + $found = true; |
| 175 | + $j = $k; |
| 176 | + break; |
| 177 | + } |
| 178 | + |
| 179 | + $right_chunk .= $right_slices[$k]; |
| 180 | + |
| 181 | + if( $k < $max_k - 1 ) { |
| 182 | + if( $left_tags[0][$i] == $right_tags[0][$k] ) { |
| 183 | + $found = true; |
| 184 | + $j = $k+1; |
| 185 | + break; |
| 186 | + } |
| 187 | + } |
| 188 | + } |
| 189 | + if( $found ) { |
| 190 | + |
| 191 | + #split chunks into smaller units (paragraphs) |
| 192 | + $paragraph_tags = "/<(p|dl)>/i"; |
| 193 | + $left_bits = preg_split( $paragraph_tags, $left_chunk ); |
| 194 | + $right_bits = preg_split( $paragraph_tags, $right_chunk ); |
| 195 | + preg_match_all( $paragraph_tags, $left_chunk, $left_seps, PREG_PATTERN_ORDER ); |
| 196 | + preg_match_all( $paragraph_tags, $right_chunk, $right_seps, PREG_PATTERN_ORDER ); |
| 197 | + |
| 198 | + $left_chunk = ''; |
| 199 | + $right_chunk = ''; |
| 200 | + |
| 201 | + # add separators that were cut off |
| 202 | + for($l=1; $l < count( $left_bits ); $l++ ) { |
| 203 | + $left_bits[$l] = $left_seps[0][$l-1].$left_bits[$l]; |
| 204 | + } |
| 205 | + for($l=1; $l < count( $right_bits ); $l++ ) { |
| 206 | + $right_bits[$l] = $right_seps[0][$l-1].$right_bits[$l]; |
| 207 | + } |
| 208 | + |
| 209 | + $max = max( count( $left_bits ) , count( $right_bits )); |
| 210 | + # initialize missing elements |
| 211 | + for($l= count( $left_bits ); $l<$max; $l++) $left_bits[$l]=''; |
| 212 | + for($l= count( $right_bits ); $l<$max; $l++) $right_bits[$l]=''; |
| 213 | + |
| 214 | + for($l=0; $l < $max; $l++ ) { |
| 215 | + |
| 216 | + list($left_delta,$left_o,$left_c) = $this->nesting_delta( $left_bits[$l] ); |
| 217 | + list($right_delta,$right_o,$right_c) = $this->nesting_delta( $right_bits[$l] ); |
| 218 | + |
| 219 | + $left_nesting = $left_nesting + $left_delta; |
| 220 | + $right_nesting = $right_nesting + $right_delta; |
| 221 | + |
| 222 | + #are we at the end? |
| 223 | + $the_end = ($l == $max-1) && ($i == $max_i -1 ); |
| 224 | + |
| 225 | + if(( $left_splitlevel == -1) && ($right_splitlevel == -1)) { |
| 226 | + $left_splitlevel = $left_nesting; |
| 227 | + $right_splitlevel = $right_nesting; |
| 228 | + $left_opening = $left_o; |
| 229 | + $right_opening = $right_o; |
| 230 | + $left_closure = $left_c; |
| 231 | + $right_closure = $right_c; |
| 232 | + |
| 233 | + $left_prefix = ''; |
| 234 | + $right_prefix = ''; |
| 235 | + $left_suffix = $left_closure; |
| 236 | + $right_suffix = $right_closure; |
| 237 | + } |
| 238 | + else if($the_end) { |
| 239 | + $left_prefix = $left_opening; |
| 240 | + $right_prefix = $right_opening; |
| 241 | + $left_suffix = ''; |
| 242 | + $right_suffix = ''; |
| 243 | + } |
| 244 | + else { |
| 245 | + $left_prefix = $left_opening; |
| 246 | + $right_prefix = $right_opening; |
| 247 | + $left_suffix = $left_closure; |
| 248 | + $right_suffix = $right_closure; |
| 249 | + } |
| 250 | + |
| 251 | + if( ( ($left_nesting == $left_splitlevel) |
| 252 | + && ($right_nesting == $right_splitlevel) ) || $the_end) { |
| 253 | + $body .= |
| 254 | + "<tr><td valign=\"top\" style=\"padding-right: 0.5em\" lang=\"{$left_lang_code}\">" |
| 255 | + ."<div style=\"width:35em; margin:0px auto\">\n" |
| 256 | + .$left_prefix.$left_bits[$l].$left_suffix |
| 257 | + ."</div>" |
| 258 | + |
| 259 | + ."</td>\n<td valign=\"top\" style=\"padding-left: 0.5em\" lang=\"{$right_lang_code}\">" |
| 260 | + ."<div style=\"width:35em; margin:0px auto\">\n" |
| 261 | + .$right_prefix.$right_bits[$l].$right_suffix |
| 262 | + ."</div>" |
| 263 | + ."</td></tr>\n"; |
| 264 | + } |
| 265 | + else { |
| 266 | + # procrastinate |
| 267 | + $left_nesting = $left_nesting - $left_delta; |
| 268 | + $right_nesting = $right_nesting - $right_delta; |
| 269 | + if ($l < $max-1) { |
| 270 | + $left_bits[$l+1] = $left_bits[$l] . $left_bits[$l+1]; |
| 271 | + $right_bits[$l+1] = $right_bits[$l] . $right_bits[$l+1]; |
| 272 | + } else { |
| 273 | + $left_chunk = $left_bits[$l] ; |
| 274 | + $right_chunk = $right_bits[$l]; |
| 275 | + } |
| 276 | + } |
| 277 | + } |
| 278 | + } |
| 279 | + else{ $right_chunk='';} |
| 280 | + } |
| 281 | + |
| 282 | + |
| 283 | + # format table head and return results |
| 284 | + $left_url = htmlspecialchars( $left_url ); |
| 285 | + $right_url = htmlspecialchars( $right_url ); |
| 286 | + $head = |
| 287 | +"<table width=\"100%\" border=\"0\" bgcolor=\"white\" rules=\"cols\" cellpadding=\"0\"> |
| 288 | +<colgroup><col width=\"50%\"/><col width=\"50%\"/></colgroup><thead> |
| 289 | +<tr><td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$left_lang_code}\"> |
| 290 | +<a href=\"{$left_url}\">{$left_title}</a></td> |
| 291 | +<td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$right_lang_code}\"> |
| 292 | +<a href=\"{$right_url}\" class='extiw'>{$right_title}</a> |
| 293 | +</td></tr></thead>\n"; |
| 294 | + return $head.$body."</table>" ; |
| 295 | + } |
| 296 | + |
| 297 | + |
| 298 | + /* |
| 299 | + * returns how much the stack is changed |
| 300 | + * also returns opening and closing sequences of tag |
| 301 | + */ |
| 302 | + function nesting_delta ( $text ) { |
| 303 | + #tags that must be closed. (list copied from Sanitizer.php) |
| 304 | + $tags = "/<\/?(b|del|i|ins|u|font|big|small|sub|sup|h1|h2|h3|h4|h5|h6|" |
| 305 | + ."cite|code|em|s|strike|strong|tt|tr|td|var|div|center|blockquote|ol|ul|dl|" |
| 306 | + ."table|caption|pre|ruby|rt|rb|rp|p|span)([\s](.*?)>|>)/i"; |
| 307 | + preg_match_all( $tags, $text, $m, PREG_SET_ORDER); |
| 308 | + |
| 309 | + $stack = array(); |
| 310 | + $counter = 0; |
| 311 | + $opening = ''; |
| 312 | + $closure = ''; |
| 313 | + for($i=0; $i < count($m); $i++){ |
| 314 | + $t = $m[$i]; |
| 315 | + if( substr( $t[0], 0, 2) != "</" ){ |
| 316 | + $counter++; |
| 317 | + array_push($stack, $t); |
| 318 | + } else { |
| 319 | + $tt = array_pop($stack); |
| 320 | + $counter--; |
| 321 | + #if( ($tt != null) && ($tt[1] != $t[1]) ) { |
| 322 | + # #input html is buggy... |
| 323 | + # echo "Warning: ".$t[1]." encountered, expected ".$tt[1]."<br />\n"; |
| 324 | + #} |
| 325 | + } |
| 326 | + } |
| 327 | + for($i=0; $i<$counter; $i++){ |
| 328 | + $opening .= $stack[$i][0]; |
| 329 | + $closure = "</".$stack[$i][1].">".$closure; |
| 330 | + } |
| 331 | + |
| 332 | + return array($counter, $opening, $closure); |
| 333 | + |
| 334 | + } |
| 335 | + |
| 336 | +} |
Index: trunk/extensions/DoubleWiki/DoubleWiki.php |
— | — | @@ -21,8 +21,11 @@ |
22 | 22 | # The translation comes from another wiki |
23 | 23 | # that can be accessed through interlanguage links |
24 | 24 | |
25 | | -$wgHooks['OutputPageBeforeHTML'][] = 'addMatchedText' ; |
26 | 25 | |
| 26 | +$wgHooks['ParserFirstCallInit'][] = 'wfDoubleWiki'; |
| 27 | +$wgExtensionMessagesFiles['DoubleWiki'] = dirname(__FILE__) . '/DoubleWiki.i18n.php'; |
| 28 | +$wgAutoloadClasses['DoubleWiki'] = dirname( __FILE__ ) . "/DoubleWiki_body.php"; |
| 29 | + |
27 | 30 | $wgExtensionCredits['other'][] = array( |
28 | 31 | 'path' => __FILE__, |
29 | 32 | 'name' => 'DoubleWiki', |
— | — | @@ -31,303 +34,10 @@ |
32 | 35 | 'descriptionmsg' => 'doublewiki-desc', |
33 | 36 | ); |
34 | 37 | |
35 | | -$wgExtensionMessagesFiles['DoubleWiki'] = dirname(__FILE__) . '/DoubleWiki.i18n.php'; |
36 | 38 | |
37 | | -function addMatchedText ( &$parserOutput , &$text ) { |
38 | | - |
39 | | - global $wgContLang, $wgRequest, $wgLang, $wgContLanguageCode, $wgTitle; |
40 | | - |
41 | | - $match_request = $wgRequest->getText( 'match' ); |
42 | | - if ( $match_request === '' ) { |
43 | | - return true; |
44 | | - } |
45 | | - |
46 | | - foreach( $parserOutput->mLanguageLinks as $l ) { |
47 | | - $nt = Title::newFromText( $l ); |
48 | | - $iw = $nt->getInterwiki(); |
49 | | - if( $iw === $match_request ){ |
50 | | - $url = $nt->getFullURL(); |
51 | | - $myURL = $wgTitle -> getLocalURL() ; |
52 | | - $languageName = $wgContLang->getLanguageName( $nt->getInterwiki() ); |
53 | | - $myLanguage = $wgLang->getLanguageName( $wgContLanguageCode ); |
54 | | - |
55 | | - $sep = ( in_string( '?', $url ) ) ? '&' : '?'; |
56 | | - $translation = Http::get( $url.$sep.'action=render' ); |
57 | | - if ( $translation !== null ) { |
58 | | - #first find all links that have no 'class' parameter. |
59 | | - #these links are local so we add '?match=xx' to their url, |
60 | | - #unless it already contains a '?' |
61 | | - $translation = preg_replace( |
62 | | - "/<a href=\"http:\/\/([^\"\?]*)\"(([\s]+)(c(?!lass=)|[^c\>\s])([^\>\s]*))*\>/i", |
63 | | - "<a href=\"http://\\1?match={$wgContLanguageCode}\"\\2>", $translation ); |
64 | | - #now add class='extiw' to these links |
65 | | - $translation = preg_replace( |
66 | | - "/<a href=\"http:\/\/([^\"]*)\"(([\s]+)(c(?!lass=)|[^c\>\s])([^\>\s]*))*\>/i", |
67 | | - "<a href=\"http://\\1\" class=\"extiw\"\\3>", $translation ); |
68 | | - #use class='extiw' for images too |
69 | | - $translation = preg_replace( |
70 | | - "/<a href=\"http:\/\/([^\"]*)\"([^\>]*)class=\"image\"([^\>]*)\>/i", |
71 | | - "<a href=\"http://\\1\"\\2class=\"extiw\"\\3>", $translation ); |
72 | | - |
73 | | - #add prefixes to internal links, in order to prevent duplicates |
74 | | - $translation = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#l_\\1\"", |
75 | | - $translation ); |
76 | | - $translation = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"l_\\1\"", |
77 | | - $translation ); |
78 | | - $text = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#r_\\1\"", $text ); |
79 | | - $text = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"r_\\1\"", $text ); |
80 | | - |
81 | | - #add tags before h2 and h3 sections |
82 | | - $translation = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>", |
83 | | - $translation ); |
84 | | - $translation = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>", |
85 | | - $translation ); |
86 | | - $text = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>", $text ); |
87 | | - $text = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>", $text ); |
88 | | - |
89 | | - #add ?match= to local links of the local wiki |
90 | | - $text = preg_replace( "/<a href=\"\/([^\"\?]*)\"/i", |
91 | | - "<a href=\"/\\1?match={$match_request}\"", $text ); |
92 | | - |
93 | | - #do the job |
94 | | - $text = matchColumns ( $text, $myLanguage, $myURL , |
95 | | - $translation, $languageName, $url, $wgContLanguageCode, $match_request ); |
96 | | - } |
97 | | - return true; |
98 | | - } |
99 | | - } |
| 39 | +function wfDoubleWiki() { |
| 40 | + new DoubleWiki; |
100 | 41 | return true; |
101 | 42 | } |
102 | 43 | |
103 | 44 | |
104 | | -/** |
105 | | - * Return table with two columns of text |
106 | | - * Text is split into slices based on title tags |
107 | | - */ |
108 | | - |
109 | | -function matchColumns( $left_text, $left_title, $left_url, $right_text, $right_title, $right_url, $left_lang_code, $right_lang_code ){ |
110 | | - |
111 | | - # note about emdedding: |
112 | | - # text is split only at a single level. |
113 | | - # initially we assume that this level is zero |
114 | | - # if nesting is encountered before the |
115 | | - # first paragraph, then this split level is increased |
116 | | - # we keep track of the current nesting level during processing |
117 | | - # if (current level != split level) then we do not split the text |
118 | | - |
119 | | - # the current level of embedding (stack depth) |
120 | | - $left_nesting = 0; |
121 | | - $right_nesting = 0; |
122 | | - |
123 | | - #the level of embedding where the text is split |
124 | | - #initial value is -1 until actual value is known |
125 | | - $left_splitlevel = -1; |
126 | | - $right_splitlevel = -1; |
127 | | - |
128 | | - # split text |
129 | | - $tag_pattern = "/<div title=\"([^\"]*)\"><\/div>/i"; |
130 | | - $left_slices = preg_split( $tag_pattern, $left_text ); |
131 | | - $right_slices = preg_split( $tag_pattern, $right_text ); |
132 | | - preg_match_all( $tag_pattern, $left_text, $left_tags, PREG_PATTERN_ORDER ); |
133 | | - preg_match_all( $tag_pattern, $right_text, $right_tags, PREG_PATTERN_ORDER ); |
134 | | - |
135 | | - /** |
136 | | - * Order slices in a two-column array. |
137 | | - * slices that are surrounded by the same tag belong in the same line |
138 | | - * $i indexes the left column, $j the right column. |
139 | | - */ |
140 | | - $body = ''; |
141 | | - $left_chunk = ''; |
142 | | - $right_chunk = ''; |
143 | | - |
144 | | - $j=0; |
145 | | - $max_i = count( $left_slices ); |
146 | | - for ( $i=0 ; $i < $max_i ; $i++ ) { |
147 | | - $found = false; |
148 | | - $left_chunk .= $left_slices[$i]; |
149 | | - |
150 | | - $max_k = count( $right_slices ); |
151 | | - |
152 | | - # if we are at the end of the loop, finish quickly |
153 | | - if ( $i==$max_i - 1 ) { |
154 | | - for ( $k=$j ; $k < $max_k ; $k++ ) $right_chunk .= $right_slices[$k]; |
155 | | - $found = true; |
156 | | - } |
157 | | - else for ( $k=$j ; $k < $max_k ; $k++ ) { |
158 | | - |
159 | | - #look for requested tag in the text |
160 | | - $a = strpos ( $right_slices[$k], $left_tags[1][$i] ); |
161 | | - if( $a ) { |
162 | | - #go to beginning of paragraph |
163 | | - #this regexp matches the rightmost delimiter |
164 | | - $sub = substr( $right_slices[$k], 0, $a); |
165 | | - if ( preg_match("/(.*)<(p|dl)>/is", $sub, $matches ) ){ |
166 | | - $right_chunk .= $matches[1]; |
167 | | - $right_slices[$k] = substr( $right_slices[$k], strlen($matches[1]) ); |
168 | | - } |
169 | | - else { |
170 | | - $right_chunk .= $sub; |
171 | | - $right_slices[$k] = substr( $right_slices[$k], $a ); |
172 | | - } |
173 | | - |
174 | | - $found = true; |
175 | | - $j = $k; |
176 | | - break; |
177 | | - } |
178 | | - |
179 | | - $right_chunk .= $right_slices[$k]; |
180 | | - |
181 | | - if( $k < $max_k - 1 ) { |
182 | | - if( $left_tags[0][$i] == $right_tags[0][$k] ) { |
183 | | - $found = true; |
184 | | - $j = $k+1; |
185 | | - break; |
186 | | - } |
187 | | - } |
188 | | - } |
189 | | - if( $found ) { |
190 | | - |
191 | | - #split chunks into smaller units (paragraphs) |
192 | | - $paragraph_tags = "/<(p|dl)>/i"; |
193 | | - $left_bits = preg_split( $paragraph_tags, $left_chunk ); |
194 | | - $right_bits = preg_split( $paragraph_tags, $right_chunk ); |
195 | | - preg_match_all( $paragraph_tags, $left_chunk, $left_seps, PREG_PATTERN_ORDER ); |
196 | | - preg_match_all( $paragraph_tags, $right_chunk, $right_seps, PREG_PATTERN_ORDER ); |
197 | | - |
198 | | - $left_chunk = ''; |
199 | | - $right_chunk = ''; |
200 | | - |
201 | | - # add separators that were cut off |
202 | | - for($l=1; $l < count( $left_bits ); $l++ ) { |
203 | | - $left_bits[$l] = $left_seps[0][$l-1].$left_bits[$l]; |
204 | | - } |
205 | | - for($l=1; $l < count( $right_bits ); $l++ ) { |
206 | | - $right_bits[$l] = $right_seps[0][$l-1].$right_bits[$l]; |
207 | | - } |
208 | | - |
209 | | - $max = max( count( $left_bits ) , count( $right_bits )); |
210 | | - # initialize missing elements |
211 | | - for($l= count( $left_bits ); $l<$max; $l++) $left_bits[$l]=''; |
212 | | - for($l= count( $right_bits ); $l<$max; $l++) $right_bits[$l]=''; |
213 | | - |
214 | | - for($l=0; $l < $max; $l++ ) { |
215 | | - |
216 | | - list($left_delta,$left_o,$left_c) = nesting_delta( $left_bits[$l] ); |
217 | | - list($right_delta,$right_o,$right_c) = nesting_delta( $right_bits[$l] ); |
218 | | - |
219 | | - $left_nesting = $left_nesting + $left_delta; |
220 | | - $right_nesting = $right_nesting + $right_delta; |
221 | | - |
222 | | - #are we at the end? |
223 | | - $the_end = ($l == $max-1) && ($i == $max_i -1 ); |
224 | | - |
225 | | - if(( $left_splitlevel == -1) && ($right_splitlevel == -1)) { |
226 | | - $left_splitlevel = $left_nesting; |
227 | | - $right_splitlevel = $right_nesting; |
228 | | - $left_opening = $left_o; |
229 | | - $right_opening = $right_o; |
230 | | - $left_closure = $left_c; |
231 | | - $right_closure = $right_c; |
232 | | - |
233 | | - $left_prefix = ''; |
234 | | - $right_prefix = ''; |
235 | | - $left_suffix = $left_closure; |
236 | | - $right_suffix = $right_closure; |
237 | | - } |
238 | | - else if($the_end) { |
239 | | - $left_prefix = $left_opening; |
240 | | - $right_prefix = $right_opening; |
241 | | - $left_suffix = ''; |
242 | | - $right_suffix = ''; |
243 | | - } |
244 | | - else { |
245 | | - $left_prefix = $left_opening; |
246 | | - $right_prefix = $right_opening; |
247 | | - $left_suffix = $left_closure; |
248 | | - $right_suffix = $right_closure; |
249 | | - } |
250 | | - |
251 | | - if( ( ($left_nesting == $left_splitlevel) |
252 | | - && ($right_nesting == $right_splitlevel) ) || $the_end) { |
253 | | - $body .= |
254 | | - "<tr><td valign=\"top\" style=\"padding-right: 0.5em\" lang=\"{$left_lang_code}\">" |
255 | | - ."<div style=\"width:35em; margin:0px auto\">\n" |
256 | | - .$left_prefix.$left_bits[$l].$left_suffix |
257 | | - ."</div>" |
258 | | - |
259 | | - ."</td>\n<td valign=\"top\" style=\"padding-left: 0.5em\" lang=\"{$right_lang_code}\">" |
260 | | - ."<div style=\"width:35em; margin:0px auto\">\n" |
261 | | - .$right_prefix.$right_bits[$l].$right_suffix |
262 | | - ."</div>" |
263 | | - ."</td></tr>\n"; |
264 | | - } |
265 | | - else { |
266 | | - # procrastinate |
267 | | - $left_nesting = $left_nesting - $left_delta; |
268 | | - $right_nesting = $right_nesting - $right_delta; |
269 | | - if ($l < $max-1) { |
270 | | - $left_bits[$l+1] = $left_bits[$l] . $left_bits[$l+1]; |
271 | | - $right_bits[$l+1] = $right_bits[$l] . $right_bits[$l+1]; |
272 | | - } else { |
273 | | - $left_chunk = $left_bits[$l] ; |
274 | | - $right_chunk = $right_bits[$l]; |
275 | | - } |
276 | | - } |
277 | | - } |
278 | | - } |
279 | | - else{ $right_chunk='';} |
280 | | - } |
281 | | - |
282 | | - |
283 | | - # format table head and return results |
284 | | - $left_url = htmlspecialchars( $left_url ); |
285 | | - $right_url = htmlspecialchars( $right_url ); |
286 | | - $head = |
287 | | -"<table width=\"100%\" border=\"0\" bgcolor=\"white\" rules=\"cols\" cellpadding=\"0\"> |
288 | | -<colgroup><col width=\"50%\"/><col width=\"50%\"/></colgroup><thead> |
289 | | -<tr><td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$left_lang_code}\"> |
290 | | -<a href=\"{$left_url}\">{$left_title}</a></td> |
291 | | -<td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$right_lang_code}\"> |
292 | | -<a href=\"{$right_url}\" class='extiw'>{$right_title}</a> |
293 | | -</td></tr></thead>\n"; |
294 | | - return $head.$body."</table>" ; |
295 | | -} |
296 | | - |
297 | | - |
298 | | -/* |
299 | | - * returns how much the stack is changed |
300 | | - * also returns opening and closing sequences of tag |
301 | | - */ |
302 | | -function nesting_delta ( $text ) { |
303 | | - #tags that must be closed. (list copied from Sanitizer.php) |
304 | | - $tags = "/<\/?(b|del|i|ins|u|font|big|small|sub|sup|h1|h2|h3|h4|h5|h6|" |
305 | | - ."cite|code|em|s|strike|strong|tt|tr|td|var|div|center|blockquote|ol|ul|dl|" |
306 | | - ."table|caption|pre|ruby|rt|rb|rp|p|span)([\s](.*?)>|>)/i"; |
307 | | - preg_match_all( $tags, $text, $m, PREG_SET_ORDER); |
308 | | - |
309 | | - $stack = array(); |
310 | | - $counter = 0; |
311 | | - $opening = ''; |
312 | | - $closure = ''; |
313 | | - for($i=0; $i < count($m); $i++){ |
314 | | - $t = $m[$i]; |
315 | | - if( substr( $t[0], 0, 2) != "</" ){ |
316 | | - $counter++; |
317 | | - array_push($stack, $t); |
318 | | - } else { |
319 | | - $tt = array_pop($stack); |
320 | | - $counter--; |
321 | | - #if( ($tt != null) && ($tt[1] != $t[1]) ) { |
322 | | - # #input html is buggy... |
323 | | - # echo "Warning: ".$t[1]." encountered, expected ".$tt[1]."<br />\n"; |
324 | | - #} |
325 | | - } |
326 | | - } |
327 | | - for($i=0; $i<$counter; $i++){ |
328 | | - $opening .= $stack[$i][0]; |
329 | | - $closure = "</".$stack[$i][1].">".$closure; |
330 | | - } |
331 | | - |
332 | | - return array($counter, $opening, $closure); |
333 | | - |
334 | | -} |