r71624 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r71623‎ \| r71624 \| r71625 >
Date:	08:09, 25 August 2010
Author:	thomasv
Status:	resolved (Comments)
Tags:
Comment:	moving extension code into its own class; no functional change in this commit
Modified paths:	/trunk/extensions/DoubleWiki/DoubleWiki.php (modified) (history) /trunk/extensions/DoubleWiki/DoubleWiki_body.php (added) (history)

Diff [purge]

Index: trunk/extensions/DoubleWiki/DoubleWiki_body.php
—	—	@@ -0,0 +1,335 @@
	2	+<?php
	3	+
	4	+# This program is free software; you can redistribute it and/or modify
	5	+# it under the terms of the GNU General Public License as published by
	6	+# the Free Software Foundation; either version 2 of the License, or
	7	+# (at your option) any later version.
	8	+#
	9	+# This program is distributed in the hope that it will be useful,
	10	+# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	+# GNU General Public License for more details.
	13	+#
	14	+# You should have received a copy of the GNU General Public License along
	15	+# with this program; if not, write to the Free Software Foundation, Inc.,
	16	+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
	17	+# http://www.gnu.org/copyleft/gpl.html
	18	+
	19	+
	20	+
	21	+class DoubleWiki {
	22	+
	23	+ /**
	24	+ * Constructor
	25	+ */
	26	+ function DoubleWiki() {
	27	+ global $wgParser, $wgHooks;
	28	+ $wgHooks['OutputPageBeforeHTML'][] = array( &$this, 'addMatchedText' );
	29	+ }
	30	+
	31	+
	32	+ /*
	33	+ * Hook function called with &match=lang
	34	+ * Transform $text into a bilingual version
	35	+ */
	36	+ function addMatchedText ( &$parserOutput , &$text ) {
	37	+
	38	+ global $wgContLang, $wgRequest, $wgLang, $wgContLanguageCode, $wgTitle;
	39	+
	40	+ $match_request = $wgRequest->getText( 'match' );
	41	+ if ( $match_request === '' ) {
	42	+ return true;
	43	+ }
	44	+
	45	+ foreach( $parserOutput->mLanguageLinks as $l ) {
	46	+ $nt = Title::newFromText( $l );
	47	+ $iw = $nt->getInterwiki();
	48	+ if( $iw === $match_request ){
	49	+ $url = $nt->getFullURL();
	50	+ $myURL = $wgTitle -> getLocalURL() ;
	51	+ $languageName = $wgContLang->getLanguageName( $nt->getInterwiki() );
	52	+ $myLanguage = $wgLang->getLanguageName( $wgContLanguageCode );
	53	+
	54	+ $sep = ( in_string( '?', $url ) ) ? '&' : '?';
	55	+ $translation = Http::get( $url.$sep.'action=render' );
	56	+ if ( $translation !== null ) {
	57	+ #first find all links that have no 'class' parameter.
	58	+ #these links are local so we add '?match=xx' to their url,
	59	+ #unless it already contains a '?'
	60	+ $translation = preg_replace(
	61	+ "/<a href=\"http:\/\/([^\"\?])\"(([\s]+)(c(?!lass=)\|[^c\>\s])([^\>\s]))*\>/i",
	62	+ "<a href=\"http://\\1?match={$wgContLanguageCode}\"\\2>", $translation );
	63	+ #now add class='extiw' to these links
	64	+ $translation = preg_replace(
	65	+ "/<a href=\"http:\/\/([^\"])\"(([\s]+)(c(?!lass=)\|[^c\>\s])([^\>\s]))*\>/i",
	66	+ "<a href=\"http://\\1\" class=\"extiw\"\\3>", $translation );
	67	+ #use class='extiw' for images too
	68	+ $translation = preg_replace(
	69	+ "/<a href=\"http:\/\/([^\"])\"([^\>])class=\"image\"([^\>]*)\>/i",
	70	+ "<a href=\"http://\\1\"\\2class=\"extiw\"\\3>", $translation );
	71	+
	72	+ #add prefixes to internal links, in order to prevent duplicates
	73	+ $translation = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#l_\\1\"",
	74	+ $translation );
	75	+ $translation = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"l_\\1\"",
	76	+ $translation );
	77	+ $text = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#r_\\1\"", $text );
	78	+ $text = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"r_\\1\"", $text );
	79	+
	80	+ #add tags before h2 and h3 sections
	81	+ $translation = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>",
	82	+ $translation );
	83	+ $translation = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>",
	84	+ $translation );
	85	+ $text = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>", $text );
	86	+ $text = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>", $text );
	87	+
	88	+ #add ?match= to local links of the local wiki
	89	+ $text = preg_replace( "/<a href=\"\/([^\"\?]*)\"/i",
	90	+ "<a href=\"/\\1?match={$match_request}\"", $text );
	91	+
	92	+ #do the job
	93	+ $text = $this->matchColumns ( $text, $myLanguage, $myURL, $wgContLanguageCode,
	94	+ $translation, $languageName, $url, $match_request );
	95	+ }
	96	+ return true;
	97	+ }
	98	+ }
	99	+ return true;
	100	+ }
	101	+
	102	+
	103	+ /**
	104	+ * Return table with two columns of text
	105	+ * Text is split into slices based on title tags
	106	+ */
	107	+
	108	+ function matchColumns( $left_text, $left_title, $left_url, $left_lang_code,
	109	+ $right_text, $right_title, $right_url, $right_lang_code ) {
	110	+
	111	+ # note about emdedding:
	112	+ # text is split only at a single level.
	113	+ # initially we assume that this level is zero
	114	+ # if nesting is encountered before the
	115	+ # first paragraph, then this split level is increased
	116	+ # we keep track of the current nesting level during processing
	117	+ # if (current level != split level) then we do not split the text
	118	+
	119	+ # the current level of embedding (stack depth)
	120	+ $left_nesting = 0;
	121	+ $right_nesting = 0;
	122	+
	123	+ #the level of embedding where the text is split
	124	+ #initial value is -1 until actual value is known
	125	+ $left_splitlevel = -1;
	126	+ $right_splitlevel = -1;
	127	+
	128	+ # split text
	129	+ $tag_pattern = "/<div title=\"([^\"]*)\"><\/div>/i";
	130	+ $left_slices = preg_split( $tag_pattern, $left_text );
	131	+ $right_slices = preg_split( $tag_pattern, $right_text );
	132	+ preg_match_all( $tag_pattern, $left_text, $left_tags, PREG_PATTERN_ORDER );
	133	+ preg_match_all( $tag_pattern, $right_text, $right_tags, PREG_PATTERN_ORDER );
	134	+
	135	+ /**
	136	+ * Order slices in a two-column array.
	137	+ * slices that are surrounded by the same tag belong in the same line
	138	+ * $i indexes the left column, $j the right column.
	139	+ */
	140	+ $body = '';
	141	+ $left_chunk = '';
	142	+ $right_chunk = '';
	143	+
	144	+ $j=0;
	145	+ $max_i = count( $left_slices );
	146	+ for ( $i=0 ; $i < $max_i ; $i++ ) {
	147	+ $found = false;
	148	+ $left_chunk .= $left_slices[$i];
	149	+
	150	+ $max_k = count( $right_slices );
	151	+
	152	+ # if we are at the end of the loop, finish quickly
	153	+ if ( $i==$max_i - 1 ) {
	154	+ for ( $k=$j ; $k < $max_k ; $k++ ) $right_chunk .= $right_slices[$k];
	155	+ $found = true;
	156	+ }
	157	+ else for ( $k=$j ; $k < $max_k ; $k++ ) {
	158	+
	159	+ #look for requested tag in the text
	160	+ $a = strpos ( $right_slices[$k], $left_tags[1][$i] );
	161	+ if( $a ) {
	162	+ #go to beginning of paragraph
	163	+ #this regexp matches the rightmost delimiter
	164	+ $sub = substr( $right_slices[$k], 0, $a);
	165	+ if ( preg_match("/(.*)<(p\|dl)>/is", $sub, $matches ) ){
	166	+ $right_chunk .= $matches[1];
	167	+ $right_slices[$k] = substr( $right_slices[$k], strlen($matches[1]) );
	168	+ }
	169	+ else {
	170	+ $right_chunk .= $sub;
	171	+ $right_slices[$k] = substr( $right_slices[$k], $a );
	172	+ }
	173	+
	174	+ $found = true;
	175	+ $j = $k;
	176	+ break;
	177	+ }
	178	+
	179	+ $right_chunk .= $right_slices[$k];
	180	+
	181	+ if( $k < $max_k - 1 ) {
	182	+ if( $left_tags[0][$i] == $right_tags[0][$k] ) {
	183	+ $found = true;
	184	+ $j = $k+1;
	185	+ break;
	186	+ }
	187	+ }
	188	+ }
	189	+ if( $found ) {
	190	+
	191	+ #split chunks into smaller units (paragraphs)
	192	+ $paragraph_tags = "/<(p\|dl)>/i";
	193	+ $left_bits = preg_split( $paragraph_tags, $left_chunk );
	194	+ $right_bits = preg_split( $paragraph_tags, $right_chunk );
	195	+ preg_match_all( $paragraph_tags, $left_chunk, $left_seps, PREG_PATTERN_ORDER );
	196	+ preg_match_all( $paragraph_tags, $right_chunk, $right_seps, PREG_PATTERN_ORDER );
	197	+
	198	+ $left_chunk = '';
	199	+ $right_chunk = '';
	200	+
	201	+ # add separators that were cut off
	202	+ for($l=1; $l < count( $left_bits ); $l++ ) {
	203	+ $left_bits[$l] = $left_seps[0][$l-1].$left_bits[$l];
	204	+ }
	205	+ for($l=1; $l < count( $right_bits ); $l++ ) {
	206	+ $right_bits[$l] = $right_seps[0][$l-1].$right_bits[$l];
	207	+ }
	208	+
	209	+ $max = max( count( $left_bits ) , count( $right_bits ));
	210	+ # initialize missing elements
	211	+ for($l= count( $left_bits ); $l<$max; $l++) $left_bits[$l]='';
	212	+ for($l= count( $right_bits ); $l<$max; $l++) $right_bits[$l]='';
	213	+
	214	+ for($l=0; $l < $max; $l++ ) {
	215	+
	216	+ list($left_delta,$left_o,$left_c) = $this->nesting_delta( $left_bits[$l] );
	217	+ list($right_delta,$right_o,$right_c) = $this->nesting_delta( $right_bits[$l] );
	218	+
	219	+ $left_nesting = $left_nesting + $left_delta;
	220	+ $right_nesting = $right_nesting + $right_delta;
	221	+
	222	+ #are we at the end?
	223	+ $the_end = ($l == $max-1) && ($i == $max_i -1 );
	224	+
	225	+ if(( $left_splitlevel == -1) && ($right_splitlevel == -1)) {
	226	+ $left_splitlevel = $left_nesting;
	227	+ $right_splitlevel = $right_nesting;
	228	+ $left_opening = $left_o;
	229	+ $right_opening = $right_o;
	230	+ $left_closure = $left_c;
	231	+ $right_closure = $right_c;
	232	+
	233	+ $left_prefix = '';
	234	+ $right_prefix = '';
	235	+ $left_suffix = $left_closure;
	236	+ $right_suffix = $right_closure;
	237	+ }
	238	+ else if($the_end) {
	239	+ $left_prefix = $left_opening;
	240	+ $right_prefix = $right_opening;
	241	+ $left_suffix = '';
	242	+ $right_suffix = '';
	243	+ }
	244	+ else {
	245	+ $left_prefix = $left_opening;
	246	+ $right_prefix = $right_opening;
	247	+ $left_suffix = $left_closure;
	248	+ $right_suffix = $right_closure;
	249	+ }
	250	+
	251	+ if( ( ($left_nesting == $left_splitlevel)
	252	+ && ($right_nesting == $right_splitlevel) ) \|\| $the_end) {
	253	+ $body .=
	254	+ "<tr><td valign=\"top\" style=\"padding-right: 0.5em\" lang=\"{$left_lang_code}\">"
	255	+ ."<div style=\"width:35em; margin:0px auto\">\n"
	256	+ .$left_prefix.$left_bits[$l].$left_suffix
	257	+ ."</div>"
	258	+
	259	+ ."</td>\n<td valign=\"top\" style=\"padding-left: 0.5em\" lang=\"{$right_lang_code}\">"
	260	+ ."<div style=\"width:35em; margin:0px auto\">\n"
	261	+ .$right_prefix.$right_bits[$l].$right_suffix
	262	+ ."</div>"
	263	+ ."</td></tr>\n";
	264	+ }
	265	+ else {
	266	+ # procrastinate
	267	+ $left_nesting = $left_nesting - $left_delta;
	268	+ $right_nesting = $right_nesting - $right_delta;
	269	+ if ($l < $max-1) {
	270	+ $left_bits[$l+1] = $left_bits[$l] . $left_bits[$l+1];
	271	+ $right_bits[$l+1] = $right_bits[$l] . $right_bits[$l+1];
	272	+ } else {
	273	+ $left_chunk = $left_bits[$l] ;
	274	+ $right_chunk = $right_bits[$l];
	275	+ }
	276	+ }
	277	+ }
	278	+ }
	279	+ else{ $right_chunk='';}
	280	+ }
	281	+
	282	+
	283	+ # format table head and return results
	284	+ $left_url = htmlspecialchars( $left_url );
	285	+ $right_url = htmlspecialchars( $right_url );
	286	+ $head =
	287	+"<table width=\"100%\" border=\"0\" bgcolor=\"white\" rules=\"cols\" cellpadding=\"0\">
	288	+<colgroup><col width=\"50%\"/><col width=\"50%\"/></colgroup><thead>
	289	+<tr><td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$left_lang_code}\">
	290	+<a href=\"{$left_url}\">{$left_title}</a></td>
	291	+<td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$right_lang_code}\">
	292	+<a href=\"{$right_url}\" class='extiw'>{$right_title}</a>
	293	+</td></tr></thead>\n";
	294	+ return $head.$body."</table>" ;
	295	+ }
	296	+
	297	+
	298	+ /*
	299	+ * returns how much the stack is changed
	300	+ * also returns opening and closing sequences of tag
	301	+ */
	302	+ function nesting_delta ( $text ) {
	303	+ #tags that must be closed. (list copied from Sanitizer.php)
	304	+ $tags = "/<\/?(b\|del\|i\|ins\|u\|font\|big\|small\|sub\|sup\|h1\|h2\|h3\|h4\|h5\|h6\|"
	305	+ ."cite\|code\|em\|s\|strike\|strong\|tt\|tr\|td\|var\|div\|center\|blockquote\|ol\|ul\|dl\|"
	306	+ ."table\|caption\|pre\|ruby\|rt\|rb\|rp\|p\|span)([\s](.*?)>\|>)/i";
	307	+ preg_match_all( $tags, $text, $m, PREG_SET_ORDER);
	308	+
	309	+ $stack = array();
	310	+ $counter = 0;
	311	+ $opening = '';
	312	+ $closure = '';
	313	+ for($i=0; $i < count($m); $i++){
	314	+ $t = $m[$i];
	315	+ if( substr( $t[0], 0, 2) != "</" ){
	316	+ $counter++;
	317	+ array_push($stack, $t);
	318	+ } else {
	319	+ $tt = array_pop($stack);
	320	+ $counter--;
	321	+ #if( ($tt != null) && ($tt[1] != $t[1]) ) {
	322	+ # #input html is buggy...
	323	+ # echo "Warning: ".$t[1]." encountered, expected ".$tt[1]."<br />\n";
	324	+ #}
	325	+ }
	326	+ }
	327	+ for($i=0; $i<$counter; $i++){
	328	+ $opening .= $stack[$i][0];
	329	+ $closure = "</".$stack[$i][1].">".$closure;
	330	+ }
	331	+
	332	+ return array($counter, $opening, $closure);
	333	+
	334	+ }
	335	+
	336	+}
Index: trunk/extensions/DoubleWiki/DoubleWiki.php
—	—	@@ -21,8 +21,11 @@
22	22	# The translation comes from another wiki
23	23	# that can be accessed through interlanguage links
24	24
25		~~-$wgHooks['OutputPageBeforeHTML'][] = 'addMatchedText' ;~~
26	25
	26	+$wgHooks['ParserFirstCallInit'][] = 'wfDoubleWiki';
	27	+$wgExtensionMessagesFiles['DoubleWiki'] = dirname(__FILE__) . '/DoubleWiki.i18n.php';
	28	+$wgAutoloadClasses['DoubleWiki'] = dirname( __FILE__ ) . "/DoubleWiki_body.php";
	29	+
27	30	$wgExtensionCredits['other'][] = array(
28	31	'path' => __FILE__,
29	32	'name' => 'DoubleWiki',
—	—	@@ -31,303 +34,10 @@
32	35	'descriptionmsg' => 'doublewiki-desc',
33	36	);
34	37
35		~~-$wgExtensionMessagesFiles['DoubleWiki'] = dirname(__FILE__) . '/DoubleWiki.i18n.php';~~
36	38
37		~~-function addMatchedText ( &$parserOutput , &$text ) {~~
38		-
39		~~- global $wgContLang, $wgRequest, $wgLang, $wgContLanguageCode, $wgTitle;~~
40		-
41		~~- $match_request = $wgRequest->getText( 'match' );~~
42		~~- if ( $match_request === '' ) {~~
43		~~- return true;~~
44		~~- }~~
45		-
46		~~- foreach( $parserOutput->mLanguageLinks as $l ) {~~
47		~~- $nt = Title::newFromText( $l );~~
48		~~- $iw = $nt->getInterwiki();~~
49		~~- if( $iw === $match_request ){~~
50		~~- $url = $nt->getFullURL();~~
51		~~- $myURL = $wgTitle -> getLocalURL() ;~~
52		~~- $languageName = $wgContLang->getLanguageName( $nt->getInterwiki() );~~
53		~~- $myLanguage = $wgLang->getLanguageName( $wgContLanguageCode );~~
54		-
55		~~- $sep = ( in_string( '?', $url ) ) ? '&' : '?';~~
56		~~- $translation = Http::get( $url.$sep.'action=render' );~~
57		~~- if ( $translation !== null ) {~~
58		~~- #first find all links that have no 'class' parameter.~~
59		~~- #these links are local so we add '?match=xx' to their url,~~
60		~~- #unless it already contains a '?'~~
61		~~- $translation = preg_replace(~~
62		~~- "/<a href=\"http:\/\/([^\"\?])\"(([\s]+)(c(?!lass=)\|[^c\>\s])([^\>\s]))*\>/i",~~
63		~~- "<a href=\"http://\\1?match={$wgContLanguageCode}\"\\2>", $translation );~~
64		~~- #now add class='extiw' to these links~~
65		~~- $translation = preg_replace(~~
66		~~- "/<a href=\"http:\/\/([^\"])\"(([\s]+)(c(?!lass=)\|[^c\>\s])([^\>\s]))*\>/i",~~
67		~~- "<a href=\"http://\\1\" class=\"extiw\"\\3>", $translation );~~
68		~~- #use class='extiw' for images too~~
69		~~- $translation = preg_replace(~~
70		~~- "/<a href=\"http:\/\/([^\"])\"([^\>])class=\"image\"([^\>]*)\>/i",~~
71		~~- "<a href=\"http://\\1\"\\2class=\"extiw\"\\3>", $translation );~~
72		-
73		~~- #add prefixes to internal links, in order to prevent duplicates~~
74		~~- $translation = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#l_\\1\"",~~
75		~~- $translation );~~
76		~~- $translation = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"l_\\1\"",~~
77		~~- $translation );~~
78		~~- $text = preg_replace("/<a href=\"#(.*?)\"/i","<a href=\"#r_\\1\"", $text );~~
79		~~- $text = preg_replace("/<li id=\"(.*?)\"/i","<li id=\"r_\\1\"", $text );~~
80		-
81		~~- #add tags before h2 and h3 sections~~
82		~~- $translation = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>",~~
83		~~- $translation );~~
84		~~- $translation = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>",~~
85		~~- $translation );~~
86		~~- $text = preg_replace("/<h2>/i","<div title=\"@@h2\"></div>\n<h2>", $text );~~
87		~~- $text = preg_replace("/<h3>/i","<div title=\"@@h3\"></div>\n<h3>", $text );~~
88		-
89		~~- #add ?match= to local links of the local wiki~~
90		~~- $text = preg_replace( "/<a href=\"\/([^\"\?]*)\"/i",~~
91		~~- "<a href=\"/\\1?match={$match_request}\"", $text );~~
92		-
93		~~- #do the job~~
94		~~- $text = matchColumns ( $text, $myLanguage, $myURL ,~~
95		~~- $translation, $languageName, $url, $wgContLanguageCode, $match_request );~~
96		~~- }~~
97		~~- return true;~~
98		~~- }~~
99		~~- }~~
	39	+function wfDoubleWiki() {
	40	+ new DoubleWiki;
100	41	return true;
101	42	}
102	43
103	44
104		-/**
105		~~- * Return table with two columns of text~~
106		~~- * Text is split into slices based on title tags~~
107		~~- */~~
108		-
109		~~-function matchColumns( $left_text, $left_title, $left_url, $right_text, $right_title, $right_url, $left_lang_code, $right_lang_code ){~~
110		-
111		~~- # note about emdedding:~~
112		~~- # text is split only at a single level.~~
113		~~- # initially we assume that this level is zero~~
114		~~- # if nesting is encountered before the~~
115		~~- # first paragraph, then this split level is increased~~
116		~~- # we keep track of the current nesting level during processing~~
117		~~- # if (current level != split level) then we do not split the text~~
118		-
119		~~- # the current level of embedding (stack depth)~~
120		~~- $left_nesting = 0;~~
121		~~- $right_nesting = 0;~~
122		-
123		~~- #the level of embedding where the text is split~~
124		~~- #initial value is -1 until actual value is known~~
125		~~- $left_splitlevel = -1;~~
126		~~- $right_splitlevel = -1;~~
127		-
128		~~- # split text~~
129		~~- $tag_pattern = "/<div title=\"([^\"]*)\"><\/div>/i";~~
130		~~- $left_slices = preg_split( $tag_pattern, $left_text );~~
131		~~- $right_slices = preg_split( $tag_pattern, $right_text );~~
132		~~- preg_match_all( $tag_pattern, $left_text, $left_tags, PREG_PATTERN_ORDER );~~
133		~~- preg_match_all( $tag_pattern, $right_text, $right_tags, PREG_PATTERN_ORDER );~~
134		-
135		- /**
136		~~- * Order slices in a two-column array.~~
137		~~- * slices that are surrounded by the same tag belong in the same line~~
138		~~- * $i indexes the left column, $j the right column.~~
139		~~- */~~
140		~~- $body = '';~~
141		~~- $left_chunk = '';~~
142		~~- $right_chunk = '';~~
143		-
144		~~- $j=0;~~
145		~~- $max_i = count( $left_slices );~~
146		~~- for ( $i=0 ; $i < $max_i ; $i++ ) {~~
147		~~- $found = false;~~
148		~~- $left_chunk .= $left_slices[$i];~~
149		-
150		~~- $max_k = count( $right_slices );~~
151		-
152		~~- # if we are at the end of the loop, finish quickly~~
153		~~- if ( $i==$max_i - 1 ) {~~
154		~~- for ( $k=$j ; $k < $max_k ; $k++ ) $right_chunk .= $right_slices[$k];~~
155		~~- $found = true;~~
156		~~- }~~
157		~~- else for ( $k=$j ; $k < $max_k ; $k++ ) {~~
158		-
159		~~- #look for requested tag in the text~~
160		~~- $a = strpos ( $right_slices[$k], $left_tags[1][$i] );~~
161		~~- if( $a ) {~~
162		~~- #go to beginning of paragraph~~
163		~~- #this regexp matches the rightmost delimiter~~
164		~~- $sub = substr( $right_slices[$k], 0, $a);~~
165		~~- if ( preg_match("/(.*)<(p\|dl)>/is", $sub, $matches ) ){~~
166		~~- $right_chunk .= $matches[1];~~
167		~~- $right_slices[$k] = substr( $right_slices[$k], strlen($matches[1]) );~~
168		~~- }~~
169		~~- else {~~
170		~~- $right_chunk .= $sub;~~
171		~~- $right_slices[$k] = substr( $right_slices[$k], $a );~~
172		~~- }~~
173		-
174		~~- $found = true;~~
175		~~- $j = $k;~~
176		~~- break;~~
177		~~- }~~
178		-
179		~~- $right_chunk .= $right_slices[$k];~~
180		-
181		~~- if( $k < $max_k - 1 ) {~~
182		~~- if( $left_tags[0][$i] == $right_tags[0][$k] ) {~~
183		~~- $found = true;~~
184		~~- $j = $k+1;~~
185		~~- break;~~
186		~~- }~~
187		~~- }~~
188		~~- }~~
189		~~- if( $found ) {~~
190		-
191		~~- #split chunks into smaller units (paragraphs)~~
192		~~- $paragraph_tags = "/<(p\|dl)>/i";~~
193		~~- $left_bits = preg_split( $paragraph_tags, $left_chunk );~~
194		~~- $right_bits = preg_split( $paragraph_tags, $right_chunk );~~
195		~~- preg_match_all( $paragraph_tags, $left_chunk, $left_seps, PREG_PATTERN_ORDER );~~
196		~~- preg_match_all( $paragraph_tags, $right_chunk, $right_seps, PREG_PATTERN_ORDER );~~
197		-
198		~~- $left_chunk = '';~~
199		~~- $right_chunk = '';~~
200		-
201		~~- # add separators that were cut off~~
202		~~- for($l=1; $l < count( $left_bits ); $l++ ) {~~
203		~~- $left_bits[$l] = $left_seps[0][$l-1].$left_bits[$l];~~
204		~~- }~~
205		~~- for($l=1; $l < count( $right_bits ); $l++ ) {~~
206		~~- $right_bits[$l] = $right_seps[0][$l-1].$right_bits[$l];~~
207		~~- }~~
208		-
209		~~- $max = max( count( $left_bits ) , count( $right_bits ));~~
210		~~- # initialize missing elements~~
211		~~- for($l= count( $left_bits ); $l<$max; $l++) $left_bits[$l]='';~~
212		~~- for($l= count( $right_bits ); $l<$max; $l++) $right_bits[$l]='';~~
213		-
214		~~- for($l=0; $l < $max; $l++ ) {~~
215		-
216		~~- list($left_delta,$left_o,$left_c) = nesting_delta( $left_bits[$l] );~~
217		~~- list($right_delta,$right_o,$right_c) = nesting_delta( $right_bits[$l] );~~
218		-
219		~~- $left_nesting = $left_nesting + $left_delta;~~
220		~~- $right_nesting = $right_nesting + $right_delta;~~
221		-
222		~~- #are we at the end?~~
223		~~- $the_end = ($l == $max-1) && ($i == $max_i -1 );~~
224		-
225		~~- if(( $left_splitlevel == -1) && ($right_splitlevel == -1)) {~~
226		~~- $left_splitlevel = $left_nesting;~~
227		~~- $right_splitlevel = $right_nesting;~~
228		~~- $left_opening = $left_o;~~
229		~~- $right_opening = $right_o;~~
230		~~- $left_closure = $left_c;~~
231		~~- $right_closure = $right_c;~~
232		-
233		~~- $left_prefix = '';~~
234		~~- $right_prefix = '';~~
235		~~- $left_suffix = $left_closure;~~
236		~~- $right_suffix = $right_closure;~~
237		~~- }~~
238		~~- else if($the_end) {~~
239		~~- $left_prefix = $left_opening;~~
240		~~- $right_prefix = $right_opening;~~
241		~~- $left_suffix = '';~~
242		~~- $right_suffix = '';~~
243		~~- }~~
244		~~- else {~~
245		~~- $left_prefix = $left_opening;~~
246		~~- $right_prefix = $right_opening;~~
247		~~- $left_suffix = $left_closure;~~
248		~~- $right_suffix = $right_closure;~~
249		~~- }~~
250		-
251		~~- if( ( ($left_nesting == $left_splitlevel)~~
252		~~- && ($right_nesting == $right_splitlevel) ) \|\| $the_end) {~~
253		~~- $body .=~~
254		~~- "<tr><td valign=\"top\" style=\"padding-right: 0.5em\" lang=\"{$left_lang_code}\">"~~
255		~~- ."<div style=\"width:35em; margin:0px auto\">\n"~~
256		~~- .$left_prefix.$left_bits[$l].$left_suffix~~
257		~~- ."</div>"~~
258		-
259		~~- ."</td>\n<td valign=\"top\" style=\"padding-left: 0.5em\" lang=\"{$right_lang_code}\">"~~
260		~~- ."<div style=\"width:35em; margin:0px auto\">\n"~~
261		~~- .$right_prefix.$right_bits[$l].$right_suffix~~
262		~~- ."</div>"~~
263		~~- ."</td></tr>\n";~~
264		~~- }~~
265		~~- else {~~
266		~~- # procrastinate~~
267		~~- $left_nesting = $left_nesting - $left_delta;~~
268		~~- $right_nesting = $right_nesting - $right_delta;~~
269		~~- if ($l < $max-1) {~~
270		~~- $left_bits[$l+1] = $left_bits[$l] . $left_bits[$l+1];~~
271		~~- $right_bits[$l+1] = $right_bits[$l] . $right_bits[$l+1];~~
272		~~- } else {~~
273		~~- $left_chunk = $left_bits[$l] ;~~
274		~~- $right_chunk = $right_bits[$l];~~
275		~~- }~~
276		~~- }~~
277		~~- }~~
278		~~- }~~
279		~~- else{ $right_chunk='';}~~
280		~~- }~~
281		-
282		-
283		~~- # format table head and return results~~
284		~~- $left_url = htmlspecialchars( $left_url );~~
285		~~- $right_url = htmlspecialchars( $right_url );~~
286		~~- $head =~~
287		~~-"<table width=\"100%\" border=\"0\" bgcolor=\"white\" rules=\"cols\" cellpadding=\"0\">~~
288		~~-<colgroup><col width=\"50%\"/><col width=\"50%\"/></colgroup><thead>~~
289		~~-<tr><td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$left_lang_code}\">~~
290		~~-<a href=\"{$left_url}\">{$left_title}</a></td>~~
291		~~-<td bgcolor=\"#cfcfff\" align=\"center\" lang=\"{$right_lang_code}\">~~
292		~~-<a href=\"{$right_url}\" class='extiw'>{$right_title}</a>~~
293		~~-</td></tr></thead>\n";~~
294		~~- return $head.$body."</table>" ;~~
295		-}
296		-
297		-
298		-/*
299		~~- * returns how much the stack is changed~~
300		~~- * also returns opening and closing sequences of tag~~
301		~~- */~~
302		~~-function nesting_delta ( $text ) {~~
303		~~- #tags that must be closed. (list copied from Sanitizer.php)~~
304		~~- $tags = "/<\/?(b\|del\|i\|ins\|u\|font\|big\|small\|sub\|sup\|h1\|h2\|h3\|h4\|h5\|h6\|"~~
305		~~- ."cite\|code\|em\|s\|strike\|strong\|tt\|tr\|td\|var\|div\|center\|blockquote\|ol\|ul\|dl\|"~~
306		~~- ."table\|caption\|pre\|ruby\|rt\|rb\|rp\|p\|span)([\s](.*?)>\|>)/i";~~
307		~~- preg_match_all( $tags, $text, $m, PREG_SET_ORDER);~~
308		-
309		~~- $stack = array();~~
310		~~- $counter = 0;~~
311		~~- $opening = '';~~
312		~~- $closure = '';~~
313		~~- for($i=0; $i < count($m); $i++){~~
314		~~- $t = $m[$i];~~
315		~~- if( substr( $t[0], 0, 2) != "</" ){~~
316		~~- $counter++;~~
317		~~- array_push($stack, $t);~~
318		~~- } else {~~
319		~~- $tt = array_pop($stack);~~
320		~~- $counter--;~~
321		~~- #if( ($tt != null) && ($tt[1] != $t[1]) ) {~~
322		~~- # #input html is buggy...~~
323		~~- # echo "Warning: ".$t[1]." encountered, expected ".$tt[1]."<br />\n";~~
324		~~- #}~~
325		~~- }~~
326		~~- }~~
327		~~- for($i=0; $i<$counter; $i++){~~
328		~~- $opening .= $stack[$i][0];~~
329		~~- $closure = "</".$stack[$i][1].">".$closure;~~
330		~~- }~~
331		-
332		~~- return array($counter, $opening, $closure);~~
333		-
334		-}

Follow-up revisions

Revision	Commit summary	Author	Date
r81643	Fix weird hook registration from r71624	demon	19:43, 7 February 2011

Comments

#Comment by ThomasV (talk | contribs) 09:33, 5 February 2011

hmm I still do not understand why this is marked as deferred. this extension is active on wmf sites. I already told reedy about this (this is why he changed the status back to new on nov 15), but now it's deferred again

#Comment by RobLa-WMF (talk | contribs) 20:55, 6 February 2011

Hrm, yep, this does look like it's installed on en.wikisource.org, among other places. Marking as new.

#Comment by 😂 (talk | contribs) 19:43, 7 February 2011

I'm not really sure why you're deferring hook registration to ParserFirstCallInit. You could just move the $wgHooks['OutputPageBeforeHTML'] setting to the extension setup file.

#Comment by 😂 (talk | contribs) 19:43, 7 February 2011

Fixed in r81643.

Status & tagging log

19:48, 7 February 2011 Trevor Parscal (WMF) (talk | contribs) changed the tags for r71624 [removed: trevor]
19:43, 7 February 2011 😂 (talk | contribs) changed the status of r71624 [removed: new added: resolved]
18:29, 7 February 2011 MarkAHershberger (talk | contribs) changed the tags for r71624 [added: trevor]
20:55, 6 February 2011 RobLa-WMF (talk | contribs) changed the status of r71624 [removed: deferred added: new]
22:13, 3 December 2010 Reedy (talk | contribs) changed the status of r71624 [removed: new added: deferred]
14:56, 15 November 2010 Reedy (talk | contribs) changed the status of r71624 [removed: deferred added: new]
19:51, 23 October 2010 Reedy (talk | contribs) changed the status of r71624 [removed: new added: deferred]