r50026 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r50025‎ \| r50026 \| r50027 >
Date:	22:54, 28 April 2009
Author:	brion
Status:	ok (Comments)
Tags:
Comment:	Revert r49669, r49670 "extract text layer from djvu file (see bug 18046)" I don't really like the use of sed here; it's an extra Unix dependency which seems unnecessary, when we could trivially do the escaping in the code. The ajax fetch added in r49670 also looks like a big security hole: * Manual use of curl should be avoided; it may not be installed, and 'localhost' may not do anything useful. * Further this appears to be a general "fetch any foreign URL and pass the data through" which is a serious security hole.
Modified paths:	/trunk/extensions/ProofreadPage/ProofreadPage.php (modified) (history) /trunk/extensions/ProofreadPage/proofread.js (modified) (history) /trunk/phase3/includes/DefaultSettings.php (modified) (history) /trunk/phase3/includes/media/DjVu.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/media/DjVu.php
—	—	@@ -52,8 +52,6 @@
53	53	$m = false;
54	54	if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
55	55	return array( 'width' => $m[2], 'page' => $m[1] );
56		~~- } else if ( preg_match( '/^page(\d+)-djvutxt$/', $str, $m ) ) {~~
57		~~- return array( 'djvutxt' => 1, 'page' => $m[1] );~~
58	56	} else {
59	57	return false;
60	58	}
—	—	@@ -66,22 +64,8 @@
67	65	);
68	66	}
69	67
70		-
71		~~- function normaliseParams( $image, &$params ) {~~
72		~~- global $wgDjvuTxt;~~
73		~~- if( $wgDjvuTxt && isset($params['djvutxt']) && $params['djvutxt']) {~~
74		~~- if ( !isset( $params['page'] ) ) {~~
75		~~- $params['page'] = 1;~~
76		~~- }~~
77		~~- $params['width'] = 0;~~
78		~~- $params['height'] = 0;~~
79		~~- return true;~~
80		~~- }~~
81		~~- else return parent::normaliseParams( $image, $params );~~
82		~~- }~~
83		-
84	68	function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
85		~~- global $wgDjvuRenderer, $wgDjvuPostProcessor, $wgDjvuTxt, $wgSed;~~
	69	+ global $wgDjvuRenderer, $wgDjvuPostProcessor;
86	70
87	71	// Fetch XML and check it, to give a more informative error message than the one which
88	72	// normaliseParams will inevitably give.
—	—	@@ -112,22 +96,12 @@
113	97
114	98	# Use a subshell (brackets) to aggregate stderr from both pipeline commands
115	99	# before redirecting it to the overall stdout. This works in both Linux and Windows XP.
116		-
117		~~- if( $params['djvutxt'] && $wgDjvuTxt && $wgSed ) {~~
118		~~- #Read text from djvu~~
119		~~- $cmd = '(' . wfEscapeShellArg( $wgDjvuTxt ) . " --page={$page} " . wfEscapeShellArg( $srcPath );~~
120		~~- #Escape < > & characters~~
121		~~- $cmd .= ' \| ' . wfEscapeShellArg( $wgSed ) . ' "s/\&/\&/g ; s/</\</g ; s/>/\>/g ; s/\"/\"/g "';~~
122		~~- $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';~~
	100	+ $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .
	101	+ wfEscapeShellArg( $srcPath );
	102	+ if ( $wgDjvuPostProcessor ) {
	103	+ $cmd .= " \| {$wgDjvuPostProcessor}";
123	104	}
124		~~- else {~~
125		~~- $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .~~
126		~~- wfEscapeShellArg( $srcPath );~~
127		~~- if ( $wgDjvuPostProcessor ) {~~
128		~~- $cmd .= " \| {$wgDjvuPostProcessor}";~~
129		~~- }~~
130		~~- $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';~~
131		~~- }~~
	105	+ $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1';
132	106	wfProfileIn( 'ddjvu' );
133	107	wfDebug( __METHOD__.": $cmd\n" );
134	108	$err = wfShellExec( $cmd, $retval );
Index: trunk/phase3/includes/DefaultSettings.php
—	—	@@ -1898,12 +1898,6 @@
1899	1899	$wgDiff = '/usr/bin/diff';
1900	1900
1901	1901	/**
1902		~~- * Path to the GNU sed utility. Required by $wgDjvuTxt.~~
1903		~~- */~~
1904		~~-#$wgSed = '/bin/sed';~~
1905		~~-$wgSed = null;~~
1906		-
1907		-/**
1908	1902	* We can also compress text stored in the 'text' table. If this is set on, new
1909	1903	* revisions will be compressed on page save if zlib support is available. Any
1910	1904	* compressed revisions will be decompressed on load regardless of this setting
—	—	@@ -3541,13 +3535,6 @@
3542	3536	$wgDjvuRenderer = null;
3543	3537
3544	3538	/**
3545		~~- * Path of the djvutxt DJVU text extraction utility~~
3546		~~- * Enable this and $wgSed, $wgDjvuDump to enable text layer extraction from djvu files~~
3547		~~- */~~
3548		~~-# $wgDjvuTxt = 'djvutxt';~~
3549		~~-$wgDjvuTxt = null;~~
3550		-
3551		-/**
3552	3539	* Path of the djvutoxml executable
3553	3540	* This works like djvudump except much, much slower as of version 3.5.
3554	3541	*
Index: trunk/extensions/ProofreadPage/ProofreadPage.php
—	—	@@ -31,8 +31,6 @@
32	32	);
33	33
34	34	$wgExtensionFunctions[] = "pr_main";
35		~~-$wgAjaxExportList[] = "pr_fetch_djvutxt";~~
36		-
37	35	function pr_main() {
38	36	global $wgParser;
39	37	$wgParser->setHook( "pagelist", "pr_renderPageList" );
—	—	@@ -40,29 +38,11 @@
41	39	}
42	40
43	41
44		-/*
45		~~- * Fetch Djvu text with curl~~
46		~~- */~~
47		~~-function pr_fetch_djvutxt( $url ) {~~
48	42
49		~~- if($url[0]=='/') $url = "http://localhost" . $url;~~
50	43
51		~~- $ch = curl_init( $url );~~
52		~~- curl_setopt( $ch, CURLOPT_HEADER, false );~~
53		~~- curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true);~~
54		~~- $text = curl_exec( $ch );~~
	44	+# Bump the version number every time you change proofread.js
	45	+$wgProofreadPageVersion = 18;
55	46
56		~~- $errno = curl_errno( $ch );~~
57		~~- $httpCode = curl_getinfo( $ch, CURLINFO_HTTP_CODE );~~
58		~~- //$contentType = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE );~~
59		-
60		~~- curl_close($ch);~~
61		~~- if($errno==0 && ( $httpCode==200 \|\| $httpCode==404 ) ) {~~
62		~~- return $text;~~
63		~~- }~~
64		~~- return "";~~
65		-}
66		-
67	47	/**
68	48	*
69	49	* Query the database to find if the current page is referred in an Index page.
Index: trunk/extensions/ProofreadPage/proofread.js
—	—	@@ -47,7 +47,6 @@
48	48
49	49
50	50
51		-
52	51	function pr_image_url(requested_width){
53	52	var image_url;
54	53
—	—	@@ -317,7 +316,7 @@
318	317	table.style.cssText = "width:100%;";
319	318
320	319	//fill table
321		~~- if(self.proofreadpage_default_layout=='horizontal')~~
	320	+ if(self.proofreadpage_default_layout=='horizontal')
322	321	pr_fill_table(true);
323	322	else
324	323	pr_fill_table(false);
—	—	@@ -494,41 +493,18 @@
495	494	addOnloadHook(pr_init_tabs);
496	495
497	496
498		~~-/fetch djvu content with ajax/~~
499		~~-function pr_fetch_djvutxt(){~~
500		~~- var text_url = proofreadPageThumbURL.replace('##WIDTH##px',"djvutxt").replace(".jpg",".txt");~~
501		~~- sajax_do_call( 'pr_fetch_djvutxt', [ text_url ], pr_init_textbox );~~
502		-}
503		-
504		-
505		~~-function pr_init_textbox(xmlhttp) {~~
506		~~- if (xmlhttp == null) return;~~
507		~~- if (xmlhttp.readyState == 4) {~~
508		~~- document.getElementById("wpTextbox1").value = xmlhttp.responseText;~~
509		~~- }~~
510		-}
511		-
512		-
513		~~-function pr_onload(){~~
514		-
515		~~- if(self.proofreadPageIsEdit){~~
516		~~- if(!document.getElementById("wpTextbox1") ) return;~~
	497	+function pr_initzoom(){
	498	+ if(document.getElementById("wpTextbox1")){
517	499	if(self.pr_horiz)
518	500	document.getElementById("wpTextbox1").style.cssText = "height:"+self.vertHeight+"px";
519	501	else
520	502	document.getElementById("wpTextbox1").style.cssText = "height:"+(self.TextBoxHeight-7)+"px";
521	503	pr_zoom(0);
522		-
523		~~- //to enable, set proofreadpage_djvutxt=1~~
524		~~- if(self.proofreadpage_djvutxt) {~~
525		~~- if( document.getElementById("wpTextbox1").value == "" ) pr_fetch_djvutxt();~~
526		~~- }~~
527	504	}
528	505	}
529		~~-hookEvent("load", pr_onload );~~
	506	+hookEvent("load", pr_initzoom );
530	507
531	508
532		-
533	509	/Quality buttons/
534	510
535	511	function pr_add_quality(form,value){

Follow-up revisions

Revision	Commit summary	Author	Date
r50050	extract djvu text (bug 18046); escape possible script with htmlspecialchars i...	thomasv	19:33, 29 April 2009
r50051	fetch djvu text (bug 18046); using parser hook instead of ajax, and Http::get...	thomasv	19:37, 29 April 2009

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r49669	extract text layer from djvu file (see bug 18046)	thomasv	18:00, 20 April 2009
r49670	fetch djvu text	thomasv	18:01, 20 April 2009

Comments

#Comment by ThomasV (talk | contribs) 19:40, 29 April 2009

ok, I did it again, without sed and without ajax : see r50050 and r50051.

Status & tagging log

07:22, 27 May 2009 Tim Starling (talk | contribs) changed the status of r50026 [removed: new added: ok]
04:27, 29 April 2009 Aaron Schulz (talk | contribs) changed the status of r50026 [removed: new added: ok]