r91776 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91775‎ | r91776 | r91777 >
Date:02:20, 9 July 2011
Author:kbrown
Status:deferred (Comments)
Tags:
Comment:
start special page for viewing archived content.
Modified paths:
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.i18n.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/SpecialModifyArchiveBlacklist.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/SpecialViewArchive.php (added) (history)
  • /trunk/extensions/ArchiveLinks/spider.php (modified) (history)

Diff [purge]

Index: trunk/extensions/ArchiveLinks/SpecialViewArchive.php
@@ -0,0 +1,98 @@
 2+<?php
 3+/**
 4+ * This special page exists to serve the cached versions of the pages that have been archived.
 5+ */
 6+
 7+if (!defined('MEDIAWIKI')) {
 8+ echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
 9+ die(1);
 10+}
 11+
 12+class SpecialViewArchive extends SpecialPage {
 13+ private $db_master;
 14+ private $db_slave;
 15+ private $db_result;
 16+
 17+ function __construct() {
 18+ parent::__construct( 'ViewArchive' );
 19+ }
 20+
 21+ public function execute( $par ) {
 22+ global $wgOut, $wgRequest;
 23+
 24+ if ( isset( $par ) || $url = $wgRequest->getText( 'archive_url' ) ) {
 25+ $this->db_master = wfGetDB( DB_MASTER );
 26+ $this->db_slave = wfGetDB( DB_SLAVE );
 27+ $db_result = array();
 28+
 29+ if( !isset( $url ) ) {
 30+ $url = $par;
 31+ }
 32+
 33+ $this->db_result['url_location'] = $this->db_slave->select( 'el_archive_resource', '*', array( 'resource_url' => $this->db_slave->strencode( $url ) ), __METHOD__ );
 34+
 35+ if ( $this->db_result['url_location']->numRows() < 1 ) {
 36+ //This URL doesn't exist in the archive, let's say so
 37+ $this->db_result['log_check'] = $this->db_slave->select( 'el_archive_log', '*', array( 'log_url' => $this->db_slave->strencode( $url ) ), __METHOD__ );
 38+ $this->db_result['queue_check'] = $this->db_slave->select( 'el_archive_queue', '*', array( 'url' => $this->db_slave->strencode( $url ) ), __METHOD__ );
 39+
 40+ if ( ( $num_rows = $this->db_result['queue_check']->numRows() ) === 1 ) {
 41+ $in_queue = true;
 42+ } elseif ( $num_rows > 1 ) {
 43+ //We found duplicates, delete them
 44+ $job = $this->db_result['queue_check']->fetchRow();
 45+ while( $row = $this->db_result['queue_check']->fetchRow() ) {
 46+ $this->db_master->delete( 'el_archive_queue', array ( 'queue_id' => $row['queue_id'] ) );
 47+ }
 48+ } else {
 49+ $in_queue = false;
 50+ }
 51+
 52+ if ( $this->db_result['log_check']->numRows() >= 1 ) {
 53+ $in_logs = true;
 54+ } else {
 55+ $in_logs = false;
 56+ }
 57+
 58+ $this->output_form();
 59+ $wgOut->addWikiMsg( 'archivelinks-view-archive-url-not-found' );
 60+ /*$wgOut->addHTML(
 61+ HTML::openElement( 'table' ) .
 62+ HTML::openElement('tr') .
 63+ HTML::openElement('td') .
 64+ HTML::closeElement('td') .
 65+ HTML::closeElement('tr') .
 66+ HTML::closeElement( 'table' )
 67+ );*/
 68+ } else {
 69+ //Disable the output so we don't get a skin around the archived content
 70+ $wgOut->disable();
 71+
 72+ ob_start();
 73+
 74+ echo HTML::htmlHeader();
 75+ }
 76+
 77+ } else {
 78+ //The user has not requested a URL, let's print a form so they can do so :D
 79+ $this->output_form();
 80+ }
 81+ }
 82+
 83+ private function output_form( ) {
 84+ global $wgOut;
 85+ $this->setHeaders();
 86+ $wgOut->addWikiMsg( 'archivelinks-view-archive-desc' );
 87+
 88+ $wgOut->addHTML(
 89+ HTML::openElement( 'form', array( 'method' => 'get', 'action' => SpecialPage::getTitleFor( 'ViewArchive' )->getLocalUrl() ) ) .
 90+ HTML::openElement( 'fieldset' ) .
 91+ HTML::element('legend', null, wfMsg('ViewArchive') ) .
 92+ XML::inputLabel( wfMsg( 'archivelinks-view-archive-url-field' ), 'archive_url', 'archive-links-archive-url', 120 ) .
 93+ HTML::element( 'br' ) .
 94+ XML::submitButton( wfMsg( 'archivelinks-view-archive-submit-button' ) ) .
 95+ HTML::closeElement( 'fieldset' ) .
 96+ HTML::closeElement( 'form' )
 97+ );
 98+ }
 99+}
\ No newline at end of file
Property changes on: trunk/extensions/ArchiveLinks/SpecialViewArchive.php
___________________________________________________________________
Added: svn:eol-style
1100 + native
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.i18n.php
@@ -8,7 +8,8 @@
99 //English
1010 $messages['en'] = array(
1111 'archivelinks-cache-title' => 'cache',
12 - 'ModifyArchiveBlacklist' => 'Modify Blacklist',
 12+ 'ModifyArchiveBlacklist' => 'Modify Archive Blacklist',
 13+ 'ViewArchive' => 'View Archive',
1314 'archivelinks-modify-blacklist-desc' => 'This page allows you to blacklist or whitelist URLs for the ArchiveLinks extension.',
1415 //'archivelinks-archive-blacklist-fieldset-label' => 'Blacklist a URL',
1516 'archivelinks-modify-blacklist-url-field-label' => 'URL to Blacklist:',
@@ -20,4 +21,9 @@
2122 'archivelinks-modify-blacklist-whitelist-field-label' => 'Blacklist',
2223 'archivelinks-modify-blacklist-blacklist-field-label' => 'Whitelist',
2324 'archivelinks-modify-blacklist-blacklist-or-whitelist-field-label' => 'Which list would you like to modify?',
 25+ 'archivelinks-view-archive-desc' => 'This is the special page for viewing archived external links, please enter the URL of the archive you\'d like to view.',
 26+ 'archivelinks-view-archive-submit-button' => 'View Archive',
 27+ 'archivelinks-view-archive-url-field' => 'URL of page:',
 28+ 'archivelinks-view-archive-url-not-found' => 'We\'re sorry, the URL you requested was not found in the archive for the following reason.',
 29+
2430 );
\ No newline at end of file
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.php
@@ -15,14 +15,17 @@
1616
1717 $wgExtensionMessagesFiles['ArchiveLinks'] = "$path/ArchiveLinks.i18n.php";
1818 $wgExtensionMessagesFiles['ModifyArchiveBlacklist'] = "$path/ArchiveLinks.i18n.php";
 19+$wgExtensionMessagesFiles['ViewArchive'] = "$path/ArchiveLinks.i18n.php";
1920
2021 $wgAutoloadClasses['ArchiveLinks'] = "$path/ArchiveLinks.class.php";
2122 $wgAutoloadClasses['SpecialModifyArchiveBlacklist'] = "$path/SpecialModifyArchiveBlacklist.php";
 23+$wgAutoloadClasses['SpecialViewArchive'] = "$path/SpecialViewArchive.php";
2224
2325 $wgHooks['ArticleSaveComplete'][] = 'ArchiveLinks::queueExternalLinks';
2426 $wgHooks['LinkerMakeExternalLink'][] = 'ArchiveLinks::rewriteLinks';
2527
2628 $wgSpecialPages['ModifyArchiveBlacklist'] = 'SpecialModifyArchiveBlacklist';
 29+$wgSpecialPages['ViewArchive'] = 'SpecialViewArchive';
2730
2831 $wgArchiveLinksConfig = array(
2932 'archive_service' => 'wikiwix',
Index: trunk/extensions/ArchiveLinks/SpecialModifyArchiveBlacklist.php
@@ -14,14 +14,14 @@
1515 public function execute($par) {
1616 global $wgOut, $wgRequest;
1717 $this->setHeaders();
18 - $this->outputHeader();
 18+ //$this->outputHeader();
1919
2020 $wgOut->addWikiMsg('archivelinks-modify-blacklist-desc');
2121
2222 $wgOut->addHTML(
2323 HTML::openElement('form', array('method' => 'post', 'action' => SpecialPage::getTitleFor('ModifyBlacklist')->getLocalUrl())) .
2424 HTML::openElement('fieldset') .
25 - HTML::element('legend', null, wfMsg('ModifyBlacklist')) .
 25+ HTML::element('legend', null, wfMsg('ModifyArchiveBlacklist')) .
2626 //HTML::hidden( 'title', SpecialPage::getTitleFor( 'ArchiveBlacklist' )->getPrefixedText() ) .
2727 HTML::openElement('table') .
2828 HTML::openElement('tr') .
Index: trunk/extensions/ArchiveLinks/spider.php
@@ -58,29 +58,29 @@
5959 }
6060
6161 private function call_wget( $url ) {
62 - global $wgArchiveLinksConfig;
63 - if ( array_key_exists( 'path_to_wget', $wgArchiveLinksConfig ) && file_exists( $wgArchiveLinksConfig['path_to_wget'] ) ) {
 62+ global $wgArchiveLinksConfig, $path;
 63+ if ( array_key_exists( 'wget_path', $wgArchiveLinksConfig ) && file_exists( $wgArchiveLinksConfig['wget_path'] ) ) {
6464 die ( 'Support is not yet added for wget in a different directory' );
65 - } elseif ( file_exists( "$path/wget.exe" ) ) {
66 - if ( $wgArchiveLinksConfig['file_types_to_archive'] ) {
67 - if ( is_array( $wgArchiveLinksConfig['file_types_to_archive']) ){
68 - $accept_file_types = '-A ' . implode( ',', $wgArchiveLinksConfig['filetypes_to_archive'] );
 65+ } elseif ( file_exists( "$path/extensions/ArchiveLinks/wget.exe" ) ) {
 66+ if ( array_key_exists( 'file_types', $wgArchiveLinksConfig ) ) {
 67+ if ( is_array( $wgArchiveLinksConfig['file_types']) ){
 68+ $accept_file_types = '-A ' . implode( ',', $wgArchiveLinksConfig['filetypes'] );
6969 } else {
70 - $accept_file_types = '-A ' . $wgArchiveLinksConfig['file_types_to_archive'];
 70+ $accept_file_types = '-A ' . $wgArchiveLinksConfig['file_types'];
7171 }
7272 } else {
7373 $accept_file_types = '';
7474 }
7575 //At the current time we are only adding support for the local filestore, but swift support is something that will be added later
76 - switch( $wgArchiveLinksConfig['filestore_to_use'] ) {
 76+ switch( $wgArchiveLinksConfig['filestore'] ) {
7777 case 'local':
7878 default:
79 - if ( $wgArchiveLinksConfig['subfolder_name'] ) {
 79+ if ( array_key_exists( 'subfolder_name', $wgArchiveLinksConfig ) ) {
8080 $content_dir = 'extensions/ArchiveLinks/' . $wgArchiveLinksConfig['subfolder_name'];
8181 } elseif ( $wgArchiveLinksConfig['content_path'] ) {
8282 $content_dir = realpath( $wgArchiveLinksConfig['content_path'] );
8383 if ( !$content_dir ) {
84 - die ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist.' .
 84+ die ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist. ' .
8585 'This makes the spider a very sad panda. Please either create it or use a different setting.');
8686 }
8787 } else {
@@ -90,9 +90,14 @@
9191 $dir = escapeshellarg( $dir );
9292 $sanitized_url = escapeshellarg( $url );
9393 }
94 -
95 - shell_exec( "cd $path" );
96 - shell_exec( "wget.exe -nH -p -H -E -k -o \"./log.txt\" -Q2m -P $dir $accept_file_types $sanitized_url" );
 94+ if ( array_key_exists( 'wget_quota', $wgArchiveLinksConfig ) ) {
 95+ $quota = $wgArchiveLinksConfig['wget_quota'];
 96+ } else {
 97+ //We'll set the default max quota for any specific web page for 8 mb, which is kind of a lot but should allow for large images
 98+ $quota = '8m';
 99+ }
 100+ shell_exec( "cd $path/extensions/ArchiveLinks/" );
 101+ shell_exec( "wget.exe -nH -p -H -E -k -Q$quota -P $dir $accept_file_types $sanitized_url" );
97102 } else {
98103 //this is primarily designed with windows in mind and no built in wget, so yeah, *nix support should be added, in other words note to self...
99104 die ( 'wget must be installed in order for the spider to function in wget mode' );
@@ -120,7 +125,7 @@
121126
122127 /**
123128 * This function checks a local file for a local block of jobs that is to be done
124 - * if there is none that exists it gets a block, create ones, and waits to avoid any replag problems
 129+ * if there is none that exists it gets a block, creates one, and waits to avoid any replag problems
125130 */
126131 private function replication_check_queue( ) {
127132 global $path, $wgArchiveLinksConfig;
@@ -171,7 +176,7 @@
172177 array_key_exists( 'in_progress_ignore_delay', $wgArchiveLinksConfig ) ? $ignore_in_prog_time = $wgArchiveLinksConfig['in_progress_ignore_delay'] :
173178 $ignore_in_prog_time = 7200;
174179
175 - if ( $reserve_time + $ingore_in_prog_time + $wait_time > $ignore_in_prog_time + $wait_time ) {
 180+ if ( $time - $reserve_time - $wait_time > $ignore_in_prog_time ) {
176181 $retval = $this->reserve_job( $row );
177182 }
178183 }

Sign-offs

UserFlagDate
Nikerabbitinspected07:44, 9 July 2011

Comments

#Comment by Nikerabbit (talk | contribs)   07:44, 9 July 2011

Inclusion protector is not needed for files which only contain a class.

+if (!defined('MEDIAWIKI')) {

MediaWiki doesn't use title case in messages.

+	'ViewArchive' => 'View Archive',

Was this intended?

-		$this->outputHeader();
+		//$this->outputHeader();

Wouldn't isset() be better here:

+	if ( array_key_exists( 'subfolder_name', $wgArchiveLinksConfig ) ) {

If you just want the title of the special page you are in, you can just use $this->getTitle(); Also, Html is usually written without all capitals.

+HTML::openElement( 'form', array( 'method' => 'get', 'action' => SpecialPage::getTitleFor( 'ViewArchive' )->getLocalUrl() ) ) .

Also, usually the action is given as $wgScript, and then hidden title value is included in the form. This is that people which use the default style urls example.com/foo/index.php?bar, where everything after question mark is dropped out by the browser (at least in POST requests).

#Comment by NeilK (talk | contribs)   23:42, 11 July 2011

FYI "Archive" is an overloaded term in MediaWiki, since old deleted pages are also called 'archive'. Suggest you stick to calling it Special:ArchiveLinks like the class.

I see you're thinking of including an output form as a default experience, but as we discussed I don't think you need this just yet. Still it would be neat if you could look up a URL.

wget.exe should be replaced by a configurable path to wget. Generally we use the default for Ubuntu Linux so that would be '/usr/bin/wget' -- of course you fix it to whatever for your local machine.

#Comment by NeilK (talk | contribs)   23:44, 11 July 2011

Also, in general, try to move configuration into its own block or method. It's much better if you figure out what the $quota is way before you need to involve it in computation.

#Comment by NeilK (talk | contribs)   18:57, 5 August 2011

Also, stop it with figuring out the path to wget.exe this late in the game... we should know if wget is present way, way before we even attempt to run the spider

Status & tagging log