Index: trunk/extensions/ArchiveLinks/SpecialViewArchive.php |
— | — | @@ -0,0 +1,98 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * This special page exists to serve the cached versions of the pages that have been archived. |
| 5 | + */ |
| 6 | + |
| 7 | +if (!defined('MEDIAWIKI')) { |
| 8 | + echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" ); |
| 9 | + die(1); |
| 10 | +} |
| 11 | + |
| 12 | +class SpecialViewArchive extends SpecialPage { |
| 13 | + private $db_master; |
| 14 | + private $db_slave; |
| 15 | + private $db_result; |
| 16 | + |
| 17 | + function __construct() { |
| 18 | + parent::__construct( 'ViewArchive' ); |
| 19 | + } |
| 20 | + |
| 21 | + public function execute( $par ) { |
| 22 | + global $wgOut, $wgRequest; |
| 23 | + |
| 24 | + if ( isset( $par ) || $url = $wgRequest->getText( 'archive_url' ) ) { |
| 25 | + $this->db_master = wfGetDB( DB_MASTER ); |
| 26 | + $this->db_slave = wfGetDB( DB_SLAVE ); |
| 27 | + $db_result = array(); |
| 28 | + |
| 29 | + if( !isset( $url ) ) { |
| 30 | + $url = $par; |
| 31 | + } |
| 32 | + |
| 33 | + $this->db_result['url_location'] = $this->db_slave->select( 'el_archive_resource', '*', array( 'resource_url' => $this->db_slave->strencode( $url ) ), __METHOD__ ); |
| 34 | + |
| 35 | + if ( $this->db_result['url_location']->numRows() < 1 ) { |
| 36 | + //This URL doesn't exist in the archive, let's say so |
| 37 | + $this->db_result['log_check'] = $this->db_slave->select( 'el_archive_log', '*', array( 'log_url' => $this->db_slave->strencode( $url ) ), __METHOD__ ); |
| 38 | + $this->db_result['queue_check'] = $this->db_slave->select( 'el_archive_queue', '*', array( 'url' => $this->db_slave->strencode( $url ) ), __METHOD__ ); |
| 39 | + |
| 40 | + if ( ( $num_rows = $this->db_result['queue_check']->numRows() ) === 1 ) { |
| 41 | + $in_queue = true; |
| 42 | + } elseif ( $num_rows > 1 ) { |
| 43 | + //We found duplicates, delete them |
| 44 | + $job = $this->db_result['queue_check']->fetchRow(); |
| 45 | + while( $row = $this->db_result['queue_check']->fetchRow() ) { |
| 46 | + $this->db_master->delete( 'el_archive_queue', array ( 'queue_id' => $row['queue_id'] ) ); |
| 47 | + } |
| 48 | + } else { |
| 49 | + $in_queue = false; |
| 50 | + } |
| 51 | + |
| 52 | + if ( $this->db_result['log_check']->numRows() >= 1 ) { |
| 53 | + $in_logs = true; |
| 54 | + } else { |
| 55 | + $in_logs = false; |
| 56 | + } |
| 57 | + |
| 58 | + $this->output_form(); |
| 59 | + $wgOut->addWikiMsg( 'archivelinks-view-archive-url-not-found' ); |
| 60 | + /*$wgOut->addHTML( |
| 61 | + HTML::openElement( 'table' ) . |
| 62 | + HTML::openElement('tr') . |
| 63 | + HTML::openElement('td') . |
| 64 | + HTML::closeElement('td') . |
| 65 | + HTML::closeElement('tr') . |
| 66 | + HTML::closeElement( 'table' ) |
| 67 | + );*/ |
| 68 | + } else { |
| 69 | + //Disable the output so we don't get a skin around the archived content |
| 70 | + $wgOut->disable(); |
| 71 | + |
| 72 | + ob_start(); |
| 73 | + |
| 74 | + echo HTML::htmlHeader(); |
| 75 | + } |
| 76 | + |
| 77 | + } else { |
| 78 | + //The user has not requested a URL, let's print a form so they can do so :D |
| 79 | + $this->output_form(); |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + private function output_form( ) { |
| 84 | + global $wgOut; |
| 85 | + $this->setHeaders(); |
| 86 | + $wgOut->addWikiMsg( 'archivelinks-view-archive-desc' ); |
| 87 | + |
| 88 | + $wgOut->addHTML( |
| 89 | + HTML::openElement( 'form', array( 'method' => 'get', 'action' => SpecialPage::getTitleFor( 'ViewArchive' )->getLocalUrl() ) ) . |
| 90 | + HTML::openElement( 'fieldset' ) . |
| 91 | + HTML::element('legend', null, wfMsg('ViewArchive') ) . |
| 92 | + XML::inputLabel( wfMsg( 'archivelinks-view-archive-url-field' ), 'archive_url', 'archive-links-archive-url', 120 ) . |
| 93 | + HTML::element( 'br' ) . |
| 94 | + XML::submitButton( wfMsg( 'archivelinks-view-archive-submit-button' ) ) . |
| 95 | + HTML::closeElement( 'fieldset' ) . |
| 96 | + HTML::closeElement( 'form' ) |
| 97 | + ); |
| 98 | + } |
| 99 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/ArchiveLinks/SpecialViewArchive.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 100 | + native |
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.i18n.php |
— | — | @@ -8,7 +8,8 @@ |
9 | 9 | //English |
10 | 10 | $messages['en'] = array( |
11 | 11 | 'archivelinks-cache-title' => 'cache', |
12 | | - 'ModifyArchiveBlacklist' => 'Modify Blacklist', |
| 12 | + 'ModifyArchiveBlacklist' => 'Modify Archive Blacklist', |
| 13 | + 'ViewArchive' => 'View Archive', |
13 | 14 | 'archivelinks-modify-blacklist-desc' => 'This page allows you to blacklist or whitelist URLs for the ArchiveLinks extension.', |
14 | 15 | //'archivelinks-archive-blacklist-fieldset-label' => 'Blacklist a URL', |
15 | 16 | 'archivelinks-modify-blacklist-url-field-label' => 'URL to Blacklist:', |
— | — | @@ -20,4 +21,9 @@ |
21 | 22 | 'archivelinks-modify-blacklist-whitelist-field-label' => 'Blacklist', |
22 | 23 | 'archivelinks-modify-blacklist-blacklist-field-label' => 'Whitelist', |
23 | 24 | 'archivelinks-modify-blacklist-blacklist-or-whitelist-field-label' => 'Which list would you like to modify?', |
| 25 | + 'archivelinks-view-archive-desc' => 'This is the special page for viewing archived external links, please enter the URL of the archive you\'d like to view.', |
| 26 | + 'archivelinks-view-archive-submit-button' => 'View Archive', |
| 27 | + 'archivelinks-view-archive-url-field' => 'URL of page:', |
| 28 | + 'archivelinks-view-archive-url-not-found' => 'We\'re sorry, the URL you requested was not found in the archive for the following reason.', |
| 29 | + |
24 | 30 | ); |
\ No newline at end of file |
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.php |
— | — | @@ -15,14 +15,17 @@ |
16 | 16 | |
17 | 17 | $wgExtensionMessagesFiles['ArchiveLinks'] = "$path/ArchiveLinks.i18n.php"; |
18 | 18 | $wgExtensionMessagesFiles['ModifyArchiveBlacklist'] = "$path/ArchiveLinks.i18n.php"; |
| 19 | +$wgExtensionMessagesFiles['ViewArchive'] = "$path/ArchiveLinks.i18n.php"; |
19 | 20 | |
20 | 21 | $wgAutoloadClasses['ArchiveLinks'] = "$path/ArchiveLinks.class.php"; |
21 | 22 | $wgAutoloadClasses['SpecialModifyArchiveBlacklist'] = "$path/SpecialModifyArchiveBlacklist.php"; |
| 23 | +$wgAutoloadClasses['SpecialViewArchive'] = "$path/SpecialViewArchive.php"; |
22 | 24 | |
23 | 25 | $wgHooks['ArticleSaveComplete'][] = 'ArchiveLinks::queueExternalLinks'; |
24 | 26 | $wgHooks['LinkerMakeExternalLink'][] = 'ArchiveLinks::rewriteLinks'; |
25 | 27 | |
26 | 28 | $wgSpecialPages['ModifyArchiveBlacklist'] = 'SpecialModifyArchiveBlacklist'; |
| 29 | +$wgSpecialPages['ViewArchive'] = 'SpecialViewArchive'; |
27 | 30 | |
28 | 31 | $wgArchiveLinksConfig = array( |
29 | 32 | 'archive_service' => 'wikiwix', |
Index: trunk/extensions/ArchiveLinks/SpecialModifyArchiveBlacklist.php |
— | — | @@ -14,14 +14,14 @@ |
15 | 15 | public function execute($par) { |
16 | 16 | global $wgOut, $wgRequest; |
17 | 17 | $this->setHeaders(); |
18 | | - $this->outputHeader(); |
| 18 | + //$this->outputHeader(); |
19 | 19 | |
20 | 20 | $wgOut->addWikiMsg('archivelinks-modify-blacklist-desc'); |
21 | 21 | |
22 | 22 | $wgOut->addHTML( |
23 | 23 | HTML::openElement('form', array('method' => 'post', 'action' => SpecialPage::getTitleFor('ModifyBlacklist')->getLocalUrl())) . |
24 | 24 | HTML::openElement('fieldset') . |
25 | | - HTML::element('legend', null, wfMsg('ModifyBlacklist')) . |
| 25 | + HTML::element('legend', null, wfMsg('ModifyArchiveBlacklist')) . |
26 | 26 | //HTML::hidden( 'title', SpecialPage::getTitleFor( 'ArchiveBlacklist' )->getPrefixedText() ) . |
27 | 27 | HTML::openElement('table') . |
28 | 28 | HTML::openElement('tr') . |
Index: trunk/extensions/ArchiveLinks/spider.php |
— | — | @@ -58,29 +58,29 @@ |
59 | 59 | } |
60 | 60 | |
61 | 61 | private function call_wget( $url ) { |
62 | | - global $wgArchiveLinksConfig; |
63 | | - if ( array_key_exists( 'path_to_wget', $wgArchiveLinksConfig ) && file_exists( $wgArchiveLinksConfig['path_to_wget'] ) ) { |
| 62 | + global $wgArchiveLinksConfig, $path; |
| 63 | + if ( array_key_exists( 'wget_path', $wgArchiveLinksConfig ) && file_exists( $wgArchiveLinksConfig['wget_path'] ) ) { |
64 | 64 | die ( 'Support is not yet added for wget in a different directory' ); |
65 | | - } elseif ( file_exists( "$path/wget.exe" ) ) { |
66 | | - if ( $wgArchiveLinksConfig['file_types_to_archive'] ) { |
67 | | - if ( is_array( $wgArchiveLinksConfig['file_types_to_archive']) ){ |
68 | | - $accept_file_types = '-A ' . implode( ',', $wgArchiveLinksConfig['filetypes_to_archive'] ); |
| 65 | + } elseif ( file_exists( "$path/extensions/ArchiveLinks/wget.exe" ) ) { |
| 66 | + if ( array_key_exists( 'file_types', $wgArchiveLinksConfig ) ) { |
| 67 | + if ( is_array( $wgArchiveLinksConfig['file_types']) ){ |
| 68 | + $accept_file_types = '-A ' . implode( ',', $wgArchiveLinksConfig['filetypes'] ); |
69 | 69 | } else { |
70 | | - $accept_file_types = '-A ' . $wgArchiveLinksConfig['file_types_to_archive']; |
| 70 | + $accept_file_types = '-A ' . $wgArchiveLinksConfig['file_types']; |
71 | 71 | } |
72 | 72 | } else { |
73 | 73 | $accept_file_types = ''; |
74 | 74 | } |
75 | 75 | //At the current time we are only adding support for the local filestore, but swift support is something that will be added later |
76 | | - switch( $wgArchiveLinksConfig['filestore_to_use'] ) { |
| 76 | + switch( $wgArchiveLinksConfig['filestore'] ) { |
77 | 77 | case 'local': |
78 | 78 | default: |
79 | | - if ( $wgArchiveLinksConfig['subfolder_name'] ) { |
| 79 | + if ( array_key_exists( 'subfolder_name', $wgArchiveLinksConfig ) ) { |
80 | 80 | $content_dir = 'extensions/ArchiveLinks/' . $wgArchiveLinksConfig['subfolder_name']; |
81 | 81 | } elseif ( $wgArchiveLinksConfig['content_path'] ) { |
82 | 82 | $content_dir = realpath( $wgArchiveLinksConfig['content_path'] ); |
83 | 83 | if ( !$content_dir ) { |
84 | | - die ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist.' . |
| 84 | + die ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist. ' . |
85 | 85 | 'This makes the spider a very sad panda. Please either create it or use a different setting.'); |
86 | 86 | } |
87 | 87 | } else { |
— | — | @@ -90,9 +90,14 @@ |
91 | 91 | $dir = escapeshellarg( $dir ); |
92 | 92 | $sanitized_url = escapeshellarg( $url ); |
93 | 93 | } |
94 | | - |
95 | | - shell_exec( "cd $path" ); |
96 | | - shell_exec( "wget.exe -nH -p -H -E -k -o \"./log.txt\" -Q2m -P $dir $accept_file_types $sanitized_url" ); |
| 94 | + if ( array_key_exists( 'wget_quota', $wgArchiveLinksConfig ) ) { |
| 95 | + $quota = $wgArchiveLinksConfig['wget_quota']; |
| 96 | + } else { |
| 97 | + //We'll set the default max quota for any specific web page for 8 mb, which is kind of a lot but should allow for large images |
| 98 | + $quota = '8m'; |
| 99 | + } |
| 100 | + shell_exec( "cd $path/extensions/ArchiveLinks/" ); |
| 101 | + shell_exec( "wget.exe -nH -p -H -E -k -Q$quota -P $dir $accept_file_types $sanitized_url" ); |
97 | 102 | } else { |
98 | 103 | //this is primarily designed with windows in mind and no built in wget, so yeah, *nix support should be added, in other words note to self... |
99 | 104 | die ( 'wget must be installed in order for the spider to function in wget mode' ); |
— | — | @@ -120,7 +125,7 @@ |
121 | 126 | |
122 | 127 | /** |
123 | 128 | * This function checks a local file for a local block of jobs that is to be done |
124 | | - * if there is none that exists it gets a block, create ones, and waits to avoid any replag problems |
| 129 | + * if there is none that exists it gets a block, creates one, and waits to avoid any replag problems |
125 | 130 | */ |
126 | 131 | private function replication_check_queue( ) { |
127 | 132 | global $path, $wgArchiveLinksConfig; |
— | — | @@ -171,7 +176,7 @@ |
172 | 177 | array_key_exists( 'in_progress_ignore_delay', $wgArchiveLinksConfig ) ? $ignore_in_prog_time = $wgArchiveLinksConfig['in_progress_ignore_delay'] : |
173 | 178 | $ignore_in_prog_time = 7200; |
174 | 179 | |
175 | | - if ( $reserve_time + $ingore_in_prog_time + $wait_time > $ignore_in_prog_time + $wait_time ) { |
| 180 | + if ( $time - $reserve_time - $wait_time > $ignore_in_prog_time ) { |
176 | 181 | $retval = $this->reserve_job( $row ); |
177 | 182 | } |
178 | 183 | } |