r99886 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r99885‎ | r99886 | r99887 >
Date:15:39, 15 October 2011
Author:reedy
Status:deferred
Tags:
Comment:
Fix spaces to tabs

Fix a couple of typos

svn:keywords Id for ApiQueryArchiveFeed

Some documentation
Modified paths:
  • /trunk/extensions/ArchiveLinks/ApiQueryArchiveFeed.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.class.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/INSTALL (modified) (history)
  • /trunk/extensions/ArchiveLinks/README (modified) (history)
  • /trunk/extensions/ArchiveLinks/SpecialViewArchive.php (modified) (history)
  • /trunk/extensions/ArchiveLinks/spider.php (modified) (history)

Diff [purge]

Index: trunk/extensions/ArchiveLinks/SpecialViewArchive.php
@@ -1,11 +1,14 @@
22 <?php
33 /**
4 - * This special page exists to serve the cached versions of the pages that have been archived.
 4+ * This special page exists to serve the cached versions of the pages that have been archived.
55 */
66
77 class SpecialViewArchive extends SpecialPage {
8 - private $db_master;
9 - private $db_slave;
 8+
 9+ /**
 10+ * @var DatabaseBase
 11+ */
 12+ private $db_master, $db_slave;
1013 private $db_result;
1114
1215 function __construct() {
@@ -15,30 +18,30 @@
1619 /**
1720 * Main function for the view archive page. This queries the resource table, disables
1821 * output, and then displays the archived version of whichever page you'd like to view.
19 - *
20 - * @global $wgOut object
 22+ *
 23+ * @global $wgOut OutputPage
2124 * @global $wgRequest object
2225 * @param $par
2326 */
2427 public function execute( $par ) {
2528 global $wgOut, $wgRequest;
26 -
 29+
2730 if ( isset( $par ) || $url = $wgRequest->getText( 'archive_url' ) ) {
2831 $this->db_master = wfGetDB( DB_MASTER );
2932 $this->db_slave = wfGetDB( DB_SLAVE );
3033 $db_result = array();
31 -
 34+
3235 if( !isset( $url ) ) {
3336 $url = $par;
3437 }
35 -
 38+
3639 $this->db_result['url_location'] = $this->db_slave->select( 'el_archive_resource', '*', array( 'resource_url' => $this->db_slave->strencode( $url ) ), __METHOD__ );
37 -
 40+
3841 if ( $this->db_result['url_location']->numRows() < 1 ) {
3942 //This URL doesn't exist in the archive, let's say so
4043 $this->db_result['log_check'] = $this->db_slave->select( 'el_archive_log', '*', array( 'log_url' => $this->db_slave->strencode( $url ) ), __METHOD__ );
4144 $this->db_result['queue_check'] = $this->db_slave->select( 'el_archive_queue', '*', array( 'url' => $this->db_slave->strencode( $url ) ), __METHOD__ );
42 -
 45+
4346 if ( ( $num_rows = $this->db_result['queue_check']->numRows() ) === 1 ) {
4447 $in_queue = true;
4548 } elseif ( $num_rows > 1 ) {
@@ -50,13 +53,13 @@
5154 } else {
5255 $in_queue = false;
5356 }
54 -
 57+
5558 if ( $this->db_result['log_check']->numRows() >= 1 ) {
5659 $in_logs = true;
5760 } else {
5861 $in_logs = false;
5962 }
60 -
 63+
6164 $this->output_form();
6265 $wgOut->addWikiMsg( 'archivelinks-view-archive-url-not-found' );
6366 /*$wgOut->addHTML(
@@ -70,29 +73,29 @@
7174 } else {
7275 //Disable the output so we don't get a skin around the archived content
7376 $wgOut->disable();
74 -
 77+
7578 ob_start();
76 -
 79+
7780 echo HTML::htmlHeader();
7881 }
79 -
 82+
8083 } else {
8184 //The user has not requested a URL, let's print a form so they can do so :D
8285 $this->output_form();
8386 }
8487 }
85 -
 88+
8689 /**
8790 * Uses the HTML functions to output the appropiate form for the special page if no archived version
8891 * exists or if no query has been specified by the user yet.
89 - *
 92+ *
9093 * @global $wgOut object
9194 */
9295 private function output_form( ) {
9396 global $wgOut;
9497 $this->setHeaders();
95 - $wgOut->addWikiMsg( 'archivelinks-view-archive-desc' );
96 -
 98+ $wgOut->addWikiMsg( 'archivelinks-view-archive-desc' );
 99+
97100 $wgOut->addHTML(
98101 HTML::openElement( 'form', array( 'method' => 'get', 'action' => SpecialPage::getTitleFor( 'ViewArchive' )->getLocalUrl() ) ) .
99102 HTML::openElement( 'fieldset' ) .
@@ -104,4 +107,4 @@
105108 HTML::closeElement( 'form' )
106109 );
107110 }
108 -}
\ No newline at end of file
 111+}
Index: trunk/extensions/ArchiveLinks/INSTALL
@@ -5,9 +5,9 @@
66 Configuration settings are in the array $wgArchiveLinksConfig, which is currently defined in ArchiveLinks.php for testing purposes.
77
88 $wgArchiveLinksConfig = array (
9 - 'archive_service' => 'wikiwix',
10 - 'use_multiple_archives' => false,
11 - 'run_spider_in_loop' => false,
 9+ 'archive_service' => 'wikiwix',
 10+ 'use_multiple_archives' => false,
 11+ 'run_spider_in_loop' => false,
1212 );
1313
1414 archive_service has the following options:
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.php
@@ -5,8 +5,6 @@
66 * in the even they go down a backup will be available.
77 */
88
9 -error_reporting( E_ALL | E_STRICT );
10 -
119 $path = dirname( __FILE__ );
1210
1311 $wgExtensionMessagesFiles['ArchiveLinks'] = "$path/ArchiveLinks.i18n.php";
@@ -44,10 +42,10 @@
4543 );
4644
4745 $wgExtensionCredits['other'][] = array(
48 - 'path' => __FILE__,
49 - 'name' => 'ArchiveLinks',
50 - 'description' => 'Enables archival of external links on the wiki to prevent linkrot.',
51 - 'version' => '0.1',
52 - 'author' => 'Kevin Brown',
53 - 'url' => '',
54 -);
\ No newline at end of file
 46+ 'path' => __FILE__,
 47+ 'name' => 'ArchiveLinks',
 48+ 'description' => 'Enables archival of external links on the wiki to prevent linkrot.',
 49+ 'version' => '0.1',
 50+ 'author' => 'Kevin Brown',
 51+ 'url' => '',
 52+);
Index: trunk/extensions/ArchiveLinks/ApiQueryArchiveFeed.php
@@ -4,52 +4,52 @@
55 function __construct ( $query, $moduleName ) {
66 parent::__construct( $query, $moduleName, 'arl' );
77 }
8 -
 8+
99 /**
1010 * This is the primary execute function for the API. It processes the query and returns
1111 * a valid API result.
1212 */
1313 public function execute ( ) {
1414 $params = $this->extractRequestParams();
15 -
 15+
1616 $this->addTables( 'el_archive_queue' );
1717 $this->addFields( '*' );
1818 $this->addWhereRange( 'queue_id', $params['dir'], $params['start'], $params['end'] );
1919 $this->addOption( 'LIMIT', $params['limit'] + 1 );
20 -
 20+
2121 $res = $this->select( __METHOD__ );
22 -
 22+
2323 $val = array( );
2424 $count = 0;
2525 $result = $this->getResult();
26 -
 26+
2727 foreach ( $res as $row ) {
2828 //much of this is stolen from ApiQueryRecentChanges
2929 if ( ++ $count > $params['limit'] ) {
3030 $this->setContinueEnumParameter( 'start', $row->queue_id );
3131 break;
3232 }
33 -
 33+
3434 $val['feed_id'] = $row->queue_id;
3535 $val['time'] = $row->insertion_time;
36 - $val['page_id'] = $row->page_id;
37 - $val['url'] = $row->url;
 36+ $val['page_id'] = $row->page_id;
 37+ $val['url'] = $row->url;
3838
3939 $fit = $result->addValue( array( 'query', $this->getModuleName() ), null, $val );
40 -
 40+
4141 if ( !$fit ) {
4242 $this->setContinueEnumParameter( 'start', $row->queue_id );
4343 break;
4444 }
4545 }
46 -
47 - $result = $result->setIndexedTagName_internal( array( 'query', $this->getModuleName() ), 'al' );
 46+
 47+ $result->setIndexedTagName_internal( array( 'query', $this->getModuleName() ), 'al' );
4848 }
49 -
 49+
5050 function getVersion() {
51 - return __CLASS__;
 51+ return __CLASS__ . ': $Id$';
5252 }
53 -
 53+
5454 function getAllowedParams() {
5555 return array(
5656 'limit' => array(
@@ -74,4 +74,4 @@
7575 )
7676 );
7777 }
78 -}
\ No newline at end of file
 78+}
Property changes on: trunk/extensions/ArchiveLinks/ApiQueryArchiveFeed.php
___________________________________________________________________
Added: svn:keywords
7979 + Id
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php
@@ -4,29 +4,29 @@
55 */
66
77 class ArchiveLinks {
8 - private $db_master;
9 - private $db_slave;
10 - private $db_result;
11 -
12 - /**
 8+ private $db_master;
 9+ private $db_slave;
 10+ private $db_result;
 11+
 12+ /**
1313 * This is the primary function for the Archive Links Extension
1414 * It fires off of the ArticleSaveComplete hook and is primarily responsible for updating
1515 * the appropiate tables to began the process of archival
16 - *
17 - * @param $article object article object from ArticleSaveComplete hook
 16+ *
 17+ * @param $article Article object article object from ArticleSaveComplete hook
1818 * @return bool
1919 */
2020 public static function queueExternalLinks ( &$article ) {
2121 global $wgParser, $wgArchiveLinksConfig;
2222 $external_links = $wgParser->getOutput();
2323 $external_links = $external_links->mExternalLinks;
24 -
 24+
2525 $db_master = wfGetDB( DB_MASTER );
2626 $db_slave = wfGetDB( DB_SLAVE );
2727 $db_result = array();
28 -
 28+
2929 $db_master->begin();
30 -
 30+
3131 if ( !isset( $wgArchiveLinksConfig['global_rearchive_time'] ) ) {
3232 //30 days or 2,592,000 seconds...
3333 $wgArchiveLinksConfig['global_rearchive_time'] = 2592000;
@@ -36,24 +36,24 @@
3737 //200 days or 17,280,000 seconds
3838 $wgArchiveLinksConfig['page_rearchive_time'] = 1728000;
3939 }
40 -
 40+
4141 if( !isset( $wgArchiveLinksConfig['previous_archive_lockout_time'] ) ) {
4242 //2 days or 172,800 seconds
4343 $wgArchiveLinksConfig['previous_archive_lockout_time'] = 172800;
4444 }
45 -
 45+
4646 $page_id = $article->getID();
4747 $time = time();
48 -
 48+
4949 if ( $wgArchiveLinksConfig['generate_feed'] === true ) {
5050 $old_id = $article->getTitle();
5151 $old_id = $old_id->getPreviousRevisionID( $page_id );
52 -
 52+
5353 $db_result['links_on_page'] = $db_master->select( 'el_archive_link_history', '*', array( 'hist_page_id' => $page_id ), __METHOD__ );
54 -
 54+
5555 $old_external_links = array();
5656 $new_external_links = array();
57 -
 57+
5858 if ( $db_result['links_on_page']->numRows() > 0 ) {
5959 while( $row = $db_result['links_on_page']->fetchRow() ) {
6060 $old_external_links[] = $row['hist_url'];
@@ -66,14 +66,14 @@
6767 } elseif ( count( $external_links ) > 0 ) {
6868 $new_external_links = $external_links;
6969 }
70 -
 70+
7171 if ( !isset( $wgArchiveLinksConfig['link_insert_max'] ) ) {
7272 $wgArchiveLinksConfig['link_insert_max'] = 100;
7373 }
7474 die ( count( $new_external_links ));
7575 if ( count( $new_external_links ) <= $wgArchiveLinksConfig['link_insert_max'] ) {
7676 //insert the links into the queue now
77 - foreach( $new_external_links as $link ) {
 77+ foreach( $new_external_links as $link ) {
7878 $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', array( 'url' => $link ), __METHOD__, array( 'LIMIT' => '1', ) );
7979 $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', array( 'bl_url' => $link ), __METHOD__, array( 'LIMIT' => '1', ) );
8080
@@ -128,14 +128,14 @@
129129 }
130130
131131 $db_master->commit();
132 -
 132+
133133 return true;
134134 }
135 -
 135+
136136 /**
137137 * This is the function resposible for rewriting the link html to insert the [cache] link
138138 * after each external link on the page. This function will get called once for every external link.
139 - *
 139+ *
140140 * @global $wgArchiveLinksConfig array
141141 * @param $url string The url of the page (what would appear in href)
142142 * @param $text string The assoicated text of the URL (what would go between the anchor tags)
@@ -172,26 +172,26 @@
173173 break;
174174 }
175175 }
176 -
177 - $link = HTML::element('a', array ( 'rel' => 'nofollow', 'class' => $attributes['class'], 'href' => $url ), $text )
 176+
 177+ $link = HTML::element('a', array ( 'rel' => 'nofollow', 'class' => $attributes['class'], 'href' => $url ), $text )
178178 . HTML::openElement('sup')
179179 . HTML::openElement('small')
180180 . '&#160;'
181181 . HTML::element('a', array ( 'rel' => 'nofollow', 'href' => $link_to_archive ), '[' . wfMsg( 'archivelinks-cache-title') . ']')
182182 . HTML::closeElement('small')
183183 . HTML::closeElement('sup');
184 -
 184+
185185 return false;
186186 } else {
187187 return true;
188188 }
189189 }
190 -
 190+
191191 /**
192192 * This function is responsible for any database updates within the extension and hooks into
193193 * update.php
194 - *
195 - * @param $updater object Passed by the LoadExtensionSchemaUpdates hook
 194+ *
 195+ * @param $updater DatabaseUpdater object Passed by the LoadExtensionSchemaUpdates hook
196196 * @return bool
197197 */
198198 public static function schemaUpdates ( $updater = null ) {
@@ -202,42 +202,17 @@
203203 $path . '/setuptables.sql',
204204 true
205205 ));
206 - $updater->addExtensionUpdate( array(
207 - 'addTable',
208 - 'el_archive_queue',
209 - $path . '/setuptables.sql',
210 - true
211 - ));
212 - $updater->addExtensionUpdate( array(
213 - 'addTable',
214 - 'el_archive_log',
215 - $path . '/setuptables.sql',
216 - true
217 - ));
218 - $updater->addExtensionUpdate( array(
219 - 'addTable',
220 - 'el_archive_resource',
221 - $path . '/setuptables.sql',
222 - true
223 - ));
224 - $updater->addExtensionUpdate( array(
225 - 'addTable',
226 - 'el_archive_link_blacklist',
227 - $path . '/setuptables.sql',
228 - true
229 - ));
230206 return true;
231207 }
232208 }
233209
234210 class InsertURLsIntoQueue extends Job {
235 - public function __construct( $title, $params ) {
236 - // Replace synchroniseThreadArticleData with the an identifier for your job.
237 - parent::__construct( 'insertURLsIntoQueue', $title, $params );
238 - }
239 -
240 -
241 - public function run() {
242 -
243 - }
244 -}
\ No newline at end of file
 211+ public function __construct( $title, $params ) {
 212+ // Replace synchroniseThreadArticleData with the an identifier for your job.
 213+ parent::__construct( 'insertURLsIntoQueue', $title, $params );
 214+ }
 215+
 216+ public function run() {
 217+
 218+ }
 219+}
Index: trunk/extensions/ArchiveLinks/spider.php
@@ -12,31 +12,33 @@
1313
1414 class ArchiveLinksSpider extends Maintenance {
1515
16 - private $db_master;
17 - private $db_slave;
 16+ /**
 17+ * @var DatabaseBase
 18+ */
 19+ private $db_master, $db_slave;
1820 private $db_result;
1921 private $jobs;
2022 private $downloaded_files;
2123
2224 /**
23 - * Primary function called from Maintenance.php to run the actual spider.
 25+ * Primary function called from Maintenance.php to run the actual spider.
2426 * Queries the queue and then downloads and stores each link for which archival
2527 * has been requested
26 - *
 28+ *
2729 * @global $wgArchiveLinksConfig array
2830 * @global $wgLoadBalancer object
2931 * @global $path string Install path of mediawiki
3032 * @return bool
3133 */
3234 public function execute( ) {
33 - global $wgArchiveLinksConfig, $wgLoadBalancer, $path;
 35+ global $wgArchiveLinksConfig;
3436
3537 $this->db_master = $this->getDB(DB_MASTER);
3638 $this->db_slave = $this->getDB(DB_SLAVE);
3739 $this->db_result = array();
3840
3941 if ( $wgArchiveLinksConfig['run_spider_in_loop'] ) {
40 - /* while ( TRUE ) {
 42+ /* while ( TRUE ) {
4143 if ( ( $url = $this->check_queue() ) !== false ) {
4244
4345 }
@@ -47,11 +49,11 @@
4850 //for right now we will pipe everything through the replication_check_queue function just for testing purposes
4951 /*if ( $wgLoadBalancer->getServerCount() > 1 ) {
5052 if ( ( $url = $this->replication_check_queue() ) !== false ) {
51 -
 53+
5254 }
5355 } else {
5456 if ( ( $url = $this->check_queue() ) !== false ) {
55 -
 57+
5658 }
5759 }*/
5860
@@ -71,19 +73,19 @@
7274 }
7375 return null;
7476 }
75 -
 77+
7678 /**
7779 * This function goes and checks to make sure the configuration values are valid
7880 * Then calls wget, finds the result and updates the appropiate database tables to
79 - * record it.
80 - *
 81+ * record it.
 82+ *
8183 * @global $wgArchiveLinksConfig array
8284 * @global $path string
8385 * @param $url string the URL that is to be archvied
8486 */
8587 private function call_wget( $url ) {
8688 global $wgArchiveLinksConfig, $path;
87 -
 89+
8890 //Check Configuration
8991 if ( isset( $wgArchiveLinksConfig['file_types'] ) ) {
9092 if ( is_array( $wgArchiveLinksConfig['file_types']) ){
@@ -105,8 +107,8 @@
106108 } elseif ( isset( $wgArchiveLinksConfig['content_path'] ) ) {
107109 $dir = realpath( $wgArchiveLinksConfig['content_path'] );
108110 if ( !$dir ) {
109 - die ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist. ' .
110 - 'This makes the spider a very sad panda. Please either create it or use a different setting.');
 111+ $this->error ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist. ' .
 112+ 'This makes the spider a very sad panda. Please either create it or use a different setting.');
111113 }
112114 } else {
113115 $dir = $path . '/archived_content/';
@@ -130,8 +132,8 @@
131133 //serveral minutes to go through all the retries which has the potential to stall the spider unnecessarily
132134 $wgArchiveLinksConfig['retry_times'] = '3';
133135 }
134 -
135 -
 136+
 137+
136138 //Do stuff with wget
137139 if ( isset( $wgArchiveLinksConfig['wget_path'] ) && file_exists( $wgArchiveLinksConfig['wget_path'] ) ) {
138140 die ( 'Support is not yet added for wget in a different directory' );
@@ -142,22 +144,22 @@
143145 $this->parse_wget_log( $log_dir, $url );
144146 /*foreach( $this->downloaded_files as $file ) {
145147 if ( $file['status'] === 'success' ) {
146 -
 148+
147149 } elseif ( $file['status'] === 'failure' ) {
148150 echo 'bar';
149151 }
150152 }*/
151 - $this->db_master->insert( $this->downloaded_files[0]['url'] );
 153+ $this->db_master->insert( $this->downloaded_files[0]['url'] ); // FIXME: Missing parameters
152154 } else {
153155 //this is primarily designed with windows in mind and no built in wget, so yeah, *nix support should be added, in other words note to self...
154 - die ( 'wget must be installed in order for the spider to function in wget mode' );
 156+ $this->error( 'wget must be installed in order for the spider to function in wget mode' );
155157 }
156158 }
157159
158160 /**
159161 * This function checks the archive queue without any attempt to work around replag.
160162 * Only one URL is taken at a time.
161 - *
 163+ *
162164 * @return mixed The URL to archive on success, False on failure
163165 */
164166 private function check_queue( ) {
@@ -166,12 +168,12 @@
167169 array( 'delay_time' => ' >=' . time(), 'in_progress' => '0'),
168170 __METHOD__,
169171 array( 'ORDER BY' => 'queue_id ASC', 'LIMIT' => '1' ));
170 -
 172+
171173 if ( $this->db_result['job-fetch']->numRows() > 0 ) {
172174 $row = $this->db_result['job-fetch']->fetchRow();
173 -
174 - //$this->delete_dups( $row['url'] );
175175
 176+ //$this->delete_dups( $row['url'] );
 177+
176178 return $row['url'];
177179 } else {
178180 //there are no jobs to do right now
@@ -181,10 +183,10 @@
182184
183185 /**
184186 * This function checks a local file for a local block of jobs that is to be done
185 - * if there is none that exists it gets a block, creates one, and waits for the
 187+ * if there is none that exists it gets a block, creates one, and waits for the
186188 * data to propagate to avoid any replag problems. All urls are not returned directly
187189 * but are put into $this->jobs.
188 - *
 190+ *
189191 * @return bool
190192 */
191193 private function replication_check_queue( ) {
@@ -194,28 +196,28 @@
195197 $file = unserialize( $file );
196198 } else {
197199 //we don't have any temp file, lets get a block of jobs to do and make one
198 - $this->db_result['job-fetch'] = $this->db_slave->select( 'el_archive_queue', '*',
 200+ $this->db_result['job-fetch'] = $this->db_slave->select( 'el_archive_queue', '*',
199201 array(
200202 'delay_time <= "' . time() . '"',
201203 'in_progress' => '0')
202 - , __METHOD__,
 204+ , __METHOD__,
203205 array(
204206 'LIMIT' => '15',
205207 'ORDER BY' => 'queue_id ASC'
206208 ));
207209 //echo $this->db_result['job-fetch'];
208 -
 210+
209211 $this->jobs = array();
210212
211213 $wait_time = wfGetLB()->safeGetLag( $this->db_slave ) * 3;
212214 $pid = (string) microtime() . ' - ' . getmypid();
213215 $time = time();
214 -
 216+
215217 //echo $pid;
216 -
 218+
217219 $this->jobs['pid'] = $pid;
218220 $this->jobs['execute_time'] = $wait_time + $time;
219 -
 221+
220222 if ($this->db_result['job-fetch']->numRows() > 0) {
221223 //$row = $this->db_result['job-fetch']->fetchRow();
222224 while ( $row = $this->db_result['job-fetch']->fetchRow() ) {
@@ -227,40 +229,40 @@
228230 } else {
229231 //in_progress is not equal to 0, this means that the job was reserved some time before
230232 //it could have been by a previous instance of this spider (assuming not running in a loop)
231 - //or a different spider entirely, since we don't have have a temp file to go on we have to assume
 233+ //or a different spider entirely, since we don't have have a temp file to go on we have to assume
232234 //it was a different spider (it could have been deleted by a user), we will only ignore the in_progress
233235 //lock if it has been a long time (2 hours by default) since the job was initally reserved
234236 $reserve_time = explode( ' ', $row['in_progress'] );
235237 $reserve_time = $reserve_time[2];
236 -
 238+
237239 isset( $wgArchiveLinksConfig['in_progress_ignore_delay'] ) ? $ignore_in_prog_time = $wgArchiveLinksConfig['in_progress_ignore_delay'] :
238240 $ignore_in_prog_time = 7200;
239 -
 241+
240242 if ( $time - $reserve_time - $wait_time > $ignore_in_prog_time ) {
241243 $retval = $this->reserve_job( $row );
242244 }
243245 }
244 -
 246+
245247 } else {
246248 //let's wait for everything to replicate, add to temp file and check back later
247249 $this->jobs[] = $row;
248250 }
249251 }
250252 }
251 -
 253+
252254 //var_dump( $this->jobs );
253 -
 255+
254256 $this->jobs = serialize( $this->jobs );
255257 //file_put_contents( "$path/extensions/ArchiveLinks/spider-temp.txt", $this->jobs );
256258 }
257 -
 259+
258260 if ( $retval !== true ) {
259261 $retval = false;
260262 }
261263 return $retval;
262264 }
263 -
264 -
 265+
 266+
265267 /**
266268 * This function checks for duplicates in the queue table, if it finds one it keeps the oldest and deletes
267269 * everything else.
@@ -270,9 +272,9 @@
271273 private function delete_dups( $url ) {
272274 //Since we querried the slave to check for dups when we insterted instead of the master let's check
273275 //that the job isn't in the queue twice, we don't want to archive it twice
274 - $this->db_result['dup-check'] = $this->db_slave->select('el_archive_queue', '*', array( 'url' => $url ), __METHOD__,
 276+ $this->db_result['dup-check'] = $this->db_slave->select('el_archive_queue', '*', array( 'url' => $url ), __METHOD__,
275277 array( 'ORDER BY' => 'queue_id ASC' ) );
276 -
 278+
277279 if ( $this->db_result['dup-check']->numRows() > 1 ) {
278280 //keep only the first job and remove all duplicates
279281 $this->db_result['dup-check']->fetchRow();
@@ -281,16 +283,16 @@
282284 var_dump( $del_row );
283285 //this is commented for testing purposes, so I don't have to keep readding the duplicate to my test db
284286 //in other words this has a giant "remove before flight" ribbon hanging from it...
285 - //$this->db_master->delete( 'el_archive_queue', '`el_archive_queue`.`queue_id` = ' . $del_row['queue_id'] );
 287+ //$this->db_master->delete( 'el_archive_queue', '`el_archive_queue`.`queue_id` = ' . $del_row['queue_id'] );
286288 }
287289 }
288290 }
289 -
290 -
 291+
 292+
291293 /**
292294 * This function sets in_progess in the queue table to 1 so other instances of the spider know that
293295 * the job is in the process of being archived.
294 - *
 296+ *
295297 * @param $row array The row of the database result from the database object.
296298 * @return bool
297299 */
@@ -302,7 +304,7 @@
303305 $this->delete_dups( $row['url'] );
304306 return true;
305307 }
306 -
 308+
307309 /**
308310 * Uses regular expressions to parse the log file of wget in non-verbose mode
309311 * This is then returned to call_wget and updated in the db
@@ -313,10 +315,10 @@
314316 */
315317 private function parse_wget_log( $log_path, $url ) {
316318 $fp = fopen( $log_path, 'r' ) or die( 'can\'t find wget log file to parse' );
317 -
 319+
318320 $this->downloaded_files = array ( );
319 -
320 - $line_regexes = array (
 321+
 322+ $line_regexes = array (
321323 'url' => '%^\d{4}-(?:\d{2}(?:-|:| )?){5}URL:(http://.*?) \[.+?\] ->%',
322324 'finish' => '%^Downloaded: \d+ files, (\d(?:.\d)?+(?:K|M)).*%',
323325 'sole_url' => '%^(http://.*):%',
@@ -324,7 +326,7 @@
325327 'quota_exceed' => '%^Download quota of .*? EXCEEDED!%',
326328 'finish_line' => '%^FINISHED --(\d{4}-(?:\d{2}(?:-|:| )){5})-%',
327329 );
328 -
 330+
329331 while ( $line = fgets( $fp ) ) {
330332 foreach( $line_regexes as $line_type => $regex ) {
331333 if ( preg_match( $regex, $line, $matches ) ) {
@@ -365,7 +367,7 @@
366368 }
367369 }
368370 }
369 -
 371+
370372 return $this->downloaded_files;
371373 }
372374 }
Index: trunk/extensions/ArchiveLinks/README
@@ -1 +1 @@
2 -This a project currently under devolopment to add premementive archival to external links so that in the event that they go down a backup copy will exist. At the current time it is NOT stable and should not be used on any production wiki.
\ No newline at end of file
 2+This a project currently under development to add premementive archival to external links so that in the event that they go down a backup copy will exist. At the current time it is NOT stable and should not be used on any production wiki.

Status & tagging log