Index: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php |
— | — | @@ -79,7 +79,7 @@ |
80 | 80 | . HTML::openElement('sup') |
81 | 81 | . HTML::openElement('small') |
82 | 82 | . ' ' |
83 | | - . HTML::element('a', array ( 'href' => $link_to_archive ), '[' . wfMsg( 'archivelinks-cache-title') . ']') |
| 83 | + . HTML::element('a', array ( 'rel' => 'nofollow', 'href' => $link_to_archive ), '[' . wfMsg( 'archivelinks-cache-title') . ']') |
84 | 84 | . HTML::closeElement('small') |
85 | 85 | . HTML::closeElement('sup'); |
86 | 86 | |
Index: trunk/extensions/ArchiveLinks/spider.php |
— | — | @@ -5,7 +5,7 @@ |
6 | 6 | */ |
7 | 7 | $path = getenv('MW_INSTALL_PATH'); |
8 | 8 | if (strval($path) === '') { |
9 | | - $path = dirname(__FILE__) . '/../..'; |
| 9 | + $path = realpath( dirname(__FILE__) . '/../..' ); |
10 | 10 | } |
11 | 11 | |
12 | 12 | require_once "$path/maintenance/Maintenance.php"; |
— | — | @@ -18,7 +18,7 @@ |
19 | 19 | private $jobs; |
20 | 20 | |
21 | 21 | public function execute( ) { |
22 | | - global $wgArchiveLinksConfig, $wgLoadBalancer; |
| 22 | + global $wgArchiveLinksConfig, $wgLoadBalancer, $path; |
23 | 23 | |
24 | 24 | $this->db_master = $this->getDB(DB_MASTER); |
25 | 25 | $this->db_slave = $this->getDB(DB_SLAVE); |
— | — | @@ -34,24 +34,72 @@ |
35 | 35 | die( 'Sorry, at the current time running the spider as a daemon isn\'t supported.' ); |
36 | 36 | } else { |
37 | 37 | //for right now we will pipe everything through the replication_check_queue function just for testing purposes |
38 | | - /* if ( $wgLoadBalancer->getServerCount() > 1 ) { |
39 | | - if ( ( $url = $this->replication_check_queue() ) !== false ) { |
40 | | - |
41 | | - } |
| 38 | + /*if ( $wgLoadBalancer->getServerCount() > 1 ) { |
| 39 | + if ( ( $url = $this->replication_check_queue() ) !== false ) { |
| 40 | + |
| 41 | + } |
42 | 42 | } else { |
43 | | - if ( ( $url = $this->check_queue() ) !== false ) { |
| 43 | + if ( ( $url = $this->check_queue() ) !== false ) { |
| 44 | + |
| 45 | + } |
| 46 | + }*/ |
44 | 47 | |
45 | | - } |
46 | | - } */ |
47 | | - |
48 | | - if ( ( $url = $this->replication_check_queue() ) !== false ) { |
49 | | - |
| 48 | + if ( ( $url = $this->check_queue() ) !== false ) { |
| 49 | + switch( $wgArchiveLinksConfig['download_lib'] ) { |
| 50 | + case 'curl': |
| 51 | + die( 'At the current time support for libcurl is not available.' ); |
| 52 | + case 'wget': |
| 53 | + default: |
| 54 | + $this->call_wget( $url ); |
| 55 | + } |
50 | 56 | } |
51 | 57 | } |
52 | 58 | return null; |
53 | 59 | } |
| 60 | + |
| 61 | + private function call_wget( $url ) { |
| 62 | + global $wgArchiveLinksConfig; |
| 63 | + if ( array_key_exists( 'path_to_wget', $wgArchiveLinksConfig ) && file_exists( $wgArchiveLinksConfig['path_to_wget'] ) ) { |
| 64 | + die ( 'Support is not yet added for wget in a different directory' ); |
| 65 | + } elseif ( file_exists( "$path/wget.exe" ) ) { |
| 66 | + if ( $wgArchiveLinksConfig['file_types_to_archive'] ) { |
| 67 | + if ( is_array( $wgArchiveLinksConfig['file_types_to_archive']) ){ |
| 68 | + $accept_file_types = '-A ' . implode( ',', $wgArchiveLinksConfig['filetypes_to_archive'] ); |
| 69 | + } else { |
| 70 | + $accept_file_types = '-A ' . $wgArchiveLinksConfig['file_types_to_archive']; |
| 71 | + } |
| 72 | + } else { |
| 73 | + $accept_file_types = ''; |
| 74 | + } |
| 75 | + //At the current time we are only adding support for the local filestore, but swift support is something that will be added later |
| 76 | + switch( $wgArchiveLinksConfig['filestore_to_use'] ) { |
| 77 | + case 'local': |
| 78 | + default: |
| 79 | + if ( $wgArchiveLinksConfig['subfolder_name'] ) { |
| 80 | + $content_dir = 'extensions/ArchiveLinks/' . $wgArchiveLinksConfig['subfolder_name']; |
| 81 | + } elseif ( $wgArchiveLinksConfig['content_path'] ) { |
| 82 | + $content_dir = realpath( $wgArchiveLinksConfig['content_path'] ); |
| 83 | + if ( !$content_dir ) { |
| 84 | + die ( 'The path you have set for $wgArchiveLinksConfig[\'content_path\'] does not exist.' . |
| 85 | + 'This makes the spider a very sad panda. Please either create it or use a different setting.'); |
| 86 | + } |
| 87 | + } else { |
| 88 | + $content_dir = 'extensions/ArchiveLinks/' . 'archived_content/'; |
| 89 | + } |
| 90 | + $dir = $path . $content_dir . sha1( time() . ' - ' . $url ); |
| 91 | + $dir = escapeshellarg( $dir ); |
| 92 | + $sanitized_url = escapeshellarg( $url ); |
| 93 | + } |
54 | 94 | |
55 | | - /*private function check_queue( ) { |
| 95 | + shell_exec( "cd $path" ); |
| 96 | + shell_exec( "wget.exe -nH -p -H -E -k -o \"./log.txt\" -Q2m -P $dir $accept_file_types $sanitized_url" ); |
| 97 | + } else { |
| 98 | + //this is primarily designed with windows in mind and no built in wget, so yeah, *nix support should be added, in other words note to self... |
| 99 | + die ( 'wget must be installed in order for the spider to function in wget mode' ); |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + private function check_queue( ) { |
56 | 104 | //need to fix this to use arrays instead of what I'm doing now |
57 | 105 | $this->db_result['job-fetch'] = $this->db_slave->select('el_archive_queue', '*', '`el_archive_queue`.`delay_time` <= ' . time() |
58 | 106 | . ' AND `el_archive_queue`.`in_progress` = 0' |
— | — | @@ -61,14 +109,14 @@ |
62 | 110 | if ( $this->db_result['job-fetch']->numRows() > 0 ) { |
63 | 111 | $row = $this->db_result['job-fetch']->fetchRow(); |
64 | 112 | |
65 | | - $this->delete_dups( $row['url'] ); |
| 113 | + //$this->delete_dups( $row['url'] ); |
66 | 114 | |
67 | 115 | return $row['url']; |
68 | 116 | } else { |
69 | 117 | //there are no jobs to do right now |
70 | 118 | return false; |
71 | 119 | } |
72 | | - }*/ |
| 120 | + } |
73 | 121 | |
74 | 122 | /** |
75 | 123 | * This function checks a local file for a local block of jobs that is to be done |
— | — | @@ -123,7 +171,7 @@ |
124 | 172 | array_key_exists( 'in_progress_ignore_delay', $wgArchiveLinksConfig ) ? $ignore_in_prog_time = $wgArchiveLinksConfig['in_progress_ignore_delay'] : |
125 | 173 | $ignore_in_prog_time = 7200; |
126 | 174 | |
127 | | - if ( $reserve_time - $time > $ignore_in_prog_time ) { |
| 175 | + if ( $reserve_time + $ingore_in_prog_time + $wait_time > $ignore_in_prog_time + $wait_time ) { |
128 | 176 | $retval = $this->reserve_job( $row ); |
129 | 177 | } |
130 | 178 | } |