Index: trunk/extensions/ArchiveLinks/ArchiveLinks.php |
— | — | @@ -11,43 +11,12 @@ |
12 | 12 | |
13 | 13 | error_reporting ( E_ALL | E_STRICT ); |
14 | 14 | |
15 | | -//$wgJobClasses['synchroniseThreadArticleData'] = 'SynchroniseThreadArticleDataJob'; |
| 15 | +$path = dirname( __FILE__ ); |
16 | 16 | |
17 | | -// Hooks |
18 | | -/*$wgHooks['EditPage::attemptSave'][] = array ( 'getExternalLinks' ); |
| 17 | +$wgExtensionMessagesFiles['ArchiveLinks'] = "$path/ArchiveLinks.i18n.php"; |
19 | 18 | |
20 | | -function getExternalLinks ( $editpage ) { |
21 | | - //var_export ( $editpage , FALSE ); |
22 | | - //echo "hi"; |
23 | | - file_put_contents ( './stuff.txt', 'it works :D' . "\n" . var_export ( $editpage ) ); |
24 | | - return true; |
25 | | -}*/ |
26 | | - |
27 | | -//$wgHooks['LinkerMakeExternalLink'][] = 'findOutWhatTheHellThisHookGivesUs'; |
| 19 | +$wgAutoloadClasses['ArchiveLinks'] = "$path/ArchiveLinks.class.php"; |
28 | 20 | |
29 | | -function findOutWhatTheHellThisHookGivesUs ( &$url, &$text, &$link, &$attributes ) { |
30 | | - for ( $i = 0, $go = true ; $go !== false; ++$i ) { |
31 | | - if ( file_exists ( './extensions/ArchiveLinks/stuff-' . $i . '.txt' ) ) { |
32 | | - continue; |
33 | | - } else { |
34 | | - file_put_contents ( "./extensions/ArchiveLinks/stuff-$i.txt", |
35 | | - var_export ( $url , TRUE ) . "\n\n" . |
36 | | - var_export ( $text , TRUE) . "\n\n" . |
37 | | - var_export ( $link , TRUE) . "\n\n" . |
38 | | - var_export ( $attributes , TRUE) . "\n\n" |
39 | | - ); |
40 | | - $go = false; |
41 | | - } |
42 | | - } |
43 | | - //echo "hi"; |
44 | | - return true; |
45 | | -} |
46 | | - |
47 | | -//$wgHooks['LinkerMakeExternalLink'][] = 'getExternalLinks'; |
48 | | -//$wgHooks['EditPage::attemptSave'][] = 'getExternalLinks'; |
49 | | - |
50 | | -$wgExtensionMessagesFiles['ArchiveLinks'] = dirname( __FILE__ ) . '/ArchiveLinks.i18n.php'; |
51 | | - |
52 | 21 | $wgHooks['ArticleSaveComplete'][] = 'ArchiveLinks::queueExternalLinks'; |
53 | 22 | $wgHooks['LinkerMakeExternalLink'][] = 'ArchiveLinks::rewriteLinks'; |
54 | 23 | |
— | — | @@ -55,136 +24,4 @@ |
56 | 25 | 'archive_service' => 'wikiwix', |
57 | 26 | 'use_multiple_archives' => false, |
58 | 27 | 'run_spider_in_loop' => false, |
59 | | -); |
60 | | - |
61 | | -class ArchiveLinks { |
62 | | - public static function queueExternalLinks ( &$article ) { |
63 | | - global $wgParser; |
64 | | - $external_links = $wgParser->getOutput(); |
65 | | - $external_links = $external_links->mExternalLinks; |
66 | | - //echo "$stuff"; |
67 | | - //file_put_contents ( './extensions/ArchiveLinks/stuff0.txt', var_export( $external_links , TRUE )); |
68 | | - |
69 | | - $db_master = wfGetDB( DB_MASTER ); |
70 | | - $db_slave = wfGetDB( DB_SLAVE ); |
71 | | - $db_result = array(); |
72 | | - |
73 | | - $db_master->begin(); |
74 | | - |
75 | | - foreach ( $external_links as $link => $unused_value ) { |
76 | | - //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"'); |
77 | | - $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( $link ) . '"'); |
78 | | - $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( $link ) . '"' ); |
79 | | - |
80 | | - if ( $db_result['blacklist']->numRows() === 0 ) { |
81 | | - if ( $db_result['queue']->numRows() === 0 ) { |
82 | | - // this probably a first time job |
83 | | - // but we should check the logs and resource table |
84 | | - // to make sure |
85 | | - $db_master->insert( 'el_archive_queue', array ( |
86 | | - 'page_id' => $article->getID(), |
87 | | - 'url' => $link, |
88 | | - 'delay_time' => '0', |
89 | | - 'insertion_time' => time(), |
90 | | - 'in_progress' => '0', |
91 | | - )); |
92 | | - } else { |
93 | | - //this job is already in the queue, why? |
94 | | - // * most likely reason is it has already been inserted by another page |
95 | | - // * or we are checking it later because the site was down at last archival |
96 | | - // in either case we don't really need to do anything right now, so skip... |
97 | | - } |
98 | | - |
99 | | - } |
100 | | - |
101 | | - //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_result , TRUE )); |
102 | | - |
103 | | - //$db_master->insert('el_archive_queue', $array ); |
104 | | - } |
105 | | - |
106 | | - $db_master->commit(); |
107 | | - |
108 | | - return true; |
109 | | - } |
110 | | - |
111 | | - public static function rewriteLinks ( &$url, &$text, &$link, &$attributes ) { |
112 | | - if ( array_key_exists('rel', $attributes) && $attributes['rel'] === 'nofollow' ) { |
113 | | - global $wgArchiveLinksConfig; |
114 | | - if ( $wgArchiveLinksConfig['use_multiple_archives'] ) { |
115 | | - //need to add support for more than one archival service at once |
116 | | - // (a page where you can select one from a list of choices) |
117 | | - } else { |
118 | | - switch ( $wgArchiveLinksConfig['archive_service'] ) { |
119 | | - case 'local': |
120 | | - //We need to have something to figure out where the filestore is... |
121 | | - $link_to_archive = urlencode( substr_replace( $url, '', 0, 7 ) ); |
122 | | - break; |
123 | | - case 'wikiwix': |
124 | | - $link_to_archive = 'http://archive.wikiwix.com/cache/?url=' . $url; |
125 | | - break; |
126 | | - case 'webcitation': |
127 | | - $link_to_archive = 'http://webcitation.org/query?url=' . $url; |
128 | | - break; |
129 | | - case 'internet_archive': |
130 | | - default: |
131 | | - $link_to_archive = 'http://wayback.archive.org/web/*/' . $url; |
132 | | - break; |
133 | | - |
134 | | - } |
135 | | - } |
136 | | - //Note to self: need to fix this to use Html.php instead of direct html |
137 | | - $link = "<a rel=\"nofollow\" class=\"{$attributes['class']}\" href=\"{$url}\">{$text}</a> <sup><small><a href=\"" |
138 | | - . "{$link_to_archive}\">" . wfMsg( 'archive-links-cache-title' ) . '</a></small></sup> '; |
139 | | - return false; |
140 | | - } else { |
141 | | - return true; |
142 | | - } |
143 | | - } |
144 | | - |
145 | | - /*function retrieveLinks ( ) { |
146 | | - |
147 | | - }*/ |
148 | | - |
149 | | - /*function queueURL ( $url, &$db_master ) { |
150 | | - |
151 | | - }*/ |
152 | | -} |
153 | | - |
154 | | -//$wgHooks['ArticleSave'][] = 'test'; |
155 | | - |
156 | | -function test ( ) { |
157 | | - /*$db_master = wfGetDB( DB_MASTER ); |
158 | | - |
159 | | - $db_master->insert('el_archive_blacklist', array( |
160 | | - 'bl_type' => 0, |
161 | | - 'bl_url' => 'http://example.com', |
162 | | - 'bl_reason' => 'test' |
163 | | - ));*/ |
164 | | - |
165 | | - //$db_slave = wfGetDB( DB_SLAVE ); |
166 | | - |
167 | | - /*db_result = $db_slave->select( 'el_archive_blacklist', '*', |
168 | | - '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( 'http://example.com' ) . '"'); |
169 | | - */ |
170 | | - //$db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( 'http://example.com' ) . '"' ); |
171 | | - |
172 | | - //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_result['queue']->numRows() , TRUE )); |
173 | | - //$add_quotes = 'http://example.com'; |
174 | | - //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_slave->addQuotes( $add_quotes ) , TRUE )); |
175 | | - |
176 | | - |
177 | | - |
178 | | - return false; |
179 | | -} |
180 | | -/* |
181 | | -class InsertURLsIntoQueue extends Job { |
182 | | - public function __construct( $title, $params ) { |
183 | | - // Replace synchroniseThreadArticleData with the an identifier for your job. |
184 | | - parent::__construct( 'insertURLsIntoQueue', $title, $params ); |
185 | | - } |
186 | | - |
187 | | - |
188 | | - public function run() { |
189 | | - |
190 | | - } |
191 | | -}*/ |
\ No newline at end of file |
| 28 | +); |
\ No newline at end of file |
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php |
— | — | @@ -0,0 +1,87 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Main Extension Class for Archive Links |
| 5 | + */ |
| 6 | +if ( !defined( 'MEDIAWIKI' ) ) { |
| 7 | + echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" ); |
| 8 | + die( 1 ); |
| 9 | +} |
| 10 | + |
| 11 | +class ArchiveLinks { |
| 12 | + public static function queueExternalLinks ( &$article ) { |
| 13 | + global $wgParser; |
| 14 | + $external_links = $wgParser->getOutput(); |
| 15 | + $external_links = $external_links->mExternalLinks; |
| 16 | + |
| 17 | + $db_master = wfGetDB( DB_MASTER ); |
| 18 | + $db_slave = wfGetDB( DB_SLAVE ); |
| 19 | + $db_result = array(); |
| 20 | + |
| 21 | + $db_master->begin(); |
| 22 | + |
| 23 | + foreach ( $external_links as $link => $unused_value ) { |
| 24 | + //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"'); |
| 25 | + $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( $link ) . '"'); |
| 26 | + $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( $link ) . '"' ); |
| 27 | + |
| 28 | + if ( $db_result['blacklist']->numRows() === 0 ) { |
| 29 | + if ( $db_result['queue']->numRows() === 0 ) { |
| 30 | + // this probably a first time job |
| 31 | + // but we should check the logs and resource table |
| 32 | + // to make sure |
| 33 | + $db_master->insert( 'el_archive_queue', array ( |
| 34 | + 'page_id' => $article->getID(), |
| 35 | + 'url' => $link, |
| 36 | + 'delay_time' => '0', |
| 37 | + 'insertion_time' => time(), |
| 38 | + 'in_progress' => '0', |
| 39 | + )); |
| 40 | + } else { |
| 41 | + //this job is already in the queue, why? |
| 42 | + // * most likely reason is it has already been inserted by another page |
| 43 | + // * or we are checking it later because the site was down at last archival |
| 44 | + // in either case we don't really need to do anything right now, so skip... |
| 45 | + } |
| 46 | + |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + $db_master->commit(); |
| 51 | + |
| 52 | + return true; |
| 53 | + } |
| 54 | + |
| 55 | + public static function rewriteLinks ( &$url, &$text, &$link, &$attributes ) { |
| 56 | + if ( array_key_exists('rel', $attributes) && $attributes['rel'] === 'nofollow' ) { |
| 57 | + global $wgArchiveLinksConfig; |
| 58 | + if ( $wgArchiveLinksConfig['use_multiple_archives'] ) { |
| 59 | + //need to add support for more than one archival service at once |
| 60 | + // (a page where you can select one from a list of choices) |
| 61 | + } else { |
| 62 | + switch ( $wgArchiveLinksConfig['archive_service'] ) { |
| 63 | + case 'local': |
| 64 | + //We need to have something to figure out where the filestore is... |
| 65 | + $link_to_archive = urlencode( substr_replace( $url, '', 0, 7 ) ); |
| 66 | + break; |
| 67 | + case 'wikiwix': |
| 68 | + $link_to_archive = 'http://archive.wikiwix.com/cache/?url=' . $url; |
| 69 | + break; |
| 70 | + case 'webcitation': |
| 71 | + $link_to_archive = 'http://webcitation.org/query?url=' . $url; |
| 72 | + break; |
| 73 | + case 'internet_archive': |
| 74 | + default: |
| 75 | + $link_to_archive = 'http://wayback.archive.org/web/*/' . $url; |
| 76 | + break; |
| 77 | + |
| 78 | + } |
| 79 | + } |
| 80 | + //Note to self: need to fix this to use Html.php instead of direct html |
| 81 | + $link = "<a rel=\"nofollow\" class=\"{$attributes['class']}\" href=\"{$url}\">{$text}</a> <sup><small><a href=\"" |
| 82 | + . "{$link_to_archive}\">" . wfMsg( 'archive-links-cache-title' ) . '</a></small></sup> '; |
| 83 | + return false; |
| 84 | + } else { |
| 85 | + return true; |
| 86 | + } |
| 87 | + } |
| 88 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 89 | + native |