r90907 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90906‎ | r90907 | r90908 >
Date:21:16, 27 June 2011
Author:kbrown
Status:deferred (Comments)
Tags:
Comment:
Cleanup ArchiveLinks.php and move the class into a separate file.
Modified paths:
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.class.php (added) (history)
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.php (modified) (history)

Diff [purge]

Index: trunk/extensions/ArchiveLinks/ArchiveLinks.php
@@ -11,43 +11,12 @@
1212
1313 error_reporting ( E_ALL | E_STRICT );
1414
15 -//$wgJobClasses['synchroniseThreadArticleData'] = 'SynchroniseThreadArticleDataJob';
 15+$path = dirname( __FILE__ );
1616
17 -// Hooks
18 -/*$wgHooks['EditPage::attemptSave'][] = array ( 'getExternalLinks' );
 17+$wgExtensionMessagesFiles['ArchiveLinks'] = "$path/ArchiveLinks.i18n.php";
1918
20 -function getExternalLinks ( $editpage ) {
21 - //var_export ( $editpage , FALSE );
22 - //echo "hi";
23 - file_put_contents ( './stuff.txt', 'it works :D' . "\n" . var_export ( $editpage ) );
24 - return true;
25 -}*/
26 -
27 -//$wgHooks['LinkerMakeExternalLink'][] = 'findOutWhatTheHellThisHookGivesUs';
 19+$wgAutoloadClasses['ArchiveLinks'] = "$path/ArchiveLinks.class.php";
2820
29 -function findOutWhatTheHellThisHookGivesUs ( &$url, &$text, &$link, &$attributes ) {
30 - for ( $i = 0, $go = true ; $go !== false; ++$i ) {
31 - if ( file_exists ( './extensions/ArchiveLinks/stuff-' . $i . '.txt' ) ) {
32 - continue;
33 - } else {
34 - file_put_contents ( "./extensions/ArchiveLinks/stuff-$i.txt",
35 - var_export ( $url , TRUE ) . "\n\n" .
36 - var_export ( $text , TRUE) . "\n\n" .
37 - var_export ( $link , TRUE) . "\n\n" .
38 - var_export ( $attributes , TRUE) . "\n\n"
39 - );
40 - $go = false;
41 - }
42 - }
43 - //echo "hi";
44 - return true;
45 -}
46 -
47 -//$wgHooks['LinkerMakeExternalLink'][] = 'getExternalLinks';
48 -//$wgHooks['EditPage::attemptSave'][] = 'getExternalLinks';
49 -
50 -$wgExtensionMessagesFiles['ArchiveLinks'] = dirname( __FILE__ ) . '/ArchiveLinks.i18n.php';
51 -
5221 $wgHooks['ArticleSaveComplete'][] = 'ArchiveLinks::queueExternalLinks';
5322 $wgHooks['LinkerMakeExternalLink'][] = 'ArchiveLinks::rewriteLinks';
5423
@@ -55,136 +24,4 @@
5625 'archive_service' => 'wikiwix',
5726 'use_multiple_archives' => false,
5827 'run_spider_in_loop' => false,
59 -);
60 -
61 -class ArchiveLinks {
62 - public static function queueExternalLinks ( &$article ) {
63 - global $wgParser;
64 - $external_links = $wgParser->getOutput();
65 - $external_links = $external_links->mExternalLinks;
66 - //echo "$stuff";
67 - //file_put_contents ( './extensions/ArchiveLinks/stuff0.txt', var_export( $external_links , TRUE ));
68 -
69 - $db_master = wfGetDB( DB_MASTER );
70 - $db_slave = wfGetDB( DB_SLAVE );
71 - $db_result = array();
72 -
73 - $db_master->begin();
74 -
75 - foreach ( $external_links as $link => $unused_value ) {
76 - //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"');
77 - $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( $link ) . '"');
78 - $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( $link ) . '"' );
79 -
80 - if ( $db_result['blacklist']->numRows() === 0 ) {
81 - if ( $db_result['queue']->numRows() === 0 ) {
82 - // this probably a first time job
83 - // but we should check the logs and resource table
84 - // to make sure
85 - $db_master->insert( 'el_archive_queue', array (
86 - 'page_id' => $article->getID(),
87 - 'url' => $link,
88 - 'delay_time' => '0',
89 - 'insertion_time' => time(),
90 - 'in_progress' => '0',
91 - ));
92 - } else {
93 - //this job is already in the queue, why?
94 - // * most likely reason is it has already been inserted by another page
95 - // * or we are checking it later because the site was down at last archival
96 - // in either case we don't really need to do anything right now, so skip...
97 - }
98 -
99 - }
100 -
101 - //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_result , TRUE ));
102 -
103 - //$db_master->insert('el_archive_queue', $array );
104 - }
105 -
106 - $db_master->commit();
107 -
108 - return true;
109 - }
110 -
111 - public static function rewriteLinks ( &$url, &$text, &$link, &$attributes ) {
112 - if ( array_key_exists('rel', $attributes) && $attributes['rel'] === 'nofollow' ) {
113 - global $wgArchiveLinksConfig;
114 - if ( $wgArchiveLinksConfig['use_multiple_archives'] ) {
115 - //need to add support for more than one archival service at once
116 - // (a page where you can select one from a list of choices)
117 - } else {
118 - switch ( $wgArchiveLinksConfig['archive_service'] ) {
119 - case 'local':
120 - //We need to have something to figure out where the filestore is...
121 - $link_to_archive = urlencode( substr_replace( $url, '', 0, 7 ) );
122 - break;
123 - case 'wikiwix':
124 - $link_to_archive = 'http://archive.wikiwix.com/cache/?url=' . $url;
125 - break;
126 - case 'webcitation':
127 - $link_to_archive = 'http://webcitation.org/query?url=' . $url;
128 - break;
129 - case 'internet_archive':
130 - default:
131 - $link_to_archive = 'http://wayback.archive.org/web/*/' . $url;
132 - break;
133 -
134 - }
135 - }
136 - //Note to self: need to fix this to use Html.php instead of direct html
137 - $link = "<a rel=\"nofollow\" class=\"{$attributes['class']}\" href=\"{$url}\">{$text}</a>&#160;<sup><small><a href=\""
138 - . "{$link_to_archive}\">" . wfMsg( 'archive-links-cache-title' ) . '</a></small></sup>&#160;';
139 - return false;
140 - } else {
141 - return true;
142 - }
143 - }
144 -
145 - /*function retrieveLinks ( ) {
146 -
147 - }*/
148 -
149 - /*function queueURL ( $url, &$db_master ) {
150 -
151 - }*/
152 -}
153 -
154 -//$wgHooks['ArticleSave'][] = 'test';
155 -
156 -function test ( ) {
157 - /*$db_master = wfGetDB( DB_MASTER );
158 -
159 - $db_master->insert('el_archive_blacklist', array(
160 - 'bl_type' => 0,
161 - 'bl_url' => 'http://example.com',
162 - 'bl_reason' => 'test'
163 - ));*/
164 -
165 - //$db_slave = wfGetDB( DB_SLAVE );
166 -
167 - /*db_result = $db_slave->select( 'el_archive_blacklist', '*',
168 - '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( 'http://example.com' ) . '"');
169 - */
170 - //$db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( 'http://example.com' ) . '"' );
171 -
172 - //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_result['queue']->numRows() , TRUE ));
173 - //$add_quotes = 'http://example.com';
174 - //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_slave->addQuotes( $add_quotes ) , TRUE ));
175 -
176 -
177 -
178 - return false;
179 -}
180 -/*
181 -class InsertURLsIntoQueue extends Job {
182 - public function __construct( $title, $params ) {
183 - // Replace synchroniseThreadArticleData with the an identifier for your job.
184 - parent::__construct( 'insertURLsIntoQueue', $title, $params );
185 - }
186 -
187 -
188 - public function run() {
189 -
190 - }
191 -}*/
\ No newline at end of file
 28+);
\ No newline at end of file
Index: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php
@@ -0,0 +1,87 @@
 2+<?php
 3+/**
 4+ * Main Extension Class for Archive Links
 5+ */
 6+if ( !defined( 'MEDIAWIKI' ) ) {
 7+ echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
 8+ die( 1 );
 9+}
 10+
 11+class ArchiveLinks {
 12+ public static function queueExternalLinks ( &$article ) {
 13+ global $wgParser;
 14+ $external_links = $wgParser->getOutput();
 15+ $external_links = $external_links->mExternalLinks;
 16+
 17+ $db_master = wfGetDB( DB_MASTER );
 18+ $db_slave = wfGetDB( DB_SLAVE );
 19+ $db_result = array();
 20+
 21+ $db_master->begin();
 22+
 23+ foreach ( $external_links as $link => $unused_value ) {
 24+ //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"');
 25+ $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( $link ) . '"');
 26+ $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( $link ) . '"' );
 27+
 28+ if ( $db_result['blacklist']->numRows() === 0 ) {
 29+ if ( $db_result['queue']->numRows() === 0 ) {
 30+ // this probably a first time job
 31+ // but we should check the logs and resource table
 32+ // to make sure
 33+ $db_master->insert( 'el_archive_queue', array (
 34+ 'page_id' => $article->getID(),
 35+ 'url' => $link,
 36+ 'delay_time' => '0',
 37+ 'insertion_time' => time(),
 38+ 'in_progress' => '0',
 39+ ));
 40+ } else {
 41+ //this job is already in the queue, why?
 42+ // * most likely reason is it has already been inserted by another page
 43+ // * or we are checking it later because the site was down at last archival
 44+ // in either case we don't really need to do anything right now, so skip...
 45+ }
 46+
 47+ }
 48+ }
 49+
 50+ $db_master->commit();
 51+
 52+ return true;
 53+ }
 54+
 55+ public static function rewriteLinks ( &$url, &$text, &$link, &$attributes ) {
 56+ if ( array_key_exists('rel', $attributes) && $attributes['rel'] === 'nofollow' ) {
 57+ global $wgArchiveLinksConfig;
 58+ if ( $wgArchiveLinksConfig['use_multiple_archives'] ) {
 59+ //need to add support for more than one archival service at once
 60+ // (a page where you can select one from a list of choices)
 61+ } else {
 62+ switch ( $wgArchiveLinksConfig['archive_service'] ) {
 63+ case 'local':
 64+ //We need to have something to figure out where the filestore is...
 65+ $link_to_archive = urlencode( substr_replace( $url, '', 0, 7 ) );
 66+ break;
 67+ case 'wikiwix':
 68+ $link_to_archive = 'http://archive.wikiwix.com/cache/?url=' . $url;
 69+ break;
 70+ case 'webcitation':
 71+ $link_to_archive = 'http://webcitation.org/query?url=' . $url;
 72+ break;
 73+ case 'internet_archive':
 74+ default:
 75+ $link_to_archive = 'http://wayback.archive.org/web/*/' . $url;
 76+ break;
 77+
 78+ }
 79+ }
 80+ //Note to self: need to fix this to use Html.php instead of direct html
 81+ $link = "<a rel=\"nofollow\" class=\"{$attributes['class']}\" href=\"{$url}\">{$text}</a>&#160;<sup><small><a href=\""
 82+ . "{$link_to_archive}\">" . wfMsg( 'archive-links-cache-title' ) . '</a></small></sup>&#160;';
 83+ return false;
 84+ } else {
 85+ return true;
 86+ }
 87+ }
 88+}
\ No newline at end of file
Property changes on: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php
___________________________________________________________________
Added: svn:eol-style
189 + native

Comments

#Comment by NeilK (talk | contribs)   18:28, 5 August 2011

in queueExternalLinks():

- could be better written with one if, ( if (no blacklist && not already queued) ) since you don't do anything in the else clauses - commit() should only be called if we make master do anything - always returning true is a waste of time, just don't return anything. Or, return true if a job was successfully added, false otherwise. The caller may not care though.

rewriteLinks():

- I thought we were going to have time in here somehow for archive.org? - agree on Html.php, or at least use some class where we can be certain these values are escaped properly

Status & tagging log