r90056 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90055‎ | r90056 | r90057 >
Date:17:13, 14 June 2011
Author:kbrown
Status:ok (Comments)
Tags:
Comment:
hopefully get everything in the right directory this time
Modified paths:
  • /trunk/extensions/ArchiveLinks (added) (history)
  • /trunk/extensions/ArchiveLinks/ArchiveLinks.php (added) (history)
  • /trunk/extensions/ArchiveLinks/NOT+STABLE+DO+NOT+USE (added) (history)
  • /trunk/extensions/ArchiveLinks/README (added) (history)

Diff [purge]

Index: trunk/extensions/ArchiveLinks/ArchiveLinks.php
@@ -0,0 +1,142 @@
 2+<?php
 3+/**
 4+ * This is an extension to archive preemptively archive external links so that
 5+ * in the even they go down a backup will be available.
 6+ */
 7+
 8+if ( !defined( 'MEDIAWIKI' ) ) {
 9+ echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
 10+ die( 1 );
 11+}
 12+
 13+error_reporting ( E_ALL | E_STRICT );
 14+
 15+//$wgJobClasses['synchroniseThreadArticleData'] = 'SynchroniseThreadArticleDataJob';
 16+
 17+// Hooks
 18+/*$wgHooks['EditPage::attemptSave'][] = array ( 'getExternalLinks' );
 19+
 20+function getExternalLinks ( $editpage ) {
 21+ //var_export ( $editpage , FALSE );
 22+ //echo "hi";
 23+ file_put_contents ( './stuff.txt', 'it works :D' . "\n" . var_export ( $editpage ) );
 24+ return true;
 25+}*/
 26+
 27+//$wgHooks['LinkerMakeExternalLink'][] = 'findOutWhatTheHellThisHookGivesUs';
 28+
 29+function findOutWhatTheHellThisHookGivesUs ( &$url, &$text, &$link, &$attributes ) {
 30+ for ( $i = 0, $go = true ; $go !== false; ++$i ) {
 31+ if ( file_exists ( './extensions/ArchiveLinks/stuff-' . $i . '.txt' ) ) {
 32+ continue;
 33+ } else {
 34+ file_put_contents ( "./extensions/ArchiveLinks/stuff-$i.txt",
 35+ var_export ( $url , TRUE ) . "\n\n" .
 36+ var_export ( $text , TRUE) . "\n\n" .
 37+ var_export ( $link , TRUE) . "\n\n" .
 38+ var_export ( $attributes , TRUE) . "\n\n"
 39+ );
 40+ $go = false;
 41+ }
 42+ }
 43+ //echo "hi";
 44+ return true;
 45+}
 46+
 47+//$wgHooks['LinkerMakeExternalLink'][] = 'getExternalLinks';
 48+//$wgHooks['EditPage::attemptSave'][] = 'getExternalLinks';
 49+$wgHooks['ArticleSaveComplete'][] = 'ArchiveLinks::getExternalLinks'; #We want to use this hook in production
 50+
 51+class ArchiveLinks {
 52+ public static function getExternalLinks ( &$article ) {
 53+ global $wgParser;
 54+ $external_links = $wgParser->getOutput();
 55+ $external_links = $external_links->mExternalLinks;
 56+ //echo "$stuff";
 57+ //file_put_contents ( './extensions/ArchiveLinks/stuff0.txt', var_export( $external_links , TRUE ));
 58+
 59+ $db_master = wfGetDB( DB_MASTER );
 60+ $db_slave = wfGetDB( DB_SLAVE );
 61+ $db_result = array();
 62+
 63+ //$db_master->begin();
 64+
 65+ foreach ( $external_links as $link => $unused_value ) {
 66+ //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"');
 67+ $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( $link ) . '"');
 68+
 69+ //we need to know if the URL is already in the queue to prevent a page from being archived twice, so we will query the master
 70+ $db_result['queue'] = $db_master->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( $link ) . '"' );
 71+
 72+ if ( $db_result['blacklist']->numRows() === 0 ) {
 73+ if ( $db_result['queue']->numRows() === 0 ) {
 74+ // this probably a first time job
 75+ // but we should check the logs and resource table
 76+ // to make sure
 77+ $db_master->insert( 'el_archive_queue', array (
 78+ 'page_id' => $article->getID(),
 79+ 'url' => $link,
 80+ //'delay_time' => '',
 81+ 'insertion_time' => time(),
 82+ 'in_progress' => '0',
 83+ ));
 84+ } else {
 85+ //this job is already in the queue, why?
 86+ // * most likely reason is it has already been inserted by another page
 87+ // * or we are checking it later because the site was down at last archival
 88+ // in either case we don't really need to do anything right now, so skip...
 89+ }
 90+
 91+ }
 92+
 93+ //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_result , TRUE ));
 94+
 95+ //$db_master->insert('el_archive_queue', $array );
 96+ }
 97+
 98+ //$db_master->commit();
 99+
 100+ return true;
 101+ }
 102+
 103+ /*function queueURL ( $url, &$db_master ) {
 104+
 105+ }*/
 106+}
 107+
 108+//$wgHooks['ArticleSave'][] = 'test';
 109+
 110+function test ( ) {
 111+ /*$db_master = wfGetDB( DB_MASTER );
 112+
 113+ $db_master->insert('el_archive_blacklist', array(
 114+ 'bl_type' => 0,
 115+ 'bl_url' => 'http://example.com',
 116+ 'bl_reason' => 'test'
 117+ ));*/
 118+
 119+ $db_slave = wfGetDB( DB_SLAVE );
 120+
 121+ /*$db_result = $db_slave->select( 'el_archive_blacklist', '*',
 122+ '`el_archive_blacklist`.`bl_url` = "' . $db_slave->strencode( 'http://example.com' ) . '"');
 123+ */
 124+ $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', '`el_archive_queue`.`url` = "' . $db_slave->strencode( 'http://example.com' ) . '"' );
 125+
 126+ file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_result['queue']->numRows() , TRUE ));
 127+ //$add_quotes = 'http://example.com';
 128+ //file_put_contents ( './extensions/ArchiveLinks/stuff.txt', var_export( $db_slave->addQuotes( $add_quotes ) , TRUE ));
 129+
 130+ return false;
 131+}
 132+/*
 133+class InsertURLsIntoQueue extends Job {
 134+ public function __construct( $title, $params ) {
 135+ // Replace synchroniseThreadArticleData with the an identifier for your job.
 136+ parent::__construct( 'insertURLsIntoQueue', $title, $params );
 137+ }
 138+
 139+
 140+ public function run() {
 141+
 142+ }
 143+}*/
\ No newline at end of file
Property changes on: trunk/extensions/ArchiveLinks/ArchiveLinks.php
___________________________________________________________________
Added: svn:eol-style
1144 + native
Index: trunk/extensions/ArchiveLinks/NOT STABLE DO NOT USE
@@ -0,0 +1 @@
 2+Please read the readme file. (tl;dr: This is under devolopment and should not be used by anyone)
\ No newline at end of file
Index: trunk/extensions/ArchiveLinks/README
@@ -0,0 +1 @@
 2+This a project currently under devolopment to add premementive archival to external links so that in the event that they go down a backup copy will exist. At the current time it is NOT stable and should not be used on any production wiki.
\ No newline at end of file
Property changes on: trunk/extensions/ArchiveLinks
___________________________________________________________________
Added: bugtraq:number
13 + true

Comments

#Comment by NeilK (talk | contribs)   18:20, 1 July 2011

largely superseded by later revisions, so marking this okay

Status & tagging log