Index: trunk/extensions/ArchiveLinks/ArchiveLinks.class.php |
— | — | @@ -42,8 +42,6 @@ |
43 | 43 | $old_id = $article->getTitle(); |
44 | 44 | $old_id = $old_id->getPreviousRevisionID( $page_id ); |
45 | 45 | |
46 | | - die('firing'); |
47 | | - |
48 | 46 | $db_result['links_on_page'] = $db_master->select( 'el_archive_link_history', '*', array( 'hist_page_id' => $page_id ), __METHOD__ ); |
49 | 47 | |
50 | 48 | $old_external_links = array(); |
— | — | @@ -68,96 +66,56 @@ |
69 | 67 | |
70 | 68 | if ( count( $new_external_links ) <= $wgArchiveLinksConfig['link_insert_max'] ) { |
71 | 69 | //insert the links into the queue now |
72 | | - foreach( $new_external_links as $link ) { |
73 | | - $this->feed_insert_links( $link ); |
74 | | - |
75 | | - /* |
| 70 | + foreach( $new_external_links as $link ) { |
| 71 | + $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', array( 'url' => $link ), __METHOD__, array( 'LIMIT' => '1', ) ); |
| 72 | + $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', array( 'bl_url' => $link ), __METHOD__, array( 'LIMIT' => '1', ) ); |
76 | 73 | |
77 | | - |
78 | | - /* |
79 | | - |
80 | | - } elseif ( $db_result['history-row']['hist_insertion_time'] >= $time - $wgArchiveLinksConfig['global_rearchive_time'] ) { |
81 | | - $db_result['history_page'] = $db_slave->select( 'el_archive_link_history', '*', array( 'hist_url' => $link, 'page_id' => $page_id ), __METHOD__, array( 'LIMIT' => '1', 'ORDER BY' => 'hist_id DESC' ) ); |
| 74 | + $db_result['queue-numrows'] = $db_result['queue']->numRows(); |
| 75 | + $db_result['blacklist-numrows'] = $db_result['blacklist']->numRows(); |
82 | 76 | |
83 | | - $db_result['history_page-numrows'] = $db_result['history_page']->numRows(); |
84 | | - $db_result['history_page-row'] = $db_result['history_page']->fetchRow(); |
| 77 | + if ( $db_result['blacklist-numrows'] === 0 && $db_result['queue-numrows'] === 0 ) { |
| 78 | + $db_master->insert( 'el_archive_queue', array( |
| 79 | + 'page_id' => $page_id, |
| 80 | + 'url' => $link, |
| 81 | + 'delay_time' => '0', |
| 82 | + 'insertion_time' => $time, |
| 83 | + 'in_progress' => '0', |
| 84 | + )); |
85 | 85 | |
86 | | - if ( $db_result['history_page-numrows'] === 0 && $db_result['history-row']['hist_insertion_time'] >= $time - $wgArchiveLinksConfig['previous_archive_lockout_time'] ) { |
87 | | - //this link is new to this particular page but has been archived on another page less than the rearchive delay |
88 | | - //grab a new version of it in case the content has changed |
89 | | - $db_master->insert( 'el_archive_queue', array( |
90 | | - 'page_id' => $page_id, |
91 | | - 'url' => $link, |
92 | | - 'delay_time' => '0', |
93 | | - 'insertion_time' => $time, |
94 | | - 'in_progress' => '0', |
95 | | - )); |
96 | | - |
97 | | - $db_master->insert( 'el_archive_link_history', array( |
98 | | - 'page_id' => $page_id, |
99 | | - 'url' => $link, |
100 | | - 'delay_time' => '0', |
101 | | - 'insertion_time' => $time, |
102 | | - 'in_progress' => '0', |
103 | | - )); |
104 | | - |
105 | | - } |
106 | | - |
107 | | - if ( $db_result['history_page-row']['insertion_time'] >= $time - $wgArchiveLinksConfig['page_rearchive_time']) { |
108 | | - |
109 | | - } |
110 | | - }*/ |
| 86 | + $db_master->insert( 'el_archive_link_history', array( |
| 87 | + 'hist_page_id' => $page_id, |
| 88 | + 'hist_url' => $link, |
| 89 | + 'hist_insertion_time' => $time, |
| 90 | + )); |
| 91 | + } |
111 | 92 | } |
112 | 93 | } else { |
113 | 94 | //insert everything as a job and do the work later to avoid lagging page save |
114 | 95 | } |
115 | 96 | |
116 | 97 | } else { |
117 | | - |
118 | 98 | foreach ( $external_links as $link => $unused_value ) { |
119 | | - $link = $db_slave->strencode( $link ); |
| 99 | + //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"'); |
| 100 | + $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', array( 'bl_url' => $link ), __METHOD__ ); |
| 101 | + $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', array( 'url' => $link ), __METHOD__ ); |
120 | 102 | |
121 | | - if ( $wgArchiveLinksConfig['generate_feed'] === true ) { |
122 | | - |
123 | | - |
124 | | - |
125 | | - /*$diff_eng = new DifferenceEngine( null, $old_id, $page_id, null, false ); |
126 | | - |
127 | | - $diff = $diff_eng->getDiffBody(); |
128 | | - die( var_dump($diff) ); |
129 | | - */ |
130 | | - |
131 | | - //file_put_contents('stf.txt', var_export( $diff, TRUE ) ); |
132 | | - |
133 | | - /* |
134 | | - * Querying the db server with selects for every link on the page would potentially be a whole bunch of unnecessary load |
135 | | - * Let's take the diff first then do it on a job instead... |
136 | | - * |
137 | | -*/ |
138 | | - |
139 | | - } else { |
140 | | - //$db_result['resource'] = $db_slave->select( 'el_archive_resource', '*', '`el_archive_resource`.`resource_url` = "' . $db_slave->strencode( $link ) . '"'); |
141 | | - $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', array( 'bl_url' => $link ), __METHOD__ ); |
142 | | - $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', array( 'url' => $link ), __METHOD__ ); |
143 | | - |
144 | | - if ( $db_result['blacklist']->numRows() === 0 ) { |
145 | | - if ( $db_result['queue']->numRows() === 0 ) { |
146 | | - // this probably a first time job |
147 | | - // but we should check the logs and resource table |
148 | | - // to make sure |
149 | | - $db_master->insert( 'el_archive_queue', array ( |
150 | | - 'page_id' => $page_id, |
151 | | - 'url' => $link, |
152 | | - 'delay_time' => '0', |
153 | | - 'insertion_time' => $time, |
154 | | - 'in_progress' => '0', |
155 | | - )); |
156 | | - } else { |
157 | | - //this job is already in the queue, why? |
158 | | - // * most likely reason is it has already been inserted by another page |
159 | | - // * or we are checking it later because the site was down at last archival |
160 | | - // in either case we don't really need to do anything right now, so skip... |
161 | | - } |
| 103 | + if ( $db_result['blacklist']->numRows() === 0 ) { |
| 104 | + if ( $db_result['queue']->numRows() === 0 ) { |
| 105 | + // this probably a first time job |
| 106 | + // but we should check the logs and resource table |
| 107 | + // to make sure |
| 108 | + $db_master->insert( 'el_archive_queue', array ( |
| 109 | + 'page_id' => $page_id, |
| 110 | + 'url' => $link, |
| 111 | + 'delay_time' => '0', |
| 112 | + 'insertion_time' => $time, |
| 113 | + 'in_progress' => '0', |
| 114 | + )); |
| 115 | + } else { |
| 116 | + //this job is already in the queue, why? |
| 117 | + // * most likely reason is it has already been inserted by another page |
| 118 | + // * or we are checking it later because the site was down at last archival |
| 119 | + // in either case we don't really need to do anything right now, so skip... |
162 | 120 | } |
163 | 121 | } |
164 | 122 | } |
— | — | @@ -207,36 +165,6 @@ |
208 | 166 | } |
209 | 167 | } |
210 | 168 | |
211 | | - public function feed_insert_links ( $url, $escaped = false ) { |
212 | | - if ( !$escaped ) { |
213 | | - $url = $this->strencode( $url ); |
214 | | - } |
215 | | - |
216 | | - $db_result['queue'] = $db_slave->select( 'el_archive_queue', '*', array( 'url' => $link ), __METHOD__, array( 'LIMIT' => '1', ) ); |
217 | | - $db_result['blacklist'] = $db_slave->select( 'el_archive_blacklist', '*', array( 'bl_url' => $link ), __METHOD__, array( 'LIMIT' => '1', ) ); |
218 | | - |
219 | | - $db_result['queue-numrows'] = $db_result['queue']->numRows(); |
220 | | - $db_result['blacklist-numrows'] = $db_result['blacklist']->numRows(); |
221 | | - |
222 | | - if ( $db_result['blacklist-numrows'] === 0 && $db_result['queue-numrows'] === 0 ) { |
223 | | - $db_master->insert( 'el_archive_queue', array( |
224 | | - 'page_id' => $page_id, |
225 | | - 'url' => $link, |
226 | | - 'delay_time' => '0', |
227 | | - 'insertion_time' => $time, |
228 | | - 'in_progress' => '0', |
229 | | - )); |
230 | | - |
231 | | - $db_master->insert( 'el_archive_link_history', array( |
232 | | - 'page_id' => $page_id, |
233 | | - 'url' => $link, |
234 | | - 'delay_time' => '0', |
235 | | - 'insertion_time' => $time, |
236 | | - 'in_progress' => '0', |
237 | | - )); |
238 | | - } |
239 | | - } |
240 | | - |
241 | 169 | public static function schemaUpdates ( $updater = null ) { |
242 | 170 | $path = dirname( __FILE__ ); |
243 | 171 | $updater->addExtensionUpdate( array( |