r46552 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46551‎ | r46552 | r46553 >
Date:21:10, 29 January 2009
Author:dale
Status:deferred
Tags:
Comment:
stream date fix maintenance script.
Modified paths:
  • /trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php (modified) (history)
  • /trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.php (modified) (history)
  • /trunk/extensions/MetavidWiki/maintenance/scrape_and_insert.inc.php (modified) (history)

Diff [purge]

Index: trunk/extensions/MetavidWiki/maintenance/scrape_and_insert.inc.php
@@ -608,26 +608,34 @@
609609 }
610610 }
611611 function proccessBill( $govTrackBillId, $bill_key, $openCongBillId = false, $mapLightBillId = false, $forceUpdate = false ) {
612 - // get the bill title & its sponser / cosponsers:
 612+ // get the bill title & its sponsor / co-sponsors:
613613 $rawGovTrackPage = $this->doRequest( $this->govTrack_bill_url . $govTrackBillId );
614614
615615 /*****************************
616 - * Proccess Bill GovTrack info
 616+ * Process Bill GovTrack info
617617 *****************************/
618618 print "gov_track id: " . $govTrackBillId . " from: " . $this->govTrack_bill_url . $govTrackBillId . "\n";
619619
620 - // get title:
621 - $patern = '/property="dc:title" datatype="xsd:string" style="margin-bottom: 1em">([^<]*)<\/div>(<p style="margin-top: 1.75em; margin-bottom: 1.75em">([^<]*))?/';
622 - preg_match( $patern, $rawGovTrackPage, $title_match );
 620+ // get title:
 621+ preg_match( '/<title>(.*)<\/title>/', $rawGovTrackPage, $title_match );
623622 if ( isset( $title_match[1] ) ) {
 623+ //strip govtrack.us
 624+ $title_match[1] = str_replace( '(GovTrack.us)', '', $title_match[1]);
624625 if ( trim( $title_match[1] ) == '' ) {
625626 print "empty title\n";
626627 return false;
627628 }
628629 $title_short = str_replace( array( '_', '...', ' [110th]', ' [109th]', ' [108th]', ' [107th]' ), array( ' ', '', '', '', '', '' ), $title_match[1] );
 630+
629631 $this->cur_bill_short_title = $title_short;
630632 // set the desc if present:
631 - $title_desc = ( isset( $title_match[3] ) ) ? $title_match[3]:'';
 633+ preg_match( '/<meta name="description" content="([^">]*)"/', $rawGovTrackPage, $desc_match );
 634+ if(isset($desc_match[1])){
 635+ $title_desc = $desc_match[1];
 636+ }else{
 637+ die('could not find title desc: ' . $title_desc);
 638+ }
 639+
632640 $this->bill_titles[$bill_key] = $title_short;
633641 } else {
634642 print $this->govTrack_bill_url . $govTrackBillId . "\n" . $patern . "\n" . $rawGovTrackPage;
@@ -635,8 +643,8 @@
636644 }
637645
638646 // print "raw govtrack:\n $rawGovTrackPage";
639 - // get the $thomas_match
640 - preg_match( '/thomas\.loc\.gov\/cgi-bin\/bdquery\/z\?(.*):/', $rawGovTrackPage, $thomas_match );
 647+ // get the $thomas_match
 648+ preg_match( '/thomas\.loc\.gov\/cgi-bin\/bdquery\/z\?([^\"]*)/', $rawGovTrackPage, $thomas_match );
641649 // get introduced: //strange .* does not seem to work :(
642650 preg_match( '/Introduced<\/nobr><\/td><td style="padding-left: 1em; font-size: 75%; color: #333333"><nobr>([^<]*)/m', $rawGovTrackPage, $date_intro_match );
643651 // print_r($date_intro_match);
@@ -685,19 +693,26 @@
686694 $this->procMapLightInterest( $interest );
687695 $bp .= 'Supporting Interest ' . $i . '=' . $interest['name'] . "|\n";
688696 $i++;
 697+ //process interest
 698+ $this->procMapLightInterest( $interest );
689699 }
690700 $i = 1;
691701 foreach ( $bill_interest['oppose'] as $interest ) {
692702 $bp .= 'Opposing Interest ' . $i . '=' . $interest['name'] . "|\n";
693703 $i++;
 704+ //process interest
 705+ $this->procMapLightInterest( $interest );
694706 }
695707 }
696708 $bp .= "}}\n";
 709+
697710 // print 'page : '.$title_short.' ' . $bp . "\n";
698711 // incorporated into the template:
699712 // $body.="\n\n".'Source: [[Data Source Name:=GovTrack]] [[Data Source URL:='.$this->govTrack_bill_url . $govTrackBillId.']]';
700713 // set up the base bill page:
701714 $wgBillTitle = Title::newFromText( $title_short );
 715+ //print $bp;
 716+ //die;
702717 do_update_wiki_page( $wgBillTitle, $bp );
703718
704719 // set up a redirect for the bill key, and a link for the category page:
@@ -721,29 +736,29 @@
722737 // print "map info: $this->mapLightBillInfo \n";
723738 print str_replace( '$1', $mapLightBillId, $this->mapLightBillInfo ) . "\n\n";
724739 $ret_ary = array( 'support' => array(), 'oppose' => array() );
725 - $bill_page = $this->doRequest( str_replace( '$1', $mapLightBillId, $this->mapLightBillInfo ) );
 740+ $bill_url = str_replace( '$1', $mapLightBillId, $this->mapLightBillInfo );
 741+ $bill_page = $this->doRequest( $bill_url);
726742 // $bill_page = $this->doRequest('http://maplight.org/map/us/bill/10831/default');
727743 // print $bill_page;
728744 // ([^<]*)<\/a>)*
729745 // a href="\/map\/us\/interest\/([^"]*) class="interest"
730 -
731 - $pat_interest = '/<li><a\shref="\/map\/us\/interest\/([^"]*)".*>([^<]*)<\/a>&nbsp;.*<\/li>/U';
732746 // class="organizations"\sid="for
733747 // preg_match_all('/class="organizations"\sid="for.*<ul class="industries list-clear">()*/',$bill_page, $matches);
734 - preg_match_all( $pat_interest, $bill_page, $matches, PREG_OFFSET_CAPTURE );
735 - // print_r($matches);
736 - $aginst_pos = strpos( $bill_page, 'class="organizations" id="against"' );
 748+ print "\n". $bill_url."\n";
 749+ preg_match_all( '/href\=\"\/map\/us\/interest\/([^"]*)[^>]*>([^<]*)/', $bill_page, $matches, PREG_OFFSET_CAPTURE );
 750+
 751+ $aginst_pos = strpos( $bill_page, 'id="against"' );
737752 // return empty arrays if we don't have info to give back:'
738753 if ( $aginst_pos === false )return $ret_ary;
739754 if ( !isset( $matches[1] ) )return $ret_ary;
740755
741756 foreach ( $matches[1] as $inx => $intrest ) {
742757 if ( $intrest[1] < $aginst_pos ) {
743 - $ret_ary['support'][] = array( 'key' => $intrest[0], 'name' => $matches[2][$inx][0] );
 758+ $ret_ary['support'][] = array( 'key' => $intrest[0], 'name' => htmlspecialchars_decode( $matches[2][$inx][0]) );
744759 } else {
745 - $ret_ary['oppose'][] = array( 'key' => $intrest[0], 'name' => $matches[2][$inx][0] );
 760+ $ret_ary['oppose'][] = array( 'key' => $intrest[0], 'name' => htmlspecialchars_decode( $matches[2][$inx][0] ) );
746761 }
747 - }
 762+ }
748763 return $ret_ary;
749764 }
750765 function get_bill_name_from_mapLight_id( $mapBillId, $doLookup = true ) {
@@ -760,7 +775,7 @@
761776 }
762777 if ( !isset( $this->mapLight_bill_cache[$mapBillId] ) ) {
763778 if ( $doLookup ) {
764 - print "missing bill by mapId: $mapBillId retrive it: \n";
 779+ print "missing bill by mapId: $mapBillId retrieve it: \n";
765780 $raw_bill_page = $this->doRequest( 'http://www.maplight.org/map/us/bill/' . $mapBillId . '/default' );
766781 preg_match( '/title">([^-]*)-/', $raw_bill_page, $matches );
767782 if ( isset( $matches[1] ) )$bill_key = trim( $matches[1] );
Index: trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.php
@@ -94,6 +94,7 @@
9595 'interest' will insert interests (uses people as base so run people first)
9696 'update_templates' will update templates & some semantic properties
9797 'file_check' checks inserted streams file urls/pointers
 98+ 'do_stream_date_check'
9899
99100 EOT;
100101 exit ();
@@ -133,6 +134,9 @@
134135 case 'file_check':
135136 do_stream_file_check();
136137 break;
 138+ case 'do_stream_date_check':
 139+ do_stream_date_check();
 140+ break;
137141 case 'update_templates' :
138142 $force = ( isset( $options['force'] ) ) ? true:false;
139143 include_once( 'metavid_gov_templates.php' );
@@ -147,5 +151,5 @@
148152 break;
149153 default :
150154 do_stream_insert( 'stream', $args[0] );
151 - break;
 155+ break;
152156 }
Index: trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
@@ -34,6 +34,34 @@
3535 // if($i==3)die;
3636 // $i++;
3737 }
 38+function do_stream_date_check(){
 39+ $dbr = wfGetDB( DB_READ );
 40+ $result = $dbr->select( 'mv_streams',
 41+ '*',
 42+ '',
 43+ __METHOD__,
 44+ array('LIMIT'=> 9000));
 45+ if ( $dbr->numRows( $result ) == 0 )die("do_stream_file_check: no streams found");
 46+
 47+ while ( $stream = $dbr->fetchObject( $result ) ) {
 48+ $sdate = split('_', $stream->name);
 49+ $sd = split('-',$sdate[count($sdate)-1]);
 50+ if( count($sd) != 3 )
 51+ continue;
 52+ $sdate = mktime( 9, 0, 0, $sd[0], $sd[1], intval('20'.$sd[2]) );
 53+ if( date('d-y', $stream->date_start_time) != date('d-y',$sdate) ) {
 54+ //print "should update date: " . $stream->date_start_time . ' to '. $sdate . ' for ' . $stream->name . "\n";
 55+ $dbw = wfGetDB( DB_WRITE );
 56+ $sql = "UPDATE `mv_streams` SET `date_start_time`= '$sdate' " .
 57+ " WHERE `id`={$stream->id} LIMIT 1 ";
 58+ $dbw->query($sql);
 59+ print "$stream->name date updated\n";
 60+ }else{
 61+ print "$stream->name date is ok\n";
 62+ }
 63+
 64+ }
 65+}
3866 function do_stream_file_check( $old_stream=false ) {
3967 global $mvgIP, $mvVideoArchivePaths;
4068 $stream_set = Array();
@@ -835,9 +863,13 @@
836864 $i++;
837865 }
838866 }
839 - $raw_results = $mvScrape->doRequest( 'http://maplight.org/map/us/interest/' . $intrestKey . '/bills' );
 867+ $intrest_bills_url = 'http://maplight.org/map/us/interest/' . $intrestKey . '/bills';
 868+ $raw_results = $mvScrape->doRequest( $intrest_bills_url );
840869 // get all bills supported or opposed
841870 preg_match_all( '/\/map\/us\/bill\/([^"]*)".*\/map\/us\/legislator.*<td>([^<]*)</U', $raw_results, $matches );
 871+ print $intrest_bills_url . "\n";
 872+ print_r($matches);
 873+ die;
842874 $sinx = $oinx = 1;
843875 if ( isset( $matches[1][0] ) ) {
844876 $support_count = $oppse_count = 0;
@@ -857,7 +889,8 @@
858890 }
859891 }
860892 $page_body .= '}}';
861 - print "Interest Page: $intrestName\n";
 893+ print "Interest Page: $intrestName \n\n $page_body\n";
 894+ die;
862895 $wTitle = Title::makeTitle( NS_MAIN, $intrestName );
863896 do_update_wiki_page( $wTitle, $page_body );
864897 }

Status & tagging log