Index: trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php |
— | — | @@ -0,0 +1,604 @@ |
| 2 | +<?php |
| 3 | +/* |
| 4 | + * metavid2mvWiki.inc.php Created on Jan 19, 2008 |
| 5 | + * |
| 6 | + * All Metavid Wiki code is Released under the GPL2 |
| 7 | + * for more info visit http:/metavid.ucsc.edu/code |
| 8 | + * |
| 9 | + * @author Michael Dale |
| 10 | + * @email dale@ucsc.edu |
| 11 | + * @url http://metavid.ucsc.edu |
| 12 | + */ |
| 13 | + function upTempalte_Ht_en($force = false) { |
| 14 | + $wgTemplateTitle = Title :: makeTitle(NS_TEMPLATE, 'Ht_en'); |
| 15 | + if (!$wgTemplateTitle->exists() || $force) { |
| 16 | + do_update_wiki_page($wgTemplateTitle, '<noinclude> |
| 17 | + This is the default Template for the display of transcript text. |
| 18 | + </noinclude><includeonly>{{ #if: {{{PersonName|}}} | {{ #ifexist: Image:{{{PersonName}}}.jpg | [[Image:{{{PersonName}}}.jpg|44px|left]]|[[Image:Missing person.jpg|44px|left]]}} |}}{{ #if:{{{PersonName|}}}|[[{{{PersonName}}}]]: |}}{{{BodyText}}} |
| 19 | + </includeonly>'); |
| 20 | + } |
| 21 | +} |
| 22 | +function upTemplate_person($force = false) { |
| 23 | + global $valid_attributes; |
| 24 | + $wgTemplateTitle = Title :: makeTitle(NS_TEMPLATE, 'Congress Person'); |
| 25 | + if (!$wgTemplateTitle->exists() || $force) { |
| 26 | + $wgTemplateArticle = new Article($wgTemplateTitle); |
| 27 | + $template_body = '<noinclude>Congress Person template simplifies |
| 28 | + the structure of articles about Congress People. |
| 29 | + <pre>{{Congress Person|' . "\n"; |
| 30 | + foreach ($valid_attributes as $dbKey => $attr) { |
| 31 | + list ($name, $desc) = $attr; |
| 32 | + $template_body .= $name . '=' . $desc . "|\n"; |
| 33 | + } |
| 34 | + $template_body .= '}}</pre>' . |
| 35 | + 'The order of the fields is not relevant. The template name (Congress Person) should be given as the \'\'first\'\' thing on a page. |
| 36 | + </noinclude>' . |
| 37 | + '<includeonly>' . "\n"; |
| 38 | + //include the image if present: |
| 39 | + $template_body .= '{{ #if: { Image:{{PAGENAME}}.jpg}| [[Image:{{PAGENAME}}.jpg]] |}}' . "\n"; |
| 40 | + foreach ($valid_attributes as $dbKey => $attr) { |
| 41 | + list ($name, $desc) = $attr; |
| 42 | + //raw semantic data (@@todo make pretty template table thing) |
| 43 | + $template_body .= "{{ #if: {{{" . $name . "}}}| [[$name:={{{" . $name . "}}}| ]] |}} \n"; |
| 44 | + } |
| 45 | + $template_body .= '[[Category:Congress Person]] [[Category:Person]] |
| 46 | + </includeonly>'; |
| 47 | + echo "updated 'Congress Person' template\n"; |
| 48 | + do_update_wiki_page($wgTemplateTitle, $template_body); |
| 49 | + } |
| 50 | +} |
| 51 | +function do_people_insert() { |
| 52 | + global $valid_attributes, $states_ary; |
| 53 | + $dbr = wfGetDB(DB_SLAVE); |
| 54 | + |
| 55 | + //check person |
| 56 | + upTemplate_person(); |
| 57 | + //do people query: |
| 58 | + $res = $dbr->query("SELECT * FROM `metavid`.`people`"); |
| 59 | + if ($dbr->numRows($res) == 0) |
| 60 | + die('could not find people: ' . "\n"); |
| 61 | + $person_ary = array (); |
| 62 | + while ($person = $dbr->fetchObject($res)) { |
| 63 | + $person_ary[] = $person; |
| 64 | + } |
| 65 | + foreach ($person_ary as $person) { |
| 66 | + $person_title = Title :: newFromUrl($person->name_clean); |
| 67 | + //semantic data via template: |
| 68 | + $page_body = '{{Congress Person|' . "\n"; |
| 69 | + foreach ($valid_attributes as $dbKey => $attr) { |
| 70 | + list ($name, $desc) = $attr; |
| 71 | + if (trim($person-> $dbKey) != '') { |
| 72 | + if ($dbKey == 'state') |
| 73 | + $person->state = $states_ary[$person->state]; |
| 74 | + $page_body .= "|{$name}={$person->$dbKey}| \n"; |
| 75 | + } |
| 76 | + } |
| 77 | + //add in the full name attribute: |
| 78 | + $page_body .= "|Full Name=" . $person->title . ' ' . $person->first . |
| 79 | + ' ' . $person->middle . ' ' . $person->last . "| \n"; |
| 80 | + $page_body .= '}}'; |
| 81 | + //add in basic info to be overwitten by tranclude (from |
| 82 | + $full_name = $person->title . ' ' . $person->first . |
| 83 | + ' ' . $person->middle . ' ' . $person->last; |
| 84 | + if (trim($full_name) == '') |
| 85 | + $full_name = $person->name_clean; |
| 86 | + |
| 87 | + $page_body .= "\n" .'Basic Person page For <b>' . $full_name . "</b><br>\n". |
| 88 | + "Text Spoken By [[Special:MediaSearch/person/{$person->name_clean}|$full_name]] "; |
| 89 | + ; |
| 90 | + do_update_wiki_page($person_title, $page_body); |
| 91 | + } |
| 92 | + foreach ($person_ary as $person) { |
| 93 | + //download/upload all the photos: |
| 94 | + $imgTitle = Title :: makeTitle(NS_IMAGE, $person->name_clean . '.jpg'); |
| 95 | + //if(!$imgTitle->exists()){ |
| 96 | + global $wgTmpDirectory; |
| 97 | + $url = 'http://www.opensecrets.org/politicians/img/pix/' . $person->osid . '.jpg'; |
| 98 | + //print $wgTmpDirectory . "\n"; |
| 99 | + $local_file = tempnam($wgTmpDirectory, 'WEBUPLOAD'); |
| 100 | + //copy file: |
| 101 | + |
| 102 | + # Check if already there existence |
| 103 | + $image = wfLocalFile($imgTitle); |
| 104 | + if ($image->exists()) { |
| 105 | + echo ($imgTitle->getDBkey() . " already in the wiki\n"); |
| 106 | + continue; |
| 107 | + } |
| 108 | + |
| 109 | + for ($ct = 0; $ct < 10; $ct++) { |
| 110 | + if (!@ copy($url, $local_file)) { |
| 111 | + print ("failed to copy $url to local_file (tring again) \n"); |
| 112 | + } else { |
| 113 | + print "copy success\n"; |
| 114 | + $ct = 10; |
| 115 | + } |
| 116 | + if ($ct == 9) |
| 117 | + print 'complete failure' . "\n"; |
| 118 | + } |
| 119 | + |
| 120 | + # Stash the file |
| 121 | + echo ("Saving " . $imgTitle->getDBkey() . "..."); |
| 122 | + $image = wfLocalFile($imgTitle); |
| 123 | + |
| 124 | + $archive = $image->publish($local_file); |
| 125 | + if (WikiError :: isError($archive)) { |
| 126 | + echo ("failed.\n"); |
| 127 | + continue; |
| 128 | + } |
| 129 | + echo ("importing..."); |
| 130 | + $comment = 'Image file for [[' . $person->name_clean . ']]'; |
| 131 | + $license = ''; |
| 132 | + |
| 133 | + if ($image->recordUpload($archive, $comment, $license)) { |
| 134 | + # We're done! |
| 135 | + echo ("done.\n"); |
| 136 | + } else { |
| 137 | + echo ("failed.\n"); |
| 138 | + } |
| 139 | + //} |
| 140 | + } |
| 141 | +} |
| 142 | +//$i=0; |
| 143 | +function do_stream_attr_check($old_stream) { |
| 144 | + global $i; |
| 145 | + $mvStream = & mvGetMVStream(array ( |
| 146 | + 'name' => $old_stream->name |
| 147 | + )); |
| 148 | + //print "doding stream attr check: "; |
| 149 | + //print_r($old_stream); |
| 150 | + |
| 151 | + if ($mvStream->date_start_time != $old_stream->adj_start_time) { |
| 152 | + $mvStream->date_start_time = $old_stream->adj_start_time; |
| 153 | + } |
| 154 | + if ($mvStream->duration != ($old_stream->adj_end_time - $old_stream->adj_start_time)) { |
| 155 | + $mvStream->duration = ($old_stream->adj_end_time - $old_stream->adj_start_time); |
| 156 | + } |
| 157 | + $mvStream->updateStreamDB(); |
| 158 | + print "\nran stream db update: " .$mvStream->duration . ' ' . $mvStream->date_start_time."\n"; |
| 159 | + //if($i==3)die; |
| 160 | + //$i++; |
| 161 | +} |
| 162 | +function do_stream_file_check(& $old_stream) { |
| 163 | + global $mvgIP; |
| 164 | + $mvStream = & mvGetMVStream(array ( |
| 165 | + 'name' => $old_stream->name |
| 166 | + )); |
| 167 | + $file_list = $mvStream->getFileList(); |
| 168 | + |
| 169 | + if ($old_stream->trascoded != 'none') { |
| 170 | + //print "transcode is: " . $old_stream->trascoded; |
| 171 | + if ($old_stream->trascoded == 'low') |
| 172 | + $set = array ( |
| 173 | + 'mv_ogg_low_quality' |
| 174 | + ); |
| 175 | + if ($old_stream->trascoded == 'high') |
| 176 | + $set = array ( |
| 177 | + 'mv_ogg_high_quality' |
| 178 | + ); |
| 179 | + if ($old_stream->trascoded == 'all') |
| 180 | + $set = array ( |
| 181 | + 'mv_ogg_high_quality', |
| 182 | + 'mv_ogg_low_quality' |
| 183 | + ); |
| 184 | + //print "set: " . print_r($set); |
| 185 | + //remove old file pointers: |
| 186 | + $dbw = wfGetDB(DB_WRITE); |
| 187 | + $sql = "DELETE FROM `mv_stream_files` WHERE `stream_id`=".$mvStream->id; |
| 188 | + $dbw->query($sql); |
| 189 | + //update files: |
| 190 | + foreach ($set as $qf) { |
| 191 | + do_insert_stream_file($mvStream, $old_stream, $qf); |
| 192 | + } |
| 193 | + } |
| 194 | + //check for archive.org stuff too.. |
| 195 | + /*if($old_stream->archive_org!=''){ |
| 196 | + $found=false; |
| 197 | + foreach($file_list as $file){ |
| 198 | + if($file->path_type =='ext_archive_org'){ |
| 199 | + $found=true; |
| 200 | + } |
| 201 | + } |
| 202 | + if(!$found)do_insert_stream_file($mvStream, $old_stream, 'mv_archive_org_link'); |
| 203 | + }*/ |
| 204 | +} |
| 205 | +function do_insert_stream_file($mvStream, $old_stream, $quality_msg) { |
| 206 | + global $mvVideoArchivePaths; |
| 207 | + $dbw = wfGetDB(DB_WRITE); |
| 208 | + if ($quality_msg == 'mv_ogg_low_quality') { |
| 209 | + $path = $mvVideoArchivePaths[$old_stream->archive_server] . $mvStream->name. '.ogg'; |
| 210 | + } else if ($quality_msg == 'mv_ogg_high_quality') { |
| 211 | + $path = $mvVideoArchivePaths[$old_stream->archive_server] .$mvStream->name.'.HQ.ogg'; |
| 212 | + }else{ |
| 213 | + return ''; |
| 214 | + } |
| 215 | + //get file duration from nfo file (if avaliable ): |
| 216 | + $nfo_url = $path . '.nfo'; |
| 217 | + $nfo_txt = file($nfo_url); |
| 218 | + if($nfo_txt){ |
| 219 | + if( isset($nfo_txt[0])){ |
| 220 | + list($na, $len) = explode('n:', $nfo_txt[0]); |
| 221 | + $len = trim($len); |
| 222 | + //trim leading zero |
| 223 | + if($len[0]=='0')$len=substr($len,1); |
| 224 | + //trim sub frame times: |
| 225 | + if(strpos($len, '.')!==false){ |
| 226 | + $len = substr($len, 0, strpos($len, '.')); |
| 227 | + } |
| 228 | + $dur=ntp2seconds($len); |
| 229 | + }else{ |
| 230 | + echo "empty nfo file: $nfo_url \n"; |
| 231 | + $dur=0; |
| 232 | + } |
| 233 | + }else{ |
| 234 | + echo "missing nfo file: $nfo_url \n"; |
| 235 | + $dur=0; |
| 236 | + } |
| 237 | + |
| 238 | + $sql = "INSERT INTO `mv_stream_files` (`stream_id`, `file_desc_msg`, `path`, `duration`)" . |
| 239 | + " VALUES ('{$mvStream->id}', '{$quality_msg}', " ." '{$path}', {$dur} )"; |
| 240 | + $dbw->query($sql); |
| 241 | +} |
| 242 | +//@@todo convert to MV_EditStream |
| 243 | +function do_add_stream(& $mvTitle, & $stream) { |
| 244 | + $MV_SpecialAddStream = new MV_SpecialCRUDStream('add'); |
| 245 | + $MV_SpecialAddStream->stream_name = $mvTitle->getStreamName(); |
| 246 | + $MV_SpecialAddStream->stream_type = 'metavid_file'; |
| 247 | + $MV_SpecialAddStream->stream_desc = mv_semantic_stream_desc($mvTitle, $stream); |
| 248 | + //add the stream: |
| 249 | + $MV_SpecialAddStream->add_stream(); |
| 250 | +} |
| 251 | +function do_stream_insert($mode, $stream_name = '') { |
| 252 | + global $mvgIP, $MVStreams, $options; |
| 253 | + $dbr = wfGetDB(DB_SLAVE); |
| 254 | + if ($mode == 'all'){ |
| 255 | + $sql = "SELECT * FROM `metavid`.`streams` WHERE `sync_status`='in_sync'"; |
| 256 | + }else if($mode=='files') { |
| 257 | + $sql = "SELECT * FROM `metavid`.`streams` WHERE `trascoded` != 'none'"; |
| 258 | + }else{ |
| 259 | + $sql = "SELECT * FROM `metavid`.`streams` WHERE `name` LIKE '{$stream_name}'"; |
| 260 | + } |
| 261 | + $res = $dbr->query($sql); |
| 262 | + if ($dbr->numRows($res) == 0) |
| 263 | + die('could not find stream: ' . $stream_name . "\n"); |
| 264 | + //load all stream names: |
| 265 | + while ($row = $dbr->fetchObject($res)) { |
| 266 | + $streams[] = $row; |
| 267 | + } |
| 268 | + print "working on " . count($streams) . ' streams'."\n"; |
| 269 | + foreach ($streams as $stream) { |
| 270 | + //init the stream |
| 271 | + $MVStreams[$stream->name] = new MV_Stream($stream); |
| 272 | + //check if the stream has already been added to the wiki (if not add it) |
| 273 | + $mvTitle = new MV_Title('MvStream:' . $stream->name); |
| 274 | + if (!$mvTitle->doesStreamExist()) { |
| 275 | + //print 'do stream desc'."\n"; |
| 276 | + do_add_stream($mvTitle, $stream); |
| 277 | + echo "stream " . $mvTitle->getStreamName() . " added \n"; |
| 278 | + } else { |
| 279 | + do_update_wiki_page($stream->name, mv_semantic_stream_desc($mvTitle, $stream), MV_NS_STREAM); |
| 280 | + //$updated = ' updated' echo "stream " . $mvTitle->getStreamName() . " already present $updated\n"; |
| 281 | + } |
| 282 | + //add duration and start_time attr |
| 283 | + do_stream_attr_check($stream); |
| 284 | + |
| 285 | + //do insert/copy all media images |
| 286 | + if(!isset($options['noimage'])){ |
| 287 | + do_proccess_images($stream); |
| 288 | + } |
| 289 | + |
| 290 | + //check for files (make sure they match with metavid db values |
| 291 | + do_stream_file_check($stream); |
| 292 | + |
| 293 | + if(!isset($options['skiptext'])){ |
| 294 | + //proccess all stream text: |
| 295 | + do_proccess_text($stream); |
| 296 | + } |
| 297 | + } |
| 298 | +} |
| 299 | +function do_proccess_text($stream){ |
| 300 | + $dbr = wfGetDB(DB_SLAVE); |
| 301 | + /* for now use the stream search table (in the future should put in our orphaned person data) |
| 302 | + * should be able to do quick checks against the index. */ |
| 303 | + $sql = "SELECT (`time`+" . CC_OFFSET . ") as time, `value` " . |
| 304 | + "FROM `metavid`.`stream_attr_time_text` |
| 305 | + WHERE `stream_fk`=" . $stream->id . " |
| 306 | + AND `time` >= " . $stream->adj_start_time . " |
| 307 | + AND `time` <= " . $stream->adj_end_time . " |
| 308 | + ORDER BY `time` ASC "; |
| 309 | + |
| 310 | + //$sql = "SELECT * FROM `metavid`.`stream_search` WHERE `stream_fk`={$stream->id}"; |
| 311 | + $page_res = $dbr->query($sql); |
| 312 | + if ($dbr->numRows($page_res) == 0) |
| 313 | + echo 'No pages for stream' . $stream->name . "\n"; |
| 314 | + $pages = array (); |
| 315 | + while ($page = $dbr->fetchObject($page_res)) { |
| 316 | + $pages[] = $page; |
| 317 | + } |
| 318 | + print "Checking ".count($pages) . " text pages\n"; |
| 319 | + $i=$j=0; |
| 320 | + foreach ($pages as $inx => $page) { |
| 321 | + //status updates: |
| 322 | + if($i==50){ |
| 323 | + print "on $j of ". count($pages) . "\n"; |
| 324 | + $i=0; |
| 325 | + } |
| 326 | + $i++; |
| 327 | + $j++; |
| 328 | + $start_time = $page->time - $stream->adj_start_time; |
| 329 | + if (seconds2ntp($start_time) < 0) |
| 330 | + $start_time = '0:00:00'; |
| 331 | + if (($inx +1) == count($pages)) { |
| 332 | + $end_time = $stream->adj_end_time - $stream->adj_start_time; |
| 333 | + } else { |
| 334 | + $end_time = $pages[$inx +1]->time - $stream->adj_start_time; |
| 335 | + } |
| 336 | + if (($end_time - $start_time) > 40) |
| 337 | + $end_time = $start_time +40; |
| 338 | + //skip if end_time <1 |
| 339 | + if ($end_time < 0) |
| 340 | + continue; |
| 341 | + //now pull up the person for the given stream time:`metavid`.`people`.`name_clean` |
| 342 | + $sql = "SELECT * , abs( `metavid`.`people_attr_stream_time`.`time` -{$page->time} ) AS `distance` " . |
| 343 | + "FROM `metavid`.`people_attr_stream_time` " . |
| 344 | + "LEFT JOIN `metavid`.`people` ON `metavid`.`people_attr_stream_time`.`people_fk` = `metavid`.`people`.`id` " . |
| 345 | + "WHERE `metavid`.`people_attr_stream_time`.`stream_fk` ={$stream->id} " . |
| 346 | + //have a negative threshold of 4 seconds |
| 347 | + "AND (`metavid`.`people_attr_stream_time`.`time`-{$page->time})>-4 " . |
| 348 | + //have a total distance threshold of 30 seconds |
| 349 | + "AND abs( `metavid`.`people_attr_stream_time`.`time` -{$page->time} )< 90 " . |
| 350 | + "ORDER BY `distance` ASC " . |
| 351 | + "LIMIT 1 "; |
| 352 | + $person_res = $dbr->query($sql); |
| 353 | + |
| 354 | + $page_title = $stream->name . '/' . seconds2ntp($start_time) . '/' . seconds2ntp($end_time); |
| 355 | + //print $page_title . "\n"; |
| 356 | + $page_body = ''; |
| 357 | + if ($dbr->numRows($person_res) != 0) { |
| 358 | + $person = $dbr->fetchObject($person_res); |
| 359 | + $person_name = utf8_encode($person->name_clean); |
| 360 | + $page_body .= "\n[[Spoken By::{$person_name}]] "; |
| 361 | + } |
| 362 | + $page_body .= trim(str_replace("\n", ' ', strtolower($page->value))); |
| 363 | + |
| 364 | + //print $page_title . "\n"; |
| 365 | + //die; |
| 366 | + //print $page_body . "\n\n"; |
| 367 | + do_update_wiki_page('Ht_en:' . $page_title, $page_body, MV_NS_MVD); |
| 368 | + } |
| 369 | +} |
| 370 | +/* |
| 371 | + * for each image add it to the image directory |
| 372 | + */ |
| 373 | +function do_proccess_images($stream) { |
| 374 | + global $mvLocalImgLoc, $MVStreams, $wgDBname; |
| 375 | + $dbr =& wfGetDB(DB_SLAVE); |
| 376 | + $dbw =& wfGetDB(DB_MASTER); |
| 377 | + |
| 378 | + //get all images for the current stream: |
| 379 | + $sql = "SELECT * FROM `metavid`.`image_archive` |
| 380 | + WHERE `stream_fk`= {$stream->id}"; |
| 381 | + $image_res = $dbr->query($sql); |
| 382 | + $img_count = $dbr->numRows($image_res); |
| 383 | + print "Found " . $img_count . " images for stream " . $stream->name . "\n"; |
| 384 | + //grab from metavid and copy to local directory structure: |
| 385 | + $i=$j= 0; |
| 386 | + while ($row = $dbr->fetchObject($image_res)) { |
| 387 | + $relative_time = $row->time - $stream->adj_start_time; |
| 388 | + //status updates: |
| 389 | + if ($i == 10) { |
| 390 | + print "On image $j of $img_count time: " . seconds2ntp($relative_time) . "\n"; |
| 391 | + $i = 0; |
| 392 | + } |
| 393 | + $j++; |
| 394 | + $i++; |
| 395 | + //get streamImage obj: |
| 396 | + $mv_stream_id = $MVStreams[$stream->name]->getStreamId(); |
| 397 | + $local_img_dir = MV_StreamImage :: getLocalImageDir($mv_stream_id); |
| 398 | + $metavid_img_url = 'http://metavid.ucsc.edu/image_media/' . $row->id . '.jpg'; |
| 399 | + |
| 400 | + $local_img_file = $local_img_dir . '/' . $relative_time . '.jpg'; |
| 401 | + //check if the image already exist in the new table |
| 402 | + $sql = "SELECT * FROM `$wgDBname`.`mv_stream_images` " . |
| 403 | + "WHERE `stream_id`={$mv_stream_id} " . |
| 404 | + "AND `time`=$relative_time"; |
| 405 | + $img_check = $dbr->query($sql); |
| 406 | + $doInsert = true; |
| 407 | + if ($dbr->numRows($img_check) != 0) { |
| 408 | + //make sure its there: |
| 409 | + if (is_file($local_img_file)) { |
| 410 | + //print "skiped stream_id:" . $mv_stream_id . " time: " . $relative_time . "\n"; |
| 411 | + continue; |
| 412 | + } else { |
| 413 | + //grab but don't insert: |
| 414 | + $doInsert = false; |
| 415 | + } |
| 416 | + } |
| 417 | + if ($doInsert) { |
| 418 | + //insert: |
| 419 | + $dbw->insert('mv_stream_images', array ( |
| 420 | + 'stream_id' => $MVStreams[$stream->name]->getStreamId(), 'time' => $relative_time)); |
| 421 | + $img_id = $dbw->insertId(); |
| 422 | + //$grab = exec('cd ' . $img_path . '; wget ' . $im_url); |
| 423 | + } |
| 424 | + |
| 425 | + if (is_file($local_img_file)) { |
| 426 | + echo "skipped $local_img_file \n"; |
| 427 | + continue; |
| 428 | + } |
| 429 | + if (!copy($metavid_img_url, $local_img_file)) { |
| 430 | + echo "failed to copy $metavid_img_url to $local_img_file...\n"; |
| 431 | + } else { |
| 432 | + //all good don't report anything' |
| 433 | + } |
| 434 | + } |
| 435 | +} |
| 436 | + |
| 437 | +function do_update_wiki_page($wgTitle, $wikiText, $ns = null, $forceUpdate=false) { |
| 438 | + global $botUserName; |
| 439 | + if (!is_object($wgTitle)) { |
| 440 | + $wgTitle = Title :: makeTitle($ns, $wgTitle); |
| 441 | + } |
| 442 | + //make sure the text is utf8 encoded: |
| 443 | + $wikiText = utf8_encode($wikiText); |
| 444 | + |
| 445 | + $wgArticle = new Article($wgTitle); |
| 446 | + if(!mvDoMvPage($wgTitle, $wgArticle, false)){ |
| 447 | + print "bad title: ".$wgTitle->getDBkey()." no edit"; |
| 448 | + if($wgTitle->exists()){ |
| 449 | + print "remove article"; |
| 450 | + $wgArticle->doDeleteArticle( 'bad title' ); |
| 451 | + } |
| 452 | + //some how mvdIndex and mvd pages got out of sync do a seperate check for the mvd: |
| 453 | + if(MV_Index::getMVDbyTitle($wgArticle->mTitle->getDBkey())!=null){ |
| 454 | + print ', rm mvd'; |
| 455 | + MV_Index::remove_by_wiki_title($wgArticle->mTitle->getDBkey()); |
| 456 | + } |
| 457 | + print "\n"; |
| 458 | + return ; |
| 459 | + } |
| 460 | + if ($wgTitle->exists()) { |
| 461 | + //if last edit!=mvBot skip (don't overwite peoples improvments') |
| 462 | + $rev = & Revision::newFromTitle($wgTitle); |
| 463 | + if( $botUserName!= $rev->getRawUserText()){ |
| 464 | + print ' skiped page edited by user:'.$rev->getRawUserText()."\n"; |
| 465 | + if(!$forceUpdate)return ; |
| 466 | + } |
| 467 | + //proc article: |
| 468 | + $cur_text = $wgArticle->getContent(); |
| 469 | + //if its a redirect skip |
| 470 | + if(substr($cur_text, 0, strlen('#REDIRECT') )=='#REDIRECT'){ |
| 471 | + print ' skiped page moved by user:'.$rev->getRawUserText()."\n"; |
| 472 | + if(!$forceUpdate)return ; |
| 473 | + } |
| 474 | + //check if text is identical: |
| 475 | + if (trim($cur_text) == trim($wikiText)) { |
| 476 | + if(!$forceUpdate)return ; |
| 477 | + } |
| 478 | + } |
| 479 | + //got here do the edit: |
| 480 | + $sum_txt = 'metavid bot insert'; |
| 481 | + $wgArticle->doEdit($wikiText, $sum_txt); |
| 482 | + print "did edit on " . $wgTitle->getDBkey() . "\n"; |
| 483 | + //die; |
| 484 | +} |
| 485 | +//given a stream name it pulls all metavid stream data and builds semantic wiki page |
| 486 | +function mv_semantic_stream_desc(& $mvTitle, & $stream) { |
| 487 | + global $start_time, $end_time; |
| 488 | + /*$sql = "SELECT * FROM `metavid`.`streams` WHERE `name` LIKE '" . $mvTitle->getStreamName() . "'"; |
| 489 | + $dbr = wfGetDB(DB_SLAVE); |
| 490 | + $res = $dbr->query($sql); |
| 491 | + //echo "\n" . $sql . "\n"; |
| 492 | + $stream = $dbr->fetchObject($res);*/ |
| 493 | + $stream_id = $stream->id; |
| 494 | + $out = ''; |
| 495 | + $pout = mv_proccess_attr('stream_attr_varchar', $stream_id); |
| 496 | + $pout .= mv_proccess_attr('stream_attr_int', $stream_id); |
| 497 | + //add links/generic text at the start |
| 498 | + $out .= '==Official Record==' . "\n"; |
| 499 | + $date = date('Ymd', $start_time); |
| 500 | + $cspan_date = date('Y-m-d', $start_time); |
| 501 | + $ch_type = ''; |
| 502 | + if (strpos($mvTitle->getStreamName(), 'house') !== false) |
| 503 | + $ch_type = 'h'; |
| 504 | + if (strpos($mvTitle->getStreamName(), 'senate') !== false) |
| 505 | + $ch_type = 's'; |
| 506 | + if ($ch_type != '') { |
| 507 | + $out .= '*[[GovTrack]] Congressional Record' . |
| 508 | + '[http://www.govtrack.us/congress/recordindex.xpd?date=' . $date . |
| 509 | + '&where=' . $ch_type . |
| 510 | + ']' . "\n\n"; |
| 511 | + $out .= '*[[THOMAS]] Congressional Record ' . |
| 512 | + '[http://thomas.loc.gov/cgi-bin/query/B?r110:@FIELD(FLD003+' . $ch_type . ')+@FIELD(DDATE+' . $date . ')' . |
| 513 | + ']' . "\n\n"; |
| 514 | + $out .= '*[[THOMAS]] Extension of Remarks ' . |
| 515 | + '[http://thomas.loc.gov/cgi-bin/query/B?r110:@FIELD(FLD003+' . $ch_type . ')+@FIELD(DDATE+' . $date . ')' . |
| 516 | + ']' . "\n\n"; |
| 517 | + } |
| 518 | + if ($stream->archive_org != '') { |
| 519 | + $out .= '==More Media Sources=='."\n"; |
| 520 | + $out .= '*[[Archive.org]] hosted original copy ' . |
| 521 | + '[http://www.archive.org/details/mv_' . $stream->name . ']' . "\n"; |
| 522 | + } |
| 523 | + //all streams have congretional cronical: |
| 524 | + $out .= '*[[CSPAN]]\'s Congressional Chronicle ' . |
| 525 | + '[http://www.c-spanarchives.org/congress/?q=node/69850&date=' . $cspan_date . '&hors=' . $ch_type . ']'; |
| 526 | + $out .= "\n\n"; |
| 527 | + $out .= $pout; |
| 528 | + $out .= '[[stream_duration:=' . ($end_time - $start_time) . '| ]]' . "\n"; |
| 529 | + if($stream->org_start_time){ |
| 530 | + $out .= '[[original_date:='.$stream->org_start_time.'| ]]'; |
| 531 | + } |
| 532 | + |
| 533 | + //add stream category (based on sync status) |
| 534 | + switch($stream->sync_status){ |
| 535 | + case 'not_checked': |
| 536 | + $out.="\n\n".'[[Category:Stream Unchecked]]'; |
| 537 | + break; |
| 538 | + case 'impossible': |
| 539 | + $out.="\n\n".'[[Category:Stream Out of Sync]]'; |
| 540 | + break; |
| 541 | + case 'in_sync': |
| 542 | + $out.="\n\n".'[[Category:Stream Basic Sync]]'; |
| 543 | + //other options [stream high quality sync ]; |
| 544 | + break; |
| 545 | + } |
| 546 | + |
| 547 | + return $out; |
| 548 | +} |
| 549 | +function mv_proccess_attr($table, $stream_id) { |
| 550 | + global $start_time, $end_time; |
| 551 | + $dbr = wfGetDB(DB_SLAVE); |
| 552 | + $sql = "SELECT * FROM `metavid`.`$table` WHERE `stream_fk`=$stream_id"; |
| 553 | + $res = $dbr->query($sql); |
| 554 | + $out = ''; |
| 555 | + while ($var = $dbr->fetchObject($res)) { |
| 556 | + $type_title = getTypeTitle($var->type); |
| 557 | + if ($var->type == 'adj_start_time') |
| 558 | + $start_time = $var->value; |
| 559 | + if ($var->type == 'adj_end_time') |
| 560 | + $end_time = $var->value; |
| 561 | + if ($type_title != '') { |
| 562 | + $reltype = ($type_title[0] == 'rel') ? '::' : ':='; |
| 563 | + $out .= '[[' . $var->type . ':=' . $var->value . '| ]]' . "\n"; |
| 564 | + } |
| 565 | + } |
| 566 | + return $out; |
| 567 | +} |
| 568 | +function getTypeTitle($type) { |
| 569 | + switch ($type) { |
| 570 | + case 'cspan_type' : |
| 571 | + return array ( |
| 572 | + 'rel', |
| 573 | + 'Government Event' |
| 574 | + ); |
| 575 | + break; |
| 576 | + case 'cspan_title' : |
| 577 | + return array ( |
| 578 | + 'atr', |
| 579 | + 'C-SPAN Title' |
| 580 | + ); |
| 581 | + break; |
| 582 | + case 'cspan_desc' : |
| 583 | + return array ( |
| 584 | + 'atr', |
| 585 | + 'C-SPAN Description' |
| 586 | + ); |
| 587 | + break; |
| 588 | + case 'adj_start_time' : |
| 589 | + return array ( |
| 590 | + 'atr', |
| 591 | + 'Unix Start Time' |
| 592 | + ); |
| 593 | + break; |
| 594 | + case 'adj_end_time' : |
| 595 | + return array ( |
| 596 | + 'atr', |
| 597 | + 'Unix End Time' |
| 598 | + ); |
| 599 | + break; |
| 600 | + default : |
| 601 | + return ''; |
| 602 | + break; |
| 603 | + } |
| 604 | +} |
| 605 | +?> |
Index: trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.php |
— | — | @@ -17,6 +17,7 @@ |
18 | 18 | $cur_path = $IP = dirname(__FILE__); |
19 | 19 | //include commandLine.inc from the mediaWiki maintance dir: |
20 | 20 | require_once ('../../../maintenance/commandLine.inc'); |
| 21 | +require_once ('metavid2mvWiki.inc.php'); |
21 | 22 | /* |
22 | 23 | * assume the wiki user has access to the metavid table and that the |
23 | 24 | * metavid table is titled `metavid` |
— | — | @@ -236,597 +237,6 @@ |
237 | 238 | do_stream_insert('stream', $args[0]); |
238 | 239 | break; |
239 | 240 | } |
240 | | -function upTempalte_Ht_en($force = false) { |
241 | | - $wgTemplateTitle = Title :: makeTitle(NS_TEMPLATE, 'Ht_en'); |
242 | | - if (!$wgTemplateTitle->exists() || $force) { |
243 | | - do_update_wiki_page($wgTemplateTitle, '<noinclude> |
244 | | - This is the default Template for the display of transcript text. |
245 | | - </noinclude><includeonly>{{ #if: {{{PersonName|}}} | {{ #ifexist: Image:{{{PersonName}}}.jpg | [[Image:{{{PersonName}}}.jpg|44px|left]]|[[Image:Missing person.jpg|44px|left]]}} |}}{{ #if:{{{PersonName|}}}|[[{{{PersonName}}}]]: |}}{{{BodyText}}} |
246 | | - </includeonly>'); |
247 | | - } |
248 | | -} |
249 | | -function upTemplate_person($force = false) { |
250 | | - global $valid_attributes; |
251 | | - $wgTemplateTitle = Title :: makeTitle(NS_TEMPLATE, 'Congress Person'); |
252 | | - if (!$wgTemplateTitle->exists() || $force) { |
253 | | - $wgTemplateArticle = new Article($wgTemplateTitle); |
254 | | - $template_body = '<noinclude>Congress Person template simplifies |
255 | | - the structure of articles about Congress People. |
256 | | - <pre>{{Congress Person|' . "\n"; |
257 | | - foreach ($valid_attributes as $dbKey => $attr) { |
258 | | - list ($name, $desc) = $attr; |
259 | | - $template_body .= $name . '=' . $desc . "|\n"; |
260 | | - } |
261 | | - $template_body .= '}}</pre>' . |
262 | | - 'The order of the fields is not relevant. The template name (Congress Person) should be given as the \'\'first\'\' thing on a page. |
263 | | - </noinclude>' . |
264 | | - '<includeonly>' . "\n"; |
265 | | - //include the image if present: |
266 | | - $template_body .= '{{ #if: { Image:{{PAGENAME}}.jpg}| [[Image:{{PAGENAME}}.jpg]] |}}' . "\n"; |
267 | | - foreach ($valid_attributes as $dbKey => $attr) { |
268 | | - list ($name, $desc) = $attr; |
269 | | - //raw semantic data (@@todo make pretty template table thing) |
270 | | - $template_body .= "{{ #if: {{{" . $name . "}}}| [[$name:={{{" . $name . "}}}| ]] |}} \n"; |
271 | | - } |
272 | | - $template_body .= '[[Category:Congress Person]] [[Category:Person]] |
273 | | - </includeonly>'; |
274 | | - echo "updated 'Congress Person' template\n"; |
275 | | - do_update_wiki_page($wgTemplateTitle, $template_body); |
276 | | - } |
277 | | -} |
278 | | -function do_people_insert() { |
279 | | - global $valid_attributes, $states_ary; |
280 | | - $dbr = wfGetDB(DB_SLAVE); |
281 | 241 | |
282 | | - //check person |
283 | | - upTemplate_person(); |
284 | | - //do people query: |
285 | | - $res = $dbr->query("SELECT * FROM `metavid`.`people`"); |
286 | | - if ($dbr->numRows($res) == 0) |
287 | | - die('could not find people: ' . "\n"); |
288 | | - $person_ary = array (); |
289 | | - while ($person = $dbr->fetchObject($res)) { |
290 | | - $person_ary[] = $person; |
291 | | - } |
292 | | - foreach ($person_ary as $person) { |
293 | | - $person_title = Title :: newFromUrl($person->name_clean); |
294 | | - //semantic data via template: |
295 | | - $page_body = '{{Congress Person|' . "\n"; |
296 | | - foreach ($valid_attributes as $dbKey => $attr) { |
297 | | - list ($name, $desc) = $attr; |
298 | | - if (trim($person-> $dbKey) != '') { |
299 | | - if ($dbKey == 'state') |
300 | | - $person->state = $states_ary[$person->state]; |
301 | | - $page_body .= "|{$name}={$person->$dbKey}| \n"; |
302 | | - } |
303 | | - } |
304 | | - //add in the full name attribute: |
305 | | - $page_body .= "|Full Name=" . $person->title . ' ' . $person->first . |
306 | | - ' ' . $person->middle . ' ' . $person->last . "| \n"; |
307 | | - $page_body .= '}}'; |
308 | | - //add in basic info to be overwitten by tranclude (from |
309 | | - $full_name = $person->title . ' ' . $person->first . |
310 | | - ' ' . $person->middle . ' ' . $person->last; |
311 | | - if (trim($full_name) == '') |
312 | | - $full_name = $person->name_clean; |
313 | | - |
314 | | - $page_body .= "\n" .'Basic Person page For <b>' . $full_name . "</b><br>\n". |
315 | | - "Text Spoken By [[Special:MediaSearch/person/{$person->name_clean}|$full_name]] "; |
316 | | - ; |
317 | | - do_update_wiki_page($person_title, $page_body); |
318 | | - } |
319 | | - foreach ($person_ary as $person) { |
320 | | - //download/upload all the photos: |
321 | | - $imgTitle = Title :: makeTitle(NS_IMAGE, $person->name_clean . '.jpg'); |
322 | | - //if(!$imgTitle->exists()){ |
323 | | - global $wgTmpDirectory; |
324 | | - $url = 'http://www.opensecrets.org/politicians/img/pix/' . $person->osid . '.jpg'; |
325 | | - //print $wgTmpDirectory . "\n"; |
326 | | - $local_file = tempnam($wgTmpDirectory, 'WEBUPLOAD'); |
327 | | - //copy file: |
328 | | - |
329 | | - # Check if already there existence |
330 | | - $image = wfLocalFile($imgTitle); |
331 | | - if ($image->exists()) { |
332 | | - echo ($imgTitle->getDBkey() . " already in the wiki\n"); |
333 | | - continue; |
334 | | - } |
335 | | - |
336 | | - for ($ct = 0; $ct < 10; $ct++) { |
337 | | - if (!@ copy($url, $local_file)) { |
338 | | - print ("failed to copy $url to local_file (tring again) \n"); |
339 | | - } else { |
340 | | - print "copy success\n"; |
341 | | - $ct = 10; |
342 | | - } |
343 | | - if ($ct == 9) |
344 | | - print 'complete failure' . "\n"; |
345 | | - } |
346 | | - |
347 | | - # Stash the file |
348 | | - echo ("Saving " . $imgTitle->getDBkey() . "..."); |
349 | | - $image = wfLocalFile($imgTitle); |
350 | | - |
351 | | - $archive = $image->publish($local_file); |
352 | | - if (WikiError :: isError($archive)) { |
353 | | - echo ("failed.\n"); |
354 | | - continue; |
355 | | - } |
356 | | - echo ("importing..."); |
357 | | - $comment = 'Image file for [[' . $person->name_clean . ']]'; |
358 | | - $license = ''; |
359 | | - |
360 | | - if ($image->recordUpload($archive, $comment, $license)) { |
361 | | - # We're done! |
362 | | - echo ("done.\n"); |
363 | | - } else { |
364 | | - echo ("failed.\n"); |
365 | | - } |
366 | | - //} |
367 | | - } |
368 | | -} |
369 | | -//$i=0; |
370 | | -function do_stream_attr_check($old_stream) { |
371 | | - global $i; |
372 | | - $mvStream = & mvGetMVStream(array ( |
373 | | - 'name' => $old_stream->name |
374 | | - )); |
375 | | - //print "doding stream attr check: "; |
376 | | - //print_r($old_stream); |
377 | | - |
378 | | - if ($mvStream->date_start_time != $old_stream->adj_start_time) { |
379 | | - $mvStream->date_start_time = $old_stream->adj_start_time; |
380 | | - } |
381 | | - if ($mvStream->duration != ($old_stream->adj_end_time - $old_stream->adj_start_time)) { |
382 | | - $mvStream->duration = ($old_stream->adj_end_time - $old_stream->adj_start_time); |
383 | | - } |
384 | | - $mvStream->updateStreamDB(); |
385 | | - print "\nran stream db update: " .$mvStream->duration . ' ' . $mvStream->date_start_time."\n"; |
386 | | - //if($i==3)die; |
387 | | - //$i++; |
388 | | -} |
389 | | -function do_stream_file_check(& $old_stream) { |
390 | | - global $mvgIP; |
391 | | - $mvStream = & mvGetMVStream(array ( |
392 | | - 'name' => $old_stream->name |
393 | | - )); |
394 | | - $file_list = $mvStream->getFileList(); |
395 | | - |
396 | | - if ($old_stream->trascoded != 'none') { |
397 | | - //print "transcode is: " . $old_stream->trascoded; |
398 | | - if ($old_stream->trascoded == 'low') |
399 | | - $set = array ( |
400 | | - 'mv_ogg_low_quality' |
401 | | - ); |
402 | | - if ($old_stream->trascoded == 'high') |
403 | | - $set = array ( |
404 | | - 'mv_ogg_high_quality' |
405 | | - ); |
406 | | - if ($old_stream->trascoded == 'all') |
407 | | - $set = array ( |
408 | | - 'mv_ogg_high_quality', |
409 | | - 'mv_ogg_low_quality' |
410 | | - ); |
411 | | - //print "set: " . print_r($set); |
412 | | - //remove old file pointers: |
413 | | - $dbw = wfGetDB(DB_WRITE); |
414 | | - $sql = "DELETE FROM `mv_stream_files` WHERE `stream_id`=".$mvStream->id; |
415 | | - $dbw->query($sql); |
416 | | - //update files: |
417 | | - foreach ($set as $qf) { |
418 | | - do_insert_stream_file($mvStream, $old_stream, $qf); |
419 | | - } |
420 | | - } |
421 | | - //check for archive.org stuff too.. |
422 | | - /*if($old_stream->archive_org!=''){ |
423 | | - $found=false; |
424 | | - foreach($file_list as $file){ |
425 | | - if($file->path_type =='ext_archive_org'){ |
426 | | - $found=true; |
427 | | - } |
428 | | - } |
429 | | - if(!$found)do_insert_stream_file($mvStream, $old_stream, 'mv_archive_org_link'); |
430 | | - }*/ |
431 | | -} |
432 | | -function do_insert_stream_file($mvStream, $old_stream, $quality_msg) { |
433 | | - global $mvVideoArchivePaths; |
434 | | - $dbw = wfGetDB(DB_WRITE); |
435 | | - if ($quality_msg == 'mv_ogg_low_quality') { |
436 | | - $path = $mvVideoArchivePaths[$old_stream->archive_server] . $mvStream->name. '.ogg'; |
437 | | - } else if ($quality_msg == 'mv_ogg_high_quality') { |
438 | | - $path = $mvVideoArchivePaths[$old_stream->archive_server] .$mvStream->name.'.HQ.ogg'; |
439 | | - }else{ |
440 | | - return ''; |
441 | | - } |
442 | | - //get file duration from nfo file (if avaliable ): |
443 | | - $nfo_url = $path . '.nfo'; |
444 | | - $nfo_txt = file($nfo_url); |
445 | | - if($nfo_txt){ |
446 | | - if( isset($nfo_txt[0])){ |
447 | | - list($na, $len) = explode('n:', $nfo_txt[0]); |
448 | | - $len = trim($len); |
449 | | - //trim leading zero |
450 | | - if($len[0]=='0')$len=substr($len,1); |
451 | | - //trim sub frame times: |
452 | | - if(strpos($len, '.')!==false){ |
453 | | - $len = substr($len, 0, strpos($len, '.')); |
454 | | - } |
455 | | - $dur=ntp2seconds($len); |
456 | | - }else{ |
457 | | - echo "empty nfo file: $nfo_url \n"; |
458 | | - $dur=0; |
459 | | - } |
460 | | - }else{ |
461 | | - echo "missing nfo file: $nfo_url \n"; |
462 | | - $dur=0; |
463 | | - } |
464 | | - |
465 | | - $sql = "INSERT INTO `mv_stream_files` (`stream_id`, `file_desc_msg`, `path`, `duration`)" . |
466 | | - " VALUES ('{$mvStream->id}', '{$quality_msg}', " ." '{$path}', {$dur} )"; |
467 | | - $dbw->query($sql); |
468 | | -} |
469 | | -//@@todo convert to MV_EditStream |
470 | | -function do_add_stream(& $mvTitle, & $stream) { |
471 | | - $MV_SpecialAddStream = new MV_SpecialCRUDStream('add'); |
472 | | - $MV_SpecialAddStream->stream_name = $mvTitle->getStreamName(); |
473 | | - $MV_SpecialAddStream->stream_type = 'metavid_file'; |
474 | | - $MV_SpecialAddStream->stream_desc = mv_semantic_stream_desc($mvTitle, $stream); |
475 | | - //add the stream: |
476 | | - $MV_SpecialAddStream->add_stream(); |
477 | | -} |
478 | | -function do_stream_insert($mode, $stream_name = '') { |
479 | | - global $mvgIP, $MVStreams, $options; |
480 | | - $dbr = wfGetDB(DB_SLAVE); |
481 | | - if ($mode == 'all'){ |
482 | | - $sql = "SELECT * FROM `metavid`.`streams` WHERE `sync_status`='in_sync'"; |
483 | | - }else if($mode=='files') { |
484 | | - $sql = "SELECT * FROM `metavid`.`streams` WHERE `trascoded` != 'none'"; |
485 | | - }else{ |
486 | | - $sql = "SELECT * FROM `metavid`.`streams` WHERE `name` LIKE '{$stream_name}'"; |
487 | | - } |
488 | | - $res = $dbr->query($sql); |
489 | | - if ($dbr->numRows($res) == 0) |
490 | | - die('could not find stream: ' . $stream_name . "\n"); |
491 | | - //load all stream names: |
492 | | - while ($row = $dbr->fetchObject($res)) { |
493 | | - $streams[] = $row; |
494 | | - } |
495 | | - print "working on " . count($streams) . ' streams'."\n"; |
496 | | - foreach ($streams as $stream) { |
497 | | - //init the stream |
498 | | - $MVStreams[$stream->name] = new MV_Stream($stream); |
499 | | - //check if the stream has already been added to the wiki (if not add it) |
500 | | - $mvTitle = new MV_Title('MvStream:' . $stream->name); |
501 | | - if (!$mvTitle->doesStreamExist()) { |
502 | | - //print 'do stream desc'."\n"; |
503 | | - do_add_stream($mvTitle, $stream); |
504 | | - echo "stream " . $mvTitle->getStreamName() . " added \n"; |
505 | | - } else { |
506 | | - do_update_wiki_page($stream->name, mv_semantic_stream_desc($mvTitle, $stream), MV_NS_STREAM); |
507 | | - //$updated = ' updated' echo "stream " . $mvTitle->getStreamName() . " already present $updated\n"; |
508 | | - } |
509 | | - //add duration and start_time attr |
510 | | - do_stream_attr_check($stream); |
511 | | - |
512 | | - //do insert/copy all media images |
513 | | - if(!isset($options['noimage'])){ |
514 | | - do_proccess_images($stream); |
515 | | - } |
516 | | - |
517 | | - //check for files (make sure they match with metavid db values |
518 | | - do_stream_file_check($stream); |
519 | | - |
520 | | - if(!isset($options['skiptext'])){ |
521 | | - //proccess all stream text: |
522 | | - do_proccess_text($stream); |
523 | | - } |
524 | | - } |
525 | | -} |
526 | | -function do_proccess_text($stream){ |
527 | | - $dbr = wfGetDB(DB_SLAVE); |
528 | | - /* for now use the stream search table (in the future should put in our orphaned person data) |
529 | | - * should be able to do quick checks against the index. */ |
530 | | - $sql = "SELECT (`time`+" . CC_OFFSET . ") as time, `value` " . |
531 | | - "FROM `metavid`.`stream_attr_time_text` |
532 | | - WHERE `stream_fk`=" . $stream->id . " |
533 | | - AND `time` >= " . $stream->adj_start_time . " |
534 | | - AND `time` <= " . $stream->adj_end_time . " |
535 | | - ORDER BY `time` ASC "; |
536 | | - |
537 | | - //$sql = "SELECT * FROM `metavid`.`stream_search` WHERE `stream_fk`={$stream->id}"; |
538 | | - $page_res = $dbr->query($sql); |
539 | | - if ($dbr->numRows($page_res) == 0) |
540 | | - echo 'No pages for stream' . $stream->name . "\n"; |
541 | | - $pages = array (); |
542 | | - while ($page = $dbr->fetchObject($page_res)) { |
543 | | - $pages[] = $page; |
544 | | - } |
545 | | - print "Checking ".count($pages) . " text pages\n"; |
546 | | - $i=$j=0; |
547 | | - foreach ($pages as $inx => $page) { |
548 | | - //status updates: |
549 | | - if($i==50){ |
550 | | - print "on $j of ". count($pages) . "\n"; |
551 | | - $i=0; |
552 | | - } |
553 | | - $i++; |
554 | | - $j++; |
555 | | - $start_time = $page->time - $stream->adj_start_time; |
556 | | - if (seconds2ntp($start_time) < 0) |
557 | | - $start_time = '0:00:00'; |
558 | | - if (($inx +1) == count($pages)) { |
559 | | - $end_time = $stream->adj_end_time - $stream->adj_start_time; |
560 | | - } else { |
561 | | - $end_time = $pages[$inx +1]->time - $stream->adj_start_time; |
562 | | - } |
563 | | - if (($end_time - $start_time) > 40) |
564 | | - $end_time = $start_time +40; |
565 | | - //skip if end_time <1 |
566 | | - if ($end_time < 0) |
567 | | - continue; |
568 | | - //now pull up the person for the given stream time:`metavid`.`people`.`name_clean` |
569 | | - $sql = "SELECT * , abs( `metavid`.`people_attr_stream_time`.`time` -{$page->time} ) AS `distance` " . |
570 | | - "FROM `metavid`.`people_attr_stream_time` " . |
571 | | - "LEFT JOIN `metavid`.`people` ON `metavid`.`people_attr_stream_time`.`people_fk` = `metavid`.`people`.`id` " . |
572 | | - "WHERE `metavid`.`people_attr_stream_time`.`stream_fk` ={$stream->id} " . |
573 | | - //have a negative threshold of 4 seconds |
574 | | - "AND (`metavid`.`people_attr_stream_time`.`time`-{$page->time})>-4 " . |
575 | | - //have a total distance threshold of 30 seconds |
576 | | - "AND abs( `metavid`.`people_attr_stream_time`.`time` -{$page->time} )< 90 " . |
577 | | - "ORDER BY `distance` ASC " . |
578 | | - "LIMIT 1 "; |
579 | | - $person_res = $dbr->query($sql); |
580 | | - |
581 | | - $page_title = $stream->name . '/' . seconds2ntp($start_time) . '/' . seconds2ntp($end_time); |
582 | | - //print $page_title . "\n"; |
583 | | - $page_body = ''; |
584 | | - if ($dbr->numRows($person_res) != 0) { |
585 | | - $person = $dbr->fetchObject($person_res); |
586 | | - $person_name = utf8_encode($person->name_clean); |
587 | | - $page_body .= "\n[[Spoken By::{$person_name}]] "; |
588 | | - } |
589 | | - $page_body .= trim(str_replace("\n", ' ', strtolower($page->value))); |
590 | | - |
591 | | - //print $page_title . "\n"; |
592 | | - //die; |
593 | | - //print $page_body . "\n\n"; |
594 | | - do_update_wiki_page('Ht_en:' . $page_title, $page_body, MV_NS_MVD); |
595 | | - } |
596 | | -} |
597 | | -/* |
598 | | - * for each image add it to the image directory |
599 | | - */ |
600 | | -function do_proccess_images($stream) { |
601 | | - global $mvLocalImgLoc, $MVStreams, $wgDBname; |
602 | | - $dbr = wfGetDB(DB_SLAVE); |
603 | | - $dbw = wfGetDB(DB_MASTER); |
604 | | - |
605 | | - //get all images for the current stream: |
606 | | - $sql = "SELECT * FROM `metavid`.`image_archive` |
607 | | - WHERE `stream_fk`= {$stream->id}"; |
608 | | - $image_res = $dbr->query($sql); |
609 | | - $img_count = $dbr->numRows($image_res); |
610 | | - print "Found " . $img_count . " images for stream " . $stream->name . "\n"; |
611 | | - //grab from metavid and copy to local directory structure: |
612 | | - $i=$j= 0; |
613 | | - while ($row = $dbr->fetchObject($image_res)) { |
614 | | - $relative_time = $row->time - $stream->adj_start_time; |
615 | | - //status updates: |
616 | | - if ($i == 10) { |
617 | | - print "On image $j of $img_count time: " . seconds2ntp($relative_time) . "\n"; |
618 | | - $i = 0; |
619 | | - } |
620 | | - $j++; |
621 | | - $i++; |
622 | | - //get streamImage obj: |
623 | | - $mv_stream_id = $MVStreams[$stream->name]->getStreamId(); |
624 | | - $local_img_dir = MV_StreamImage :: getLocalImageDir($mv_stream_id); |
625 | | - $metavid_img_url = 'http://metavid.ucsc.edu/image_media/' . $row->id . '.jpg'; |
626 | | - |
627 | | - $local_img_file = $local_img_dir . '/' . $relative_time . '.jpg'; |
628 | | - //check if the image already exist in the new table |
629 | | - $sql = "SELECT * FROM `$wgDBname`.`mv_stream_images` " . |
630 | | - "WHERE `stream_id`={$mv_stream_id} " . |
631 | | - "AND `time`=$relative_time"; |
632 | | - $img_check = $dbr->query($sql); |
633 | | - $doInsert = true; |
634 | | - if ($dbr->numRows($img_check) != 0) { |
635 | | - //make sure its there: |
636 | | - if (is_file($local_img_file)) { |
637 | | - //print "skiped stream_id:" . $mv_stream_id . " time: " . $relative_time . "\n"; |
638 | | - continue; |
639 | | - } else { |
640 | | - //grab but don't insert: |
641 | | - $doInsert = false; |
642 | | - } |
643 | | - } |
644 | | - if ($doInsert) { |
645 | | - //insert: |
646 | | - $dbw->insert('mv_stream_images', array ( |
647 | | - 'stream_id' => $MVStreams[$stream->name]->getStreamId(), 'time' => $relative_time)); |
648 | | - $img_id = $dbw->insertId(); |
649 | | - //$grab = exec('cd ' . $img_path . '; wget ' . $im_url); |
650 | | - } |
651 | | - |
652 | | - if (is_file($local_img_file)) { |
653 | | - echo "skipped $local_img_file \n"; |
654 | | - continue; |
655 | | - } |
656 | | - if (!copy($metavid_img_url, $local_img_file)) { |
657 | | - echo "failed to copy $metavid_img_url to $local_img_file...\n"; |
658 | | - } else { |
659 | | - //all good don't report anything' |
660 | | - } |
661 | | - } |
662 | | -} |
663 | | - |
664 | | -function do_update_wiki_page($wgTitle, $wikiText, $ns = null, $forceUpdate=false) { |
665 | | - global $botUserName; |
666 | | - if (!is_object($wgTitle)) { |
667 | | - $wgTitle = Title :: makeTitle($ns, $wgTitle); |
668 | | - } |
669 | | - //make sure the text is utf8 encoded: |
670 | | - $wikiText = utf8_encode($wikiText); |
671 | | - |
672 | | - $wgArticle = new Article($wgTitle); |
673 | | - if(!mvDoMvPage($wgTitle, $wgArticle, false)){ |
674 | | - print "bad title: ".$wgTitle->getDBkey()." no edit"; |
675 | | - if($wgTitle->exists()){ |
676 | | - print "remove article"; |
677 | | - $wgArticle->doDeleteArticle( 'bad title' ); |
678 | | - } |
679 | | - //some how mvdIndex and mvd pages got out of sync do a seperate check for the mvd: |
680 | | - if(MV_Index::getMVDbyTitle($wgArticle->mTitle->getDBkey())!=null){ |
681 | | - print ', rm mvd'; |
682 | | - MV_Index::remove_by_wiki_title($wgArticle->mTitle->getDBkey()); |
683 | | - } |
684 | | - print "\n"; |
685 | | - return ; |
686 | | - } |
687 | | - if ($wgTitle->exists()) { |
688 | | - //if last edit!=mvBot skip (don't overwite peoples improvments') |
689 | | - $rev = & Revision::newFromTitle($wgTitle); |
690 | | - if( $botUserName!= $rev->getRawUserText()){ |
691 | | - print ' skiped page edited by user:'.$rev->getRawUserText()."\n"; |
692 | | - if(!$forceUpdate)return ; |
693 | | - } |
694 | | - //proc article: |
695 | | - $cur_text = $wgArticle->getContent(); |
696 | | - //if its a redirect skip |
697 | | - if(substr($cur_text, 0, strlen('#REDIRECT') )=='#REDIRECT'){ |
698 | | - print ' skiped page moved by user:'.$rev->getRawUserText()."\n"; |
699 | | - if(!$forceUpdate)return ; |
700 | | - } |
701 | | - //check if text is identical: |
702 | | - if (trim($cur_text) == trim($wikiText)) { |
703 | | - if(!$forceUpdate)return ; |
704 | | - } |
705 | | - } |
706 | | - //got here do the edit: |
707 | | - $sum_txt = 'metavid bot insert'; |
708 | | - $wgArticle->doEdit($wikiText, $sum_txt); |
709 | | - print "did edit on " . $wgTitle->getDBkey() . "\n"; |
710 | | - //die; |
711 | | -} |
712 | | -//given a stream name it pulls all metavid stream data and builds semantic wiki page |
713 | | -function mv_semantic_stream_desc(& $mvTitle, & $stream) { |
714 | | - global $start_time, $end_time; |
715 | | - /*$sql = "SELECT * FROM `metavid`.`streams` WHERE `name` LIKE '" . $mvTitle->getStreamName() . "'"; |
716 | | - $dbr = wfGetDB(DB_SLAVE); |
717 | | - $res = $dbr->query($sql); |
718 | | - //echo "\n" . $sql . "\n"; |
719 | | - $stream = $dbr->fetchObject($res);*/ |
720 | | - $stream_id = $stream->id; |
721 | | - $out = ''; |
722 | | - $pout = mv_proccess_attr('stream_attr_varchar', $stream_id); |
723 | | - $pout .= mv_proccess_attr('stream_attr_int', $stream_id); |
724 | | - //add links/generic text at the start |
725 | | - $out .= '==Official Record==' . "\n"; |
726 | | - $date = date('Ymd', $start_time); |
727 | | - $cspan_date = date('Y-m-d', $start_time); |
728 | | - $ch_type = ''; |
729 | | - if (strpos($mvTitle->getStreamName(), 'house') !== false) |
730 | | - $ch_type = 'h'; |
731 | | - if (strpos($mvTitle->getStreamName(), 'senate') !== false) |
732 | | - $ch_type = 's'; |
733 | | - if ($ch_type != '') { |
734 | | - $out .= '*[[GovTrack]] Congressional Record' . |
735 | | - '[http://www.govtrack.us/congress/recordindex.xpd?date=' . $date . |
736 | | - '&where=' . $ch_type . |
737 | | - ']' . "\n\n"; |
738 | | - $out .= '*[[THOMAS]] Congressional Record ' . |
739 | | - '[http://thomas.loc.gov/cgi-bin/query/B?r110:@FIELD(FLD003+' . $ch_type . ')+@FIELD(DDATE+' . $date . ')' . |
740 | | - ']' . "\n\n"; |
741 | | - $out .= '*[[THOMAS]] Extension of Remarks ' . |
742 | | - '[http://thomas.loc.gov/cgi-bin/query/B?r110:@FIELD(FLD003+' . $ch_type . ')+@FIELD(DDATE+' . $date . ')' . |
743 | | - ']' . "\n\n"; |
744 | | - } |
745 | | - if ($stream->archive_org != '') { |
746 | | - $out .= '==More Media Sources=='."\n"; |
747 | | - $out .= '*[[Archive.org]] hosted original copy ' . |
748 | | - '[http://www.archive.org/details/mv_' . $stream->name . ']' . "\n"; |
749 | | - } |
750 | | - //all streams have congretional cronical: |
751 | | - $out .= '*[[CSPAN]]\'s Congressional Chronicle ' . |
752 | | - '[http://www.c-spanarchives.org/congress/?q=node/69850&date=' . $cspan_date . '&hors=' . $ch_type . ']'; |
753 | | - $out .= "\n\n"; |
754 | | - $out .= $pout; |
755 | | - $out .= '[[stream_duration:=' . ($end_time - $start_time) . '| ]]' . "\n"; |
756 | | - if($stream->org_start_time){ |
757 | | - $out .= '[[original_date:='.$stream->org_start_time.'| ]]'; |
758 | | - } |
759 | | - |
760 | | - //add stream category (based on sync status) |
761 | | - switch($stream->sync_status){ |
762 | | - case 'not_checked': |
763 | | - $out.="\n\n".'[[Category:Stream Unchecked]]'; |
764 | | - break; |
765 | | - case 'impossible': |
766 | | - $out.="\n\n".'[[Category:Stream Out of Sync]]'; |
767 | | - break; |
768 | | - case 'in_sync': |
769 | | - $out.="\n\n".'[[Category:Stream Basic Sync]]'; |
770 | | - //other options [stream high quality sync ]; |
771 | | - break; |
772 | | - } |
773 | | - |
774 | | - return $out; |
775 | | -} |
776 | | -function mv_proccess_attr($table, $stream_id) { |
777 | | - global $start_time, $end_time; |
778 | | - $dbr = wfGetDB(DB_SLAVE); |
779 | | - $sql = "SELECT * FROM `metavid`.`$table` WHERE `stream_fk`=$stream_id"; |
780 | | - $res = $dbr->query($sql); |
781 | | - $out = ''; |
782 | | - while ($var = $dbr->fetchObject($res)) { |
783 | | - $type_title = getTypeTitle($var->type); |
784 | | - if ($var->type == 'adj_start_time') |
785 | | - $start_time = $var->value; |
786 | | - if ($var->type == 'adj_end_time') |
787 | | - $end_time = $var->value; |
788 | | - if ($type_title != '') { |
789 | | - $reltype = ($type_title[0] == 'rel') ? '::' : ':='; |
790 | | - $out .= '[[' . $var->type . ':=' . $var->value . '| ]]' . "\n"; |
791 | | - } |
792 | | - } |
793 | | - return $out; |
794 | | -} |
795 | | -function getTypeTitle($type) { |
796 | | - switch ($type) { |
797 | | - case 'cspan_type' : |
798 | | - return array ( |
799 | | - 'rel', |
800 | | - 'Government Event' |
801 | | - ); |
802 | | - break; |
803 | | - case 'cspan_title' : |
804 | | - return array ( |
805 | | - 'atr', |
806 | | - 'C-SPAN Title' |
807 | | - ); |
808 | | - break; |
809 | | - case 'cspan_desc' : |
810 | | - return array ( |
811 | | - 'atr', |
812 | | - 'C-SPAN Description' |
813 | | - ); |
814 | | - break; |
815 | | - case 'adj_start_time' : |
816 | | - return array ( |
817 | | - 'atr', |
818 | | - 'Unix Start Time' |
819 | | - ); |
820 | | - break; |
821 | | - case 'adj_end_time' : |
822 | | - return array ( |
823 | | - 'atr', |
824 | | - 'Unix End Time' |
825 | | - ); |
826 | | - break; |
827 | | - default : |
828 | | - return ''; |
829 | | - break; |
830 | | - } |
831 | | -} |
832 | 242 | ?> |
833 | 243 | |
Index: trunk/extensions/MetavidWiki/maintenance/ogg_thumb_insert.sh |
— | — | @@ -1,15 +1,77 @@ |
2 | 2 | #!/bin/bash |
3 | 3 | |
4 | | -streamid=${1}; |
5 | | -filename=${2}; |
6 | | -interval=${3}; |
| 4 | +########################################################################### |
| 5 | +# |
| 6 | +# DESCRIPTION |
| 7 | +# |
| 8 | +# This script can extract jpg frames from your ogg files at a specified |
| 9 | +# interval. It can also insert this information into your mvWiki |
| 10 | +# database. |
| 11 | +# |
| 12 | +# USAGE |
| 13 | +# |
| 14 | +# ./ogg_thumb_insert.sh stream_id filename interval |
| 15 | +# |
| 16 | +# EXAMPLE |
| 17 | +# |
| 18 | +# ./ogg_thumb_insert.sh 17 /var/www/localhost/htdocs/media/stream.ogg 20 |
| 19 | +# |
| 20 | +# The previous example should extract frames every 20 seconds into the |
| 21 | +# file named stream.ogg. It will place them in the appropriate stream |
| 22 | +# directory which by default is '../stream_images/7/17/'. It should also |
| 23 | +# insert information about the frame into the 'mv_stream_images' table. |
| 24 | +# |
| 25 | +########################################################################### |
| 26 | +# |
| 27 | +# This script relies on a number of programs being in your path, and is |
| 28 | +# intended to be executed from the 'maintenance' directory. |
| 29 | +# |
| 30 | +# Requirements: |
| 31 | +# |
| 32 | +# ffmpeg |
| 33 | +# mysql |
| 34 | +# imagemagick |
| 35 | +# ogginfo |
| 36 | +# grep |
| 37 | +# sed |
| 38 | +# awk |
| 39 | +# gawk |
| 40 | +# echo |
| 41 | +# wc |
| 42 | +# bc |
| 43 | +# seq |
| 44 | +# mkdir |
| 45 | +# |
| 46 | +########################################################################### |
| 47 | +# |
| 48 | +# Use at your own risk. There is very little error checking. |
| 49 | +# |
| 50 | +########################################################################### |
| 51 | +# This quick hack brought to you by Seth McClain smcclain@opengov.org |
| 52 | +########################################################################### |
7 | 53 | |
| 54 | + |
| 55 | +## REMOVE THE FOLLOWING TWO LINES BEFORE EXECUTING ## |
| 56 | +echo "Please be sure to edit this file and change some variables before executing it"; |
| 57 | +exit |
| 58 | +## REMOVE THE PREVIOUS TWO LINES BEFORE EXECUTING ## |
| 59 | + |
| 60 | + |
| 61 | +## The following variables need to be set to allow the script access to your |
| 62 | +## MySQL database |
| 63 | + |
8 | 64 | table="mv_stream_images"; |
9 | 65 | db="mvwiki"; |
10 | 66 | user="user"; |
11 | 67 | pw="password"; |
12 | 68 | hostname="localhost"; |
13 | 69 | |
| 70 | +## Do not edit below this line |
| 71 | + |
| 72 | +streamid=${1}; |
| 73 | +filename=${2}; |
| 74 | +interval=${3}; |
| 75 | + |
14 | 76 | chars=`echo -n ${streamid} | wc -c`; |
15 | 77 | dots=`for i in \`seq 1 ${chars}\`; do echo -n .; done | sed -e s/^.//` |
16 | 78 | dir=`echo ${streamid} | sed -e s/^${dots}//` |
— | — | @@ -25,7 +87,7 @@ |
26 | 88 | |
27 | 89 | for i in `seq 1 ${interval} ${duration}` |
28 | 90 | do |
29 | | - #echo "insert into ${table}(stream_id, time) values(${streamid}, ${i});" | mysql -u ${user} --password=${pw} ${db} |
| 91 | + echo "insert into ${table}(stream_id, time) values(${streamid}, ${i});" | mysql -u ${user} --password=${pw} ${db} |
30 | 92 | ffmpeg -ss ${i} -i ${filename} -vcodec mjpeg -vframes 1 -an -f rawvideo -s 320x240 -y ${filedir}/${i}_320x240.jpg |
31 | 93 | done |
32 | 94 | |
Index: trunk/extensions/MetavidWiki/maintenance/scrape_and_insert.php |
— | — | @@ -38,6 +38,7 @@ |
39 | 39 | switch($args[0]){ |
40 | 40 | case 'cspan_chronicle': |
41 | 41 | $MV_CspanScraper = new MV_CspanScraper(); |
| 42 | + $MV_CspanScraper->doScrapeInsert(); |
42 | 43 | break; |
43 | 44 | } |
44 | 45 | } |
— | — | @@ -96,15 +97,52 @@ |
97 | 98 | $href=''; |
98 | 99 | $href_match=array(); |
99 | 100 | preg_match('/href="(.*)"/',$matches[5][$k], $href_match); |
100 | | - if(count($href_match)!=0)$href=$href_match[1]; |
| 101 | + if(count($href_match)!=0)$href=$href_match[1]; |
| 102 | + |
| 103 | + $porg = str_replace('<br>',' ',$matches[4][$k]); |
| 104 | + $porg = preg_replace('/[D|R]+\-\[.*\]/', '', $porg); |
| 105 | + $pparts = explode(',',$porg); |
| 106 | + $pname = trim($pparts[1]) . '_' . trim($pparts[0]); |
101 | 107 | $person_time_ary[]= array( |
102 | 108 | 'start_time'=>strip_tags($matches[1][$k]), |
103 | 109 | 'length'=>$matches[3][$k], |
104 | 110 | 'person_title'=>str_replace('<br>',' ',$matches[4][$k]), |
| 111 | + 'spoken_by'=>$pname, |
105 | 112 | 'href'=>$href |
106 | 113 | ); |
107 | | - } |
108 | | - print_r($person_time_ary); |
| 114 | + } |
| 115 | + //group people in page matches |
| 116 | + $g_person_time_ary=array(); |
| 117 | + $prev_person=null; |
| 118 | + foreach($person_time_ary as $ptag){ |
| 119 | + $g_person_time_ary[$ptag['spoken_by']][]=$ptag; |
| 120 | + } |
| 121 | + |
| 122 | + //retrive rows to find match: |
| 123 | + $dbr =& wfGetDB(DB_SLAVE); |
| 124 | + $mvd_res = MV_Index::getMVDInRange($stream->id, null, null, $mvd_type='ht_en',false,$smw_properties=array('Spoken_by'), ''); |
| 125 | + $g_row_matches=array(); |
| 126 | + //group peole in db matches: |
| 127 | + while ($row = $dbr->fetchObject($mvd_res)) { |
| 128 | + if(!isset($row->Spoken_by))continue; |
| 129 | + if(!isset($g_row_matches[strtolower($row->Spoken_by)])){ |
| 130 | + $g_row_matches[strtolower($row->Spoken_by)]=get_object_vars($row); |
| 131 | + $g_row_matches[strtolower($row->Spoken_by)]['end_time_sec']=$row->end_time; |
| 132 | + }else{ |
| 133 | + $g_row_matches[strtolower($row->Spoken_by)]['end_time_sec']+=$row->end_time; |
| 134 | + } |
| 135 | + $cspan_person = next($g_person_time_ary); |
| 136 | + } |
| 137 | + //add in sync offset data for $g_person_time_ary |
| 138 | + reset($g_person_time_ary); |
| 139 | + foreach($g_row_matches as $rp=>$rperson){ |
| 140 | + |
| 141 | + } |
| 142 | + //find match person1->person2->person3 |
| 143 | + |
| 144 | + |
| 145 | + //average switch time to get offset of stream |
| 146 | + //use offset to insert all $person_time_array data |
109 | 147 | } |
110 | 148 | } |
111 | 149 | } |
— | — | @@ -132,8 +170,7 @@ |
133 | 171 | $page = file_get_contents($url); |
134 | 172 | if($page===false){ |
135 | 173 | echo("error retriving $url retrying...\n"); |
136 | | - sleep(5); |
137 | | - //@@todo: this may eventually overflow the stack: |
| 174 | + sleep(5); |
138 | 175 | return $this->doRequest($url); |
139 | 176 | } |
140 | 177 | if($page!=''){ |
Index: trunk/extensions/MetavidWiki/includes/MV_Index.php |
— | — | @@ -102,17 +102,36 @@ |
103 | 103 | } |
104 | 104 | /* |
105 | 105 | * getMVDInRange returns the mvd titles that are in the given range |
| 106 | + * param list got kind of crazy long... @@todo refactor int a request object or something cleaner |
106 | 107 | */ |
107 | | - function getMVDInRange($stream_id, $start_time=null, $end_time=null, $mvd_type='all',$getText=false){ |
| 108 | + function getMVDInRange($stream_id, $start_time=null, $end_time=null, $mvd_type='all',$getText=false,$smw_properties=array(), $limit='LIMIT 0, 200'){ |
108 | 109 | global $mvIndexTableName, $mvDefaultClipLength; |
109 | 110 | $dbr =& wfGetDB(DB_SLAVE); |
110 | 111 | |
111 | | - $sql = "SELECT `mv_page_id` as `id`, `mvd_type`, `wiki_title`, `stream_id`, `start_time`, `end_time` " . |
112 | | - "FROM {$dbr->tableName($mvIndexTableName)} " . |
113 | | - "WHERE `stream_id`={$stream_id} "; |
| 112 | + $sql_sel = "SELECT `mv_page_id` as `id`, `mvd_type`, `wiki_title`, `stream_id`, `start_time`, `end_time` "; |
| 113 | + $sql_from=" FROM {$dbr->tableName($mvIndexTableName)} "; |
| 114 | + if(count($smw_properties)!=0){ |
| 115 | + foreach($smw_properties as $prop_name){ |
| 116 | + $sql_sel.=", `$prop_name`.`object_title` as `$prop_name`"; |
| 117 | + $sql_from.="LEFT JOIN `smw_relations` as `$prop_name` ON (`mv_mvd_index`.`mv_page_id`=`$prop_name`.`subject_id` " . |
| 118 | + "AND `$prop_name`.`relation_title`='$prop_name') "; |
| 119 | + } |
| 120 | + } |
| 121 | + $sql = $sql_sel . $sql_from; |
| 122 | + $sql.= "WHERE `stream_id`={$stream_id} "; |
114 | 123 | if($mvd_type!='all'){ |
115 | 124 | //check if mvd_type is array: |
116 | | - $sql.="AND `mvd_type`='{$mvd_type}' "; |
| 125 | + if(is_array($mvd_type)){ |
| 126 | + $sql.='AND ('; |
| 127 | + $or=''; |
| 128 | + foreach($mvd_type as $mtype){ |
| 129 | + $sql.=$or."`mvd_type'='{$mtype}' "; |
| 130 | + $or='OR '; |
| 131 | + } |
| 132 | + $sql.=')'; |
| 133 | + }else{ |
| 134 | + $sql.="AND `mvd_type`='{$mvd_type}' "; |
| 135 | + } |
117 | 136 | } |
118 | 137 | //get any data that covers this rage: |
119 | 138 | if($end_time)$sql.=" AND `start_time` <= " . $end_time; |
— | — | @@ -120,7 +139,7 @@ |
121 | 140 | //add in ordering |
122 | 141 | $sql.=' ORDER BY `start_time` ASC '; |
123 | 142 | //add in limit of 200 for now |
124 | | - $sql.=' LIMIT 0, 200'; |
| 143 | + $sql.=$limit; |
125 | 144 | //echo $sql; |
126 | 145 | $result =& $dbr->query( $sql, 'MV_Index:time_index_query'); |
127 | 146 | return $result; |