Index: trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php |
— | — | @@ -0,0 +1,121 @@ |
| 2 | +<?php |
| 3 | +/* |
| 4 | + * video_ocr_thumb_insert.php Created on January, 2009 |
| 5 | + * based on ogg_thumb_insert |
| 6 | + * All Metavid Wiki code is Released under the GPL2 |
| 7 | + * for more info visit http://metavid.org/wiki/Code |
| 8 | + * |
| 9 | + * @author Michael Dale, aphid |
| 10 | + * @email dale@ucsc.edu, aphid@ucsc.edu |
| 11 | + * @url http://metavid.org |
| 12 | + */ |
| 13 | + |
| 14 | +$cur_path = $IP = dirname( __FILE__ ); |
| 15 | +// include commandLine.inc from the mediaWiki maintance dir: |
| 16 | +require_once ( '../../../maintenance/commandLine.inc' ); |
| 17 | +require_once ( 'metavid2mvWiki.inc.php' ); |
| 18 | + |
| 19 | +// include util functions: |
| 20 | +require_once( 'maintenance_util.inc.php' ); |
| 21 | + |
| 22 | +if ( count( $args ) == 0 || isset ( $options['help'] ) ) { |
| 23 | + print' |
| 24 | +USAGE |
| 25 | + php ogg_thumb_insert.php stream_name filename interval |
| 26 | + |
| 27 | +EXAMPLE we get a frame every 5 seconds from input file stream.mpeg: |
| 28 | + video2image2mvwiki.php stream_name stream.ogg 5 |
| 29 | + |
| 30 | +DURATION is scraped from ffmpeg |
| 31 | + |
| 32 | +Notes: |
| 33 | + if possible you want to use the source footage rather than the ogg to generate the thumbnails (ie the mpeg2 or dv) |
| 34 | +'; |
| 35 | +exit(); |
| 36 | + |
| 37 | +} |
| 38 | + |
| 39 | + |
| 40 | +//maybe we derive stream name from filename? one less thing to think about. |
| 41 | +$stream_name = $args[0]; |
| 42 | +$filename = $args[1]; |
| 43 | +$interval = $args[2]; |
| 44 | + |
| 45 | + |
| 46 | +$MV_Stream = MV_Stream::newStreamByName( $stream_name ); |
| 47 | +$stream_id = $MV_Stream->getStreamId(); |
| 48 | + |
| 49 | +$filedir = '../stream_images/' . MV_StreamImage::getRelativeImagePath( $stream_id ); |
| 50 | +$workingdir = '/metavid/raw_mpeg'; |
| 51 | +$duration = getDuration($filename); |
| 52 | + |
| 53 | +$ocrfile = ""; |
| 54 | + |
| 55 | + |
| 56 | +//gets duration from ffmpeg |
| 57 | + |
| 58 | +$dbw = $dbr = wfGetDB( DB_MASTER ); |
| 59 | +for ( $i = 0; $i < $duration; $i += $interval ) { |
| 60 | + shell_exec( "ffmpeg -ss $i -i {$filename} -vcodec mjpeg -vframes 1 -an -f rawvideo -y {$filedir}/{$i}.jpg 2>&1" ); |
| 61 | + if(is_file("{$filedir}/{$i}.jpg")){ |
| 62 | + //$dbw->query( "INSERT INTO `mv_stream_images` (`stream_id`, `time`) VALUES ($stream_id, $i)" ); |
| 63 | + shell_exec("convert $filedir/$i.jpg -crop 457x30+63+358 $workingdir/temp.ocr.tif && convert $workingdir/temp.ocr.tif -resize 300% -level 10%,1,20% -monochrome +compress $workingdir/temp.ocr.tif"); |
| 64 | + shell_exec("tesseract $workingdir/temp.ocr.tif $workingdir/ocrtemp nobatch lettersonly 2>&1"); |
| 65 | + $ocr = shell_exec("tail $workingdir/ocrtemp.txt") ." at " .sec2hms($i) ." \n"; |
| 66 | + echo $ocr; |
| 67 | + $ocrfile .= $ocr; |
| 68 | + }else{ |
| 69 | + print "failed to create file: {$filedir}/{$i}.jpg \n"; |
| 70 | + } |
| 71 | +} |
| 72 | + |
| 73 | +$ocrfileloc = "$workingdir/$stream_name.ocr"; |
| 74 | +$fh = fopen($ocrfileloc, 'w') or die ("can't write ocr file"); |
| 75 | +fwrite($fh, $ocrfile); |
| 76 | +fclose($fh); |
| 77 | + |
| 78 | +function getDuration($filename) |
| 79 | +{ |
| 80 | + $string = shell_exec( "ffmpeg -i $filename 2>&1"); |
| 81 | + $pattern = "/Duration: ([0-9])([0-9]):([0-9])([0-9]):([0-9])([0-9])/"; |
| 82 | + preg_match($pattern, $string, $reg_array); |
| 83 | + $result = $reg_array[0]; |
| 84 | + $hms = explode(" ", $result); |
| 85 | + $durationhms = $hms[1]; |
| 86 | + echo "duration is $durationhms \n"; |
| 87 | + $durarray = explode(":", $durationhms); |
| 88 | + return ($durarray[0]* 3600) + ($durarray[1]* 60) + $durarray[2]; |
| 89 | +} |
| 90 | + |
| 91 | +function sec2hms ($sec, $padHours = false) { |
| 92 | + |
| 93 | + $hms = ""; |
| 94 | + |
| 95 | + // there are 3600 seconds in an hour, so if we |
| 96 | + // divide total seconds by 3600 and throw away |
| 97 | + // the remainder, we've got the number of hours |
| 98 | + $hours = intval(intval($sec) / 3600); |
| 99 | + |
| 100 | + // add to $hms, with a leading 0 if asked for |
| 101 | + $hms .= ($padHours) |
| 102 | + ? str_pad($hours, 2, "0", STR_PAD_LEFT). ':' |
| 103 | + : $hours. ':'; |
| 104 | + |
| 105 | + // dividing the total seconds by 60 will give us |
| 106 | + // the number of minutes, but we're interested in |
| 107 | + // minutes past the hour: to get that, we need to |
| 108 | + // divide by 60 again and keep the remainder |
| 109 | + $minutes = intval(($sec / 60) % 60); |
| 110 | + |
| 111 | + // then add to $hms (with a leading 0 if needed) |
| 112 | + $hms .= str_pad($minutes, 2, "0", STR_PAD_LEFT). ':'; |
| 113 | + |
| 114 | + // seconds are simple - just divide the total |
| 115 | + // seconds by 60 and keep the remainder |
| 116 | + $seconds = intval($sec % 60); |
| 117 | + |
| 118 | + // add to $hms, again with a leading 0 if needed |
| 119 | + $hms .= str_pad($seconds, 2, "0", STR_PAD_LEFT); |
| 120 | + |
| 121 | + return $hms; |
| 122 | +} |