r46057 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46056‎ | r46057 | r46058 >
Date:01:47, 23 January 2009
Author:markcdeckert
Status:deferred
Tags:
Comment:
first revision
Modified paths:
  • /trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php (added) (history)

Diff [purge]

Index: trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php
@@ -0,0 +1,121 @@
 2+<?php
 3+/*
 4+ * video_ocr_thumb_insert.php Created on January, 2009
 5+ * based on ogg_thumb_insert
 6+ * All Metavid Wiki code is Released under the GPL2
 7+ * for more info visit http://metavid.org/wiki/Code
 8+ *
 9+ * @author Michael Dale, aphid
 10+ * @email dale@ucsc.edu, aphid@ucsc.edu
 11+ * @url http://metavid.org
 12+ */
 13+
 14+$cur_path = $IP = dirname( __FILE__ );
 15+// include commandLine.inc from the mediaWiki maintance dir:
 16+require_once ( '../../../maintenance/commandLine.inc' );
 17+require_once ( 'metavid2mvWiki.inc.php' );
 18+
 19+// include util functions:
 20+require_once( 'maintenance_util.inc.php' );
 21+
 22+if ( count( $args ) == 0 || isset ( $options['help'] ) ) {
 23+ print'
 24+USAGE
 25+ php ogg_thumb_insert.php stream_name filename interval
 26+
 27+EXAMPLE we get a frame every 5 seconds from input file stream.mpeg:
 28+ video2image2mvwiki.php stream_name stream.ogg 5
 29+
 30+DURATION is scraped from ffmpeg
 31+
 32+Notes:
 33+ if possible you want to use the source footage rather than the ogg to generate the thumbnails (ie the mpeg2 or dv)
 34+';
 35+exit();
 36+
 37+}
 38+
 39+
 40+//maybe we derive stream name from filename? one less thing to think about.
 41+$stream_name = $args[0];
 42+$filename = $args[1];
 43+$interval = $args[2];
 44+
 45+
 46+$MV_Stream = MV_Stream::newStreamByName( $stream_name );
 47+$stream_id = $MV_Stream->getStreamId();
 48+
 49+$filedir = '../stream_images/' . MV_StreamImage::getRelativeImagePath( $stream_id );
 50+$workingdir = '/metavid/raw_mpeg';
 51+$duration = getDuration($filename);
 52+
 53+$ocrfile = "";
 54+
 55+
 56+//gets duration from ffmpeg
 57+
 58+$dbw = $dbr = wfGetDB( DB_MASTER );
 59+for ( $i = 0; $i < $duration; $i += $interval ) {
 60+ shell_exec( "ffmpeg -ss $i -i {$filename} -vcodec mjpeg -vframes 1 -an -f rawvideo -y {$filedir}/{$i}.jpg 2>&1" );
 61+ if(is_file("{$filedir}/{$i}.jpg")){
 62+ //$dbw->query( "INSERT INTO `mv_stream_images` (`stream_id`, `time`) VALUES ($stream_id, $i)" );
 63+ shell_exec("convert $filedir/$i.jpg -crop 457x30+63+358 $workingdir/temp.ocr.tif && convert $workingdir/temp.ocr.tif -resize 300% -level 10%,1,20% -monochrome +compress $workingdir/temp.ocr.tif");
 64+ shell_exec("tesseract $workingdir/temp.ocr.tif $workingdir/ocrtemp nobatch lettersonly 2>&1");
 65+ $ocr = shell_exec("tail $workingdir/ocrtemp.txt") ." at " .sec2hms($i) ." \n";
 66+ echo $ocr;
 67+ $ocrfile .= $ocr;
 68+ }else{
 69+ print "failed to create file: {$filedir}/{$i}.jpg \n";
 70+ }
 71+}
 72+
 73+$ocrfileloc = "$workingdir/$stream_name.ocr";
 74+$fh = fopen($ocrfileloc, 'w') or die ("can't write ocr file");
 75+fwrite($fh, $ocrfile);
 76+fclose($fh);
 77+
 78+function getDuration($filename)
 79+{
 80+ $string = shell_exec( "ffmpeg -i $filename 2>&1");
 81+ $pattern = "/Duration: ([0-9])([0-9]):([0-9])([0-9]):([0-9])([0-9])/";
 82+ preg_match($pattern, $string, $reg_array);
 83+ $result = $reg_array[0];
 84+ $hms = explode(" ", $result);
 85+ $durationhms = $hms[1];
 86+ echo "duration is $durationhms \n";
 87+ $durarray = explode(":", $durationhms);
 88+ return ($durarray[0]* 3600) + ($durarray[1]* 60) + $durarray[2];
 89+}
 90+
 91+function sec2hms ($sec, $padHours = false) {
 92+
 93+ $hms = "";
 94+
 95+ // there are 3600 seconds in an hour, so if we
 96+ // divide total seconds by 3600 and throw away
 97+ // the remainder, we've got the number of hours
 98+ $hours = intval(intval($sec) / 3600);
 99+
 100+ // add to $hms, with a leading 0 if asked for
 101+ $hms .= ($padHours)
 102+ ? str_pad($hours, 2, "0", STR_PAD_LEFT). ':'
 103+ : $hours. ':';
 104+
 105+ // dividing the total seconds by 60 will give us
 106+ // the number of minutes, but we're interested in
 107+ // minutes past the hour: to get that, we need to
 108+ // divide by 60 again and keep the remainder
 109+ $minutes = intval(($sec / 60) % 60);
 110+
 111+ // then add to $hms (with a leading 0 if needed)
 112+ $hms .= str_pad($minutes, 2, "0", STR_PAD_LEFT). ':';
 113+
 114+ // seconds are simple - just divide the total
 115+ // seconds by 60 and keep the remainder
 116+ $seconds = intval($sec % 60);
 117+
 118+ // add to $hms, again with a leading 0 if needed
 119+ $hms .= str_pad($seconds, 2, "0", STR_PAD_LEFT);
 120+
 121+ return $hms;
 122+}

Status & tagging log