#!/usr/bin/perl -w ## viterbi ## R. Malouf, 7 Nov 2003 ## Demonstration of the Viterbi algorithm $Inf = 100**100**100; ## Sample Hidden Markov Model # states @S = ( 0..2 ); # initial state $S0 = 0; # transition probs $A[from][to] # an array of arrays @A = ( [ 0.0, 0.5, 0.5 ], [ 0.0, 0.5, 0.5 ], [ 0.0, 0.5, 0.5 ] ); # emission probs $B{symbol}[from][to] # a hash of arrays of arrays %B = ( "h" => [ [ 0.0, 0.5, 0.5 ], [ 0.0, 1.0, 1.0 ], [ 0.0, 0.0, 0.0 ] ], "t" => [ [ 0.0, 0.5, 0.5 ], [ 0.0, 0.0, 0.0 ], [ 0.0, 1.0, 1.0 ] ] ); ## Set up nice state names for HMM states 0,1, and 2 my @SN = ( 0 , H, T); ## Print out complete info about this HMM readably foreach my $s (@S) { my $tot_state_prob = 0; print "$SN[$s] : \n"; foreach my $oc ("h", "t") { print " $oc \n"; foreach my $s1 (@S) { my $state_tran_prob = $A[$s][$s1]; my $emit_tran_prob = $B{$oc}[$s][$s1]; my $tran_prob = $state_tran_prob * $emit_tran_prob; $tot_state_prob = $tot_state_prob + $tran_prob; print " $SN[$s]=>$SN[$s1]: $emit_tran_prob * $state_tran_prob = $tran_prob \n"; } } print " ---------------------- \n"; print "$SN[$s] (Total Probs) = $tot_state_prob \n\n"; } # switch to log probs foreach $word ( keys %B ) { foreach $i ( 0 .. $#{ $B{$word} } ) { foreach $j ( 0 .. $#{ $B{$word} } ) { if ($B{$word}[$i][$j] > 0 ) { $B{$word}[$i][$j]= log($B{$word}[$i][$j]) } else { $B{$word}[$i][$j]= - $Inf } } } } ## Look at the above log prob has table if you like # foreach $word ( keys %B ) { # foreach $i ( 0 .. $#{ $B{$word} } ) { # foreach $j ( 0 .. $#{ $B{$word} } ) { # print STDERR "$word $i $j $B{$word}[$i][$j] \n"; # } # } #} # Switch to log probs in transition probs too. foreach $i ( 0 .. $#A ) { foreach $j ( 0 .. $#A ) { if ($A[$i][$j] > 0 ) { $A[$i][$j]= log($A[$i][$j]) } else { $A[$i][$j]= - $Inf } } } ## main program ## prompt for an output sequence, print most likely state ## sequence, and repeat until EOF print "\nOutput: "; while (<>) { chop; print "States: ", decode(split //), "\n"; print "\nOutput: "; } ## decode(@O) ## Apply Viterbi algorithm to find the most likely state ## sequence given an output sequence @O sub decode { # Get output sequence my @O = @_; my $T = scalar(@O); # Initialize first column # Note we are populating the table with log probs # When Pr(X)=1, log(Pr(X))=0.0 # When Pr(X)=0, log(Pr(X))= Undefined = -$Inf my @trellis; foreach my $s (@S) { if ($s==$S0) { $trellis[0][$s] = 0.0; } else { $trellis[0][$s] = -$Inf; } } # Fill in rest of trellis foreach my $t (1..$T) { # Get next output symbol my $o = shift @O; unless (defined($B{$o})) { print "Illegal symbol: $o!\n"; return (); } # Fill in next column; using log probs so add to get $score foreach my $s (@S) { $trellis[$t][$s] = -$Inf; foreach my $s1 (@S) { my $score = $trellis[$t-1][$s1]+$A[$s1][$s]+$B{$o}[$s1][$s]; if ($score > $trellis[$t][$s]) { $trellis[$t][$s] = $score; $back[$t][$s] = $s1; } } } } # Find best final state my $best = 0; foreach my $s (@S) { if ($trellis[$T][$s] > $trellis[$T][$best]) { $best = $s; } } # Read out best path my @path = ( $best ); foreach my $t (reverse(1..$T)) { $vscore[$t] = exp($trellis[$t][$best]); $best = $back[$t][$best]; unshift @path, $best; } print "Time State Prob\n"; print "0: $path[0] 1 \n"; foreach my $t (1..$T) { my $newsname = $SN[$path[$t]]; print "$t: $newsname $vscore[$t] \n"; } print " \n\n"; return @path; }