#!/usr/bin/perl ######################################################################## # # measure -- get the raw numerical data (nuclear F1 and F2, offglide # F1 and F2) from the formant tracks. # # # Invocation: perl measure FILE 'SPEAKER-CODE' BW # # Where FILE is the .wav filename and BW is the widest permitted bandwidth # for a formant ("formants" with smaller BW will be ignored). SPEAKER CODE # is the speaker code that will be put on every line. # # Input: FILE.TextGrid, File.FormantTier # where the Text Grid tiers are # 1. Words (interval tier, word boundaries, labelled with words) # 2. Diphthongs (interval tier, diphthong boundaries, labelled with words) # 3. Overrides (point tier, points marked "F1max" or whatever, to make # the pgm put F1max where you want it) # 4. Landmarks (point tier; the pgm's guesses for F1max, F2max, F2min) # # Output format (FILE.dat) # # SPEAKER-CODE Word Nuc NucF1 NucF2 F2max F2maxF1 F2maxF2 # # E. Moreton * UNC-Chapel Hill Linguistics * Last update 2004 July 6 ######################################################################## ($File, $Spkr, $CRAZY_BW) = @ARGV; { ($Frame1Time, $FrameLength) = getFrameLength(); open (TEXTGRID, "<./$File.TextGrid") || die "measure: Can't open $File.TextGrid"; getDiphthongs (); getOverrides(); close (TEXTGRID); open (FORMANTS, "<./$File.FormantTier") || die "measure: Can't open $File.FormantTier"; open (DAT, ">./$File.dat") || die "measure: Can't open $File.dat"; open (CHECK, ">./$File.Check.TextGrid") || die "writeCheck: Couldn't create $File.Check.TextGrid"; copyTextGrid (); # Copy input text grid to .Check.TextGrid; $WordNum = 0; foreach $Diphthong (@Diphthongs) { ($Word, $StartFrame, $EndFrame) = split / /, $Diphthong; $DataRef = getData ($StartFrame, $EndFrame); writeData ($WordNum++, $Word, $DataRef); } close (FORMANTS); close (DAT); } # Get frame-time correspondences sub getFrameLength { my $FrameLength, $Frame1Time, $Frame2Time; open (FRAMES, "cat $File.FormantTier | egrep '^[ ]+time = [0-9]+\.[0-9]+'|") || die "measure: getFrameLength: Can't open $File.FormantTier"; $_ = ; ($foo, $Frame1Time) = split / = /, $_; $_ = ; ($foo, $Frame2Time) = split / = /, $_; $FrameLength = $Frame2Time - $Frame1Time; ($FrameLength > 0) || die "measure: getFrameLength: Formant Tier frames appear to have zero length. Is the FT file corrupt?"; ($Frame1Time + 0, $FrameLength); } sub TimeToFrame { my $Time = shift (@_); int ( ($Time - $Frame1Time) / $FrameLength); } sub FrameToTime { my $Frame = shift (@_); $Frame * $FrameLength + $Frame1Time; } sub getDiphthongs { # Diphthong tier (interval tier, beg. and end of each diphthong, labelled # with word) gets stored as array of "word first_frame last_frame". # Scan to beginning of Diphthong tier in file while ($_ = ) { if (eof (TEXTGRID)) { die "measure: getDiphthongs: Couldn't find Diphthong tier in $File.TextGrid"; } last if (/Diphthong/); } # Read it in my $i = 0; while ($_ = ) { # Quit when we get to the Overrides tier last if (/item \[3\]/); # Find next interval specification next unless (/intervals \[([0-9]*)\]:/); my %Buf = {}; # Get beginning, end, and label foreach my $var ('xmin', 'xmax', 'text') { $_ = ; s/\"//g; s/\n//g; tr/A-Z/a-z/; my ($foo, $varname, $value) = split /\s+=*\s*/; if ($var ne $varname) { die "measure:getDiphthongs:$File:Expected $var, found $varname in interval $i"; } $Buf {$var} = $value; } # Non-empties only if ($Buf {"text"}) { push @Diphthongs, $Buf {text} . ' ' . TimeToFrame ($Buf {xmin}) . ' ' . TimeToFrame ($Buf {xmax}); } } } sub getOverrides { # Override tier (point tier, each pt. labelled "F1max" or "F2max" # or "F2min") gets stored as an array where the index is the frame # number and the contents are a space-delimited concatenation of # the labels # Scan to beginning of Overrides tier in file while ($_ = ) { if (eof (TEXTGRID)) { die "measure: getOverrides: Couldn't find Overrides tier in $File.TextGrid"; } last if (/Overrides/); } # Read it in my $i = 0; while ($_ = ) { last if (/Landmarks/); # Find next point specification next unless (/points \[([0-9]*)\]:/); my %Buf = {}; # Get beginning, end, and label foreach my $var ('time', 'mark') { $_ = ; s/\"//g; s/\n//g; tr/A-Z/a-z/; my ($foo, $varname, @values) = split /\s+=*\s*/; if ($var ne $varname) { die "measure:getOverrides:$File:Expected $var, found $varname in interval $i"; } $Buf {$var} = join ' ', @values; } # Non-empties only if ($Buf {"mark"}) { $Overrides [TimeToFrame ($Buf {time})] .= $Buf {mark} . ' '; } } } sub getData { my ($StartFrame, $EndFrame) = @_; my %Data; @Tracks = getFormantTracks ($StartFrame, $EndFrame); foreach my $i (1, 2) { my $FormantPointName = "F$i" . 'max'; my $Frame; # Locate F1max and F2max (frame numbers); process time overrides ($Frame = getOverrideFrame ($i, 'max', $StartFrame, $EndFrame)) || ($Frame = getExtremeFrame ($i, 'max', @Tracks) + $StartFrame); $Data {$FormantPointName} = $Frame; # Get relevant formant value at point foreach my $j (1, 2) { $Data {$FormantPointName . "F$j"} = getFormant ($j, $Tracks [$Frame - $StartFrame]); # Process formant-value overrides, e.g., "F1=451" if ($Overrides [$Frame] =~ /f$j\=[0-9\.]+/) { my ($fore, $aft) = split /f$j\=/, $Overrides [$Frame]; my ($fval, $foo) = split /\s+/, $aft; $Data {$FormantPointName . "F$j"} = $fval; } } } \%Data; } sub getFormantTracks { # Get formant tier between specified frames, inclusive. # Returns an array originating at 0, so you'll need to add $StartFrame # to any subscript to recover the actual formant-tier frame number. my ($StartFrame, $EndFrame) = @_; my @Tracks, @Formants, $Frame; for ($Frame = $StartFrame; $Frame <= $EndFrame; $Frame++) { push @Tracks, readFormantFrame ($Frame); } @Tracks; } sub readFormantFrame { my $Frame = $_[0]; while ($_ = ) { last if (/^points \[$Frame\]:/); } $_ = ; $_ = ; /numberOfFormants/ || die "measure:getNextFrame:$File: Expected numberOfFormants but found $_ at time $time"; my ($foo, $Fmts) = split /\s+=\s* /; # Read all formants and associated bandwidths my @Fs = getFormantTier ('formant', $Fmts); my @Bs = getFormantTier ('bandwidth', $Fmts); # Eliminate formants with crazy bandwidths my @Formants = (); my $F; my $B; while (@Fs) { $F = shift (@Fs); $B = shift (@Bs); next if ($B > $CRAZY_BW); push @Formants, "$F $B"; } \@Formants; } sub getFormantTier { # Read one point (formants and bandwidths) from the Formant Tier my ($type, $Fmts) = @_; my @vals = (), $foo; $_ = ; /$type \[\]/ || die "measure:getFormantTier:$File: Expected $type but found $_ at time $time"; for (my $i = 0; $i < $Fmts; $i++) { $_ = ; /$type \[$i\] = / || die "measure:getFormantTier:$File: Expected $type but found $_ at time $time"; ($foo, $vals [$i]) = split /=/; } @vals; } sub getExtremeFrame { my ($Formant, $Extremum, @Tracks) = @_; if ($Formant !~ /[1-6]/ || $Extremum !~ /(min|max)/) { die "measure: getExtremeFrame: What kind of a formant extremum is F$Formant$Extremum ?!"; } my $extFrame = 0; my $extFormant = ${$Tracks [0]}[$Formant -1]; my $currFormant; for (my $Frame = 0; $Frame <= $#Tracks; $Frame++) { $currFormant = ${$Tracks [$Frame]}[$Formant -1]; if ($Extremum =~ /min/) { next if ($currFormant >= $extFormant); } else { next if ($currFormant <= $extFormant); } $extFrame = $Frame; $extFormant = $currFormant; } $extFrame; } sub getOverrideFrame () { my ($Formant, $Extremum, $StartFrame, $EndFrame) = @_; my $Frame = 0; for (my $i = $StartFrame; $i <= $EndFrame; $i++) { if ($Overrides [$i] =~ /f$Formant$Extremum/) { $Frame = $i; last; } } $Frame; } sub getFormant { # Extract a formant value from the specified frame of @Tracks my ($Fnum, $TrackFrame) = @_; my ($F, $B) = split ' ', $ {$TrackFrame} [$Fnum - 1]; $F; } # Write the results sub writeData { my ($WordNum, $Word, $DataRef) = @_; print DAT "$Spkr "; printf DAT "%-10s ", $Word; my $ORs = ''; # Indicate overrides in output foreach my $i (1, 2) { my $maxFrame = $ {$DataRef} {"F$i".'max'}; my $maxTime = FrameToTime ($maxFrame); my $F1 = $ {$DataRef} {"F$i".'max'.'F1'}; my $F2 = $ {$DataRef} {"F$i".'max'.'F2'}; printf DAT "%7.3f ", $maxTime; printf DAT "%4d %4d ", $F1, $F2; $ORs .= $Overrides [$maxFrame]; writeLandmarksTier ($WordNum, $i, $maxFrame, $maxTime, $F1, $F2); } print DAT "$ORs\n"; } # Make a TextGrid object for checking the results sub copyTextGrid { # Copy the original TextGrid, but without its Landmarks tier open (TEXTGRID, "<./$File.TextGrid") || die "writeCheck: Couldn't reopen $File.TextGrid"; while ($_ = ) { print CHECK $_; last if (/points: size/); # Start of Overrides tier } while ($_ = ) { last if (/points: size/); # Start of Landmarks tier print CHECK $_; } print CHECK " points: size = ", 2*(1+$#Diphthongs), "\n"; close (TEXTGRID); } sub writeLandmarksTier { # Print info on Formant $i max to .Check.TextGrid my ($WordNum, $i, $maxFrame, $maxTime, $F1, $F2) = @_; my $pointNum = 2 * $WordNum + $i - 1; print CHECK " points [$pointNum]:\n"; printf CHECK " time = %7.3f\n", $maxTime; printf CHECK " mark = " . '"F%1dmax %4d %4d"'."\n", $i, $F1, $F2; }