VoxForge
Hi,
having some accuracy problems, im still learning how to get everything to come together.
im using this sample audio file http://www.canirun.co.uk/bob.wav
the output im getting is: it's art at larchmont slash reader
expected output: get started with google dot com slash reader
my config (using lattice demo):
<?xml version="1.0" encoding="UTF-8"?>
<!--
Sphinx-4 Configuration file
-->
<!-- ******************************************************** -->
<!-- biship configuration file -->
<!-- ******************************************************** -->
<config>
<!-- ******************************************************** -->
<!-- frequently tuned properties -->
<!-- ******************************************************** -->
<property name="absoluteBeamWidth" value="500"/>
<property name="relativeBeamWidth" value="1E-80"/>
<property name="absoluteWordBeamWidth" value="20"/>
<property name="relativeWordBeamWidth" value="1E-60"/>
<property name="wordInsertionProbability" value="0.7"/>
<property name="languageWeight" value="10.5"/>
<property name="silenceInsertionProbability" value=".1"/>
<property name="frontend" value="epFrontEnd"/>
<property name="recognizer" value="recognizer"/>
<property name="showCreations" value="false"/>
<!-- ******************************************************** -->
<!-- word recognizer configuration -->
<!-- ******************************************************** -->
<component name="recognizer"
type="edu.cmu.sphinx.recognizer.Recognizer">
<property name="decoder" value="decoder"/>
<propertylist name="monitors">
<item>accuracyTracker </item>
<item>speedTracker </item>
<item>memoryTracker </item>
<item>recognizerMonitor </item>
</propertylist>
</component>
<!-- ******************************************************** -->
<!-- The Decoder configuration -->
<!-- ******************************************************** -->
<component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
<property name="searchManager" value="wordPruningSearchManager"/>
<property name="featureBlockSize" value="50"/>
</component>
<!-- ******************************************************** -->
<!-- The Search Manager -->
<!-- ******************************************************** -->
<component name="wordPruningSearchManager"
type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
<property name="logMath" value="logMath"/>
<property name="linguist" value="lexTreeLinguist"/>
<property name="pruner" value="trivialPruner"/>
<property name="scorer" value="threadedScorer"/>
<property name="activeListManager" value="activeListManager"/>
<property name="growSkipInterval" value="0"/>
<property name="checkStateOrder" value="false"/>
<property name="buildWordLattice" value="true"/>
<property name="acousticLookaheadFrames" value="1.7"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<!-- ******************************************************** -->
<!-- The Active Lists -->
<!-- ******************************************************** -->
<component name="activeListManager"
type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
<propertylist name="activeListFactories">
<item>standardActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
</propertylist>
</component>
<component name="standardActiveListFactory"
type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<component name="wordActiveListFactory"
type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeWordBeamWidth}"/>
</component>
<!-- ******************************************************** -->
<!-- The Pruner -->
<!-- ******************************************************** -->
<component name="trivialPruner"
type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>
<!-- ******************************************************** -->
<!-- TheScorer -->
<!-- ******************************************************** -->
<component name="threadedScorer"
type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
<property name="frontend" value="${frontend}"/>
</component>
<!-- ******************************************************** -->
<!-- The linguist configuration -->
<!-- ******************************************************** -->
<component name="lexTreeLinguist"
type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
<property name="logMath" value="logMath"/>
<property name="acousticModel" value="wsj"/>
<property name="languageModel" value="trigramModel"/>
<property name="dictionary" value="dictionary"/>
<property name="addFillerWords" value="false"/>
<property name="fillerInsertionProbability" value="1E-10"/>
<property name="generateUnitStates" value="false"/>
<property name="wantUnigramSmear" value="true"/>
<property name="unigramSmearWeight" value="1"/>
<property name="wordInsertionProbability"
value="${wordInsertionProbability}"/>
<property name="silenceInsertionProbability"
value="${silenceInsertionProbability}"/>
<property name="languageWeight" value="${languageWeight}"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Dictionary configuration -->
<!-- ******************************************************** -->
<component name="dictionary"
type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
<property name="dictionaryPath"
value="/Users/Stewart/Desktop/voxforge-en-0.4/voxforge-en-0.4/etc/cmudict.0.7a"/>
<property name="fillerPath"
value="/Users/Stewart/Desktop/voxforge-en-0.4/voxforge-en-0.4/etc/voxforge_en_sphinx.filler"/>
<property name="addSilEndingPronunciation" value="false"/>
<property name="wordReplacement" value="<sil>"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Language Model configuration -->
<!-- ******************************************************** -->
<component name="trigramModel"
type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
<property name="unigramWeight" value=".5"/>
<property name="maxDepth" value="3"/>
<property name="logMath" value="logMath"/>
<property name="dictionary" value="dictionary"/>
<property name="location"
value="/Users/Stewart/Desktop/voxforge-en-0.4/voxforge-en-0.4/etc/lm_giga_64k_nvp_3gram.lm.DMP"/>
</component>
<!-- ******************************************************** -->
<!-- The acoustic model configuration -->
<!-- ******************************************************** -->
<component name="wsj"
type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
<property name="loader" value="wsjLoader"/>
<property name="unitManager" value="unitManager"/>
</component>
<component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
<property name="logMath" value="logMath"/>
<property name="unitManager" value="unitManager"/>
<property name="location" value="/Users/Stewart/Desktop/voxforge-en-0.4/voxforge-en-0.4/model_parameters/voxforge_en_sphinx.cd_cont_5000"/>
</component>
<!-- ******************************************************** -->
<!-- The unit manager configuration -->
<!-- ******************************************************** -->
<component name="unitManager"
type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
<!-- ******************************************************** -->
<!-- The frontend configuration -->
<!-- ******************************************************** -->
<component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
<propertylist name="pipeline">
<item>audioFileDataSource </item>
<item>dataBlocker </item>
<item>speechClassifier </item>
<item>speechMarker </item>
<item>nonSpeechDataFilter </item>
<item>preemphasizer </item>
<item>windower </item>
<item>fft </item>
<item>melFilterBank </item>
<item>dct </item>
<item>liveCMN </item>
<item>featureExtraction </item>
<item>featureTransform</item>
</propertylist>
</component>
<component name="melFilterBank" type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
<property name="numberFilters" value="40"/>
<property name="minimumFrequency" value="130"/>
<property name="maximumFrequency" value="6800"/>
</component>
<component name="featureTransform" type="edu.cmu.sphinx.frontend.feature.FeatureTransform">
<property name="loader" value="wsjLoader"/>
</component>
<component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>
<component name="microphone"
type="edu.cmu.sphinx.frontend.util.Microphone">
<property name="closeBetweenUtterances" value="false"/>
</component>
<component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>
<component name="speechClassifier"
type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
<property name="threshold" value="13"/>
</component>
<component name="nonSpeechDataFilter"
type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>
<component name="speechMarker"
type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker">
<property name="speechTrailer" value="50"/>
</component>
<component name="preemphasizer"
type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
<component name="windower"
type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>
<component name="fft"
type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>
<!--<component name="melFilterBank"
type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>-->
<component name="dct"
type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
<component name="liveCMN"
type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>
<component name="featureExtraction"
type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>
<!-- ******************************************************* -->
<!-- monitors -->
<!-- ******************************************************* -->
<component name="accuracyTracker"
type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showRawResults" value="true"/>
<property name="showAlignedResults" value="true"/>
</component>
<component name="memoryTracker"
type="edu.cmu.sphinx.instrumentation.MemoryTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showDetails" value="false"/>
<property name="showSummary" value="false"/>
</component>
<component name="speedTracker"
type="edu.cmu.sphinx.instrumentation.SpeedTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="frontend" value="${frontend}"/>
<property name="showDetails" value="false"/>
</component>
<component name="recognizerMonitor"
type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
<property name="recognizer" value="${recognizer}"/>
<propertylist name="allocatedMonitors">
<item>configMonitor </item>
</propertylist>
</component>
<component name="configMonitor"
type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
<property name="showConfig" value="false"/>
</component>
<!-- ******************************************************* -->
<!-- Miscellaneous components -->
<!-- ******************************************************* -->
<component name="logMath" type="edu.cmu.sphinx.util.LogMath">
<property name="logBase" value="1.0001"/>
<property name="useAddTable" value="true"/>
</component>
</config>
--- (Edited on 11/10/2011 6:49 am [GMT-0600] by ) ---
opps im not using wsjLoader (just forgot to name it properly in config) im using the latest voxforge 0.4 sphinx model.
--- (Edited on 11/10/2011 7:00 am [GMT-0600] by ) ---
ok im starting to get it, i've changed the config to:
<!-- ******************************************************** -->
<!-- frequently tuned properties -->
<!-- ******************************************************** -->
<property name="absoluteBeamWidth" value="500"/>
<property name="relativeBeamWidth" value="1E-80"/>
<property name="absoluteWordBeamWidth" value="20"/>
<property name="relativeWordBeamWidth" value="1E-60"/>
<property name="wordInsertionProbability" value="0.2"/>
<property name="languageWeight" value="4"/>
<property name="silenceInsertionProbability" value=".1"/>
<property name="frontend" value="epFrontEnd"/>
<property name="recognizer" value="recognizer"/>
<property name="showCreations" value="false"/>
and i now get: get started at gogel ah tromp slash reader
do i simply add:
GOOGLE.COM G OW G AH L D AA T K AA M
to the dictionary (to get it to recignise google.com)? I tryed it but it doesn't seem to work...do i also have to include it in the arpa language model as well (google.com)?
thanks.
--- (Edited on 11/10/2011 6:59 pm [GMT-0600] by ) ---
> do i also have to include it in the arpa language model as well (google.com)? Yes I also recommend you to read a tutorial which explains all the problems like you are asking about
--- (Edited on 11/16/2011 03:47 [GMT+0300] by nsh) ---