Added voice control
Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2
This commit is contained in:
parent
35c92407a3
commit
53da641909
863 changed files with 192681 additions and 0 deletions
34
lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml
Normal file
34
lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
<project
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>edu.cmu.sphinx</groupId>
|
||||
<artifactId>sphinx4-parent</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>sphinx4-core</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Sphinx4 core</name>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-math3</artifactId>
|
||||
<version>3.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>edu.cmu.sphinx</groupId>
|
||||
<artifactId>sphinx4-data</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,355 @@
|
|||
/*
|
||||
* Copyright 2014 Alpha Cephei Inc.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.alignment;
|
||||
|
||||
import static java.lang.Math.abs;
|
||||
import static java.lang.Math.max;
|
||||
import static java.lang.Math.min;
|
||||
import static java.util.Arrays.fill;
|
||||
import static java.util.Collections.emptyList;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import edu.cmu.sphinx.util.Range;
|
||||
import edu.cmu.sphinx.util.Utilities;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Alexander Solovets
|
||||
*/
|
||||
public class LongTextAligner {
|
||||
|
||||
private final class Alignment {
|
||||
|
||||
public final class Node {
|
||||
|
||||
private final int databaseIndex;
|
||||
private final int queryIndex;
|
||||
|
||||
private Node(int row, int column) {
|
||||
this.databaseIndex = column;
|
||||
this.queryIndex = row;
|
||||
}
|
||||
|
||||
public int getDatabaseIndex() {
|
||||
return shifts.get(databaseIndex - 1);
|
||||
}
|
||||
|
||||
public int getQueryIndex() {
|
||||
return indices.get(queryIndex - 1);
|
||||
}
|
||||
|
||||
public String getQueryWord() {
|
||||
if (queryIndex > 0)
|
||||
return query.get(getQueryIndex());
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getDatabaseWord() {
|
||||
if (databaseIndex > 0)
|
||||
return reftup.get(getDatabaseIndex());
|
||||
return null;
|
||||
}
|
||||
|
||||
public int getValue() {
|
||||
if (isBoundary())
|
||||
return max(queryIndex, databaseIndex);
|
||||
return hasMatch() ? 0 : 1;
|
||||
}
|
||||
|
||||
public boolean hasMatch() {
|
||||
return getQueryWord().equals(getDatabaseWord());
|
||||
}
|
||||
|
||||
public boolean isBoundary() {
|
||||
return queryIndex == 0 || databaseIndex == 0;
|
||||
}
|
||||
|
||||
public boolean isTarget() {
|
||||
return queryIndex == indices.size() &&
|
||||
databaseIndex == shifts.size();
|
||||
}
|
||||
|
||||
public List<Node> adjacent() {
|
||||
List<Node> result = new ArrayList<Node>(3);
|
||||
if (queryIndex < indices.size() &&
|
||||
databaseIndex < shifts.size()) {
|
||||
result.add(new Node(queryIndex + 1, databaseIndex + 1));
|
||||
}
|
||||
if (databaseIndex < shifts.size()) {
|
||||
result.add(new Node(queryIndex, databaseIndex + 1));
|
||||
}
|
||||
if (queryIndex < indices.size()) {
|
||||
result.add(new Node(queryIndex + 1, databaseIndex));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object object) {
|
||||
if (!(object instanceof Node))
|
||||
return false;
|
||||
|
||||
Node other = (Node) object;
|
||||
return queryIndex == other.queryIndex &&
|
||||
databaseIndex == other.databaseIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * (31 * queryIndex + databaseIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("[%d %d]", queryIndex, databaseIndex);
|
||||
}
|
||||
}
|
||||
|
||||
private final List<Integer> shifts;
|
||||
private final List<String> query;
|
||||
private final List<Integer> indices;
|
||||
private final List<Node> alignment;
|
||||
|
||||
public Alignment(List<String> query, Range range) {
|
||||
this.query = query;
|
||||
indices = new ArrayList<Integer>();
|
||||
Set<Integer> shiftSet = new TreeSet<Integer>();
|
||||
for (int i = 0; i < query.size(); i++) {
|
||||
if (tupleIndex.containsKey(query.get(i))) {
|
||||
indices.add(i);
|
||||
for (Integer shift : tupleIndex.get(query.get(i))) {
|
||||
if (range.contains(shift))
|
||||
shiftSet.add(shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
shifts = new ArrayList<Integer>(shiftSet);
|
||||
|
||||
final Map<Node, Integer> cost = new HashMap<Node, Integer>();
|
||||
PriorityQueue<Node> openSet = new PriorityQueue<Node>(1, new Comparator<Node>() {
|
||||
@Override
|
||||
public int compare(Node o1, Node o2) {
|
||||
return cost.get(o1).compareTo(cost.get(o2));
|
||||
}
|
||||
});
|
||||
Collection<Node> closedSet = new HashSet<Node>();
|
||||
Map<Node, Node> parents = new HashMap<Node, Node>();
|
||||
|
||||
Node startNode = new Node(0, 0);
|
||||
cost.put(startNode, 0);
|
||||
openSet.add(startNode);
|
||||
|
||||
while (!openSet.isEmpty()) {
|
||||
Node q = openSet.poll();
|
||||
if (closedSet.contains(q))
|
||||
continue;
|
||||
|
||||
if (q.isTarget()) {
|
||||
List<Node> backtrace = new ArrayList<Node>();
|
||||
while (parents.containsKey(q)) {
|
||||
if (!q.isBoundary() && q.hasMatch())
|
||||
backtrace.add(q);
|
||||
q = parents.get(q);
|
||||
}
|
||||
alignment = new ArrayList<Node>(backtrace);
|
||||
Collections.reverse(alignment);
|
||||
return;
|
||||
}
|
||||
|
||||
closedSet.add(q);
|
||||
for (Node nb : q.adjacent()) {
|
||||
|
||||
if (closedSet.contains(nb))
|
||||
continue;
|
||||
|
||||
// FIXME: move to appropriate location
|
||||
int l = abs(indices.size() - shifts.size() - q.queryIndex +
|
||||
q.databaseIndex) -
|
||||
abs(indices.size() - shifts.size() -
|
||||
nb.queryIndex +
|
||||
nb.databaseIndex);
|
||||
|
||||
Integer oldScore = cost.get(nb);
|
||||
Integer qScore = cost.get(q);
|
||||
if (oldScore == null)
|
||||
oldScore = Integer.MAX_VALUE;
|
||||
if (qScore == null)
|
||||
qScore = Integer.MAX_VALUE;
|
||||
|
||||
int newScore = qScore + nb.getValue() - l;
|
||||
if (newScore < oldScore) {
|
||||
cost.put(nb, newScore);
|
||||
openSet.add(nb);
|
||||
parents.put(nb, q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
alignment = emptyList();
|
||||
}
|
||||
|
||||
public List<Node> getIndices() {
|
||||
return alignment;
|
||||
}
|
||||
}
|
||||
|
||||
private final int tupleSize;
|
||||
private final List<String> reftup;
|
||||
private final HashMap<String, ArrayList<Integer>> tupleIndex;
|
||||
private List<String> refWords;
|
||||
|
||||
/**
|
||||
* Constructs new text aligner that servers requests for alignment of
|
||||
* sequence of words with the provided database sequence. Sequences are
|
||||
* aligned by tuples comprising one or more subsequent words.
|
||||
*
|
||||
* @param words list of words forming the database
|
||||
* @param tupleSize size of a tuple, must be greater or equal to 1
|
||||
*/
|
||||
public LongTextAligner(List<String> words, int tupleSize) {
|
||||
assert words != null;
|
||||
assert tupleSize > 0;
|
||||
|
||||
this.tupleSize = tupleSize;
|
||||
this.refWords = words;
|
||||
|
||||
int offset = 0;
|
||||
reftup = getTuples(words);
|
||||
|
||||
tupleIndex = new HashMap<String, ArrayList<Integer>>();
|
||||
for (String tuple : reftup) {
|
||||
ArrayList<Integer> indexes = tupleIndex.get(tuple);
|
||||
if (indexes == null) {
|
||||
indexes = new ArrayList<Integer>();
|
||||
tupleIndex.put(tuple, indexes);
|
||||
}
|
||||
indexes.add(offset++);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligns query sequence with the previously built database.
|
||||
* @param query list of words to look for
|
||||
*
|
||||
* @return indices of alignment
|
||||
*/
|
||||
public int[] align(List<String> query) {
|
||||
return align(query, new Range(0, refWords.size()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligns query sequence with the previously built database.
|
||||
* @param words list words to look for
|
||||
* @param range range of database to look for alignment
|
||||
*
|
||||
* @return indices of alignment
|
||||
*/
|
||||
public int[] align(List<String> words, Range range) {
|
||||
|
||||
if (range.upperEndpoint() - range.lowerEndpoint() < tupleSize || words.size() < tupleSize) {
|
||||
return alignTextSimple(refWords.subList(range.lowerEndpoint(), range.upperEndpoint()), words, range.lowerEndpoint());
|
||||
}
|
||||
|
||||
int[] result = new int[words.size()];
|
||||
fill(result, -1);
|
||||
int lastIndex = 0;
|
||||
for (Alignment.Node node : new Alignment(getTuples(words), range)
|
||||
.getIndices()) {
|
||||
// for (int j = 0; j < tupleSize; ++j)
|
||||
lastIndex = max(lastIndex, node.getQueryIndex());
|
||||
for (; lastIndex < node.getQueryIndex() + tupleSize; ++lastIndex)
|
||||
result[lastIndex] = node.getDatabaseIndex() + lastIndex -
|
||||
node.getQueryIndex();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes list of tuples of the given size out of list of words.
|
||||
*
|
||||
* @param words words
|
||||
* @return list of tuples of size {@link #tupleSize}
|
||||
*/
|
||||
private List<String> getTuples(List<String> words) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
LinkedList<String> tuple = new LinkedList<String>();
|
||||
|
||||
Iterator<String> it = words.iterator();
|
||||
for (int i = 0; i < tupleSize - 1; i++) {
|
||||
tuple.add(it.next());
|
||||
}
|
||||
while (it.hasNext()) {
|
||||
tuple.addLast(it.next());
|
||||
result.add(Utilities.join(tuple));
|
||||
tuple.removeFirst();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static int[] alignTextSimple(List<String> database, List<String> query,
|
||||
int offset) {
|
||||
int n = database.size() + 1;
|
||||
int m = query.size() + 1;
|
||||
int[][] f = new int[n][m];
|
||||
|
||||
f[0][0] = 0;
|
||||
for (int i = 1; i < n; ++i) {
|
||||
f[i][0] = i;
|
||||
}
|
||||
|
||||
for (int j = 1; j < m; ++j) {
|
||||
f[0][j] = j;
|
||||
}
|
||||
|
||||
for (int i = 1; i < n; ++i) {
|
||||
for (int j = 1; j < m; ++j) {
|
||||
int match = f[i - 1][j - 1];
|
||||
String refWord = database.get(i - 1);
|
||||
String queryWord = query.get(j - 1);
|
||||
if (!refWord.equals(queryWord)) {
|
||||
++match;
|
||||
}
|
||||
int insert = f[i][j - 1] + 1;
|
||||
int delete = f[i - 1][j] + 1;
|
||||
f[i][j] = min(match, min(insert, delete));
|
||||
}
|
||||
}
|
||||
|
||||
--n;
|
||||
--m;
|
||||
int[] alignment = new int[m];
|
||||
Arrays.fill(alignment, -1);
|
||||
while (m > 0) {
|
||||
if (n == 0) {
|
||||
--m;
|
||||
} else {
|
||||
String refWord = database.get(n - 1);
|
||||
String queryWord = query.get(m - 1);
|
||||
if (f[n - 1][m - 1] <= f[n - 1][m - 1]
|
||||
&& f[n - 1][m - 1] <= f[n][m - 1]
|
||||
&& refWord.equals(queryWord)) {
|
||||
alignment[--m] = --n + offset;
|
||||
} else {
|
||||
if (f[n - 1][m] < f[n][m - 1]) {
|
||||
--n;
|
||||
} else {
|
||||
--m;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return alignment;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright 2014 Alpha Cephei Inc.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.alignment;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class SimpleTokenizer implements TextTokenizer {
|
||||
public List<String> expand(String text) {
|
||||
|
||||
text = text.replace('’', '\'');
|
||||
text = text.replace('‘', ' ');
|
||||
text = text.replace('”', ' ');
|
||||
text = text.replace('“', ' ');
|
||||
text = text.replace('"', ' ');
|
||||
text = text.replace('»', ' ');
|
||||
text = text.replace('«', ' ');
|
||||
text = text.replace('–', '-');
|
||||
text = text.replace('—', ' ');
|
||||
text = text.replace('…', ' ');
|
||||
|
||||
text = text.replace(" - ", " ");
|
||||
text = text.replaceAll("[/_*%]", " ");
|
||||
text = text.toLowerCase();
|
||||
|
||||
String[] tokens = text.split("[.,?:!;()]");
|
||||
return Arrays.asList(tokens);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright 2014 Alpha Cephei Inc.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.alignment;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface TextTokenizer {
|
||||
|
||||
|
||||
/**
|
||||
* Cleans the text and returns the list of lines
|
||||
*
|
||||
* @param text Input text
|
||||
* @return a list of lines in the text.
|
||||
*/
|
||||
List<String> expand(String text);
|
||||
}
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment;
|
||||
|
||||
/**
|
||||
* Contains a parsed token from a Tokenizer.
|
||||
*/
|
||||
public class Token {
|
||||
|
||||
private String token = null;
|
||||
private String whitespace = null;
|
||||
private String prepunctuation = null;
|
||||
private String postpunctuation = null;
|
||||
private int position = 0; // position in the original input text
|
||||
private int lineNumber = 0;
|
||||
|
||||
/**
|
||||
* Returns the whitespace characters of this Token.
|
||||
*
|
||||
* @return the whitespace characters of this Token; null if this Token does
|
||||
* not use whitespace characters
|
||||
*/
|
||||
public String getWhitespace() {
|
||||
return whitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the prepunctuation characters of this Token.
|
||||
*
|
||||
* @return the prepunctuation characters of this Token; null if this Token
|
||||
* does not use prepunctuation characters
|
||||
*/
|
||||
public String getPrepunctuation() {
|
||||
return prepunctuation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the postpunctuation characters of this Token.
|
||||
*
|
||||
* @return the postpunctuation characters of this Token; null if this Token
|
||||
* does not use postpunctuation characters
|
||||
*/
|
||||
public String getPostpunctuation() {
|
||||
return postpunctuation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the position of this token in the original input text.
|
||||
*
|
||||
* @return the position of this token in the original input text
|
||||
*/
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line of this token in the original text.
|
||||
*
|
||||
* @return the line of this token in the original text
|
||||
*/
|
||||
public int getLineNumber() {
|
||||
return lineNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the whitespace characters of this Token.
|
||||
*
|
||||
* @param whitespace the whitespace character for this token
|
||||
*/
|
||||
public void setWhitespace(String whitespace) {
|
||||
this.whitespace = whitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the prepunctuation characters of this Token.
|
||||
*
|
||||
* @param prepunctuation the prepunctuation characters
|
||||
*/
|
||||
public void setPrepunctuation(String prepunctuation) {
|
||||
this.prepunctuation = prepunctuation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the postpunctuation characters of this Token.
|
||||
*
|
||||
* @param postpunctuation the postpunctuation characters
|
||||
*/
|
||||
public void setPostpunctuation(String postpunctuation) {
|
||||
this.postpunctuation = postpunctuation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the position of the token in the original input text.
|
||||
*
|
||||
* @param position the position of the input text
|
||||
*/
|
||||
public void setPosition(int position) {
|
||||
this.position = position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the line of this token in the original text.
|
||||
*
|
||||
* @param lineNumber the line of this token in the original text
|
||||
*/
|
||||
public void setLineNumber(int lineNumber) {
|
||||
this.lineNumber = lineNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string associated with this token.
|
||||
*
|
||||
* @return the token if it exists; otherwise null
|
||||
*/
|
||||
public String getWord() {
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the string of this Token.
|
||||
*
|
||||
* @param word the word for this token
|
||||
*/
|
||||
public void setWord(String word) {
|
||||
token = word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts this token to a string.
|
||||
*
|
||||
* @return the string representation of this object
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuffer fullToken = new StringBuffer();
|
||||
|
||||
if (whitespace != null) {
|
||||
fullToken.append(whitespace);
|
||||
}
|
||||
if (prepunctuation != null) {
|
||||
fullToken.append(prepunctuation);
|
||||
}
|
||||
if (token != null) {
|
||||
fullToken.append(token);
|
||||
}
|
||||
if (postpunctuation != null) {
|
||||
fullToken.append(postpunctuation);
|
||||
}
|
||||
return fullToken.toString();
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,405 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Iterator;
|
||||
|
||||
import edu.cmu.sphinx.alignment.Token;
|
||||
|
||||
/**
|
||||
* Implements the tokenizer interface. Breaks an input sequence of characters
|
||||
* into a set of tokens.
|
||||
*/
|
||||
public class CharTokenizer implements Iterator<Token> {
|
||||
|
||||
/** A constant indicating that the end of the stream has been read. */
|
||||
public static final int EOF = -1;
|
||||
|
||||
/** A string containing the default whitespace characters. */
|
||||
public static final String DEFAULT_WHITESPACE_SYMBOLS = " \t\n\r";
|
||||
|
||||
/** A string containing the default single characters. */
|
||||
public static final String DEFAULT_SINGLE_CHAR_SYMBOLS = "(){}[]";
|
||||
|
||||
/** A string containing the default pre-punctuation characters. */
|
||||
public static final String DEFAULT_PREPUNCTUATION_SYMBOLS = "\"'`({[";
|
||||
|
||||
/** A string containing the default post-punctuation characters. */
|
||||
public static final String DEFAULT_POSTPUNCTUATION_SYMBOLS =
|
||||
"\"'`.,:;!?(){}[]";
|
||||
|
||||
/** The line number. */
|
||||
private int lineNumber;
|
||||
|
||||
/** The input text (from the Utterance) to tokenize. */
|
||||
private String inputText;
|
||||
|
||||
/** The file to read input text from, if using file mode. */
|
||||
private Reader reader;
|
||||
|
||||
/** The current character, whether its from the file or the input text. */
|
||||
private int currentChar;
|
||||
|
||||
/**
|
||||
* The current char position for the input text (not the file) this is
|
||||
* called "file_pos" in flite
|
||||
*/
|
||||
private int currentPosition;
|
||||
|
||||
/** The delimiting symbols of this tokenizer. */
|
||||
private String whitespaceSymbols = DEFAULT_WHITESPACE_SYMBOLS;
|
||||
private String singleCharSymbols = DEFAULT_SINGLE_CHAR_SYMBOLS;
|
||||
private String prepunctuationSymbols = DEFAULT_PREPUNCTUATION_SYMBOLS;
|
||||
private String postpunctuationSymbols = DEFAULT_POSTPUNCTUATION_SYMBOLS;
|
||||
|
||||
/** The error description. */
|
||||
private String errorDescription;
|
||||
|
||||
/** A place to store the current token. */
|
||||
private Token token;
|
||||
private Token lastToken;
|
||||
|
||||
/**
|
||||
* Constructs a Tokenizer.
|
||||
*/
|
||||
public CharTokenizer() {}
|
||||
|
||||
/**
|
||||
* Creates a tokenizer that will return tokens from the given string.
|
||||
*
|
||||
* @param string the string to tokenize
|
||||
*/
|
||||
public CharTokenizer(String string) {
|
||||
setInputText(string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a tokenizer that will return tokens from the given file.
|
||||
*
|
||||
* @param file where to read the input from
|
||||
*/
|
||||
public CharTokenizer(Reader file) {
|
||||
setInputReader(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the whitespace symbols of this Tokenizer to the given symbols.
|
||||
*
|
||||
* @param symbols the whitespace symbols
|
||||
*/
|
||||
public void setWhitespaceSymbols(String symbols) {
|
||||
whitespaceSymbols = symbols;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the single character symbols of this Tokenizer to the given
|
||||
* symbols.
|
||||
*
|
||||
* @param symbols the single character symbols
|
||||
*/
|
||||
public void setSingleCharSymbols(String symbols) {
|
||||
singleCharSymbols = symbols;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the prepunctuation symbols of this Tokenizer to the given symbols.
|
||||
*
|
||||
* @param symbols the prepunctuation symbols
|
||||
*/
|
||||
public void setPrepunctuationSymbols(String symbols) {
|
||||
prepunctuationSymbols = symbols;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the postpunctuation symbols of this Tokenizer to the given symbols.
|
||||
*
|
||||
* @param symbols the postpunctuation symbols
|
||||
*/
|
||||
public void setPostpunctuationSymbols(String symbols) {
|
||||
postpunctuationSymbols = symbols;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text to tokenize.
|
||||
*
|
||||
* @param inputString the string to tokenize
|
||||
*/
|
||||
public void setInputText(String inputString) {
|
||||
inputText = inputString;
|
||||
currentPosition = 0;
|
||||
|
||||
if (inputText != null) {
|
||||
getNextChar();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the input reader
|
||||
*
|
||||
* @param reader the input source
|
||||
*/
|
||||
public void setInputReader(Reader reader) {
|
||||
this.reader = reader;
|
||||
getNextChar();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token.
|
||||
*
|
||||
* @return the next token if it exists, <code>null</code> if no more tokens
|
||||
*/
|
||||
public Token next() {
|
||||
lastToken = token;
|
||||
token = new Token();
|
||||
|
||||
// Skip whitespace
|
||||
token.setWhitespace(getTokenOfCharClass(whitespaceSymbols));
|
||||
|
||||
// quoted strings currently ignored
|
||||
|
||||
// get prepunctuation
|
||||
token.setPrepunctuation(getTokenOfCharClass(prepunctuationSymbols));
|
||||
|
||||
// get the symbol itself
|
||||
if (singleCharSymbols.indexOf(currentChar) != -1) {
|
||||
token.setWord(String.valueOf((char) currentChar));
|
||||
getNextChar();
|
||||
} else {
|
||||
token.setWord(getTokenNotOfCharClass(whitespaceSymbols));
|
||||
}
|
||||
|
||||
token.setPosition(currentPosition);
|
||||
token.setLineNumber(lineNumber);
|
||||
|
||||
// This'll have token *plus* postpunctuation
|
||||
// Get postpunctuation
|
||||
removeTokenPostpunctuation();
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if there are more tokens, <code>false</code>
|
||||
* otherwise.
|
||||
*
|
||||
* @return <code>true</code> if there are more tokens <code>false</code>
|
||||
* otherwise
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
int nextChar = currentChar;
|
||||
return (nextChar != EOF);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the currentPosition pointer by 1 (if not exceeding length of
|
||||
* inputText, and returns the character pointed by currentPosition.
|
||||
*
|
||||
* @return the next character EOF if no more characters exist
|
||||
*/
|
||||
private int getNextChar() {
|
||||
if (reader != null) {
|
||||
try {
|
||||
int readVal = reader.read();
|
||||
if (readVal == -1) {
|
||||
currentChar = EOF;
|
||||
} else {
|
||||
currentChar = (char) readVal;
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
currentChar = EOF;
|
||||
errorDescription = ioe.getMessage();
|
||||
}
|
||||
} else if (inputText != null) {
|
||||
if (currentPosition < inputText.length()) {
|
||||
currentChar = (int) inputText.charAt(currentPosition);
|
||||
} else {
|
||||
currentChar = EOF;
|
||||
}
|
||||
}
|
||||
if (currentChar != EOF) {
|
||||
currentPosition++;
|
||||
}
|
||||
if (currentChar == '\n') {
|
||||
lineNumber++;
|
||||
}
|
||||
return currentChar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starting from the current position of the input text, returns the
|
||||
* subsequent characters of type charClass, and not of type
|
||||
* singleCharSymbols.
|
||||
*
|
||||
* @param charClass the type of characters to look for
|
||||
* @param buffer the place to append characters of type charClass
|
||||
*
|
||||
* @return a string of characters starting from the current position of the
|
||||
* input text, until it encounters a character not in the string
|
||||
* charClass
|
||||
*
|
||||
*/
|
||||
private String getTokenOfCharClass(String charClass) {
|
||||
return getTokenByCharClass(charClass, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Starting from the current position of the input text/file, returns the
|
||||
* subsequent characters, not of type singleCharSymbols, and ended at
|
||||
* characters of type endingCharClass. E.g., if the current string is
|
||||
* "xxxxyyy", endingCharClass is "yz", and singleCharClass "abc". Then this
|
||||
* method will return to "xxxx".
|
||||
*
|
||||
* @param endingCharClass the type of characters to look for
|
||||
*
|
||||
* @return a string of characters from the current position until it
|
||||
* encounters characters in endingCharClass
|
||||
*
|
||||
*/
|
||||
private String getTokenNotOfCharClass(String endingCharClass) {
|
||||
return getTokenByCharClass(endingCharClass, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a `compressed' method from getTokenOfCharClass() and
|
||||
* getTokenNotOfCharClass(). If parameter containThisCharClass is
|
||||
* <code>true</code>, then a string from the current position to the last
|
||||
* character in charClass is returned. If containThisCharClass is
|
||||
* <code>false</code> , then a string before the first occurrence of a
|
||||
* character in containThisCharClass is returned.
|
||||
*
|
||||
* @param charClass the string of characters you want included or excluded
|
||||
* in your return
|
||||
* @param containThisCharClass determines if you want characters in
|
||||
* charClass in the returned string or not
|
||||
*
|
||||
* @return a string of characters from the current position until it
|
||||
* encounters characters in endingCharClass
|
||||
*/
|
||||
private String getTokenByCharClass(String charClass,
|
||||
boolean containThisCharClass) {
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
|
||||
// if we want the returned string to contain chars in charClass, then
|
||||
// containThisCharClass is TRUE and
|
||||
// (charClass.indexOf(currentChar) != 1) == containThisCharClass)
|
||||
// returns true; if we want it to stop at characters of charClass,
|
||||
// then containThisCharClass is FALSE, and the condition returns
|
||||
// false.
|
||||
while ((charClass.indexOf(currentChar) != -1) == containThisCharClass
|
||||
&& singleCharSymbols.indexOf(currentChar) == -1
|
||||
&& currentChar != EOF) {
|
||||
buffer.append((char) currentChar);
|
||||
getNextChar();
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the postpunctuation characters from the current token. Copies
|
||||
* those postpunctuation characters to the class variable
|
||||
* 'postpunctuation'.
|
||||
*/
|
||||
private void removeTokenPostpunctuation() {
|
||||
if (token == null) {
|
||||
return;
|
||||
}
|
||||
final String tokenWord = token.getWord();
|
||||
|
||||
int tokenLength = tokenWord.length();
|
||||
int position = tokenLength - 1;
|
||||
|
||||
while (position > 0
|
||||
&& postpunctuationSymbols.indexOf((int) tokenWord
|
||||
.charAt(position)) != -1) {
|
||||
position--;
|
||||
}
|
||||
|
||||
if (tokenLength - 1 != position) {
|
||||
// Copy postpunctuation from token
|
||||
token.setPostpunctuation(tokenWord.substring(position + 1));
|
||||
|
||||
// truncate token at postpunctuation
|
||||
token.setWord(tokenWord.substring(0, position + 1));
|
||||
} else {
|
||||
token.setPostpunctuation("");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if there were errors while reading tokens
|
||||
*
|
||||
* @return <code>true</code> if there were errors; <code>false</code>
|
||||
* otherwise
|
||||
*/
|
||||
public boolean hasErrors() {
|
||||
return errorDescription != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* if hasErrors returns <code>true</code>, this will return a description
|
||||
* of the error encountered, otherwise it will return <code>null</code>
|
||||
*
|
||||
* @return a description of the last error that occurred.
|
||||
*/
|
||||
public String getErrorDescription() {
|
||||
return errorDescription;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the current token should start a new sentence.
|
||||
*
|
||||
* @return <code>true</code> if a new sentence should be started
|
||||
*/
|
||||
public boolean isSentenceSeparator() {
|
||||
String tokenWhiteSpace = token.getWhitespace();
|
||||
String lastTokenPostpunctuation = null;
|
||||
if (lastToken != null) {
|
||||
lastTokenPostpunctuation = lastToken.getPostpunctuation();
|
||||
}
|
||||
|
||||
if (lastToken == null || token == null) {
|
||||
return false;
|
||||
} else if (tokenWhiteSpace.indexOf('\n') != tokenWhiteSpace
|
||||
.lastIndexOf('\n')) {
|
||||
return true;
|
||||
} else if (lastTokenPostpunctuation.indexOf(':') != -1
|
||||
|| lastTokenPostpunctuation.indexOf('?') != -1
|
||||
|| lastTokenPostpunctuation.indexOf('!') != -1) {
|
||||
return true;
|
||||
} else if (lastTokenPostpunctuation.indexOf('.') != -1
|
||||
&& tokenWhiteSpace.length() > 1
|
||||
&& Character.isUpperCase(token.getWord().charAt(0))) {
|
||||
return true;
|
||||
} else {
|
||||
String lastWord = lastToken.getWord();
|
||||
int lastWordLength = lastWord.length();
|
||||
|
||||
if (lastTokenPostpunctuation.indexOf('.') != -1
|
||||
&&
|
||||
/* next word starts with a capital */
|
||||
Character.isUpperCase(token.getWord().charAt(0))
|
||||
&&
|
||||
/* last word isn't an abbreviation */
|
||||
!(Character.isUpperCase(lastWord
|
||||
.charAt(lastWordLength - 1)) || (lastWordLength < 4 && Character
|
||||
.isUpperCase(lastWord.charAt(0))))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,608 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of a Classification and Regression Tree (CART) that is used
|
||||
* more like a binary decision tree, with each node containing a decision or a
|
||||
* final value. The decision nodes in the CART trees operate on an Item and
|
||||
* have the following format:
|
||||
*
|
||||
* <pre>
|
||||
* NODE feat operand value qfalse
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Where <code>feat</code> is an string that represents a feature to pass to
|
||||
* the <code>findFeature</code> method of an item.
|
||||
*
|
||||
* <p>
|
||||
* The <code>value</code> represents the value to be compared against the
|
||||
* feature obtained from the item via the <code>feat</code> string. The
|
||||
* <code>operand</code> is the operation to do the comparison. The available
|
||||
* operands are as follows:
|
||||
*
|
||||
* <ul>
|
||||
* <li>< - the feature is less than value
|
||||
* <li>=- the feature is equal to the value
|
||||
* <li>>- the feature is greater than the value
|
||||
* <li>MATCHES - the feature matches the regular expression stored in value
|
||||
* <li>IN - [[[TODO: still guessing because none of the CART's in Flite seem to
|
||||
* use IN]]] the value is in the list defined by the feature.
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* [[[TODO: provide support for the IN operator.]]]
|
||||
*
|
||||
* <p>
|
||||
* For < and >, this CART coerces the value and feature to float's. For =,
|
||||
* this CART coerces the value and feature to string and checks for string
|
||||
* equality. For MATCHES, this CART uses the value as a regular expression and
|
||||
* compares the obtained feature to that.
|
||||
*
|
||||
* <p>
|
||||
* A CART is represented by an array in this implementation. The
|
||||
* <code>qfalse</code> value represents the index of the array to go to if the
|
||||
* comparison does not match. In this implementation, qtrue index is always
|
||||
* implied, and represents the next element in the array. The root node of the
|
||||
* CART is the first element in the array.
|
||||
*
|
||||
* <p>
|
||||
* The interpretations always start at the root node of the CART and continue
|
||||
* until a final node is found. The final nodes have the following form:
|
||||
*
|
||||
* <pre>
|
||||
* LEAF value
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Where <code>value</code> represents the value of the node. Reaching a final
|
||||
* node indicates the interpretation is over and the value of the node is the
|
||||
* interpretation result.
|
||||
*/
|
||||
public class DecisionTree {
|
||||
/** Logger instance. */
|
||||
private static final Logger logger = Logger.getLogger(DecisionTree.class.getSimpleName());
|
||||
/**
|
||||
* Entry in file represents the total number of nodes in the file. This
|
||||
* should be at the top of the file. The format should be "TOTAL n" where n
|
||||
* is an integer value.
|
||||
*/
|
||||
final static String TOTAL = "TOTAL";
|
||||
|
||||
/**
|
||||
* Entry in file represents a node. The format should be
|
||||
* "NODE feat op val f" where 'feat' represents a feature, op represents an
|
||||
* operand, val is the value, and f is the index of the node to go to is
|
||||
* there isn't a match.
|
||||
*/
|
||||
final static String NODE = "NODE";
|
||||
|
||||
/**
|
||||
* Entry in file represents a final node. The format should be "LEAF val"
|
||||
* where val represents the value.
|
||||
*/
|
||||
final static String LEAF = "LEAF";
|
||||
|
||||
/**
|
||||
* OPERAND_MATCHES
|
||||
*/
|
||||
final static String OPERAND_MATCHES = "MATCHES";
|
||||
|
||||
/**
|
||||
* The CART. Entries can be DecisionNode or LeafNode. An ArrayList could be
|
||||
* used here -- I chose not to because I thought it might be quicker to
|
||||
* avoid dealing with the dynamic resizing.
|
||||
*/
|
||||
Node[] cart = null;
|
||||
|
||||
/**
|
||||
* The number of nodes in the CART.
|
||||
*/
|
||||
transient int curNode = 0;
|
||||
|
||||
/**
|
||||
* Creates a new CART by reading from the given URL.
|
||||
*
|
||||
* @param url the location of the CART data
|
||||
*
|
||||
* @throws IOException if errors occur while reading the data
|
||||
*/
|
||||
public DecisionTree(URL url) throws IOException {
|
||||
BufferedReader reader;
|
||||
String line;
|
||||
|
||||
reader = new BufferedReader(new InputStreamReader(url.openStream()));
|
||||
line = reader.readLine();
|
||||
while (line != null) {
|
||||
if (!line.startsWith("***")) {
|
||||
parseAndAdd(line);
|
||||
}
|
||||
line = reader.readLine();
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new CART by reading from the given reader.
|
||||
*
|
||||
* @param reader the source of the CART data
|
||||
* @param nodes the number of nodes to read for this cart
|
||||
*
|
||||
* @throws IOException if errors occur while reading the data
|
||||
*/
|
||||
public DecisionTree(BufferedReader reader, int nodes) throws IOException {
|
||||
this(nodes);
|
||||
String line;
|
||||
for (int i = 0; i < nodes; i++) {
|
||||
line = reader.readLine();
|
||||
if (!line.startsWith("***")) {
|
||||
parseAndAdd(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new CART that will be populated with nodes later.
|
||||
*
|
||||
* @param numNodes the number of nodes
|
||||
*/
|
||||
private DecisionTree(int numNodes) {
|
||||
cart = new Node[numNodes];
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump the CART tree as a dot file.
|
||||
* <p>
|
||||
* The dot tool is part of the graphviz distribution at <a
|
||||
* href="http://www.graphviz.org/">http://www.graphviz.org/</a>. If
|
||||
* installed, call it as "dot -O -Tpdf *.dot" from the console to generate
|
||||
* pdfs.
|
||||
* </p>
|
||||
*
|
||||
* @param out The PrintWriter to write to.
|
||||
*/
|
||||
public void dumpDot(PrintWriter out) {
|
||||
out.write("digraph \"" + "CART Tree" + "\" {\n");
|
||||
out.write("rankdir = LR\n");
|
||||
|
||||
for (Node n : cart) {
|
||||
out.println("\tnode" + Math.abs(n.hashCode()) + " [ label=\""
|
||||
+ n.toString() + "\", color=" + dumpDotNodeColor(n)
|
||||
+ ", shape=" + dumpDotNodeShape(n) + " ]\n");
|
||||
if (n instanceof DecisionNode) {
|
||||
DecisionNode dn = (DecisionNode) n;
|
||||
if (dn.qtrue < cart.length && cart[dn.qtrue] != null) {
|
||||
out.write("\tnode" + Math.abs(n.hashCode()) + " -> node"
|
||||
+ Math.abs(cart[dn.qtrue].hashCode())
|
||||
+ " [ label=" + "TRUE" + " ]\n");
|
||||
}
|
||||
if (dn.qfalse < cart.length && cart[dn.qfalse] != null) {
|
||||
out.write("\tnode" + Math.abs(n.hashCode()) + " -> node"
|
||||
+ Math.abs(cart[dn.qfalse].hashCode())
|
||||
+ " [ label=" + "FALSE" + " ]\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.write("}\n");
|
||||
out.close();
|
||||
}
|
||||
|
||||
protected String dumpDotNodeColor(Node n) {
|
||||
if (n instanceof LeafNode) {
|
||||
return "green";
|
||||
}
|
||||
return "red";
|
||||
}
|
||||
|
||||
protected String dumpDotNodeShape(Node n) {
|
||||
return "box";
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a node from the given input line and add it to the CART. It
|
||||
* expects the TOTAL line to come before any of the nodes.
|
||||
*
|
||||
* @param line a line of input to parse
|
||||
*/
|
||||
protected void parseAndAdd(String line) {
|
||||
StringTokenizer tokenizer = new StringTokenizer(line, " ");
|
||||
String type = tokenizer.nextToken();
|
||||
if (type.equals(LEAF) || type.equals(NODE)) {
|
||||
cart[curNode] = getNode(type, tokenizer, curNode);
|
||||
cart[curNode].setCreationLine(line);
|
||||
curNode++;
|
||||
} else if (type.equals(TOTAL)) {
|
||||
cart = new Node[Integer.parseInt(tokenizer.nextToken())];
|
||||
curNode = 0;
|
||||
} else {
|
||||
throw new Error("Invalid CART type: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the node based upon the type and tokenizer.
|
||||
*
|
||||
* @param type <code>NODE</code> or <code>LEAF</code>
|
||||
* @param tokenizer the StringTokenizer containing the data to get
|
||||
* @param currentNode the index of the current node we're looking at
|
||||
*
|
||||
* @return the node
|
||||
*/
|
||||
protected Node getNode(String type, StringTokenizer tokenizer,
|
||||
int currentNode) {
|
||||
if (type.equals(NODE)) {
|
||||
String feature = tokenizer.nextToken();
|
||||
String operand = tokenizer.nextToken();
|
||||
Object value = parseValue(tokenizer.nextToken());
|
||||
int qfalse = Integer.parseInt(tokenizer.nextToken());
|
||||
if (operand.equals(OPERAND_MATCHES)) {
|
||||
return new MatchingNode(feature, value.toString(),
|
||||
currentNode + 1, qfalse);
|
||||
} else {
|
||||
return new ComparisonNode(feature, value, operand,
|
||||
currentNode + 1, qfalse);
|
||||
}
|
||||
} else if (type.equals(LEAF)) {
|
||||
return new LeafNode(parseValue(tokenizer.nextToken()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Coerces a string into a value.
|
||||
*
|
||||
* @param string of the form "type(value)"; for example, "Float(2.3)"
|
||||
*
|
||||
* @return the value
|
||||
*/
|
||||
protected Object parseValue(String string) {
|
||||
int openParen = string.indexOf("(");
|
||||
String type = string.substring(0, openParen);
|
||||
String value = string.substring(openParen + 1, string.length() - 1);
|
||||
if (type.equals("String")) {
|
||||
return value;
|
||||
} else if (type.equals("Float")) {
|
||||
return new Float(Float.parseFloat(value));
|
||||
} else if (type.equals("Integer")) {
|
||||
return new Integer(Integer.parseInt(value));
|
||||
} else if (type.equals("List")) {
|
||||
StringTokenizer tok = new StringTokenizer(value, ",");
|
||||
int size = tok.countTokens();
|
||||
|
||||
int[] values = new int[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
float fval = Float.parseFloat(tok.nextToken());
|
||||
values[i] = Math.round(fval);
|
||||
}
|
||||
return values;
|
||||
} else {
|
||||
throw new Error("Unknown type: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Passes the given item through this CART and returns the interpretation.
|
||||
*
|
||||
* @param item the item to analyze
|
||||
*
|
||||
* @return the interpretation
|
||||
*/
|
||||
public Object interpret(Item item) {
|
||||
int nodeIndex = 0;
|
||||
DecisionNode decision;
|
||||
|
||||
while (!(cart[nodeIndex] instanceof LeafNode)) {
|
||||
decision = (DecisionNode) cart[nodeIndex];
|
||||
nodeIndex = decision.getNextNode(item);
|
||||
}
|
||||
logger.fine("LEAF " + cart[nodeIndex].getValue());
|
||||
return ((LeafNode) cart[nodeIndex]).getValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* A node for the CART.
|
||||
*/
|
||||
static abstract class Node {
|
||||
/**
|
||||
* The value of this node.
|
||||
*/
|
||||
protected Object value;
|
||||
|
||||
/**
|
||||
* Create a new Node with the given value.
|
||||
*/
|
||||
public Node(Object value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value.
|
||||
*/
|
||||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string representation of the type of the value.
|
||||
*/
|
||||
public String getValueString() {
|
||||
if (value == null) {
|
||||
return "NULL()";
|
||||
} else if (value instanceof String) {
|
||||
return "String(" + value.toString() + ")";
|
||||
} else if (value instanceof Float) {
|
||||
return "Float(" + value.toString() + ")";
|
||||
} else if (value instanceof Integer) {
|
||||
return "Integer(" + value.toString() + ")";
|
||||
} else {
|
||||
return value.getClass().toString() + "(" + value.toString()
|
||||
+ ")";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sets the line of text used to create this node.
|
||||
*
|
||||
* @param line the creation line
|
||||
*/
|
||||
public void setCreationLine(String line) {}
|
||||
}
|
||||
|
||||
/**
|
||||
* A decision node that determines the next Node to go to in the CART.
|
||||
*/
|
||||
abstract static class DecisionNode extends Node {
|
||||
/**
|
||||
* The feature used to find a value from an Item.
|
||||
*/
|
||||
private PathExtractor path;
|
||||
|
||||
/**
|
||||
* Index of Node to go to if the comparison doesn't match.
|
||||
*/
|
||||
protected int qfalse;
|
||||
|
||||
/**
|
||||
* Index of Node to go to if the comparison matches.
|
||||
*/
|
||||
protected int qtrue;
|
||||
|
||||
/**
|
||||
* The feature used to find a value from an Item.
|
||||
*/
|
||||
public String getFeature() {
|
||||
return path.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feature associated with this DecisionNode and the given
|
||||
* item
|
||||
*
|
||||
* @param item the item to start from
|
||||
* @return the object representing the feature
|
||||
*/
|
||||
public Object findFeature(Item item) {
|
||||
return path.findFeature(item);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next node based upon the descision determined at this
|
||||
* node
|
||||
*
|
||||
* @param item the current item.
|
||||
* @return the index of the next node
|
||||
*/
|
||||
public final int getNextNode(Item item) {
|
||||
return getNextNode(findFeature(item));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new DecisionNode.
|
||||
*
|
||||
* @param feature the string used to get a value from an Item
|
||||
* @param value the value to compare to
|
||||
* @param qtrue the Node index to go to if the comparison matches
|
||||
* @param qfalse the Node machine index to go to upon no match
|
||||
*/
|
||||
public DecisionNode(String feature, Object value, int qtrue, int qfalse) {
|
||||
super(value);
|
||||
this.path = new PathExtractor(feature, true);
|
||||
this.qtrue = qtrue;
|
||||
this.qfalse = qfalse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next Node to go to in the CART. The return value is an index
|
||||
* in the CART.
|
||||
*/
|
||||
abstract public int getNextNode(Object val);
|
||||
}
|
||||
|
||||
/**
|
||||
* A decision Node that compares two values.
|
||||
*/
|
||||
static class ComparisonNode extends DecisionNode {
|
||||
/**
|
||||
* LESS_THAN
|
||||
*/
|
||||
final static String LESS_THAN = "<";
|
||||
|
||||
/**
|
||||
* EQUALS
|
||||
*/
|
||||
final static String EQUALS = "=";
|
||||
|
||||
/**
|
||||
* GREATER_THAN
|
||||
*/
|
||||
final static String GREATER_THAN = ">";
|
||||
|
||||
/**
|
||||
* The comparison type. One of LESS_THAN, GREATER_THAN, or EQUAL_TO.
|
||||
*/
|
||||
String comparisonType;
|
||||
|
||||
/**
|
||||
* Create a new ComparisonNode with the given values.
|
||||
*
|
||||
* @param feature the string used to get a value from an Item
|
||||
* @param value the value to compare to
|
||||
* @param comparisonType one of LESS_THAN, EQUAL_TO, or GREATER_THAN
|
||||
* @param qtrue the Node index to go to if the comparison matches
|
||||
* @param qfalse the Node index to go to upon no match
|
||||
*/
|
||||
public ComparisonNode(String feature, Object value,
|
||||
String comparisonType, int qtrue, int qfalse) {
|
||||
super(feature, value, qtrue, qfalse);
|
||||
if (!comparisonType.equals(LESS_THAN)
|
||||
&& !comparisonType.equals(EQUALS)
|
||||
&& !comparisonType.equals(GREATER_THAN)) {
|
||||
throw new Error("Invalid comparison type: " + comparisonType);
|
||||
} else {
|
||||
this.comparisonType = comparisonType;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the given value and return the appropriate Node index.
|
||||
* IMPLEMENTATION NOTE: LESS_THAN and GREATER_THAN, the Node's value
|
||||
* and the value passed in are converted to floating point values. For
|
||||
* EQUAL, the Node's value and the value passed in are treated as
|
||||
* String compares. This is the way of Flite, so be it Flite.
|
||||
*
|
||||
* @param val the value to compare
|
||||
*/
|
||||
public int getNextNode(Object val) {
|
||||
boolean yes = false;
|
||||
int ret;
|
||||
|
||||
if (comparisonType.equals(LESS_THAN)
|
||||
|| comparisonType.equals(GREATER_THAN)) {
|
||||
float cart_fval;
|
||||
float fval;
|
||||
if (value instanceof Float) {
|
||||
cart_fval = ((Float) value).floatValue();
|
||||
} else {
|
||||
cart_fval = Float.parseFloat(value.toString());
|
||||
}
|
||||
if (val instanceof Float) {
|
||||
fval = ((Float) val).floatValue();
|
||||
} else {
|
||||
fval = Float.parseFloat(val.toString());
|
||||
}
|
||||
if (comparisonType.equals(LESS_THAN)) {
|
||||
yes = (fval < cart_fval);
|
||||
} else {
|
||||
yes = (fval > cart_fval);
|
||||
}
|
||||
} else { // comparisonType = "="
|
||||
String sval = val.toString();
|
||||
String cart_sval = value.toString();
|
||||
yes = sval.equals(cart_sval);
|
||||
}
|
||||
if (yes) {
|
||||
ret = qtrue;
|
||||
} else {
|
||||
ret = qfalse;
|
||||
}
|
||||
logger.fine(trace(val, yes, ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
private String trace(Object value, boolean match, int next) {
|
||||
return "NODE " + getFeature() + " [" + value + "] "
|
||||
+ comparisonType + " [" + getValue() + "] "
|
||||
+ (match ? "Yes" : "No") + " next " + next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representation of this Node.
|
||||
*/
|
||||
public String toString() {
|
||||
return "NODE " + getFeature() + " " + comparisonType + " "
|
||||
+ getValueString() + " " + Integer.toString(qtrue) + " "
|
||||
+ Integer.toString(qfalse);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A Node that checks for a regular expression match.
|
||||
*/
|
||||
static class MatchingNode extends DecisionNode {
|
||||
Pattern pattern;
|
||||
|
||||
/**
|
||||
* Create a new MatchingNode with the given values.
|
||||
*
|
||||
* @param feature the string used to get a value from an Item
|
||||
* @param regex the regular expression
|
||||
* @param qtrue the Node index to go to if the comparison matches
|
||||
* @param qfalse the Node index to go to upon no match
|
||||
*/
|
||||
public MatchingNode(String feature, String regex, int qtrue, int qfalse) {
|
||||
super(feature, regex, qtrue, qfalse);
|
||||
this.pattern = Pattern.compile(regex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the given value and return the appropriate CART index.
|
||||
*
|
||||
* @param val the value to compare -- this must be a String
|
||||
*/
|
||||
public int getNextNode(Object val) {
|
||||
return pattern.matcher((String) val).matches() ? qtrue : qfalse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representation of this Node.
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuffer buf =
|
||||
new StringBuffer(NODE + " " + getFeature() + " "
|
||||
+ OPERAND_MATCHES);
|
||||
buf.append(getValueString() + " ");
|
||||
buf.append(Integer.toString(qtrue) + " ");
|
||||
buf.append(Integer.toString(qfalse));
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The final Node of a CART. This just a marker class.
|
||||
*/
|
||||
static class LeafNode extends Node {
|
||||
/**
|
||||
* Create a new LeafNode with the given value.
|
||||
*
|
||||
* @param the value of this LeafNode
|
||||
*/
|
||||
public LeafNode(Object value) {
|
||||
super(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representation of this Node.
|
||||
*/
|
||||
public String toString() {
|
||||
return "LEAF " + getValueString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implementation of the FeatureSet interface.
|
||||
*/
|
||||
public class FeatureSet {
|
||||
|
||||
private final Map<String, Object> featureMap;
|
||||
static DecimalFormat formatter;
|
||||
|
||||
/**
|
||||
* Creates a new empty feature set
|
||||
*/
|
||||
public FeatureSet() {
|
||||
featureMap = new LinkedHashMap<String, Object>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the given feature is present.
|
||||
*
|
||||
* @param name the name of the feature of interest
|
||||
*
|
||||
* @return true if the named feature is present
|
||||
*/
|
||||
public boolean isPresent(String name) {
|
||||
return featureMap.containsKey(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the named feature from this set of features.
|
||||
*
|
||||
* @param name the name of the feature of interest
|
||||
*/
|
||||
public void remove(String name) {
|
||||
featureMap.remove(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that returns the named feature as a string.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
*
|
||||
* @return the value associated with the name or null if the value is not
|
||||
* found
|
||||
*
|
||||
* @throws ClassCastException if the associated value is not a String
|
||||
*/
|
||||
public String getString(String name) {
|
||||
return (String) getObject(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that returns the named feature as a int.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
*
|
||||
* @return the value associated with the name or null if the value is not
|
||||
* found
|
||||
*
|
||||
* @throws ClassCastException if the associated value is not an int.
|
||||
*/
|
||||
public int getInt(String name) {
|
||||
return ((Integer) getObject(name)).intValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that returns the named feature as a float.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
*
|
||||
* @return the value associated with the name or null if the value is not
|
||||
* found.
|
||||
*
|
||||
* @throws ClassCastException if the associated value is not a float
|
||||
*/
|
||||
public float getFloat(String name) {
|
||||
return ((Float) getObject(name)).floatValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the named feature as an object.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
*
|
||||
* @return the value associated with the name or null if the value is not
|
||||
* found
|
||||
*/
|
||||
public Object getObject(String name) {
|
||||
return featureMap.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that sets the named feature as a int.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setInt(String name, int value) {
|
||||
setObject(name, new Integer(value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that sets the named feature as a float.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setFloat(String name, float value) {
|
||||
setObject(name, new Float(value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that sets the named feature as a String.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setString(String name, String value) {
|
||||
setObject(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the named feature.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setObject(String name, Object value) {
|
||||
featureMap.put(name, value);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,447 @@
|
|||
/**
|
||||
* Portions Copyright 2001-2003 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
* Represents a node in a Relation. Items can have shared contents but each
|
||||
* item has its own set of Daughters. The shared contents of an item
|
||||
* (represented by ItemContents) includes the feature set for the item and the
|
||||
* set of all relations that this item is contained in. An item can be
|
||||
* contained in a number of relations and as daughters to other items. This
|
||||
* class is used to keep track of all of these relationships. There may be many
|
||||
* instances of item that reference the same shared ItemContents.
|
||||
*/
|
||||
public class Item {
|
||||
private Relation ownerRelation;
|
||||
private ItemContents contents;
|
||||
private Item parent;
|
||||
private Item daughter;
|
||||
private Item next;
|
||||
private Item prev;
|
||||
|
||||
/**
|
||||
* Creates an item. The item is coupled to a particular Relation. If shared
|
||||
* contents is null a new sharedContents is created.
|
||||
*
|
||||
* @param relation the relation that owns this item
|
||||
* @param sharedContents the contents that is shared with others. If null,
|
||||
* a new sharedContents is created.
|
||||
*/
|
||||
public Item(Relation relation, ItemContents sharedContents) {
|
||||
ownerRelation = relation;
|
||||
if (sharedContents != null) {
|
||||
contents = sharedContents;
|
||||
} else {
|
||||
contents = new ItemContents();
|
||||
}
|
||||
parent = null;
|
||||
daughter = null;
|
||||
next = null;
|
||||
prev = null;
|
||||
|
||||
getSharedContents().addItemRelation(relation.getName(), this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the item in the given relation that has the same shared contents.
|
||||
*
|
||||
* @param relationName the relation of interest
|
||||
*
|
||||
* @return the item as found in the given relation or null if not found
|
||||
*/
|
||||
public Item getItemAs(String relationName) {
|
||||
return getSharedContents().getItemRelation(relationName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the owning Relation.
|
||||
*
|
||||
* @return the relation that owns this item
|
||||
*/
|
||||
public Relation getOwnerRelation() {
|
||||
return ownerRelation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the shared contents for this item.
|
||||
*
|
||||
* @return the shared item contents
|
||||
*/
|
||||
public ItemContents getSharedContents() {
|
||||
return contents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if this item has daughters.
|
||||
*
|
||||
* @return true if this item has daughters
|
||||
*/
|
||||
public boolean hasDaughters() {
|
||||
return daughter != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the first daughter of this item.
|
||||
*
|
||||
* @return the first daughter or null if none
|
||||
*/
|
||||
public Item getDaughter() {
|
||||
return daughter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the Nth daughter of this item.
|
||||
*
|
||||
* @param which the index of the daughter to return
|
||||
*
|
||||
* @return the Nth daughter or null if none at the given index
|
||||
*/
|
||||
public Item getNthDaughter(int which) {
|
||||
Item d = daughter;
|
||||
int count = 0;
|
||||
while (count++ != which && d != null) {
|
||||
d = d.next;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the last daughter of this item.
|
||||
*
|
||||
* @return the last daughter or null if none at the given index
|
||||
*/
|
||||
public Item getLastDaughter() {
|
||||
Item d = daughter;
|
||||
if (d == null) {
|
||||
return null;
|
||||
}
|
||||
while (d.next != null) {
|
||||
d = d.next;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given item as a daughter to this item.
|
||||
*
|
||||
* @param item for the new daughter
|
||||
* @return created item
|
||||
*/
|
||||
public Item addDaughter(Item item) {
|
||||
Item newItem;
|
||||
ItemContents contents;
|
||||
|
||||
Item p = getLastDaughter();
|
||||
|
||||
if (p != null) {
|
||||
newItem = p.appendItem(item);
|
||||
} else {
|
||||
if (item == null) {
|
||||
contents = new ItemContents();
|
||||
} else {
|
||||
contents = item.getSharedContents();
|
||||
}
|
||||
newItem = new Item(getOwnerRelation(), contents);
|
||||
newItem.parent = this;
|
||||
daughter = newItem;
|
||||
}
|
||||
return newItem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Item, adds it as a daughter to this item and returns the
|
||||
* new item.
|
||||
*
|
||||
* @return the newly created item that was added as a daughter
|
||||
*/
|
||||
public Item createDaughter() {
|
||||
return addDaughter(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the parent of this item.
|
||||
*
|
||||
* @return the parent of this item
|
||||
*/
|
||||
public Item getParent() {
|
||||
Item n;
|
||||
for (n = this; n.prev != null; n = n.prev) {
|
||||
}
|
||||
return n.parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the parent of this item.
|
||||
*
|
||||
* @param parent the parent of this item
|
||||
*/
|
||||
/*
|
||||
* private void setParent(Item parent) { this.parent = parent; }
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns the utterance associated with this item.
|
||||
*
|
||||
* @return the utterance that contains this item
|
||||
*/
|
||||
public Utterance getUtterance() {
|
||||
return getOwnerRelation().getUtterance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the feature set of this item.
|
||||
*
|
||||
* @return the feature set of this item
|
||||
*/
|
||||
public FeatureSet getFeatures() {
|
||||
return getSharedContents().getFeatures();
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the feature by following the given path. Path is a string of ":"
|
||||
* or "." separated strings with the following interpretations:
|
||||
* <ul>
|
||||
* <li>n - next item
|
||||
* <li>p - previous item
|
||||
* <li>parent - the parent
|
||||
* <li>daughter - the daughter
|
||||
* <li>daughter1 - same as daughter
|
||||
* <li>daughtern - the last daughter
|
||||
* <li>R:relname - the item as found in the given relation 'relname'
|
||||
* </ul>
|
||||
* The last element of the path will be interpreted as a voice/language
|
||||
* specific feature function (if present) or an item feature name. If the
|
||||
* feature function exists it will be called with the item specified by the
|
||||
* path, otherwise, a feature will be retrieved with the given name. If
|
||||
* neither exist than a String "0" is returned.
|
||||
*
|
||||
* @param pathAndFeature the path to follow
|
||||
* @return created object
|
||||
*/
|
||||
public Object findFeature(String pathAndFeature) {
|
||||
int lastDot;
|
||||
String feature;
|
||||
String path;
|
||||
Item item;
|
||||
Object results = null;
|
||||
|
||||
lastDot = pathAndFeature.lastIndexOf(".");
|
||||
// string can be of the form "p.feature" or just "feature"
|
||||
|
||||
if (lastDot == -1) {
|
||||
feature = pathAndFeature;
|
||||
path = null;
|
||||
} else {
|
||||
feature = pathAndFeature.substring(lastDot + 1);
|
||||
path = pathAndFeature.substring(0, lastDot);
|
||||
}
|
||||
|
||||
item = findItem(path);
|
||||
if (item != null) {
|
||||
results = item.getFeatures().getObject(feature);
|
||||
}
|
||||
results = (results == null) ? "0" : results;
|
||||
|
||||
// System.out.println("FI " + pathAndFeature + " are " + results);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the item specified by the given path.
|
||||
*
|
||||
* Path is a string of ":" or "." separated strings with the following
|
||||
* interpretations:
|
||||
* <ul>
|
||||
* <li>n - next item
|
||||
* <li>p - previous item
|
||||
* <li>parent - the parent
|
||||
* <li>daughter - the daughter
|
||||
* <li>daughter1 - same as daughter
|
||||
* <li>daughtern - the last daughter
|
||||
* <li>R:relname - the item as found in the given relation 'relname'
|
||||
* </ul>
|
||||
* If the given path takes us outside of the bounds of the item graph, then
|
||||
* list access exceptions will be thrown.
|
||||
*
|
||||
* @param path the path to follow
|
||||
*
|
||||
* @return the item at the given path
|
||||
*/
|
||||
public Item findItem(String path) {
|
||||
Item pitem = this;
|
||||
StringTokenizer tok;
|
||||
|
||||
if (path == null) {
|
||||
return this;
|
||||
}
|
||||
|
||||
tok = new StringTokenizer(path, ":.");
|
||||
|
||||
while (pitem != null && tok.hasMoreTokens()) {
|
||||
String token = tok.nextToken();
|
||||
if (token.equals("n")) {
|
||||
pitem = pitem.getNext();
|
||||
} else if (token.equals("p")) {
|
||||
pitem = pitem.getPrevious();
|
||||
} else if (token.equals("nn")) {
|
||||
pitem = pitem.getNext();
|
||||
if (pitem != null) {
|
||||
pitem = pitem.getNext();
|
||||
}
|
||||
} else if (token.equals("pp")) {
|
||||
pitem = pitem.getPrevious();
|
||||
if (pitem != null) {
|
||||
pitem = pitem.getPrevious();
|
||||
}
|
||||
} else if (token.equals("parent")) {
|
||||
pitem = pitem.getParent();
|
||||
} else if (token.equals("daughter") || token.equals("daughter1")) {
|
||||
pitem = pitem.getDaughter();
|
||||
} else if (token.equals("daughtern")) {
|
||||
pitem = pitem.getLastDaughter();
|
||||
} else if (token.equals("R")) {
|
||||
String relationName = tok.nextToken();
|
||||
pitem =
|
||||
pitem.getSharedContents()
|
||||
.getItemRelation(relationName);
|
||||
} else {
|
||||
System.out.println("findItem: bad feature " + token + " in "
|
||||
+ path);
|
||||
}
|
||||
}
|
||||
return pitem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next item in this list.
|
||||
*
|
||||
* @return the next item or null
|
||||
*/
|
||||
public Item getNext() {
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the previous item in this list.
|
||||
*
|
||||
* @return the previous item or null
|
||||
*/
|
||||
public Item getPrevious() {
|
||||
return prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends an item in this list after this item.
|
||||
*
|
||||
* @param originalItem new item has shared contents with this item (or *
|
||||
* null)
|
||||
*
|
||||
* @return the newly appended item
|
||||
*/
|
||||
public Item appendItem(Item originalItem) {
|
||||
ItemContents contents;
|
||||
Item newItem;
|
||||
|
||||
if (originalItem == null) {
|
||||
contents = null;
|
||||
} else {
|
||||
contents = originalItem.getSharedContents();
|
||||
}
|
||||
|
||||
newItem = new Item(getOwnerRelation(), contents);
|
||||
newItem.next = this.next;
|
||||
if (this.next != null) {
|
||||
this.next.prev = newItem;
|
||||
}
|
||||
|
||||
attach(newItem);
|
||||
|
||||
if (this.ownerRelation.getTail() == this) {
|
||||
this.ownerRelation.setTail(newItem);
|
||||
}
|
||||
return newItem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attaches/appends an item to this one.
|
||||
*
|
||||
* @param item the item to append
|
||||
*/
|
||||
void attach(Item item) {
|
||||
this.next = item;
|
||||
item.prev = this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepends an item in this list before this item.
|
||||
*
|
||||
* @param originalItem new item has shared contents with this item (or *
|
||||
* null)
|
||||
*
|
||||
* @return the newly appended item
|
||||
*/
|
||||
public Item prependItem(Item originalItem) {
|
||||
ItemContents contents;
|
||||
Item newItem;
|
||||
|
||||
if (originalItem == null) {
|
||||
contents = null;
|
||||
} else {
|
||||
contents = originalItem.getSharedContents();
|
||||
}
|
||||
|
||||
newItem = new Item(getOwnerRelation(), contents);
|
||||
newItem.prev = this.prev;
|
||||
if (this.prev != null) {
|
||||
this.prev.next = newItem;
|
||||
}
|
||||
newItem.next = this;
|
||||
this.prev = newItem;
|
||||
if (this.parent != null) {
|
||||
this.parent.daughter = newItem;
|
||||
newItem.parent = this.parent;
|
||||
this.parent = null;
|
||||
}
|
||||
if (this.ownerRelation.getHead() == this) {
|
||||
this.ownerRelation.setHead(newItem);
|
||||
}
|
||||
return newItem;
|
||||
}
|
||||
|
||||
// Inherited from object
|
||||
public String toString() {
|
||||
// if we have a feature called 'name' use that
|
||||
// otherwise fall back on the default.
|
||||
String name = getFeatures().getString("name");
|
||||
if (name == null) {
|
||||
name = "";
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the shared contents of the two items are the same.
|
||||
*
|
||||
* @param otherItem the item to compare
|
||||
*
|
||||
* @return true if the shared contents are the same
|
||||
*/
|
||||
public boolean equalsShared(Item otherItem) {
|
||||
if (otherItem == null) {
|
||||
return false;
|
||||
} else {
|
||||
return getSharedContents().equals(otherItem.getSharedContents());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
|
||||
/**
|
||||
* Contains the information that is shared between multiple items.
|
||||
*/
|
||||
public class ItemContents {
|
||||
private FeatureSet features;
|
||||
private FeatureSet relations;
|
||||
|
||||
/**
|
||||
* Class Constructor.
|
||||
*/
|
||||
public ItemContents() {
|
||||
features = new FeatureSet();
|
||||
relations = new FeatureSet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given item to the set of relations. Whenever an item is added
|
||||
* to a relation, it should add the name and the Item reference to this set
|
||||
* of name/item mappings. This allows an item to find out the set of all
|
||||
* relations that it is contained in.
|
||||
*
|
||||
* @param relationName the name of the relation
|
||||
* @param item the item reference in the relation
|
||||
*/
|
||||
public void addItemRelation(String relationName, Item item) {
|
||||
// System.out.println("AddItemRelation: " + relationName
|
||||
// + " item: " + item);
|
||||
relations.setObject(relationName, item);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the relation/item mapping from this ItemContents.
|
||||
*
|
||||
* @param relationName the name of the relation/item to remove
|
||||
*/
|
||||
public void removeItemRelation(String relationName) {
|
||||
relations.remove(relationName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given the name of a relation, returns the item the shares the same
|
||||
* ItemContents.
|
||||
*
|
||||
* @param relationName the name of the relation of interest
|
||||
*
|
||||
* @return the item associated with this ItemContents in the named
|
||||
* relation, or null if it does not exist
|
||||
*/
|
||||
public Item getItemRelation(String relationName) {
|
||||
return (Item) relations.getObject(relationName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the feature set for this item contents.
|
||||
*
|
||||
* @return the FeatureSet for this contents
|
||||
*/
|
||||
public FeatureSet getFeatures() {
|
||||
return features;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,449 @@
|
|||
/**
|
||||
* Portions Copyright 2001-2003 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
|
||||
/**
|
||||
* Expands Strings containing digits characters into a list of words
|
||||
* representing those digits.
|
||||
*
|
||||
* It translates the following code from flite:
|
||||
* <code>lang/usEnglish/us_expand.c</code>
|
||||
*/
|
||||
public class NumberExpander {
|
||||
|
||||
private static final String[] digit2num = {"zero", "one", "two", "three",
|
||||
"four", "five", "six", "seven", "eight", "nine"};
|
||||
|
||||
private static final String[] digit2teen = {"ten", /* shouldn't get called */
|
||||
"eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
|
||||
"seventeen", "eighteen", "nineteen"};
|
||||
|
||||
private static final String[] digit2enty = {"zero", /* shouldn't get called */
|
||||
"ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty",
|
||||
"ninety"};
|
||||
|
||||
private static final String[] ord2num = {"zeroth", "first", "second",
|
||||
"third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth"};
|
||||
|
||||
private static final String[] ord2teen = {"tenth", /* shouldn't get called */
|
||||
"eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth",
|
||||
"sixteenth", "seventeenth", "eighteenth", "nineteenth"};
|
||||
|
||||
private static final String[] ord2enty = {"zeroth", /* shouldn't get called */
|
||||
"tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth",
|
||||
"seventieth", "eightieth", "ninetieth"};
|
||||
|
||||
private static String[] digit2Numness = {
|
||||
"", "tens", "twenties", "thirties", "fourties", "fifties",
|
||||
"sixties", "seventies", "eighties", "nineties"
|
||||
};
|
||||
|
||||
/**
|
||||
* Unconstructable
|
||||
*/
|
||||
private NumberExpander() {}
|
||||
|
||||
/**
|
||||
* Expands a digit string into a list of English words of those digits. For
|
||||
* example, "1234" expands to "one two three four"
|
||||
*
|
||||
* @param numberString the digit string to expand.
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
public static void expandNumber(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
int numDigits = numberString.length();
|
||||
|
||||
if (numDigits == 0) {
|
||||
// wordRelation = null;
|
||||
} else if (numDigits == 1) {
|
||||
expandDigits(numberString, wordRelation);
|
||||
} else if (numDigits == 2) {
|
||||
expand2DigitNumber(numberString, wordRelation);
|
||||
} else if (numDigits == 3) {
|
||||
expand3DigitNumber(numberString, wordRelation);
|
||||
} else if (numDigits < 7) {
|
||||
expandBelow7DigitNumber(numberString, wordRelation);
|
||||
} else if (numDigits < 10) {
|
||||
expandBelow10DigitNumber(numberString, wordRelation);
|
||||
} else if (numDigits < 13) {
|
||||
expandBelow13DigitNumber(numberString, wordRelation);
|
||||
} else {
|
||||
expandDigits(numberString, wordRelation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a two-digit string into a list of English words.
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expand2DigitNumber(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
if (numberString.charAt(0) == '0') {
|
||||
// numberString is "0X"
|
||||
if (numberString.charAt(1) == '0') {
|
||||
// numberString is "00", do nothing
|
||||
} else {
|
||||
// numberString is "01", "02" ...
|
||||
String number = digit2num[numberString.charAt(1) - '0'];
|
||||
wordRelation.addWord(number);
|
||||
}
|
||||
} else if (numberString.charAt(1) == '0') {
|
||||
// numberString is "10", "20", ...
|
||||
String number = digit2enty[numberString.charAt(0) - '0'];
|
||||
wordRelation.addWord(number);
|
||||
} else if (numberString.charAt(0) == '1') {
|
||||
// numberString is "11", "12", ..., "19"
|
||||
String number = digit2teen[numberString.charAt(1) - '0'];
|
||||
wordRelation.addWord(number);
|
||||
} else {
|
||||
// numberString is "2X", "3X", ...
|
||||
String enty = digit2enty[numberString.charAt(0) - '0'];
|
||||
wordRelation.addWord(enty);
|
||||
expandDigits(numberString.substring(1, numberString.length()),
|
||||
wordRelation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a three-digit string into a list of English words.
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expand3DigitNumber(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
if (numberString.charAt(0) == '0') {
|
||||
expandNumberAt(numberString, 1, wordRelation);
|
||||
} else {
|
||||
String hundredDigit = digit2num[numberString.charAt(0) - '0'];
|
||||
wordRelation.addWord(hundredDigit);
|
||||
wordRelation.addWord("hundred");
|
||||
expandNumberAt(numberString, 1, wordRelation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a string that is a 4 to 6 digits number into a list of English
|
||||
* words. For example, "333000" into "three hundred and thirty-three
|
||||
* thousand".
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expandBelow7DigitNumber(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
expandLargeNumber(numberString, "thousand", 3, wordRelation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a string that is a 7 to 9 digits number into a list of English
|
||||
* words. For example, "19000000" into nineteen million.
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expandBelow10DigitNumber(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
expandLargeNumber(numberString, "million", 6, wordRelation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a string that is a 10 to 12 digits number into a list of English
|
||||
* words. For example, "27000000000" into twenty-seven billion.
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expandBelow13DigitNumber(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
expandLargeNumber(numberString, "billion", 9, wordRelation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a string that is a number longer than 3 digits into a list of
|
||||
* English words. For example, "1000" into one thousand.
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param order either "thousand", "million", or "billion"
|
||||
* @param numberZeroes the number of zeroes, depending on the order, so its
|
||||
* either 3, 6, or 9
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expandLargeNumber(String numberString, String order,
|
||||
int numberZeroes, WordRelation wordRelation) {
|
||||
int numberDigits = numberString.length();
|
||||
|
||||
// parse out the prefix, e.g., "113" in "113,000"
|
||||
int i = numberDigits - numberZeroes;
|
||||
String part = numberString.substring(0, i);
|
||||
|
||||
// get how many thousands/millions/billions
|
||||
Item oldTail = wordRelation.getTail();
|
||||
expandNumber(part, wordRelation);
|
||||
if (wordRelation.getTail() != oldTail) {
|
||||
wordRelation.addWord(order);
|
||||
}
|
||||
expandNumberAt(numberString, i, wordRelation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number string list of the given string starting at the given
|
||||
* index. E.g., expandNumberAt("1100", 1) gives "one hundred"
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param startIndex the starting position
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
private static void expandNumberAt(String numberString, int startIndex,
|
||||
WordRelation wordRelation) {
|
||||
expandNumber(
|
||||
numberString.substring(startIndex, numberString.length()),
|
||||
wordRelation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands given token to list of words pronouncing it as digits
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
public static void expandDigits(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
int numberDigits = numberString.length();
|
||||
for (int i = 0; i < numberDigits; i++) {
|
||||
char digit = numberString.charAt(i);
|
||||
if (Character.isDigit(digit)) {
|
||||
wordRelation.addWord(digit2num[numberString.charAt(i) - '0']);
|
||||
} else {
|
||||
wordRelation.addWord("umpty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands the digit string of an ordinal number.
|
||||
*
|
||||
* @param rawNumberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
public static void expandOrdinal(String rawNumberString,
|
||||
WordRelation wordRelation) {
|
||||
// remove all ','s from the raw number string
|
||||
expandNumber(rawNumberString.replace(",", ""), wordRelation);
|
||||
|
||||
// get the last in the list of number strings
|
||||
Item lastItem = wordRelation.getTail();
|
||||
|
||||
if (lastItem != null) {
|
||||
|
||||
FeatureSet featureSet = lastItem.getFeatures();
|
||||
String lastNumber = featureSet.getString("name");
|
||||
String ordinal = findMatchInArray(lastNumber, digit2num, ord2num);
|
||||
|
||||
if (ordinal == null) {
|
||||
ordinal = findMatchInArray(lastNumber, digit2teen, ord2teen);
|
||||
}
|
||||
if (ordinal == null) {
|
||||
ordinal = findMatchInArray(lastNumber, digit2enty, ord2enty);
|
||||
}
|
||||
|
||||
if (lastNumber.equals("hundred")) {
|
||||
ordinal = "hundredth";
|
||||
} else if (lastNumber.equals("thousand")) {
|
||||
ordinal = "thousandth";
|
||||
} else if (lastNumber.equals("billion")) {
|
||||
ordinal = "billionth";
|
||||
}
|
||||
|
||||
// if there was an ordinal, set the last element of the list
|
||||
// to that ordinal; otherwise, don't do anything
|
||||
if (ordinal != null) {
|
||||
wordRelation.setLastWord(ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void expandNumess(String rawString, WordRelation wordRelation) {
|
||||
if (rawString.length() == 4) {
|
||||
expand2DigitNumber(rawString.substring(0, 2), wordRelation);
|
||||
expandNumess(rawString.substring(2), wordRelation);
|
||||
} else {
|
||||
wordRelation.addWord(digit2Numness[rawString.charAt(0) - '0']);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds a match of the given string in the given array, and returns the
|
||||
* element at the same index in the returnInArray
|
||||
*
|
||||
* @param strToMatch the string to match
|
||||
* @param matchInArray the source array
|
||||
* @param returnInArray the return array
|
||||
*
|
||||
* @return an element in returnInArray, or <code>null</code> if a match is
|
||||
* not found
|
||||
*/
|
||||
private static String findMatchInArray(String strToMatch,
|
||||
String[] matchInArray, String[] returnInArray) {
|
||||
for (int i = 0; i < matchInArray.length; i++) {
|
||||
if (strToMatch.equals(matchInArray[i])) {
|
||||
if (i < returnInArray.length) {
|
||||
return returnInArray[i];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands the given number string as pairs as in years or IDs
|
||||
*
|
||||
* @param numberString the string which is the number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
public static void expandID(String numberString, WordRelation wordRelation) {
|
||||
|
||||
int numberDigits = numberString.length();
|
||||
|
||||
if ((numberDigits == 4) && (numberString.charAt(2) == '0')
|
||||
&& (numberString.charAt(3) == '0')) {
|
||||
if (numberString.charAt(1) == '0') { // e.g. 2000, 3000
|
||||
expandNumber(numberString, wordRelation);
|
||||
} else {
|
||||
expandNumber(numberString.substring(0, 2), wordRelation);
|
||||
wordRelation.addWord("hundred");
|
||||
}
|
||||
} else if ((numberDigits == 2) && (numberString.charAt(0) == '0')) {
|
||||
wordRelation.addWord("oh");
|
||||
expandDigits(numberString.substring(1, 2), wordRelation);
|
||||
} else if ((numberDigits == 4 && numberString.charAt(1) == '0')
|
||||
|| numberDigits < 3) {
|
||||
expandNumber(numberString, wordRelation);
|
||||
} else if (numberDigits % 2 == 1) {
|
||||
String firstDigit = digit2num[numberString.charAt(0) - '0'];
|
||||
wordRelation.addWord(firstDigit);
|
||||
expandID(numberString.substring(1, numberDigits), wordRelation);
|
||||
} else {
|
||||
expandNumber(numberString.substring(0, 2), wordRelation);
|
||||
expandID(numberString.substring(2, numberDigits), wordRelation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands the given number string as a real number.
|
||||
*
|
||||
* @param numberString the string which is the real number to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
public static void expandReal(String numberString,
|
||||
WordRelation wordRelation) {
|
||||
|
||||
int stringLength = numberString.length();
|
||||
int position;
|
||||
|
||||
if (numberString.charAt(0) == '-') {
|
||||
// negative real numbers
|
||||
wordRelation.addWord("minus");
|
||||
expandReal(numberString.substring(1, stringLength), wordRelation);
|
||||
} else if (numberString.charAt(0) == '+') {
|
||||
// prefixed with a '+'
|
||||
wordRelation.addWord("plus");
|
||||
expandReal(numberString.substring(1, stringLength), wordRelation);
|
||||
} else if ((position = numberString.indexOf('e')) != -1
|
||||
|| (position = numberString.indexOf('E')) != -1) {
|
||||
// numbers with 'E' or 'e'
|
||||
expandReal(numberString.substring(0, position), wordRelation);
|
||||
wordRelation.addWord("e");
|
||||
expandReal(numberString.substring(position + 1), wordRelation);
|
||||
} else if ((position = numberString.indexOf('.')) != -1) {
|
||||
// numbers with '.'
|
||||
String beforeDot = numberString.substring(0, position);
|
||||
if (beforeDot.length() > 0) {
|
||||
expandReal(beforeDot, wordRelation);
|
||||
}
|
||||
wordRelation.addWord("point");
|
||||
String afterDot = numberString.substring(position + 1);
|
||||
if (afterDot.length() > 0) {
|
||||
expandDigits(afterDot, wordRelation);
|
||||
}
|
||||
} else {
|
||||
// everything else
|
||||
expandNumber(numberString, wordRelation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands the given string of letters as a list of single char symbols.
|
||||
*
|
||||
* @param letters the string of letters to expand
|
||||
* @param wordRelation words are added to this Relation
|
||||
*/
|
||||
public static void expandLetters(String letters, WordRelation wordRelation) {
|
||||
letters = letters.toLowerCase();
|
||||
char c;
|
||||
|
||||
for (int i = 0; i < letters.length(); i++) {
|
||||
// if this is a number
|
||||
c = letters.charAt(i);
|
||||
if (Character.isDigit(c)) {
|
||||
wordRelation.addWord(digit2num[c - '0']);
|
||||
} else if (letters.equals("a")) {
|
||||
wordRelation.addWord("_a");
|
||||
} else {
|
||||
wordRelation.addWord(String.valueOf(c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the integer value of the given string of Roman numerals.
|
||||
*
|
||||
* @param roman the string of Roman numbers
|
||||
*
|
||||
* @return the integer value
|
||||
*/
|
||||
public static int expandRoman(String roman) {
|
||||
int value = 0;
|
||||
|
||||
for (int p = 0; p < roman.length(); p++) {
|
||||
char c = roman.charAt(p);
|
||||
if (c == 'X') {
|
||||
value += 10;
|
||||
} else if (c == 'V') {
|
||||
value += 5;
|
||||
} else if (c == 'I') {
|
||||
if (p + 1 < roman.length()) {
|
||||
char p1 = roman.charAt(p + 1);
|
||||
if (p1 == 'V') {
|
||||
value += 4;
|
||||
p++;
|
||||
} else if (p1 == 'X') {
|
||||
value += 9;
|
||||
p++;
|
||||
} else {
|
||||
value += 1;
|
||||
}
|
||||
} else {
|
||||
value += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,264 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* Interface that Manages a feature or item path. Allows navigation to the
|
||||
* corresponding feature or item. This class in controlled by the following
|
||||
* system properties:
|
||||
*
|
||||
* <pre>
|
||||
* com.sun.speech.freetts.interpretCartPaths - default false
|
||||
* com.sun.speech.freetts.lazyCartCompile - default true
|
||||
* </pre>
|
||||
*
|
||||
* com.sun.speech.freetts.interpretCartPaths
|
||||
*
|
||||
* Instances of this class will optionally pre-compile the paths. Pre-compiling
|
||||
* paths reduces the processing time and objects needed to extract a feature or
|
||||
* an item based upon a path.
|
||||
*/
|
||||
public class PathExtractor {
|
||||
/** Logger instance. */
|
||||
private static final Logger LOGGER = Logger
|
||||
.getLogger(PathExtractor.class.getName());
|
||||
|
||||
/**
|
||||
* If this system property is set to true, paths will not be compiled.
|
||||
*/
|
||||
public final static String INTERPRET_PATHS_PROPERTY =
|
||||
"com.sun.speech.freetts.interpretCartPaths";
|
||||
|
||||
/**
|
||||
* If this system property is set to true, CART feature/item paths will
|
||||
* only be compiled as needed.
|
||||
*/
|
||||
public final static String LAZY_COMPILE_PROPERTY =
|
||||
"com.sun.speech.freetts.lazyCartCompile";
|
||||
|
||||
private final static boolean INTERPRET_PATHS = System.getProperty(
|
||||
INTERPRET_PATHS_PROPERTY, "false").equals("true");
|
||||
private final static boolean LAZY_COMPILE = System.getProperty(
|
||||
LAZY_COMPILE_PROPERTY, "true").equals("true");
|
||||
|
||||
private String pathAndFeature;
|
||||
private String path;
|
||||
private String feature;
|
||||
private Object[] compiledPath;
|
||||
|
||||
/**
|
||||
* Creates a path for the given feature.
|
||||
* @param pathAndFeature string to use
|
||||
* @param wantFeature do we need features
|
||||
*/
|
||||
public PathExtractor(String pathAndFeature, boolean wantFeature) {
|
||||
this.pathAndFeature = pathAndFeature;
|
||||
if (INTERPRET_PATHS) {
|
||||
path = pathAndFeature;
|
||||
return;
|
||||
}
|
||||
|
||||
if (wantFeature) {
|
||||
int lastDot = pathAndFeature.lastIndexOf(".");
|
||||
// string can be of the form "p.feature" or just "feature"
|
||||
|
||||
if (lastDot == -1) {
|
||||
feature = pathAndFeature;
|
||||
path = null;
|
||||
} else {
|
||||
feature = pathAndFeature.substring(lastDot + 1);
|
||||
path = pathAndFeature.substring(0, lastDot);
|
||||
}
|
||||
} else {
|
||||
this.path = pathAndFeature;
|
||||
}
|
||||
|
||||
if (!LAZY_COMPILE) {
|
||||
compiledPath = compile(path);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the item associated with this Path.
|
||||
*
|
||||
* @param item the item to start at
|
||||
* @return the item associated with the path or null
|
||||
*/
|
||||
public Item findItem(Item item) {
|
||||
|
||||
if (INTERPRET_PATHS) {
|
||||
return item.findItem(path);
|
||||
}
|
||||
|
||||
if (compiledPath == null) {
|
||||
compiledPath = compile(path);
|
||||
}
|
||||
|
||||
Item pitem = item;
|
||||
|
||||
for (int i = 0; pitem != null && i < compiledPath.length;) {
|
||||
OpEnum op = (OpEnum) compiledPath[i++];
|
||||
if (op == OpEnum.NEXT) {
|
||||
pitem = pitem.getNext();
|
||||
} else if (op == OpEnum.PREV) {
|
||||
pitem = pitem.getPrevious();
|
||||
} else if (op == OpEnum.NEXT_NEXT) {
|
||||
pitem = pitem.getNext();
|
||||
if (pitem != null) {
|
||||
pitem = pitem.getNext();
|
||||
}
|
||||
} else if (op == OpEnum.PREV_PREV) {
|
||||
pitem = pitem.getPrevious();
|
||||
if (pitem != null) {
|
||||
pitem = pitem.getPrevious();
|
||||
}
|
||||
} else if (op == OpEnum.PARENT) {
|
||||
pitem = pitem.getParent();
|
||||
} else if (op == OpEnum.DAUGHTER) {
|
||||
pitem = pitem.getDaughter();
|
||||
} else if (op == OpEnum.LAST_DAUGHTER) {
|
||||
pitem = pitem.getLastDaughter();
|
||||
} else if (op == OpEnum.RELATION) {
|
||||
String relationName = (String) compiledPath[i++];
|
||||
pitem =
|
||||
pitem.getSharedContents()
|
||||
.getItemRelation(relationName);
|
||||
} else {
|
||||
System.out.println("findItem: bad feature " + op + " in "
|
||||
+ path);
|
||||
}
|
||||
}
|
||||
return pitem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the feature associated with this Path.
|
||||
*
|
||||
* @param item the item to start at
|
||||
* @return the feature associated or "0" if the feature was not found.
|
||||
*/
|
||||
public Object findFeature(Item item) {
|
||||
|
||||
if (INTERPRET_PATHS) {
|
||||
return item.findFeature(path);
|
||||
}
|
||||
|
||||
Item pitem = findItem(item);
|
||||
Object results = null;
|
||||
if (pitem != null) {
|
||||
if (LOGGER.isLoggable(Level.FINER)) {
|
||||
LOGGER.finer("findFeature: Item [" + pitem + "], feature '"
|
||||
+ feature + "'");
|
||||
}
|
||||
results = pitem.getFeatures().getObject(feature);
|
||||
}
|
||||
|
||||
results = (results == null) ? "0" : results;
|
||||
if (LOGGER.isLoggable(Level.FINER)) {
|
||||
LOGGER.finer("findFeature: ...results = '" + results + "'");
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the given path into the compiled form
|
||||
*
|
||||
* @param path the path to compile
|
||||
* @return the compiled form which is in the form of an array path
|
||||
* traversal enums and associated strings
|
||||
*/
|
||||
private Object[] compile(String path) {
|
||||
if (path == null) {
|
||||
return new Object[0];
|
||||
}
|
||||
|
||||
List<Object> list = new ArrayList<Object>();
|
||||
StringTokenizer tok = new StringTokenizer(path, ":.");
|
||||
|
||||
while (tok.hasMoreTokens()) {
|
||||
String token = tok.nextToken();
|
||||
OpEnum op = OpEnum.getInstance(token);
|
||||
if (op == null) {
|
||||
throw new Error("Bad path compiled " + path);
|
||||
}
|
||||
|
||||
list.add(op);
|
||||
|
||||
if (op == OpEnum.RELATION) {
|
||||
list.add(tok.nextToken());
|
||||
}
|
||||
}
|
||||
return list.toArray();
|
||||
}
|
||||
|
||||
// inherited for Object
|
||||
|
||||
public String toString() {
|
||||
return pathAndFeature;
|
||||
}
|
||||
|
||||
// TODO: add these to the interface should we support binary
|
||||
// files
|
||||
/*
|
||||
* public void writeBinary(); public void readBinary();
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An enumerated type associated with path operations.
|
||||
*/
|
||||
class OpEnum {
|
||||
static private Map<String, OpEnum> map = new HashMap<String, OpEnum>();
|
||||
|
||||
public final static OpEnum NEXT = new OpEnum("n");
|
||||
public final static OpEnum PREV = new OpEnum("p");
|
||||
public final static OpEnum NEXT_NEXT = new OpEnum("nn");
|
||||
public final static OpEnum PREV_PREV = new OpEnum("pp");
|
||||
public final static OpEnum PARENT = new OpEnum("parent");
|
||||
public final static OpEnum DAUGHTER = new OpEnum("daughter");
|
||||
public final static OpEnum LAST_DAUGHTER = new OpEnum("daughtern");
|
||||
public final static OpEnum RELATION = new OpEnum("R");
|
||||
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* Creates a new OpEnum.. There is a limited set of OpEnums
|
||||
*
|
||||
* @param name the path name for this Enum
|
||||
*/
|
||||
private OpEnum(String name) {
|
||||
this.name = name;
|
||||
map.put(name, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets an OpEnum thats associated with the given name.
|
||||
*
|
||||
* @param name the name of the OpEnum of interest
|
||||
*/
|
||||
public static OpEnum getInstance(String name) {
|
||||
return (OpEnum) map.get(name);
|
||||
}
|
||||
|
||||
// inherited from Object
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* Implements a finite state machine that checks if a given string is a prefix.
|
||||
*/
|
||||
public class PrefixFSM extends PronounceableFSM {
|
||||
|
||||
/**
|
||||
* Constructs a PrefixFSM.
|
||||
* @param url of the fsm
|
||||
* @throws IOException if load failed
|
||||
*/
|
||||
public PrefixFSM(URL url) throws IOException {
|
||||
super(url, true);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
* Implements a finite state machine that checks if a given string is
|
||||
* pronounceable. If it is pronounceable, the method <code>accept()</code> will
|
||||
* return true.
|
||||
*/
|
||||
public class PronounceableFSM {
|
||||
|
||||
private static final String VOCAB_SIZE = "VOCAB_SIZE";
|
||||
private static final String NUM_OF_TRANSITIONS = "NUM_OF_TRANSITIONS";
|
||||
private static final String TRANSITIONS = "TRANSITIONS";
|
||||
|
||||
/**
|
||||
* The vocabulary size.
|
||||
*/
|
||||
protected int vocabularySize;
|
||||
|
||||
/**
|
||||
* The transitions of this FSM
|
||||
*/
|
||||
protected int[] transitions;
|
||||
|
||||
/**
|
||||
* Whether we should scan the input string from the front.
|
||||
*/
|
||||
protected boolean scanFromFront;
|
||||
|
||||
/**
|
||||
* Constructs a PronounceableFSM with information in the given URL.
|
||||
*
|
||||
* @param url the URL that contains the FSM specification
|
||||
* @param scanFromFront indicates whether this FSM should scan the input
|
||||
* string from the front, or from the back
|
||||
* @throws IOException if something went wrong
|
||||
*/
|
||||
public PronounceableFSM(URL url, boolean scanFromFront) throws IOException {
|
||||
this.scanFromFront = scanFromFront;
|
||||
InputStream is = url.openStream();
|
||||
loadText(is);
|
||||
is.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a PronounceableFSM with the given attributes.
|
||||
*
|
||||
* @param vocabularySize the vocabulary size of the FSM
|
||||
* @param transitions the transitions of the FSM
|
||||
* @param scanFromFront indicates whether this FSM should scan the input
|
||||
* string from the front, or from the back
|
||||
*/
|
||||
public PronounceableFSM(int vocabularySize, int[] transitions,
|
||||
boolean scanFromFront) {
|
||||
this.vocabularySize = vocabularySize;
|
||||
this.transitions = transitions;
|
||||
this.scanFromFront = scanFromFront;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the ASCII specification of this FSM from the given InputStream.
|
||||
*
|
||||
* @param is the input stream to load from
|
||||
*
|
||||
* @throws IOException if an error occurs on input.
|
||||
*/
|
||||
private void loadText(InputStream is) throws IOException {
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
|
||||
String line = null;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
if (!line.startsWith("***")) {
|
||||
if (line.startsWith(VOCAB_SIZE)) {
|
||||
vocabularySize = parseLastInt(line);
|
||||
} else if (line.startsWith(NUM_OF_TRANSITIONS)) {
|
||||
int transitionsSize = parseLastInt(line);
|
||||
transitions = new int[transitionsSize];
|
||||
} else if (line.startsWith(TRANSITIONS)) {
|
||||
StringTokenizer st = new StringTokenizer(line);
|
||||
String transition = st.nextToken();
|
||||
int i = 0;
|
||||
while (st.hasMoreTokens() && i < transitions.length) {
|
||||
transition = st.nextToken().trim();
|
||||
transitions[i++] = Integer.parseInt(transition);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the integer value of the last integer in the given string.
|
||||
*
|
||||
* @param line the line to parse the integer from
|
||||
*
|
||||
* @return an integer
|
||||
*/
|
||||
private int parseLastInt(String line) {
|
||||
String lastInt = line.trim().substring(line.lastIndexOf(" "));
|
||||
return Integer.parseInt(lastInt.trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* Causes this FSM to transition to the next state given the current state
|
||||
* and input symbol.
|
||||
*
|
||||
* @param state the current state
|
||||
* @param symbol the input symbol
|
||||
*/
|
||||
private int transition(int state, int symbol) {
|
||||
for (int i = state; i < transitions.length; i++) {
|
||||
if ((transitions[i] % vocabularySize) == symbol) {
|
||||
return (transitions[i] / vocabularySize);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if this finite state machine accepts the given input
|
||||
* string.
|
||||
*
|
||||
* @param inputString the input string to be tested
|
||||
*
|
||||
* @return true if this FSM accepts, false if it rejects
|
||||
*/
|
||||
public boolean accept(String inputString) {
|
||||
int symbol;
|
||||
int state = transition(0, '#');
|
||||
int leftEnd = inputString.length() - 1;
|
||||
int start = (scanFromFront) ? 0 : leftEnd;
|
||||
|
||||
for (int i = start; 0 <= i && i <= leftEnd;) {
|
||||
char c = inputString.charAt(i);
|
||||
if (c == 'n' || c == 'm') {
|
||||
symbol = 'N';
|
||||
} else if ("aeiouy".indexOf(c) != -1) {
|
||||
symbol = 'V';
|
||||
} else {
|
||||
symbol = c;
|
||||
}
|
||||
state = transition(state, symbol);
|
||||
if (state == -1) {
|
||||
return false;
|
||||
} else if (symbol == 'V') {
|
||||
return true;
|
||||
}
|
||||
if (scanFromFront) {
|
||||
i++;
|
||||
} else {
|
||||
i--;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import edu.cmu.sphinx.alignment.USEnglishTokenizer;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Represents an ordered set of {@link Item}s and their associated children. A
|
||||
* relation has a name and a list of items, and is added to an
|
||||
* {@link Utterance} via an {@link USEnglishTokenizer}.
|
||||
*/
|
||||
public class Relation {
|
||||
private String name;
|
||||
private Utterance owner;
|
||||
private Item head;
|
||||
private Item tail;
|
||||
|
||||
/**
|
||||
* Name of the relation that contains tokens from the original input text.
|
||||
* This is the first thing to be added to the utterance.
|
||||
*/
|
||||
public static final String TOKEN = "Token";
|
||||
|
||||
/**
|
||||
* Name of the relation that contains the normalized version of the
|
||||
* original input text.
|
||||
*/
|
||||
public static final String WORD = "Word";
|
||||
|
||||
/**
|
||||
* Creates a relation.
|
||||
*
|
||||
* @param name the name of the Relation
|
||||
* @param owner the utterance that contains this relation
|
||||
*/
|
||||
Relation(String name, Utterance owner) {
|
||||
this.name = name;
|
||||
this.owner = owner;
|
||||
head = null;
|
||||
tail = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the name of this Relation.
|
||||
*
|
||||
* @return the name of this Relation
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the head of the item list.
|
||||
*
|
||||
* @return the head item
|
||||
*/
|
||||
public Item getHead() {
|
||||
return head;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the head of the item list.
|
||||
*
|
||||
* @param item the new head item
|
||||
*/
|
||||
void setHead(Item item) {
|
||||
head = item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the tail of the item list.
|
||||
*
|
||||
* @return the tail item
|
||||
*/
|
||||
public Item getTail() {
|
||||
return tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the tail of the item list.
|
||||
*
|
||||
* @param item the new tail item
|
||||
*/
|
||||
void setTail(Item item) {
|
||||
tail = item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new item to this relation. The item added does not share its
|
||||
* contents with any other item.
|
||||
*
|
||||
* @return the newly added item
|
||||
*/
|
||||
public Item appendItem() {
|
||||
return appendItem(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new item to this relation. The item added shares its contents
|
||||
* with the original item.
|
||||
*
|
||||
* @param originalItem the ItemContents that will be shared by the new item
|
||||
*
|
||||
* @return the newly added item
|
||||
*/
|
||||
public Item appendItem(Item originalItem) {
|
||||
ItemContents contents;
|
||||
Item newItem;
|
||||
|
||||
if (originalItem == null) {
|
||||
contents = null;
|
||||
} else {
|
||||
contents = originalItem.getSharedContents();
|
||||
}
|
||||
newItem = new Item(this, contents);
|
||||
if (head == null) {
|
||||
head = newItem;
|
||||
}
|
||||
|
||||
if (tail != null) {
|
||||
tail.attach(newItem);
|
||||
}
|
||||
tail = newItem;
|
||||
return newItem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the utterance that contains this relation.
|
||||
*
|
||||
* @return the utterance that contains this relation
|
||||
*/
|
||||
public Utterance getUtterance() {
|
||||
return owner;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* Implements a finite state machine that checks if a given string is a suffix.
|
||||
*/
|
||||
public class SuffixFSM extends PronounceableFSM {
|
||||
|
||||
/**
|
||||
* Constructs a SuffixFSM.
|
||||
* @param url suffix of FSM
|
||||
* @throws IOException if loading failed
|
||||
*/
|
||||
public SuffixFSM(URL url) throws IOException {
|
||||
super(url, false);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,229 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import edu.cmu.sphinx.alignment.Token;
|
||||
|
||||
/**
|
||||
* Holds all the data for an utterance to be spoken. It is incrementally
|
||||
* modified by various UtteranceProcessor implementations. An utterance
|
||||
* contains a set of Features (essential a set of properties) and a set of
|
||||
* Relations. A Relation is an ordered set of Item graphs. The utterance
|
||||
* contains a set of features and implements FeatureSet so that applications
|
||||
* can set/get features directly from the utterance. If a feature query is not
|
||||
* found in the utterance feature set, the query is forwarded to the FeatureSet
|
||||
* of the voice associated with the utterance.
|
||||
*/
|
||||
public class Utterance {
|
||||
private FeatureSet features;
|
||||
private FeatureSet relations;
|
||||
|
||||
/**
|
||||
* Creates an utterance with the given set of tokenized text.
|
||||
*
|
||||
* @param tokenizer tokenizer to use for utterance.
|
||||
*/
|
||||
public Utterance(CharTokenizer tokenizer) {
|
||||
features = new FeatureSet();
|
||||
relations = new FeatureSet();
|
||||
setTokenList(tokenizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new relation with the given name and adds it to this
|
||||
* utterance.
|
||||
*
|
||||
* @param name the name of the new relation
|
||||
*
|
||||
* @return the newly created relation
|
||||
*/
|
||||
public Relation createRelation(String name) {
|
||||
Relation relation = new Relation(name, this);
|
||||
relations.setObject(name, relation);
|
||||
return relation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a relation from this utterance.
|
||||
*
|
||||
* @param name the name of the Relation
|
||||
*
|
||||
* @return the relation or null if the relation is not found
|
||||
*/
|
||||
public Relation getRelation(String name) {
|
||||
return (Relation) relations.getObject(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if this utterance contains a relation with the given name.
|
||||
*
|
||||
* @param name the name of the relation of interest.
|
||||
* @return if relation is present
|
||||
*/
|
||||
public boolean hasRelation(String name) {
|
||||
return relations.isPresent(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the named feature from this set of features.
|
||||
*
|
||||
* @param name the name of the feature of interest
|
||||
*/
|
||||
public void remove(String name) {
|
||||
features.remove(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that sets the named feature as an int.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setInt(String name, int value) {
|
||||
features.setInt(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that sets the named feature as a float.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setFloat(String name, float value) {
|
||||
features.setFloat(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that sets the named feature as a String.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setString(String name, String value) {
|
||||
features.setString(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the named feature.
|
||||
*
|
||||
* @param name the name of the feature
|
||||
* @param value the value of the feature
|
||||
*/
|
||||
public void setObject(String name, Object value) {
|
||||
features.setObject(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Item in the given Relation associated with the given time.
|
||||
*
|
||||
* @param relation the name of the relation
|
||||
* @param time the time
|
||||
* @return the item
|
||||
*/
|
||||
public Item getItem(String relation, float time) {
|
||||
Relation segmentRelation = null;
|
||||
String pathName = null;
|
||||
|
||||
if (relation.equals(Relation.WORD)) {
|
||||
pathName = "R:SylStructure.parent.parent.R:Word";
|
||||
} else if (relation.equals(Relation.TOKEN)) {
|
||||
pathName = "R:SylStructure.parent.parent.R:Token.parent";
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Utterance.getItem(): relation cannot be " + relation);
|
||||
}
|
||||
|
||||
PathExtractor path = new PathExtractor(pathName, false);
|
||||
|
||||
// get the Item in the Segment Relation with the given time
|
||||
Item segmentItem = getItem(segmentRelation, time);
|
||||
|
||||
if (segmentItem != null) {
|
||||
return path.findItem(segmentItem);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static Item getItem(Relation segmentRelation, float time) {
|
||||
Item lastSegment = segmentRelation.getTail();
|
||||
// If given time is closer to the front than the end, search from
|
||||
// the front; otherwise, start search from end
|
||||
// this might not be the best strategy though.
|
||||
float lastSegmentEndTime = getSegmentEnd(lastSegment);
|
||||
if (time < 0 || lastSegmentEndTime < time) {
|
||||
return null;
|
||||
} else if (lastSegmentEndTime - time > time) {
|
||||
return findFromFront(segmentRelation, time);
|
||||
} else {
|
||||
return findFromEnd(segmentRelation, time);
|
||||
}
|
||||
}
|
||||
|
||||
private static Item findFromEnd(Relation segmentRelation, float time) {
|
||||
Item item = segmentRelation.getTail();
|
||||
while (item != null && getSegmentEnd(item) > time) {
|
||||
item = item.getPrevious();
|
||||
}
|
||||
|
||||
if (item != segmentRelation.getTail()) {
|
||||
item = item.getNext();
|
||||
}
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
private static Item findFromFront(Relation segmentRelation, float time) {
|
||||
Item item = segmentRelation.getHead();
|
||||
while (item != null && time > getSegmentEnd(item)) {
|
||||
item = item.getNext();
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
private static float getSegmentEnd(Item segment) {
|
||||
FeatureSet segmentFeatureSet = segment.getFeatures();
|
||||
return segmentFeatureSet.getFloat("end");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the token list for this utterance. Note that this could be
|
||||
* optimized by turning the token list directly into the token relation.
|
||||
*
|
||||
* @param tokenList the tokenList
|
||||
*
|
||||
*/
|
||||
private void setTokenList(Iterator<Token> tokenizer) {
|
||||
Relation relation = createRelation(Relation.TOKEN);
|
||||
while (tokenizer.hasNext()) {
|
||||
Token token = tokenizer.next();
|
||||
String tokenWord = token.getWord();
|
||||
|
||||
if (tokenWord != null && tokenWord.length() > 0) {
|
||||
Item item = relation.appendItem();
|
||||
|
||||
FeatureSet featureSet = item.getFeatures();
|
||||
featureSet.setString("name", tokenWord);
|
||||
featureSet.setString("whitespace", token.getWhitespace());
|
||||
featureSet.setString("prepunctuation",
|
||||
token.getPrepunctuation());
|
||||
featureSet.setString("punc", token.getPostpunctuation());
|
||||
featureSet.setString("file_pos",
|
||||
String.valueOf(token.getPosition()));
|
||||
featureSet.setString("line_number",
|
||||
String.valueOf(token.getLineNumber()));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/**
|
||||
* Portions Copyright 2001 Sun Microsystems, Inc.
|
||||
* Portions Copyright 1999-2001 Language Technologies Institute,
|
||||
* Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.alignment.tokenizer;
|
||||
|
||||
import edu.cmu.sphinx.alignment.USEnglishTokenizer;
|
||||
|
||||
/**
|
||||
* Helper class to add words and breaks into a Relation object.
|
||||
*/
|
||||
public class WordRelation {
|
||||
|
||||
private Relation relation;
|
||||
private USEnglishTokenizer tokenToWords;
|
||||
|
||||
private WordRelation(Relation parentRelation, USEnglishTokenizer tokenToWords) {
|
||||
this.relation = parentRelation;
|
||||
this.tokenToWords = tokenToWords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a WordRelation object with the given utterance and TokenToWords.
|
||||
*
|
||||
* @param utterance the Utterance from which to create a Relation
|
||||
* @param tokenToWords the TokenToWords object to use
|
||||
*
|
||||
* @return a WordRelation object
|
||||
*/
|
||||
public static WordRelation createWordRelation(Utterance utterance,
|
||||
USEnglishTokenizer tokenToWords) {
|
||||
Relation relation = utterance.createRelation(Relation.WORD);
|
||||
return new WordRelation(relation, tokenToWords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a break as a feature to the last item in the list.
|
||||
*/
|
||||
public void addBreak() {
|
||||
Item wordItem = (Item) relation.getTail();
|
||||
if (wordItem != null) {
|
||||
FeatureSet featureSet = wordItem.getFeatures();
|
||||
featureSet.setString("break", "1");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a word as an Item to this WordRelation object.
|
||||
*
|
||||
* @param word the word to add
|
||||
*/
|
||||
public void addWord(String word) {
|
||||
Item tokenItem = tokenToWords.getTokenItem();
|
||||
Item wordItem = tokenItem.createDaughter();
|
||||
FeatureSet featureSet = wordItem.getFeatures();
|
||||
featureSet.setString("name", word);
|
||||
relation.appendItem(wordItem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the last Item in this WordRelation to the given word.
|
||||
*
|
||||
* @param word the word to set
|
||||
*/
|
||||
public void setLastWord(String word) {
|
||||
Item lastItem = relation.getTail();
|
||||
FeatureSet featureSet = lastItem.getFeatures();
|
||||
featureSet.setString("name", word);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the last item in this WordRelation.
|
||||
*
|
||||
* @return the last item
|
||||
*/
|
||||
public Item getTail() {
|
||||
return relation.getTail();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import edu.cmu.sphinx.decoder.adaptation.ClusteredDensityFileData;
|
||||
import edu.cmu.sphinx.decoder.adaptation.Stats;
|
||||
import edu.cmu.sphinx.decoder.adaptation.Transform;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
|
||||
import edu.cmu.sphinx.recognizer.Recognizer;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
|
||||
|
||||
/**
|
||||
* Base class for high-level speech recognizers.
|
||||
*/
|
||||
public class AbstractSpeechRecognizer {
|
||||
|
||||
protected final Context context;
|
||||
protected final Recognizer recognizer;
|
||||
|
||||
protected ClusteredDensityFileData clusters;
|
||||
|
||||
protected final SpeechSourceProvider speechSourceProvider;
|
||||
|
||||
/**
|
||||
* Constructs recognizer object using provided configuration.
|
||||
* @param configuration initial configuration
|
||||
* @throws IOException if IO went wrong
|
||||
*/
|
||||
public AbstractSpeechRecognizer(Configuration configuration)
|
||||
throws IOException
|
||||
{
|
||||
this(new Context(configuration));
|
||||
}
|
||||
|
||||
protected AbstractSpeechRecognizer(Context context) throws IOException {
|
||||
this.context = context;
|
||||
recognizer = context.getInstance(Recognizer.class);
|
||||
speechSourceProvider = new SpeechSourceProvider();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns result of the recognition.
|
||||
*
|
||||
* @return recognition result or {@code null} if there is no result, e.g., because the
|
||||
* microphone or input stream has been closed
|
||||
*/
|
||||
public SpeechResult getResult() {
|
||||
Result result = recognizer.recognize();
|
||||
return null == result ? null : new SpeechResult(result);
|
||||
}
|
||||
|
||||
public Stats createStats(int numClasses) {
|
||||
clusters = new ClusteredDensityFileData(context.getLoader(), numClasses);
|
||||
return new Stats(context.getLoader(), clusters);
|
||||
}
|
||||
|
||||
public void setTransform(Transform transform) {
|
||||
if (clusters != null) {
|
||||
context.getLoader().update(transform, clusters);
|
||||
}
|
||||
}
|
||||
|
||||
public void loadTransform(String path, int numClass) throws Exception {
|
||||
clusters = new ClusteredDensityFileData(context.getLoader(), numClass);
|
||||
Transform transform = new Transform((Sphinx3Loader)context.getLoader(), numClass);
|
||||
transform.load(path);
|
||||
context.getLoader().update(transform, clusters);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
|
||||
/**
|
||||
* Represents common configuration options.
|
||||
*
|
||||
* This configuration is used by high-level recognition classes.
|
||||
*
|
||||
* @see SpeechAligner
|
||||
* @see LiveSpeechRecognizer
|
||||
* @see StreamSpeechRecognizer
|
||||
*/
|
||||
public class Configuration {
|
||||
|
||||
private String acousticModelPath;
|
||||
private String dictionaryPath;
|
||||
private String languageModelPath;
|
||||
private String grammarPath;
|
||||
private String grammarName;
|
||||
|
||||
private int sampleRate = 16000;
|
||||
private boolean useGrammar = false;
|
||||
|
||||
/**
|
||||
* @return path to acoustic model
|
||||
*/
|
||||
public String getAcousticModelPath() {
|
||||
return acousticModelPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets path to acoustic model.
|
||||
* @param acousticModelPath URL of the acoustic model
|
||||
*/
|
||||
public void setAcousticModelPath(String acousticModelPath) {
|
||||
this.acousticModelPath = acousticModelPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return path to dictionary.
|
||||
*/
|
||||
public String getDictionaryPath() {
|
||||
return dictionaryPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets path to dictionary.
|
||||
* @param dictionaryPath URL of the dictionary
|
||||
*/
|
||||
public void setDictionaryPath(String dictionaryPath) {
|
||||
this.dictionaryPath = dictionaryPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return path to the language model
|
||||
*/
|
||||
public String getLanguageModelPath() {
|
||||
return languageModelPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets paths to language model resource.
|
||||
* @param languageModelPath URL of the language model
|
||||
*/
|
||||
public void setLanguageModelPath(String languageModelPath) {
|
||||
this.languageModelPath = languageModelPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return grammar path
|
||||
*/
|
||||
public String getGrammarPath() {
|
||||
return grammarPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets path to grammar resources.
|
||||
* @param grammarPath URL of the grammar
|
||||
*/
|
||||
public void setGrammarPath(String grammarPath) {
|
||||
this.grammarPath = grammarPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return grammar name
|
||||
*/
|
||||
public String getGrammarName() {
|
||||
return grammarName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets grammar name if fixed grammar is used.
|
||||
* @param grammarName of the grammar
|
||||
*/
|
||||
public void setGrammarName(String grammarName) {
|
||||
this.grammarName = grammarName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return whether fixed grammar should be used instead of language model.
|
||||
*/
|
||||
public boolean getUseGrammar() {
|
||||
return useGrammar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether fixed grammar should be used instead of language model.
|
||||
* @param useGrammar to use grammar or language model
|
||||
*/
|
||||
public void setUseGrammar(boolean useGrammar) {
|
||||
this.useGrammar = useGrammar;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the configured sample rate.
|
||||
*/
|
||||
public int getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets sample rate for the input stream.
|
||||
* @param sampleRate sample rate in Hertz
|
||||
*/
|
||||
public void setSampleRate(int sampleRate) {
|
||||
this.sampleRate = sampleRate;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import static edu.cmu.sphinx.util.props.ConfigurationManagerUtils.resourceToURL;
|
||||
import static edu.cmu.sphinx.util.props.ConfigurationManagerUtils.setProperty;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank2;
|
||||
import edu.cmu.sphinx.frontend.util.StreamDataSource;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
|
||||
import edu.cmu.sphinx.util.TimeFrame;
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
import edu.cmu.sphinx.util.props.ConfigurationManager;
|
||||
|
||||
|
||||
/**
|
||||
* Helps to tweak configuration without touching XML-file directly.
|
||||
*/
|
||||
public class Context {
|
||||
|
||||
private final ConfigurationManager configurationManager;
|
||||
|
||||
/**
|
||||
* Constructs builder that uses default XML configuration.
|
||||
* @param config configuration
|
||||
* @throws MalformedURLException if failed to load configuration file
|
||||
*/
|
||||
public Context(Configuration config)
|
||||
throws IOException, MalformedURLException
|
||||
{
|
||||
this("resource:/edu/cmu/sphinx/api/default.config.xml", config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs builder using user-supplied XML configuration.
|
||||
*
|
||||
* @param path path to XML-resource with configuration
|
||||
* @param config configuration
|
||||
* @throws MalformedURLException if failed to load configuration file
|
||||
* @throws IOException if failed to load configuration file
|
||||
*/
|
||||
public Context(String path, Configuration config)
|
||||
throws IOException, MalformedURLException
|
||||
{
|
||||
configurationManager = new ConfigurationManager(resourceToURL(path));
|
||||
|
||||
setAcousticModel(config.getAcousticModelPath());
|
||||
setDictionary(config.getDictionaryPath());
|
||||
|
||||
if (null != config.getGrammarPath() && config.getUseGrammar())
|
||||
setGrammar(config.getGrammarPath(), config.getGrammarName());
|
||||
if (null != config.getLanguageModelPath() && !config.getUseGrammar())
|
||||
setLanguageModel(config.getLanguageModelPath());
|
||||
|
||||
setSampleRate(config.getSampleRate());
|
||||
|
||||
// Force ConfigurationManager to build the whole graph
|
||||
// in order to enable instance lookup by class.
|
||||
configurationManager.lookup("recognizer");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets acoustic model location.
|
||||
*
|
||||
* It also reads feat.params which should be located at the root of
|
||||
* acoustic model and sets corresponding parameters of
|
||||
* {@link MelFrequencyFilterBank2} instance.
|
||||
*
|
||||
* @param path path to directory with acoustic model files
|
||||
*
|
||||
* @throws IOException if failed to read feat.params
|
||||
*/
|
||||
public void setAcousticModel(String path) throws IOException {
|
||||
setLocalProperty("acousticModelLoader->location", path);
|
||||
setLocalProperty("dictionary->fillerPath", path + "/noisedict");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets dictionary.
|
||||
*
|
||||
* @param path path to directory with dictionary files
|
||||
*/
|
||||
public void setDictionary(String path) {
|
||||
setLocalProperty("dictionary->dictionaryPath", path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets sampleRate.
|
||||
*
|
||||
* @param sampleRate sample rate of the input stream.
|
||||
*/
|
||||
public void setSampleRate(int sampleRate) {
|
||||
setLocalProperty("dataSource->sampleRate", Integer.toString(sampleRate));
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets path to the grammar files.
|
||||
*
|
||||
* Enables static grammar and disables probabilistic language model.
|
||||
* JSGF and GrXML formats are supported.
|
||||
*
|
||||
* @param path path to the grammar files
|
||||
* @param name name of the main grammar to use
|
||||
* @see Context#setLanguageModel(String)
|
||||
*/
|
||||
public void setGrammar(String path, String name) {
|
||||
// TODO: use a single param of type File, cache directory part
|
||||
if (name.endsWith(".grxml")) {
|
||||
setLocalProperty("grXmlGrammar->grammarLocation", path + name);
|
||||
setLocalProperty("flatLinguist->grammar", "grXmlGrammar");
|
||||
} else {
|
||||
setLocalProperty("jsgfGrammar->grammarLocation", path);
|
||||
setLocalProperty("jsgfGrammar->grammarName", name);
|
||||
setLocalProperty("flatLinguist->grammar", "jsgfGrammar");
|
||||
}
|
||||
setLocalProperty("decoder->searchManager", "simpleSearchManager");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets path to the language model.
|
||||
*
|
||||
* Enables probabilistic language model and disables static grammar.
|
||||
* Currently it supports ".lm" and ".dmp" file formats.
|
||||
*
|
||||
* @param path path to the language model file
|
||||
* @see Context#setGrammar(String, String)
|
||||
*
|
||||
* @throws IllegalArgumentException if path ends with unsupported extension
|
||||
*/
|
||||
public void setLanguageModel(String path) {
|
||||
if (path.endsWith(".lm")) {
|
||||
setLocalProperty("simpleNGramModel->location", path);
|
||||
setLocalProperty(
|
||||
"lexTreeLinguist->languageModel", "simpleNGramModel");
|
||||
} else if (path.endsWith(".dmp")) {
|
||||
setLocalProperty("largeTrigramModel->location", path);
|
||||
setLocalProperty(
|
||||
"lexTreeLinguist->languageModel", "largeTrigramModel");
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Unknown format extension: " + path);
|
||||
}
|
||||
//search manager for LVCSR is set by deafult
|
||||
}
|
||||
|
||||
|
||||
public void setSpeechSource(InputStream stream, TimeFrame timeFrame) {
|
||||
getInstance(StreamDataSource.class).setInputStream(stream, timeFrame);
|
||||
setLocalProperty("trivialScorer->frontend", "liveFrontEnd");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets byte stream as the speech source.
|
||||
*
|
||||
* @param stream stream to process
|
||||
*/
|
||||
public void setSpeechSource(InputStream stream) {
|
||||
getInstance(StreamDataSource.class).setInputStream(stream);
|
||||
setLocalProperty("trivialScorer->frontend", "liveFrontEnd");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets property within a "component" tag in configuration.
|
||||
*
|
||||
* Use this method to alter "value" property of a "property" tag inside a
|
||||
* "component" tag of the XML configuration.
|
||||
*
|
||||
* @param name property name
|
||||
* @param value property value
|
||||
* @see Context#setGlobalProperty(String, Object)
|
||||
*/
|
||||
public void setLocalProperty(String name, Object value) {
|
||||
setProperty(configurationManager, name, value.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets property of a top-level "property" tag.
|
||||
*
|
||||
* Use this method to alter "value" property of a "property" tag whose
|
||||
* parent is the root tag "config" of the XML configuration.
|
||||
*
|
||||
* @param name property name
|
||||
* @param value property value
|
||||
* @see Context#setLocalProperty(String, Object)
|
||||
*/
|
||||
public void setGlobalProperty(String name, Object value) {
|
||||
configurationManager.setGlobalProperty(name, value.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns instance of the XML configuration by its class.
|
||||
*
|
||||
* @param clazz class to look up
|
||||
* @param <C> generic
|
||||
* @return instance of the specified class or null
|
||||
*/
|
||||
public <C extends Configurable> C getInstance(Class<C> clazz) {
|
||||
return configurationManager.lookup(clazz);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Loader object used for loading the acoustic model.
|
||||
*
|
||||
* @return the loader object
|
||||
*/
|
||||
public Loader getLoader(){
|
||||
return (Loader) configurationManager.lookup("acousticModelLoader");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import edu.cmu.sphinx.frontend.util.StreamDataSource;
|
||||
|
||||
|
||||
/**
|
||||
* High-level class for live speech recognition.
|
||||
*/
|
||||
public class LiveSpeechRecognizer extends AbstractSpeechRecognizer {
|
||||
|
||||
private final Microphone microphone;
|
||||
|
||||
/**
|
||||
* Constructs new live recognition object.
|
||||
*
|
||||
* @param configuration common configuration
|
||||
* @throws IOException if model IO went wrong
|
||||
*/
|
||||
public LiveSpeechRecognizer(Configuration configuration) throws IOException
|
||||
{
|
||||
super(configuration);
|
||||
microphone = speechSourceProvider.getMicrophone();
|
||||
context.getInstance(StreamDataSource.class)
|
||||
.setInputStream(microphone.getStream());
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts recognition process.
|
||||
*
|
||||
* @param clear clear cached microphone data
|
||||
* @see LiveSpeechRecognizer#stopRecognition()
|
||||
*/
|
||||
public void startRecognition(boolean clear) {
|
||||
recognizer.allocate();
|
||||
microphone.startRecording();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops recognition process.
|
||||
*
|
||||
* Recognition process is paused until the next call to startRecognition.
|
||||
*
|
||||
* @see LiveSpeechRecognizer#startRecognition(boolean)
|
||||
*/
|
||||
public void stopRecognition() {
|
||||
microphone.stopRecording();
|
||||
recognizer.deallocate();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.sound.sampled.*;
|
||||
|
||||
/**
|
||||
* InputStream adapter
|
||||
*/
|
||||
public class Microphone {
|
||||
|
||||
private final TargetDataLine line;
|
||||
private final InputStream inputStream;
|
||||
|
||||
public Microphone(
|
||||
float sampleRate,
|
||||
int sampleSize,
|
||||
boolean signed,
|
||||
boolean bigEndian) {
|
||||
AudioFormat format =
|
||||
new AudioFormat(sampleRate, sampleSize, 1, signed, bigEndian);
|
||||
try {
|
||||
line = AudioSystem.getTargetDataLine(format);
|
||||
line.open();
|
||||
} catch (LineUnavailableException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
inputStream = new AudioInputStream(line);
|
||||
}
|
||||
|
||||
public void startRecording() {
|
||||
line.start();
|
||||
}
|
||||
|
||||
public void stopRecording() {
|
||||
line.stop();
|
||||
}
|
||||
|
||||
public InputStream getStream() {
|
||||
return inputStream;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,263 @@
|
|||
/*
|
||||
* Copyright 2014 Alpha Cephei Inc.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.TreeMap;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import edu.cmu.sphinx.alignment.LongTextAligner;
|
||||
import edu.cmu.sphinx.alignment.SimpleTokenizer;
|
||||
import edu.cmu.sphinx.alignment.TextTokenizer;
|
||||
import edu.cmu.sphinx.linguist.language.grammar.AlignerGrammar;
|
||||
import edu.cmu.sphinx.linguist.language.ngram.DynamicTrigramModel;
|
||||
import edu.cmu.sphinx.recognizer.Recognizer;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.result.WordResult;
|
||||
import edu.cmu.sphinx.util.Range;
|
||||
import edu.cmu.sphinx.util.TimeFrame;
|
||||
|
||||
public class SpeechAligner {
|
||||
private final Logger logger = Logger.getLogger(getClass().getSimpleName());
|
||||
|
||||
private static final int TUPLE_SIZE = 3;
|
||||
|
||||
private final Context context;
|
||||
private final Recognizer recognizer;
|
||||
private final AlignerGrammar grammar;
|
||||
private final DynamicTrigramModel languageModel;
|
||||
|
||||
private TextTokenizer tokenizer;
|
||||
|
||||
public SpeechAligner(String amPath, String dictPath, String g2pPath) throws MalformedURLException, IOException {
|
||||
Configuration configuration = new Configuration();
|
||||
configuration.setAcousticModelPath(amPath);
|
||||
configuration.setDictionaryPath(dictPath);
|
||||
|
||||
context = new Context(configuration);
|
||||
if (g2pPath != null) {
|
||||
context.setLocalProperty("dictionary->g2pModelPath", g2pPath);
|
||||
context.setLocalProperty("dictionary->g2pMaxPron", "2");
|
||||
}
|
||||
context.setLocalProperty("lexTreeLinguist->languageModel", "dynamicTrigramModel");
|
||||
recognizer = context.getInstance(Recognizer.class);
|
||||
grammar = context.getInstance(AlignerGrammar.class);
|
||||
languageModel = context.getInstance(DynamicTrigramModel.class);
|
||||
setTokenizer(new SimpleTokenizer());
|
||||
}
|
||||
|
||||
public List<WordResult> align(URL audioUrl, String transcript) throws IOException {
|
||||
return align(audioUrl, getTokenizer().expand(transcript));
|
||||
}
|
||||
|
||||
/**
|
||||
* Align audio to sentence transcript
|
||||
*
|
||||
* @param audioUrl audio file URL to process
|
||||
* @param sentenceTranscript cleaned transcript
|
||||
* @return List of aligned words with timings
|
||||
* @throws IOException if IO went wrong
|
||||
*/
|
||||
public List<WordResult> align(URL audioUrl, List<String> sentenceTranscript) throws IOException {
|
||||
|
||||
List<String> transcript = sentenceToWords(sentenceTranscript);
|
||||
|
||||
LongTextAligner aligner = new LongTextAligner(transcript, TUPLE_SIZE);
|
||||
Map<Integer, WordResult> alignedWords = new TreeMap<Integer, WordResult>();
|
||||
Queue<Range> ranges = new LinkedList<Range>();
|
||||
Queue<List<String>> texts = new ArrayDeque<List<String>>();
|
||||
Queue<TimeFrame> timeFrames = new ArrayDeque<TimeFrame>();
|
||||
|
||||
ranges.offer(new Range(0, transcript.size()));
|
||||
texts.offer(transcript);
|
||||
TimeFrame totalTimeFrame = TimeFrame.INFINITE;
|
||||
timeFrames.offer(totalTimeFrame);
|
||||
long lastFrame = TimeFrame.INFINITE.getEnd();
|
||||
|
||||
languageModel.setText(sentenceTranscript);
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 1) {
|
||||
context.setLocalProperty("decoder->searchManager", "alignerSearchManager");
|
||||
}
|
||||
|
||||
while (!texts.isEmpty()) {
|
||||
assert texts.size() == ranges.size();
|
||||
assert texts.size() == timeFrames.size();
|
||||
|
||||
List<String> text = texts.poll();
|
||||
TimeFrame frame = timeFrames.poll();
|
||||
Range range = ranges.poll();
|
||||
|
||||
|
||||
logger.info("Aligning frame " + frame + " to text " + text + " range " + range);
|
||||
|
||||
recognizer.allocate();
|
||||
|
||||
if (i >= 1) {
|
||||
grammar.setWords(text);
|
||||
}
|
||||
|
||||
context.setSpeechSource(audioUrl.openStream(), frame);
|
||||
|
||||
List<WordResult> hypothesis = new ArrayList<WordResult>();
|
||||
Result result;
|
||||
while (null != (result = recognizer.recognize())) {
|
||||
logger.info("Utterance result " + result.getTimedBestResult(true));
|
||||
hypothesis.addAll(result.getTimedBestResult(false));
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
if (hypothesis.size() > 0) {
|
||||
lastFrame = hypothesis.get(hypothesis.size() - 1).getTimeFrame().getEnd();
|
||||
}
|
||||
}
|
||||
|
||||
List<String> words = new ArrayList<String>();
|
||||
for (WordResult wr : hypothesis) {
|
||||
words.add(wr.getWord().getSpelling());
|
||||
}
|
||||
int[] alignment = aligner.align(words, range);
|
||||
|
||||
List<WordResult> results = hypothesis;
|
||||
|
||||
logger.info("Decoding result is " + results);
|
||||
|
||||
// dumpAlignment(transcript, alignment, results);
|
||||
dumpAlignmentStats(transcript, alignment, results);
|
||||
|
||||
for (int j = 0; j < alignment.length; j++) {
|
||||
if (alignment[j] != -1) {
|
||||
alignedWords.put(alignment[j], hypothesis.get(j));
|
||||
}
|
||||
}
|
||||
|
||||
recognizer.deallocate();
|
||||
}
|
||||
|
||||
scheduleNextAlignment(transcript, alignedWords, ranges, texts, timeFrames, lastFrame);
|
||||
}
|
||||
|
||||
return new ArrayList<WordResult>(alignedWords.values());
|
||||
}
|
||||
|
||||
public List<String> sentenceToWords(List<String> sentenceTranscript) {
|
||||
ArrayList<String> transcript = new ArrayList<String>();
|
||||
for (String sentence : sentenceTranscript) {
|
||||
String[] words = sentence.split("\\s+");
|
||||
for (String word : words) {
|
||||
if (word.length() > 0)
|
||||
transcript.add(word);
|
||||
}
|
||||
}
|
||||
return transcript;
|
||||
}
|
||||
|
||||
private void dumpAlignmentStats(List<String> transcript, int[] alignment, List<WordResult> results) {
|
||||
int insertions = 0;
|
||||
int deletions = 0;
|
||||
int size = transcript.size();
|
||||
|
||||
int[] aid = alignment;
|
||||
int lastId = -1;
|
||||
for (int ij = 0; ij < aid.length; ++ij) {
|
||||
if (aid[ij] == -1) {
|
||||
insertions++;
|
||||
} else {
|
||||
if (aid[ij] - lastId > 1) {
|
||||
deletions += aid[ij] - lastId;
|
||||
}
|
||||
lastId = aid[ij];
|
||||
}
|
||||
}
|
||||
|
||||
if (lastId >= 0 && transcript.size() - lastId > 1) {
|
||||
deletions += transcript.size() - lastId;
|
||||
}
|
||||
logger.info(String.format("Size %d deletions %d insertions %d error rate %.2f", size, insertions, deletions,
|
||||
(insertions + deletions) / ((float) size) * 100f));
|
||||
}
|
||||
|
||||
private void scheduleNextAlignment(List<String> transcript, Map<Integer, WordResult> alignedWords, Queue<Range> ranges,
|
||||
Queue<List<String>> texts, Queue<TimeFrame> timeFrames, long lastFrame) {
|
||||
int prevKey = 0;
|
||||
long prevStart = 0;
|
||||
for (Map.Entry<Integer, WordResult> e : alignedWords.entrySet()) {
|
||||
if (e.getKey() - prevKey > 1) {
|
||||
checkedOffer(transcript, texts, timeFrames, ranges, prevKey, e.getKey() + 1, prevStart, e.getValue()
|
||||
.getTimeFrame().getEnd());
|
||||
}
|
||||
prevKey = e.getKey();
|
||||
prevStart = e.getValue().getTimeFrame().getStart();
|
||||
}
|
||||
if (transcript.size() - prevKey > 1) {
|
||||
checkedOffer(transcript, texts, timeFrames, ranges, prevKey, transcript.size(), prevStart, lastFrame);
|
||||
}
|
||||
}
|
||||
|
||||
public void dumpAlignment(List<String> transcript, int[] alignment, List<WordResult> results) {
|
||||
logger.info("Alignment");
|
||||
int[] aid = alignment;
|
||||
int lastId = -1;
|
||||
for (int ij = 0; ij < aid.length; ++ij) {
|
||||
if (aid[ij] == -1) {
|
||||
logger.info(String.format("+ %s", results.get(ij)));
|
||||
} else {
|
||||
if (aid[ij] - lastId > 1) {
|
||||
for (String result1 : transcript.subList(lastId + 1, aid[ij])) {
|
||||
logger.info(String.format("- %-25s", result1));
|
||||
}
|
||||
} else {
|
||||
logger.info(String.format(" %-25s", transcript.get(aid[ij])));
|
||||
}
|
||||
lastId = aid[ij];
|
||||
}
|
||||
}
|
||||
|
||||
if (lastId >= 0 && transcript.size() - lastId > 1) {
|
||||
for (String result1 : transcript.subList(lastId + 1, transcript.size())) {
|
||||
logger.info(String.format("- %-25s", result1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void checkedOffer(List<String> transcript, Queue<List<String>> texts, Queue<TimeFrame> timeFrames,
|
||||
Queue<Range> ranges, int start, int end, long timeStart, long timeEnd) {
|
||||
|
||||
double wordDensity = ((double) (timeEnd - timeStart)) / (end - start);
|
||||
|
||||
// Skip range if it's too short, average word is less than 10
|
||||
// milliseconds
|
||||
if (wordDensity < 10.0 && (end - start) > 3) {
|
||||
logger.info("Skipping text range due to a high density " + transcript.subList(start, end).toString());
|
||||
return;
|
||||
}
|
||||
|
||||
texts.offer(transcript.subList(start, end));
|
||||
timeFrames.offer(new TimeFrame(timeStart, timeEnd));
|
||||
ranges.offer(new Range(start, end - 1));
|
||||
}
|
||||
|
||||
public TextTokenizer getTokenizer() {
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
public void setTokenizer(TextTokenizer wordExpander) {
|
||||
this.tokenizer = wordExpander;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
import edu.cmu.sphinx.recognizer.Recognizer;
|
||||
import edu.cmu.sphinx.result.*;
|
||||
|
||||
|
||||
/**
|
||||
* High-level wrapper for {@link Result} instance.
|
||||
*/
|
||||
public final class SpeechResult {
|
||||
|
||||
private final Result result;
|
||||
private final Lattice lattice;
|
||||
|
||||
/**
|
||||
* Constructs recognition result based on {@link Result} object.
|
||||
*
|
||||
* @param result recognition result returned by {@link Recognizer}
|
||||
*/
|
||||
public SpeechResult(Result result) {
|
||||
this.result = result;
|
||||
if (result.toCreateLattice()) {
|
||||
lattice = new Lattice(result);
|
||||
new LatticeOptimizer(lattice).optimize();
|
||||
lattice.computeNodePosteriors(1.0f);
|
||||
} else
|
||||
lattice = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@link List} of words of the recognition result.
|
||||
* Within the list words are ordered by time frame.
|
||||
*
|
||||
* @return words that form the result
|
||||
*/
|
||||
public List<WordResult> getWords() {
|
||||
return lattice != null ? lattice.getWordResultPath() : result.getTimedBestResult(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string representation of the result.
|
||||
*/
|
||||
public String getHypothesis() {
|
||||
return result.getBestResultNoFiller();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return N best hypothesis.
|
||||
*
|
||||
* @param n number of hypothesis to return
|
||||
* @return {@link Collection} of several best hypothesis
|
||||
*/
|
||||
public Collection<String> getNbest(int n) {
|
||||
if (lattice == null)
|
||||
return new HashSet<String>();
|
||||
return new Nbest(lattice).getNbest(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns lattice for the recognition result.
|
||||
*
|
||||
* @return lattice object
|
||||
*/
|
||||
public Lattice getLattice() {
|
||||
return lattice;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return Result object of current SpeechResult
|
||||
*
|
||||
* @return Result object stored in this.result
|
||||
*/
|
||||
public Result getResult() {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
|
||||
public class SpeechSourceProvider {
|
||||
|
||||
Microphone getMicrophone() {
|
||||
return new Microphone(16000, 16, true, false);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import edu.cmu.sphinx.util.TimeFrame;
|
||||
|
||||
/**
|
||||
* Speech recognizer that works with audio resources.
|
||||
*
|
||||
* @see LiveSpeechRecognizer live speech recognizer
|
||||
*/
|
||||
public class StreamSpeechRecognizer extends AbstractSpeechRecognizer {
|
||||
|
||||
/**
|
||||
* Constructs new stream recognizer.
|
||||
*
|
||||
* @param configuration configuration
|
||||
* @throws IOException error occured during model load
|
||||
*/
|
||||
public StreamSpeechRecognizer(Configuration configuration)
|
||||
throws IOException
|
||||
{
|
||||
super(configuration);
|
||||
}
|
||||
|
||||
public void startRecognition(InputStream stream) {
|
||||
startRecognition(stream, TimeFrame.INFINITE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts recognition process.
|
||||
*
|
||||
* Starts recognition process and optionally clears previous data.
|
||||
*
|
||||
* @param stream input stream to process
|
||||
* @param timeFrame time range of the stream to process
|
||||
* @see StreamSpeechRecognizer#stopRecognition()
|
||||
*/
|
||||
public void startRecognition(InputStream stream, TimeFrame timeFrame) {
|
||||
recognizer.allocate();
|
||||
context.setSpeechSource(stream, timeFrame);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops recognition process.
|
||||
*
|
||||
* Recognition process is paused until the next call to startRecognition.
|
||||
*
|
||||
* @see StreamSpeechRecognizer#startRecognition(InputStream, TimeFrame)
|
||||
*/
|
||||
public void stopRecognition() {
|
||||
recognizer.deallocate();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.SearchManager;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/** An abstract decoder which implements all functionality which is independent of the used decoding-paradigm (pull/push). */
|
||||
public abstract class AbstractDecoder implements ResultProducer, Configurable {
|
||||
|
||||
/**
|
||||
* The property that defines the name of the search manager to use
|
||||
* */
|
||||
@S4Component(type = SearchManager.class)
|
||||
public final static String PROP_SEARCH_MANAGER = "searchManager";
|
||||
protected SearchManager searchManager;
|
||||
|
||||
@S4ComponentList(type = ResultListener.class)
|
||||
public static final String PROP_RESULT_LISTENERS = "resultListeners";
|
||||
protected final List<ResultListener> resultListeners = new ArrayList<ResultListener>();
|
||||
|
||||
/**
|
||||
* If set to true the used search-manager will be automatically allocated
|
||||
* in <code>newProperties()</code>.
|
||||
* */
|
||||
@S4Boolean(defaultValue = false)
|
||||
public static final String AUTO_ALLOCATE = "autoAllocate";
|
||||
|
||||
/**
|
||||
* If set to <code>false</code> the used search-manager all registered
|
||||
* result listeners will be notified only for final results. Per default
|
||||
* non-final results don't trigger notification, because in most
|
||||
* application the utterance final result will be sufficient.
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public static final String FIRE_NON_FINAL_RESULTS = "fireNonFinalResults";
|
||||
private boolean fireNonFinalResults;
|
||||
|
||||
private String name;
|
||||
protected Logger logger;
|
||||
|
||||
public AbstractDecoder() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract decoder to implement live and batch recognizers
|
||||
* @param searchManager search manager to use
|
||||
* @param fireNonFinalResults to fire result during decoding
|
||||
* @param autoAllocate automatic allocate all components
|
||||
* @param resultListeners listeners to get noification
|
||||
*/
|
||||
public AbstractDecoder(SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> resultListeners) {
|
||||
String name = getClass().getName();
|
||||
init( name, Logger.getLogger(name),
|
||||
searchManager, fireNonFinalResults, autoAllocate, resultListeners);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode frames until recognition is complete
|
||||
*
|
||||
* @param referenceText the reference text (or null)
|
||||
* @return a result
|
||||
*/
|
||||
public abstract Result decode(String referenceText);
|
||||
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
init( ps.getInstanceName(), ps.getLogger(), (SearchManager) ps.getComponent(PROP_SEARCH_MANAGER), ps.getBoolean(FIRE_NON_FINAL_RESULTS), ps.getBoolean(AUTO_ALLOCATE), ps.getComponentList(PROP_RESULT_LISTENERS, ResultListener.class));
|
||||
}
|
||||
|
||||
private void init(String name, Logger logger, SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> listeners) {
|
||||
this.name = name;
|
||||
this.logger = logger;
|
||||
|
||||
this.searchManager = searchManager;
|
||||
this.fireNonFinalResults = fireNonFinalResults;
|
||||
|
||||
if (autoAllocate) {
|
||||
searchManager.allocate();
|
||||
}
|
||||
|
||||
for (ResultListener listener : listeners) {
|
||||
addResultListener(listener);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Allocate resources necessary for decoding */
|
||||
public void allocate() {
|
||||
searchManager.allocate();
|
||||
}
|
||||
|
||||
|
||||
/** Deallocate resources */
|
||||
public void deallocate() {
|
||||
searchManager.deallocate();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a result listener to this recognizer. A result listener is called whenever a new result is generated by the
|
||||
* recognizer. This method can be called in any state.
|
||||
*
|
||||
* @param resultListener the listener to add
|
||||
*/
|
||||
public void addResultListener(ResultListener resultListener) {
|
||||
resultListeners.add(resultListener);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes a previously added result listener. This method can be called in any state.
|
||||
*
|
||||
* @param resultListener the listener to remove
|
||||
*/
|
||||
public void removeResultListener(ResultListener resultListener) {
|
||||
resultListeners.remove(resultListener);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fires new results as soon as they become available.
|
||||
*
|
||||
* @param result the new result
|
||||
*/
|
||||
protected void fireResultListeners(Result result) {
|
||||
if (fireNonFinalResults || result.isFinal()) {
|
||||
for (ResultListener resultListener : resultListeners) {
|
||||
resultListener.newResult(result);
|
||||
}
|
||||
}else {
|
||||
logger.finer("skipping non-final result " + result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder;
|
||||
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
import edu.cmu.sphinx.decoder.search.SearchManager;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/** The primary decoder class */
|
||||
public class Decoder extends AbstractDecoder {
|
||||
|
||||
public Decoder() {
|
||||
// Keep this or else XML configuration fails.
|
||||
}
|
||||
|
||||
/** The property for the number of features to recognize at once. */
|
||||
@S4Integer(defaultValue = Integer.MAX_VALUE)
|
||||
public final static String PROP_FEATURE_BLOCK_SIZE = "featureBlockSize";
|
||||
private int featureBlockSize;
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
featureBlockSize = ps.getInt(PROP_FEATURE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main decoder
|
||||
*
|
||||
* @param searchManager search manager to configure search space
|
||||
* @param fireNonFinalResults should we notify about non-final results
|
||||
* @param autoAllocate automatic allocation of all componenets
|
||||
* @param resultListeners listeners to get signals
|
||||
* @param featureBlockSize frequency of notification about results
|
||||
*/
|
||||
public Decoder( SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> resultListeners, int featureBlockSize) {
|
||||
super( searchManager, fireNonFinalResults, autoAllocate, resultListeners);
|
||||
this.featureBlockSize = featureBlockSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode frames until recognition is complete.
|
||||
*
|
||||
* @param referenceText the reference text (or null)
|
||||
* @return a result
|
||||
*/
|
||||
@Override
|
||||
public Result decode(String referenceText) {
|
||||
searchManager.startRecognition();
|
||||
Result result;
|
||||
do {
|
||||
result = searchManager.recognize(featureBlockSize);
|
||||
if (result != null) {
|
||||
result.setReferenceText(referenceText);
|
||||
fireResultListeners(result);
|
||||
}
|
||||
} while (result != null && !result.isFinal());
|
||||
searchManager.stopRecognition();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.frontend.endpoint.SpeechEndSignal;
|
||||
import edu.cmu.sphinx.frontend.endpoint.SpeechStartSignal;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.decoder.search.SearchManager;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* A decoder which does not use the common pull-principle of S4 but processes only one single frame on each call of
|
||||
* <code>decode()</code>. When using this decoder, make sure that the <code>AcousticScorer</code> used by the
|
||||
* <code>SearchManager</code> can access some buffered <code>Data</code>s.
|
||||
*/
|
||||
public class FrameDecoder extends AbstractDecoder implements DataProcessor {
|
||||
|
||||
private DataProcessor predecessor;
|
||||
|
||||
private boolean isRecognizing;
|
||||
private Result result;
|
||||
|
||||
public FrameDecoder( SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> listeners) {
|
||||
super(searchManager, fireNonFinalResults, autoAllocate, listeners);
|
||||
}
|
||||
|
||||
public FrameDecoder() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a single frame.
|
||||
*
|
||||
* @param referenceText the reference text (or null)
|
||||
* @return a result
|
||||
*/
|
||||
@Override
|
||||
public Result decode(String referenceText) {
|
||||
return searchManager.recognize(1);
|
||||
}
|
||||
|
||||
public Data getData() throws DataProcessingException {
|
||||
Data d = getPredecessor().getData();
|
||||
|
||||
if (isRecognizing && (d instanceof FloatData || d instanceof DoubleData || d instanceof SpeechEndSignal)) {
|
||||
result = decode(null);
|
||||
|
||||
if (result != null) {
|
||||
fireResultListeners(result);
|
||||
result = null;
|
||||
}
|
||||
}
|
||||
|
||||
// we also trigger recogntion on a DataEndSignal to allow threaded scorers to shut down correctly
|
||||
if (d instanceof DataEndSignal) {
|
||||
searchManager.stopRecognition();
|
||||
}
|
||||
|
||||
if (d instanceof SpeechStartSignal) {
|
||||
searchManager.startRecognition();
|
||||
isRecognizing = true;
|
||||
result = null;
|
||||
}
|
||||
|
||||
if (d instanceof SpeechEndSignal) {
|
||||
searchManager.stopRecognition();
|
||||
|
||||
//fire results which were not yet final
|
||||
if (result != null)
|
||||
fireResultListeners(result);
|
||||
|
||||
isRecognizing = false;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
public DataProcessor getPredecessor() {
|
||||
return predecessor;
|
||||
}
|
||||
|
||||
|
||||
public void setPredecessor(DataProcessor predecessor) {
|
||||
this.predecessor = predecessor;
|
||||
}
|
||||
|
||||
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder;
|
||||
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
|
||||
import java.util.EventListener;
|
||||
|
||||
/** The listener interface for being informed when new results are generated. */
|
||||
public interface ResultListener extends EventListener, Configurable {
|
||||
|
||||
/**
|
||||
* Method called when a new result is generated
|
||||
*
|
||||
* @param result the new result
|
||||
*/
|
||||
public void newResult(Result result);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder;
|
||||
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
/**
|
||||
* Some API-elements shared by components which are able to produce <code>Result</code>s.
|
||||
*
|
||||
* @see edu.cmu.sphinx.result.Result
|
||||
*/
|
||||
public interface ResultProducer extends Configurable {
|
||||
|
||||
/** Registers a new listener for <code>Result</code>.
|
||||
* @param resultListener listener to add
|
||||
*/
|
||||
void addResultListener(ResultListener resultListener);
|
||||
|
||||
|
||||
/** Removes a listener from this <code>ResultProducer</code>-instance.
|
||||
* @param resultListener listener to remove
|
||||
*/
|
||||
void removeResultListener(ResultListener resultListener);
|
||||
}
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
package edu.cmu.sphinx.decoder.adaptation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.math3.util.FastMath;
|
||||
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Pool;
|
||||
|
||||
/**
|
||||
* Used for clustering gaussians. The clustering is performed by Euclidean
|
||||
* distance criterion. The "k-means" clustering algorithm is used for clustering
|
||||
* the gaussians.
|
||||
*
|
||||
* @author Bogdan Petcu
|
||||
*/
|
||||
public class ClusteredDensityFileData {
|
||||
|
||||
private int numberOfClusters;
|
||||
private int[] corespondingClass;
|
||||
|
||||
public ClusteredDensityFileData(Loader loader, int numberOfClusters) {
|
||||
this.numberOfClusters = numberOfClusters;
|
||||
kMeansClustering(loader, 30);
|
||||
}
|
||||
|
||||
public int getNumberOfClusters() {
|
||||
return this.numberOfClusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for accessing the index that is specific to a gaussian.
|
||||
*
|
||||
* @param gaussian
|
||||
* provided in a i * numStates + gaussianIndex form.
|
||||
* @return class index
|
||||
*/
|
||||
public int getClassIndex(int gaussian) {
|
||||
return corespondingClass[gaussian];
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes euclidean distance between 2 n-dimensional points.
|
||||
*
|
||||
* @param a
|
||||
* - n-dimensional "a" point
|
||||
* @param b
|
||||
* - n-dimensional "b" point
|
||||
* @return the euclidean distance between a and b.
|
||||
*/
|
||||
private float euclidianDistance(float[] a, float[] b) {
|
||||
double s = 0, d;
|
||||
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
d = a[i] - b[i];
|
||||
s += d * d;
|
||||
}
|
||||
|
||||
return (float) FastMath.sqrt(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the two float array have the same components
|
||||
*
|
||||
* @param a
|
||||
* - float array a
|
||||
* @param b
|
||||
* - float array b
|
||||
* @return true if values from a are equal to the ones in b, else false.
|
||||
*/
|
||||
private boolean isEqual(float[] a, float[] b) {
|
||||
if (a.length != b.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
if (a[i] != b[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs k-means-clustering algorithm for clustering gaussians.
|
||||
* Clustering is done using euclidean distance criterium.
|
||||
*
|
||||
* @param maxIterations
|
||||
*/
|
||||
private void kMeansClustering(Loader loader, int maxIterations) {
|
||||
Pool<float[]> initialData = loader.getMeansPool();
|
||||
ArrayList<float[]> oldCentroids = new ArrayList<float[]>(
|
||||
numberOfClusters);
|
||||
ArrayList<float[]> centroids = new ArrayList<float[]>(numberOfClusters);
|
||||
int numberOfElements = initialData.size(), nrOfIterations = maxIterations, index;
|
||||
int[] count = new int[numberOfClusters];
|
||||
double distance, min;
|
||||
float[] currentValue, centroid;
|
||||
float[][][] array = new float[numberOfClusters][numberOfElements][];
|
||||
boolean converged = false;
|
||||
Random randomGenerator = new Random();
|
||||
|
||||
for (int i = 0; i < numberOfClusters; i++) {
|
||||
index = randomGenerator.nextInt(numberOfElements);
|
||||
centroids.add(initialData.get(index));
|
||||
oldCentroids.add(initialData.get(index));
|
||||
count[i] = 0;
|
||||
}
|
||||
|
||||
index = 0;
|
||||
|
||||
while (!converged && nrOfIterations > 0) {
|
||||
corespondingClass = new int[initialData.size()];
|
||||
array = new float[numberOfClusters][numberOfElements][];
|
||||
|
||||
for (int i = 0; i < numberOfClusters; i++) {
|
||||
oldCentroids.set(i, centroids.get(i));
|
||||
count[i] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < initialData.size(); i++) {
|
||||
currentValue = initialData.get(i);
|
||||
min = this.euclidianDistance(oldCentroids.get(0), currentValue);
|
||||
index = 0;
|
||||
|
||||
for (int k = 1; k < numberOfClusters; k++) {
|
||||
distance = this.euclidianDistance(oldCentroids.get(k),
|
||||
currentValue);
|
||||
|
||||
if (distance < min) {
|
||||
min = distance;
|
||||
index = k;
|
||||
}
|
||||
}
|
||||
|
||||
array[index][count[index]] = currentValue;
|
||||
corespondingClass[i] = index;
|
||||
count[index]++;
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i < numberOfClusters; i++) {
|
||||
centroid = new float[initialData.get(0).length];
|
||||
|
||||
if (count[i] > 0) {
|
||||
|
||||
for (int j = 0; j < count[i]; j++) {
|
||||
for (int k = 0; k < initialData.get(0).length; k++) {
|
||||
centroid[k] += array[i][j][k];
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = 0; k < initialData.get(0).length; k++) {
|
||||
centroid[k] /= count[i];
|
||||
}
|
||||
|
||||
centroids.set(i, centroid);
|
||||
}
|
||||
}
|
||||
|
||||
converged = true;
|
||||
|
||||
for (int i = 0; i < numberOfClusters; i++) {
|
||||
converged = converged
|
||||
&& (this.isEqual(centroids.get(i), oldCentroids.get(i)));
|
||||
}
|
||||
|
||||
nrOfIterations--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,235 @@
|
|||
package edu.cmu.sphinx.decoder.adaptation;
|
||||
|
||||
import edu.cmu.sphinx.api.SpeechResult;
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.frontend.FloatData;
|
||||
import edu.cmu.sphinx.linguist.HMMSearchState;
|
||||
import edu.cmu.sphinx.linguist.SearchState;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
|
||||
import edu.cmu.sphinx.util.LogMath;
|
||||
|
||||
/**
|
||||
* This class is used for estimating a MLLR transform for each cluster of data.
|
||||
* The clustering must be previously performed using
|
||||
* ClusteredDensityFileData.java
|
||||
*
|
||||
* @author Bogdan Petcu
|
||||
*/
|
||||
public class Stats {
|
||||
|
||||
private ClusteredDensityFileData means;
|
||||
private double[][][][][] regLs;
|
||||
private double[][][][] regRs;
|
||||
private int nrOfClusters;
|
||||
private Sphinx3Loader loader;
|
||||
private float varFlor;
|
||||
private LogMath logMath = LogMath.getLogMath();;
|
||||
|
||||
public Stats(Loader loader, ClusteredDensityFileData means) {
|
||||
this.loader = (Sphinx3Loader) loader;
|
||||
this.nrOfClusters = means.getNumberOfClusters();
|
||||
this.means = means;
|
||||
this.varFlor = (float) 1e-5;
|
||||
this.invertVariances();
|
||||
this.init();
|
||||
}
|
||||
|
||||
private void init() {
|
||||
int len = loader.getVectorLength()[0];
|
||||
this.regLs = new double[nrOfClusters][][][][];
|
||||
this.regRs = new double[nrOfClusters][][][];
|
||||
|
||||
for (int i = 0; i < nrOfClusters; i++) {
|
||||
this.regLs[i] = new double[loader.getNumStreams()][][][];
|
||||
this.regRs[i] = new double[loader.getNumStreams()][][];
|
||||
|
||||
for (int j = 0; j < loader.getNumStreams(); j++) {
|
||||
len = loader.getVectorLength()[j];
|
||||
this.regLs[i][j] = new double[len][len + 1][len + 1];
|
||||
this.regRs[i][j] = new double[len][len + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ClusteredDensityFileData getClusteredData() {
|
||||
return this.means;
|
||||
}
|
||||
|
||||
public double[][][][][] getRegLs() {
|
||||
return regLs;
|
||||
}
|
||||
|
||||
public double[][][][] getRegRs() {
|
||||
return regRs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for inverting variances.
|
||||
*/
|
||||
private void invertVariances() {
|
||||
|
||||
for (int i = 0; i < loader.getNumStates(); i++) {
|
||||
for (int k = 0; k < loader.getNumGaussiansPerState(); k++) {
|
||||
for (int l = 0; l < loader.getVectorLength()[0]; l++) {
|
||||
if (loader.getVariancePool().get(
|
||||
i * loader.getNumGaussiansPerState() + k)[l] <= 0.) {
|
||||
this.loader.getVariancePool().get(
|
||||
i * loader.getNumGaussiansPerState() + k)[l] = (float) 0.5;
|
||||
} else if (loader.getVariancePool().get(
|
||||
i * loader.getNumGaussiansPerState() + k)[l] < varFlor) {
|
||||
this.loader.getVariancePool().get(
|
||||
i * loader.getNumGaussiansPerState() + k)[l] = (float) (1. / varFlor);
|
||||
} else {
|
||||
this.loader.getVariancePool().get(
|
||||
i * loader.getNumGaussiansPerState() + k)[l] = (float) (1. / loader
|
||||
.getVariancePool().get(
|
||||
i * loader.getNumGaussiansPerState()
|
||||
+ k)[l]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes posterior values for the each component.
|
||||
*
|
||||
* @param componentScores
|
||||
* from which the posterior values are computed.
|
||||
* @param numStreams
|
||||
* Number of feature streams
|
||||
* @return posterior values for all components.
|
||||
*/
|
||||
private float[] computePosterios(float[] componentScores, int numStreams) {
|
||||
float[] posteriors = componentScores;
|
||||
|
||||
int step = componentScores.length / numStreams;
|
||||
int startIdx = 0;
|
||||
for (int i = 0; i < numStreams; i++) {
|
||||
float max = posteriors[startIdx];
|
||||
for (int j = startIdx + 1; j < startIdx + step; j++) {
|
||||
if (posteriors[j] > max) {
|
||||
max = posteriors[j];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = startIdx; j < startIdx + step; j++) {
|
||||
posteriors[j] = (float) logMath.logToLinear(posteriors[j] - max);
|
||||
}
|
||||
startIdx += step;
|
||||
}
|
||||
|
||||
return posteriors;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for directly collect and use counts. The counts are
|
||||
* collected and stored separately for each cluster.
|
||||
*
|
||||
* @param result
|
||||
* Result object to collect counts from.
|
||||
* @throws Exception if something went wrong
|
||||
*/
|
||||
public void collect(SpeechResult result) throws Exception {
|
||||
Token token = result.getResult().getBestToken();
|
||||
float[] componentScore, featureVector, posteriors, tmean;
|
||||
int[] len;
|
||||
float dnom, wtMeanVar, wtDcountVar, wtDcountVarMean, mean;
|
||||
int mId, cluster;
|
||||
int numStreams, gauPerState;
|
||||
|
||||
if (token == null)
|
||||
throw new Exception("Best token not found!");
|
||||
|
||||
do {
|
||||
FloatData feature = (FloatData) token.getData();
|
||||
SearchState ss = token.getSearchState();
|
||||
|
||||
if (!(ss instanceof HMMSearchState && ss.isEmitting())) {
|
||||
token = token.getPredecessor();
|
||||
continue;
|
||||
}
|
||||
|
||||
componentScore = token.calculateComponentScore(feature);
|
||||
featureVector = FloatData.toFloatData(feature).getValues();
|
||||
mId = (int) ((HMMSearchState) token.getSearchState()).getHMMState()
|
||||
.getMixtureId();
|
||||
if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
|
||||
// use CI phone ID for tied mixture model
|
||||
mId = ((Sphinx3Loader) loader).getSenone2Ci()[mId];
|
||||
len = loader.getVectorLength();
|
||||
numStreams = loader.getNumStreams();
|
||||
gauPerState = loader.getNumGaussiansPerState();
|
||||
posteriors = this.computePosterios(componentScore, numStreams);
|
||||
int featVectorStartIdx = 0;
|
||||
|
||||
for (int i = 0; i < numStreams; i++) {
|
||||
for (int j = 0; j < gauPerState; j++) {
|
||||
|
||||
cluster = means.getClassIndex(mId * numStreams
|
||||
* gauPerState + i * gauPerState + j);
|
||||
dnom = posteriors[i * gauPerState + j];
|
||||
if (dnom > 0.) {
|
||||
tmean = loader.getMeansPool().get(
|
||||
mId * numStreams * gauPerState + i
|
||||
* gauPerState + j);
|
||||
|
||||
for (int k = 0; k < len[i]; k++) {
|
||||
mean = posteriors[i * gauPerState + j]
|
||||
* featureVector[k + featVectorStartIdx];
|
||||
wtMeanVar = mean
|
||||
* loader.getVariancePool().get(
|
||||
mId * numStreams * gauPerState + i
|
||||
* gauPerState + j)[k];
|
||||
wtDcountVar = dnom
|
||||
* loader.getVariancePool().get(
|
||||
mId * numStreams * gauPerState + i
|
||||
* gauPerState + j)[k];
|
||||
|
||||
for (int p = 0; p < len[i]; p++) {
|
||||
wtDcountVarMean = wtDcountVar * tmean[p];
|
||||
|
||||
for (int q = p; q < len[i]; q++) {
|
||||
regLs[cluster][i][k][p][q] += wtDcountVarMean
|
||||
* tmean[q];
|
||||
}
|
||||
regLs[cluster][i][k][p][len[i]] += wtDcountVarMean;
|
||||
regRs[cluster][i][k][p] += wtMeanVar * tmean[p];
|
||||
}
|
||||
regLs[cluster][i][k][len[i]][len[i]] += wtDcountVar;
|
||||
regRs[cluster][i][k][len[i]] += wtMeanVar;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
featVectorStartIdx += len[i];
|
||||
}
|
||||
token = token.getPredecessor();
|
||||
} while (token != null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill lower part of Legetter's set of G matrices.
|
||||
*/
|
||||
public void fillRegLowerPart() {
|
||||
for (int i = 0; i < this.nrOfClusters; i++) {
|
||||
for (int j = 0; j < loader.getNumStreams(); j++) {
|
||||
for (int l = 0; l < loader.getVectorLength()[j]; l++) {
|
||||
for (int p = 0; p <= loader.getVectorLength()[j]; p++) {
|
||||
for (int q = p + 1; q <= loader.getVectorLength()[j]; q++) {
|
||||
regLs[i][j][l][q][p] = regLs[i][j][l][p][q];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Transform createTransform() {
|
||||
Transform transform = new Transform(loader, nrOfClusters);
|
||||
transform.update(this);
|
||||
return transform;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
package edu.cmu.sphinx.decoder.adaptation;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Scanner;
|
||||
|
||||
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
|
||||
import org.apache.commons.math3.linear.ArrayRealVector;
|
||||
import org.apache.commons.math3.linear.DecompositionSolver;
|
||||
import org.apache.commons.math3.linear.LUDecomposition;
|
||||
import org.apache.commons.math3.linear.RealMatrix;
|
||||
import org.apache.commons.math3.linear.RealVector;
|
||||
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
|
||||
|
||||
public class Transform {
|
||||
|
||||
private float[][][][] As;
|
||||
private float[][][] Bs;
|
||||
private Sphinx3Loader loader;
|
||||
private int nrOfClusters;
|
||||
|
||||
public Transform(Sphinx3Loader loader, int nrOfClusters) {
|
||||
this.loader = loader;
|
||||
this.nrOfClusters = nrOfClusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for access to A matrix.
|
||||
*
|
||||
* @return A matrix (representing A from A*x + B = C)
|
||||
*/
|
||||
public float[][][][] getAs() {
|
||||
return As;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for access to B matrix.
|
||||
*
|
||||
* @return B matrix (representing B from A*x + B = C)
|
||||
*/
|
||||
public float[][][] getBs() {
|
||||
return Bs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the transformation to file in a format that could further be used
|
||||
* in Sphinx3 and Sphinx4.
|
||||
*
|
||||
* @param filePath path to store transform matrix
|
||||
* @param index index of transform to store
|
||||
* @throws Exception if something went wrong
|
||||
*/
|
||||
public void store(String filePath, int index) throws Exception {
|
||||
PrintWriter writer = new PrintWriter(filePath, "UTF-8");
|
||||
|
||||
// nMllrClass
|
||||
writer.println("1");
|
||||
writer.println(loader.getNumStreams());
|
||||
|
||||
for (int i = 0; i < loader.getNumStreams(); i++) {
|
||||
writer.println(loader.getVectorLength()[i]);
|
||||
|
||||
for (int j = 0; j < loader.getVectorLength()[i]; j++) {
|
||||
for (int k = 0; k < loader.getVectorLength()[i]; ++k) {
|
||||
writer.print(As[index][i][j][k]);
|
||||
writer.print(" ");
|
||||
}
|
||||
writer.println();
|
||||
}
|
||||
|
||||
for (int j = 0; j < loader.getVectorLength()[i]; j++) {
|
||||
writer.print(Bs[index][i][j]);
|
||||
writer.print(" ");
|
||||
|
||||
}
|
||||
writer.println();
|
||||
|
||||
for (int j = 0; j < loader.getVectorLength()[i]; j++) {
|
||||
writer.print("1.0 ");
|
||||
|
||||
}
|
||||
writer.println();
|
||||
}
|
||||
writer.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for computing the actual transformations (A and B matrices). These
|
||||
* are stored in As and Bs.
|
||||
*/
|
||||
private void computeMllrTransforms(double[][][][][] regLs,
|
||||
double[][][][] regRs) {
|
||||
int len;
|
||||
DecompositionSolver solver;
|
||||
RealMatrix coef;
|
||||
RealVector vect, ABloc;
|
||||
|
||||
for (int c = 0; c < nrOfClusters; c++) {
|
||||
this.As[c] = new float[loader.getNumStreams()][][];
|
||||
this.Bs[c] = new float[loader.getNumStreams()][];
|
||||
|
||||
for (int i = 0; i < loader.getNumStreams(); i++) {
|
||||
len = loader.getVectorLength()[i];
|
||||
this.As[c][i] = new float[len][len];
|
||||
this.Bs[c][i] = new float[len];
|
||||
|
||||
for (int j = 0; j < len; ++j) {
|
||||
coef = new Array2DRowRealMatrix(regLs[c][i][j], false);
|
||||
solver = new LUDecomposition(coef).getSolver();
|
||||
vect = new ArrayRealVector(regRs[c][i][j], false);
|
||||
ABloc = solver.solve(vect);
|
||||
|
||||
for (int k = 0; k < len; ++k) {
|
||||
this.As[c][i][j][k] = (float) ABloc.getEntry(k);
|
||||
}
|
||||
|
||||
this.Bs[c][i][j] = (float) ABloc.getEntry(len);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the transformation from a file
|
||||
*
|
||||
* @param filePath file path to load transform
|
||||
* @throws Exception if something went wrong
|
||||
*/
|
||||
public void load(String filePath) throws Exception {
|
||||
|
||||
Scanner input = new Scanner(new File(filePath));
|
||||
int numStreams, nMllrClass;
|
||||
int[] vectorLength = new int[1];
|
||||
|
||||
nMllrClass = input.nextInt();
|
||||
|
||||
assert nMllrClass == 1;
|
||||
|
||||
numStreams = input.nextInt();
|
||||
|
||||
this.As = new float[nMllrClass][][][];
|
||||
this.Bs = new float[nMllrClass][][];
|
||||
|
||||
for (int i = 0; i < numStreams; i++) {
|
||||
vectorLength[i] = input.nextInt();
|
||||
|
||||
int length = vectorLength[i];
|
||||
|
||||
this.As[0] = new float[numStreams][length][length];
|
||||
this.Bs[0] = new float[numStreams][length];
|
||||
|
||||
for (int j = 0; j < length; j++) {
|
||||
for (int k = 0; k < length; ++k) {
|
||||
As[0][i][j][k] = input.nextFloat();
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < length; j++) {
|
||||
Bs[0][i][j] = input.nextFloat();
|
||||
}
|
||||
}
|
||||
input.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores in current object a transform generated on the provided stats.
|
||||
*
|
||||
* @param stats
|
||||
* provided stats that were previously collected from Result
|
||||
* objects.
|
||||
*/
|
||||
public void update(Stats stats) {
|
||||
stats.fillRegLowerPart();
|
||||
As = new float[nrOfClusters][][][];
|
||||
Bs = new float[nrOfClusters][][];
|
||||
this.computeMllrTransforms(stats.getRegLs(), stats.getRegRs());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.pruner;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.ActiveList;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
/** A Null pruner. Does no actual pruning */
|
||||
public class NullPruner implements Pruner {
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
}
|
||||
|
||||
|
||||
/** Creates a simple pruner */
|
||||
public NullPruner() {
|
||||
}
|
||||
|
||||
|
||||
/** starts the pruner */
|
||||
public void startRecognition() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* prunes the given set of states
|
||||
*
|
||||
* @param activeList the active list of tokens
|
||||
* @return the pruned (and possibly new) activeList
|
||||
*/
|
||||
public ActiveList prune(ActiveList activeList) {
|
||||
return activeList;
|
||||
}
|
||||
|
||||
|
||||
/** Performs post-recognition cleanup. */
|
||||
public void stopRecognition() {
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.pruner.Pruner#allocate()
|
||||
*/
|
||||
public void allocate() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.pruner.Pruner#deallocate()
|
||||
*/
|
||||
public void deallocate() {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.pruner;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.ActiveList;
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
|
||||
/** Provides a mechanism for pruning a set of StateTokens */
|
||||
public interface Pruner extends Configurable {
|
||||
|
||||
/** Starts the pruner */
|
||||
public void startRecognition();
|
||||
|
||||
|
||||
/**
|
||||
* prunes the given set of states
|
||||
*
|
||||
* @param stateTokenList a list containing StateToken objects to be scored
|
||||
* @return the pruned list, (may be the sample list as stateTokenList)
|
||||
*/
|
||||
public ActiveList prune(ActiveList stateTokenList);
|
||||
|
||||
|
||||
/** Performs post-recognition cleanup. */
|
||||
public void stopRecognition();
|
||||
|
||||
|
||||
/** Allocates resources necessary for this pruner */
|
||||
public void allocate();
|
||||
|
||||
|
||||
/** Deallocates resources necessary for this pruner */
|
||||
public void deallocate();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.pruner;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.ActiveList;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
/** Performs the default pruning behavior which is to invoke the purge on the active list */
|
||||
public class SimplePruner implements Pruner {
|
||||
|
||||
private String name;
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
}
|
||||
|
||||
|
||||
public SimplePruner() {
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#getName()
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
/** Starts the pruner */
|
||||
public void startRecognition() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* prunes the given set of states
|
||||
*
|
||||
* @param activeList a activeList of tokens
|
||||
*/
|
||||
public ActiveList prune(ActiveList activeList) {
|
||||
return activeList.purge();
|
||||
}
|
||||
|
||||
|
||||
/** Performs post-recognition cleanup. */
|
||||
public void stopRecognition() {
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.pruner.Pruner#allocate()
|
||||
*/
|
||||
public void allocate() {
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.pruner.Pruner#deallocate()
|
||||
*/
|
||||
public void deallocate() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/** Provides a mechanism for scoring a set of HMM states */
|
||||
public interface AcousticScorer extends Configurable {
|
||||
|
||||
/** Allocates resources for this scorer */
|
||||
public void allocate();
|
||||
|
||||
|
||||
/** Deallocates resources for this scorer */
|
||||
public void deallocate();
|
||||
|
||||
|
||||
/** starts the scorer */
|
||||
public void startRecognition();
|
||||
|
||||
|
||||
/** stops the scorer */
|
||||
public void stopRecognition();
|
||||
|
||||
/**
|
||||
* Scores the given set of states over previously stored acoustic data if any or a new one
|
||||
*
|
||||
* @param scorableList a list containing Scoreable objects to be scored
|
||||
* @return the best scoring scoreable, or null if there are no more frames to score
|
||||
*/
|
||||
public Data calculateScores(List<? extends Scoreable> scorableList);
|
||||
|
||||
/**
|
||||
* Scores the given set of states over previously acoustic data from frontend
|
||||
* and stores latter in the queue
|
||||
*
|
||||
* @param scorableList a list containing Scoreable objects to be scored
|
||||
* @return the best scoring scoreable, or null if there are no more frames to score
|
||||
*/
|
||||
public Data calculateScoresAndStoreData(List<? extends Scoreable> scorableList);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager;
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* Normalizes a set of Tokens against the best scoring Token of a background model.
|
||||
*
|
||||
* @author Holger Brandl
|
||||
*/
|
||||
public class BackgroundModelNormalizer implements ScoreNormalizer {
|
||||
|
||||
/**
|
||||
* The active list provider used to determined the best token for normalization. If this reference is not defined no
|
||||
* normalization will be applied.
|
||||
*/
|
||||
@S4Component(type = SimpleBreadthFirstSearchManager.class, mandatory = false)
|
||||
public static final String ACTIVE_LIST_PROVIDER = "activeListProvider";
|
||||
private SimpleBreadthFirstSearchManager activeListProvider;
|
||||
|
||||
private Logger logger;
|
||||
|
||||
public BackgroundModelNormalizer() {
|
||||
}
|
||||
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
this.activeListProvider = (SimpleBreadthFirstSearchManager) ps.getComponent(ACTIVE_LIST_PROVIDER);
|
||||
this.logger = ps.getLogger();
|
||||
|
||||
logger.warning("no active list set.");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param activeListProvider The active list provider used to determined the best token for normalization. If this reference is not defined no
|
||||
* normalization will be applied.
|
||||
*/
|
||||
public BackgroundModelNormalizer(SimpleBreadthFirstSearchManager activeListProvider) {
|
||||
this.activeListProvider = activeListProvider;
|
||||
this.logger = Logger.getLogger(getClass().getName());
|
||||
|
||||
logger.warning("no active list set.");
|
||||
}
|
||||
|
||||
public Scoreable normalize(List<? extends Scoreable> scoreableList, Scoreable bestToken) {
|
||||
if (activeListProvider == null) {
|
||||
return bestToken;
|
||||
}
|
||||
|
||||
Token normToken = activeListProvider.getActiveList().getBestToken();
|
||||
|
||||
float normScore = normToken.getScore();
|
||||
|
||||
for (Scoreable scoreable : scoreableList) {
|
||||
if (scoreable instanceof Token) {
|
||||
scoreable.normalizeScore(normScore);
|
||||
}
|
||||
}
|
||||
|
||||
return bestToken;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Performs a simple normalization of all token-scores by
|
||||
*
|
||||
* @author Holger Brandl
|
||||
*/
|
||||
public class MaxScoreNormalizer implements ScoreNormalizer {
|
||||
|
||||
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
}
|
||||
|
||||
public MaxScoreNormalizer() {
|
||||
}
|
||||
|
||||
|
||||
public Scoreable normalize(List<? extends Scoreable> scoreableList, Scoreable bestToken) {
|
||||
for (Scoreable scoreable : scoreableList) {
|
||||
scoreable.normalizeScore(bestToken.getScore());
|
||||
}
|
||||
|
||||
return bestToken;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Describes all API-elements that are necessary to normalize token-scores after these have been computed by an
|
||||
* AcousticScorer.
|
||||
*
|
||||
* @author Holger Brandl
|
||||
* @see edu.cmu.sphinx.decoder.scorer.AcousticScorer
|
||||
* @see edu.cmu.sphinx.decoder.search.Token
|
||||
*/
|
||||
public interface ScoreNormalizer extends Configurable {
|
||||
|
||||
/**
|
||||
* Normalizes the scores of a set of Tokens.
|
||||
*
|
||||
* @param scoreableList The set of scores to be normalized
|
||||
* @param bestToken The best scoring Token of the above mentioned list. Although not strictly necessary it's
|
||||
* included because of convenience reasons and to reduce computational overhead.
|
||||
* @return The best token after the all <code>Token</code>s have been normalized. In most cases normalization won't
|
||||
* change the order but to keep the API open for any kind of approach it seemed reasonable to include this.
|
||||
*/
|
||||
Scoreable normalize(List<? extends Scoreable> scoreableList, Scoreable bestToken);
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright 1999-2010 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
|
||||
/** Thing that can provide the score */
|
||||
public interface ScoreProvider {
|
||||
|
||||
/**
|
||||
* Provides the score
|
||||
*
|
||||
* @param data data to score
|
||||
* @return the score
|
||||
*/
|
||||
public float getScore(Data data);
|
||||
|
||||
/**
|
||||
* Provides component score
|
||||
*
|
||||
* @param feature data to score
|
||||
* @return the score
|
||||
*/
|
||||
public float[] getComponentScore(Data feature);
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/** Represents an entity that can be scored against a data */
|
||||
public interface Scoreable extends Data {
|
||||
|
||||
/**
|
||||
* A {@code Scoreable} comparator that is used to order scoreables according to their score,
|
||||
* in descending order.
|
||||
*
|
||||
* <p>Note: since a higher score results in a lower natural order,
|
||||
* statements such as {@code Collections.min(list, Scoreable.COMPARATOR)}
|
||||
* actually return the Scoreable with the <b>highest</b> score,
|
||||
* in contrast to the natural meaning of the word "min".
|
||||
*/
|
||||
Comparator<Scoreable> COMPARATOR = new Comparator<Scoreable>() {
|
||||
public int compare(Scoreable t1, Scoreable t2) {
|
||||
if (t1.getScore() > t2.getScore()) {
|
||||
return -1;
|
||||
} else if (t1.getScore() == t2.getScore()) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculates a score against the given data. The score can be retrieved with get score
|
||||
*
|
||||
* @param data the data to be scored
|
||||
* @return the score for the data
|
||||
*/
|
||||
public float calculateScore(Data data);
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves a previously calculated (and possibly normalized) score
|
||||
*
|
||||
* @return the score
|
||||
*/
|
||||
public float getScore();
|
||||
|
||||
|
||||
/**
|
||||
* Normalizes a previously calculated score
|
||||
*
|
||||
* @param maxScore maximum score to use for norm
|
||||
* @return the normalized score
|
||||
*/
|
||||
public float normalizeScore(float maxScore);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.frontend.endpoint.SpeechEndSignal;
|
||||
import edu.cmu.sphinx.frontend.util.DataUtil;
|
||||
import edu.cmu.sphinx.util.props.ConfigurableAdapter;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Component;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Implements some basic scorer functionality, including a simple default
|
||||
* acoustic scoring implementation which scores within the current thread, that
|
||||
* can be changed by overriding the {@link #doScoring} method.
|
||||
*
|
||||
* <p>
|
||||
* Note that all scores are maintained in LogMath log base.
|
||||
*
|
||||
* @author Holger Brandl
|
||||
*/
|
||||
public class SimpleAcousticScorer extends ConfigurableAdapter implements AcousticScorer {
|
||||
|
||||
/** Property the defines the frontend to retrieve features from for scoring */
|
||||
@S4Component(type = BaseDataProcessor.class)
|
||||
public final static String FEATURE_FRONTEND = "frontend";
|
||||
protected BaseDataProcessor frontEnd;
|
||||
|
||||
/**
|
||||
* An optional post-processor for computed scores that will normalize
|
||||
* scores. If not set, no normalization will applied and the token scores
|
||||
* will be returned unchanged.
|
||||
*/
|
||||
@S4Component(type = ScoreNormalizer.class, mandatory = false)
|
||||
public final static String SCORE_NORMALIZER = "scoreNormalizer";
|
||||
protected ScoreNormalizer scoreNormalizer;
|
||||
|
||||
private LinkedList<Data> storedData;
|
||||
private boolean seenEnd = false;
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
this.frontEnd = (BaseDataProcessor) ps.getComponent(FEATURE_FRONTEND);
|
||||
this.scoreNormalizer = (ScoreNormalizer) ps.getComponent(SCORE_NORMALIZER);
|
||||
storedData = new LinkedList<Data>();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param frontEnd
|
||||
* the frontend to retrieve features from for scoring
|
||||
* @param scoreNormalizer
|
||||
* optional post-processor for computed scores that will
|
||||
* normalize scores. If not set, no normalization will applied
|
||||
* and the token scores will be returned unchanged.
|
||||
*/
|
||||
public SimpleAcousticScorer(BaseDataProcessor frontEnd, ScoreNormalizer scoreNormalizer) {
|
||||
initLogger();
|
||||
this.frontEnd = frontEnd;
|
||||
this.scoreNormalizer = scoreNormalizer;
|
||||
storedData = new LinkedList<Data>();
|
||||
}
|
||||
|
||||
public SimpleAcousticScorer() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Scores the given set of states.
|
||||
*
|
||||
* @param scoreableList
|
||||
* A list containing scoreable objects to be scored
|
||||
* @return The best scoring scoreable, or <code>null</code> if there are no
|
||||
* more features to score
|
||||
*/
|
||||
public Data calculateScores(List<? extends Scoreable> scoreableList) {
|
||||
Data data;
|
||||
if (storedData.isEmpty()) {
|
||||
while ((data = getNextData()) instanceof Signal) {
|
||||
if (data instanceof SpeechEndSignal) {
|
||||
seenEnd = true;
|
||||
break;
|
||||
}
|
||||
if (data instanceof DataEndSignal) {
|
||||
if (seenEnd)
|
||||
return null;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data == null)
|
||||
return null;
|
||||
} else {
|
||||
data = storedData.poll();
|
||||
}
|
||||
|
||||
return calculateScoresForData(scoreableList, data);
|
||||
}
|
||||
|
||||
public Data calculateScoresAndStoreData(List<? extends Scoreable> scoreableList) {
|
||||
Data data;
|
||||
while ((data = getNextData()) instanceof Signal) {
|
||||
if (data instanceof SpeechEndSignal) {
|
||||
seenEnd = true;
|
||||
break;
|
||||
}
|
||||
if (data instanceof DataEndSignal) {
|
||||
if (seenEnd)
|
||||
return null;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data == null)
|
||||
return null;
|
||||
|
||||
storedData.add(data);
|
||||
|
||||
return calculateScoresForData(scoreableList, data);
|
||||
}
|
||||
|
||||
protected Data calculateScoresForData(List<? extends Scoreable> scoreableList, Data data) {
|
||||
if (data instanceof SpeechEndSignal || data instanceof DataEndSignal) {
|
||||
return data;
|
||||
}
|
||||
|
||||
if (scoreableList.isEmpty())
|
||||
return null;
|
||||
|
||||
// convert the data to FloatData if not yet done
|
||||
if (data instanceof DoubleData)
|
||||
data = DataUtil.DoubleData2FloatData((DoubleData) data);
|
||||
|
||||
Scoreable bestToken = doScoring(scoreableList, data);
|
||||
|
||||
// apply optional score normalization
|
||||
if (scoreNormalizer != null && bestToken instanceof Token)
|
||||
bestToken = scoreNormalizer.normalize(scoreableList, bestToken);
|
||||
|
||||
return bestToken;
|
||||
}
|
||||
|
||||
protected Data getNextData() {
|
||||
Data data = frontEnd.getData();
|
||||
return data;
|
||||
}
|
||||
|
||||
public void startRecognition() {
|
||||
storedData.clear();
|
||||
}
|
||||
|
||||
public void stopRecognition() {
|
||||
// nothing needs to be done here
|
||||
}
|
||||
|
||||
/**
|
||||
* Scores a a list of <code>Scoreable</code>s given a <code>Data</code>
|
||||
* -object.
|
||||
*
|
||||
* @param scoreableList
|
||||
* The list of Scoreables to be scored
|
||||
* @param data
|
||||
* The <code>Data</code>-object to be used for scoring.
|
||||
* @param <T> type for scorables
|
||||
* @return the best scoring <code>Scoreable</code> or <code>null</code> if
|
||||
* the list of scoreables was empty.
|
||||
*/
|
||||
protected <T extends Scoreable> T doScoring(List<T> scoreableList, Data data) {
|
||||
|
||||
T best = null;
|
||||
float bestScore = -Float.MAX_VALUE;
|
||||
|
||||
for (T item : scoreableList) {
|
||||
item.calculateScore(data);
|
||||
if (item.getScore() > bestScore) {
|
||||
bestScore = item.getScore();
|
||||
best = item;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
// Even if we don't do any meaningful allocation here, we implement the
|
||||
// methods because most extending scorers do need them either.
|
||||
|
||||
public void allocate() {
|
||||
}
|
||||
|
||||
public void deallocate() {
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.scorer;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.frontend.BaseDataProcessor;
|
||||
import edu.cmu.sphinx.frontend.DataProcessingException;
|
||||
import edu.cmu.sphinx.util.CustomThreadFactory;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Boolean;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
/**
|
||||
* An acoustic scorer that breaks the scoring up into a configurable number of separate threads.
|
||||
* <p>
|
||||
* All scores are maintained in LogMath log base
|
||||
*/
|
||||
public class ThreadedAcousticScorer extends SimpleAcousticScorer {
|
||||
|
||||
/**
|
||||
* The property that controls the thread priority of scoring threads.
|
||||
* Must be a value between {@link Thread#MIN_PRIORITY} and {@link Thread#MAX_PRIORITY}, inclusive.
|
||||
* The default is {@link Thread#NORM_PRIORITY}.
|
||||
*/
|
||||
@S4Integer(defaultValue = Thread.NORM_PRIORITY)
|
||||
public final static String PROP_THREAD_PRIORITY = "threadPriority";
|
||||
|
||||
/**
|
||||
* The property that controls the number of threads that are used to score HMM states. If the isCpuRelative
|
||||
* property is false, then is is the exact number of threads that are used to score HMM states. If the isCpuRelative
|
||||
* property is true, then this value is combined with the number of available processors on the system. If you want
|
||||
* to have one thread per CPU available to score states, set the NUM_THREADS property to 0 and the isCpuRelative to
|
||||
* true. If you want exactly one thread to process scores set NUM_THREADS to 1 and isCpuRelative to false.
|
||||
* <p>
|
||||
* If the value is 1 isCpuRelative is false no additional thread will be instantiated, and all computation will be
|
||||
* done in the calling thread itself. The default value is 0.
|
||||
*/
|
||||
@S4Integer(defaultValue = 0)
|
||||
public final static String PROP_NUM_THREADS = "numThreads";
|
||||
|
||||
/**
|
||||
* The property that controls whether the number of available CPUs on the system is used when determining
|
||||
* the number of threads to use for scoring. If true, the NUM_THREADS property is combined with the available number
|
||||
* of CPUS to determine the number of threads. Note that the number of threads is contained to be never lower than
|
||||
* zero. Also, if the number of threads is 0, the states are scored on the calling thread, no separate threads are
|
||||
* started. The default value is false.
|
||||
*/
|
||||
@S4Boolean(defaultValue = true)
|
||||
public final static String PROP_IS_CPU_RELATIVE = "isCpuRelative";
|
||||
|
||||
/**
|
||||
* The property that controls the minimum number of scoreables sent to a thread. This is used to prevent
|
||||
* over threading of the scoring that could happen if the number of threads is high compared to the size of the
|
||||
* active list. The default is 50
|
||||
*/
|
||||
@S4Integer(defaultValue = 10)
|
||||
public final static String PROP_MIN_SCOREABLES_PER_THREAD = "minScoreablesPerThread";
|
||||
|
||||
private final static String className = ThreadedAcousticScorer.class.getSimpleName();
|
||||
|
||||
private int numThreads; // number of threads in use
|
||||
private int threadPriority;
|
||||
private int minScoreablesPerThread; // min scoreables sent to a thread
|
||||
private ExecutorService executorService;
|
||||
|
||||
/**
|
||||
* @param frontEnd
|
||||
* the frontend to retrieve features from for scoring
|
||||
* @param scoreNormalizer
|
||||
* optional post-processor for computed scores that will
|
||||
* normalize scores. If not set, no normalization will applied
|
||||
* and the token scores will be returned unchanged.
|
||||
* @param minScoreablesPerThread
|
||||
* the number of threads that are used to score HMM states. If
|
||||
* the isCpuRelative property is false, then is is the exact
|
||||
* number of threads that are used to score HMM states. If the
|
||||
* isCpuRelative property is true, then this value is combined
|
||||
* with the number of available processors on the system. If you
|
||||
* want to have one thread per CPU available to score states, set
|
||||
* the NUM_THREADS property to 0 and the isCpuRelative to true.
|
||||
* If you want exactly one thread to process scores set
|
||||
* NUM_THREADS to 1 and isCpuRelative to false.
|
||||
* <p>
|
||||
* If the value is 1 isCpuRelative is false no additional thread
|
||||
* will be instantiated, and all computation will be done in the
|
||||
* calling thread itself. The default value is 0.
|
||||
* @param cpuRelative
|
||||
* controls whether the number of available CPUs on the system is
|
||||
* used when determining the number of threads to use for
|
||||
* scoring. If true, the NUM_THREADS property is combined with
|
||||
* the available number of CPUS to determine the number of
|
||||
* threads. Note that the number of threads is constrained to be
|
||||
* never lower than zero. Also, if the number of threads is 0,
|
||||
* the states are scored on the calling thread, no separate
|
||||
* threads are started. The default value is false.
|
||||
* @param numThreads
|
||||
* the minimum number of scoreables sent to a thread. This is
|
||||
* used to prevent over threading of the scoring that could
|
||||
* happen if the number of threads is high compared to the size
|
||||
* of the active list. The default is 50
|
||||
* @param threadPriority
|
||||
* the thread priority of scoring threads. Must be a value between
|
||||
* {@link Thread#MIN_PRIORITY} and {@link Thread#MAX_PRIORITY}, inclusive.
|
||||
* The default is {@link Thread#NORM_PRIORITY}.
|
||||
*/
|
||||
public ThreadedAcousticScorer(BaseDataProcessor frontEnd, ScoreNormalizer scoreNormalizer,
|
||||
int minScoreablesPerThread, boolean cpuRelative, int numThreads, int threadPriority) {
|
||||
super(frontEnd, scoreNormalizer);
|
||||
init(minScoreablesPerThread, cpuRelative, numThreads, threadPriority);
|
||||
}
|
||||
|
||||
public ThreadedAcousticScorer() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
init(ps.getInt(PROP_MIN_SCOREABLES_PER_THREAD), ps.getBoolean(PROP_IS_CPU_RELATIVE),
|
||||
ps.getInt(PROP_NUM_THREADS), ps.getInt(PROP_THREAD_PRIORITY));
|
||||
}
|
||||
|
||||
private void init(int minScoreablesPerThread, boolean cpuRelative, int numThreads, int threadPriority) {
|
||||
this.minScoreablesPerThread = minScoreablesPerThread;
|
||||
if (cpuRelative) {
|
||||
numThreads += Runtime.getRuntime().availableProcessors();
|
||||
}
|
||||
this.numThreads = numThreads;
|
||||
this.threadPriority = threadPriority;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void allocate() {
|
||||
super.allocate();
|
||||
if (executorService == null) {
|
||||
if (numThreads > 1) {
|
||||
logger.fine("# of scoring threads: " + numThreads);
|
||||
executorService = Executors.newFixedThreadPool(numThreads,
|
||||
new CustomThreadFactory(className, true, threadPriority));
|
||||
} else {
|
||||
logger.fine("no scoring threads");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deallocate() {
|
||||
super.deallocate();
|
||||
if (executorService != null) {
|
||||
executorService.shutdown();
|
||||
executorService = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected <T extends Scoreable> T doScoring(List<T> scoreableList, final Data data) {
|
||||
if (numThreads > 1) {
|
||||
int totalSize = scoreableList.size();
|
||||
int jobSize = Math.max((totalSize + numThreads - 1) / numThreads, minScoreablesPerThread);
|
||||
|
||||
if (jobSize < totalSize) {
|
||||
List<Callable<T>> tasks = new ArrayList<Callable<T>>();
|
||||
for (int from = 0, to = jobSize; from < totalSize; from = to, to += jobSize) {
|
||||
final List<T> scoringJob = scoreableList.subList(from, Math.min(to, totalSize));
|
||||
tasks.add(new Callable<T>() {
|
||||
public T call() throws Exception {
|
||||
return ThreadedAcousticScorer.super.doScoring(scoringJob, data);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
List<T> finalists = new ArrayList<T>(tasks.size());
|
||||
|
||||
try {
|
||||
for (Future<T> result : executorService.invokeAll(tasks))
|
||||
finalists.add(result.get());
|
||||
} catch (Exception e) {
|
||||
throw new DataProcessingException("No scoring jobs ended", e);
|
||||
}
|
||||
|
||||
return Collections.min(finalists, Scoreable.COMPARATOR);
|
||||
}
|
||||
}
|
||||
// if no additional threads are necessary, do the scoring in the calling thread
|
||||
return super.doScoring(scoreableList, data);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import java.util.List;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
/**
|
||||
* An active list is maintained as a sorted list
|
||||
* <p>
|
||||
* Note that all scores are represented in LogMath logbase
|
||||
*/
|
||||
public interface ActiveList extends Iterable<Token> {
|
||||
|
||||
/**
|
||||
* property that sets the desired (or target) size for this active list. This is sometimes referred to as the beam
|
||||
* size
|
||||
*/
|
||||
@S4Integer(defaultValue = 2000)
|
||||
public final static String PROP_ABSOLUTE_BEAM_WIDTH = "absoluteBeamWidth";
|
||||
|
||||
/**
|
||||
* Property that sets the minimum score relative to the maximum score in the list for pruning. Tokens with a score
|
||||
* less than relativeBeamWidth * maximumScore will be pruned from the list
|
||||
*/
|
||||
@S4Double(defaultValue = 0.0)
|
||||
public final static String PROP_RELATIVE_BEAM_WIDTH = "relativeBeamWidth";
|
||||
/**
|
||||
* Property that indicates whether or not the active list will implement 'strict pruning'. When strict pruning is
|
||||
* enabled, the active list will not remove tokens from the active list until they have been completely scored. If
|
||||
* strict pruning is not enabled, tokens can be removed from the active list based upon their entry scores. The
|
||||
* default setting is false (disabled).
|
||||
*/
|
||||
|
||||
@S4Boolean(defaultValue = true)
|
||||
public final static String PROP_STRICT_PRUNING = "strictPruning";
|
||||
|
||||
/**
|
||||
* Adds the given token to the list, keeping track of the lowest scoring token
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token);
|
||||
|
||||
/**
|
||||
* Purges the active list of excess members returning a (potentially new) active list
|
||||
*
|
||||
* @return a purged active list
|
||||
*/
|
||||
public ActiveList purge();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the size of this list
|
||||
*
|
||||
* @return the size
|
||||
*/
|
||||
public int size();
|
||||
|
||||
|
||||
/**
|
||||
* Gets the list of all tokens
|
||||
*
|
||||
* @return the set of tokens
|
||||
*/
|
||||
public List<Token> getTokens();
|
||||
|
||||
/**
|
||||
* gets the beam threshold best upon the best scoring token
|
||||
*
|
||||
* @return the beam threshold
|
||||
*/
|
||||
public float getBeamThreshold();
|
||||
|
||||
|
||||
/**
|
||||
* gets the best score in the list
|
||||
*
|
||||
* @return the best score
|
||||
*/
|
||||
public float getBestScore();
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best scoring token for this active list
|
||||
*
|
||||
* @param token the best scoring token
|
||||
*/
|
||||
public void setBestToken(Token token);
|
||||
|
||||
|
||||
/**
|
||||
* Gets the best scoring token for this active list
|
||||
*
|
||||
* @return the best scoring token
|
||||
*/
|
||||
public Token getBestToken();
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new empty version of this active list with the same general properties.
|
||||
*
|
||||
* @return a new active list.
|
||||
*/
|
||||
public ActiveList newInstance();
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.util.LogMath;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
/** Creates new active lists. */
|
||||
public abstract class ActiveListFactory implements Configurable {
|
||||
|
||||
|
||||
/**
|
||||
* property that sets the desired (or target) size for this active list. This is sometimes referred to as the beam
|
||||
* size
|
||||
*/
|
||||
@S4Integer(defaultValue = -1)
|
||||
public final static String PROP_ABSOLUTE_BEAM_WIDTH = "absoluteBeamWidth";
|
||||
|
||||
/**
|
||||
* Property that sets the minimum score relative to the maximum score in the list for pruning. Tokens with a score
|
||||
* less than relativeBeamWidth * maximumScore will be pruned from the list
|
||||
*/
|
||||
@S4Double(defaultValue = 1E-80)
|
||||
public final static String PROP_RELATIVE_BEAM_WIDTH = "relativeBeamWidth";
|
||||
|
||||
/**
|
||||
* Property that indicates whether or not the active list will implement 'strict pruning'. When strict pruning is
|
||||
* enabled, the active list will not remove tokens from the active list until they have been completely scored. If
|
||||
* strict pruning is not enabled, tokens can be removed from the active list based upon their entry scores. The
|
||||
* default setting is false (disabled).
|
||||
*/
|
||||
@S4Boolean(defaultValue = true)
|
||||
public final static String PROP_STRICT_PRUNING = "strictPruning";
|
||||
|
||||
protected LogMath logMath;
|
||||
protected int absoluteBeamWidth;
|
||||
protected float logRelativeBeamWidth;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param absoluteBeamWidth beam for absolute pruning
|
||||
* @param relativeBeamWidth beam for relative pruning
|
||||
*/
|
||||
public ActiveListFactory(int absoluteBeamWidth,double relativeBeamWidth){
|
||||
logMath = LogMath.getLogMath();
|
||||
this.absoluteBeamWidth = absoluteBeamWidth;
|
||||
this.logRelativeBeamWidth = logMath.linearToLog(relativeBeamWidth);
|
||||
}
|
||||
|
||||
public ActiveListFactory() {
|
||||
}
|
||||
|
||||
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
logMath = LogMath.getLogMath();
|
||||
absoluteBeamWidth = ps.getInt(PROP_ABSOLUTE_BEAM_WIDTH);
|
||||
double relativeBeamWidth = ps.getDouble(PROP_RELATIVE_BEAM_WIDTH);
|
||||
|
||||
logRelativeBeamWidth = logMath.linearToLog(relativeBeamWidth);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new active list of a particular type
|
||||
*
|
||||
* @return the active list
|
||||
*/
|
||||
public abstract ActiveList newInstance();
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
import edu.cmu.sphinx.util.props.S4Double;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/** An active list is maintained as a sorted list */
|
||||
public interface ActiveListManager extends Configurable {
|
||||
|
||||
/** The property that specifies the absolute word beam width */
|
||||
@S4Integer(defaultValue = 2000)
|
||||
public final static String PROP_ABSOLUTE_WORD_BEAM_WIDTH =
|
||||
"absoluteWordBeamWidth";
|
||||
|
||||
/** The property that specifies the relative word beam width */
|
||||
@S4Double(defaultValue = 0.0)
|
||||
public final static String PROP_RELATIVE_WORD_BEAM_WIDTH =
|
||||
"relativeWordBeamWidth";
|
||||
|
||||
/**
|
||||
* Adds the given token to the list
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token);
|
||||
|
||||
|
||||
/**
|
||||
* Returns an Iterator of all the non-emitting ActiveLists. The iteration order is the same as the search state
|
||||
* order.
|
||||
*
|
||||
* @return an Iterator of non-emitting ActiveLists
|
||||
*/
|
||||
public Iterator<ActiveList> getNonEmittingListIterator();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the emitting ActiveList from the manager
|
||||
*
|
||||
* @return the emitting ActiveList
|
||||
*/
|
||||
public ActiveList getEmittingList();
|
||||
|
||||
|
||||
/**
|
||||
* Clears emitting list in manager
|
||||
*/
|
||||
public void clearEmittingList();
|
||||
|
||||
|
||||
/** Dumps out debug info for the active list manager */
|
||||
public void dump();
|
||||
|
||||
|
||||
/**
|
||||
* Sets the total number of state types to be managed
|
||||
*
|
||||
* @param numStateOrder the total number of state types
|
||||
*/
|
||||
public void setNumStateOrder(int numStateOrder);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Manager for pruned hypothesis
|
||||
*
|
||||
* @author Joe Woelfel
|
||||
*/
|
||||
public class AlternateHypothesisManager {
|
||||
|
||||
private final Map<Token, List<Token>> viterbiLoserMap = new HashMap<Token, List<Token>>();
|
||||
private final int maxEdges;
|
||||
|
||||
|
||||
/**
|
||||
* Creates an alternate hypotheses manager
|
||||
*
|
||||
* @param maxEdges the maximum edges allowed
|
||||
*/
|
||||
public AlternateHypothesisManager(int maxEdges) {
|
||||
this.maxEdges = maxEdges;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Collects adds alternate predecessors for a token that would have lost because of viterbi.
|
||||
*
|
||||
* @param token - a token that has an alternate lower scoring predecessor that still might be of interest
|
||||
* @param predecessor - a predecessor that scores lower than token.getPredecessor().
|
||||
*/
|
||||
|
||||
public void addAlternatePredecessor(Token token, Token predecessor) {
|
||||
assert predecessor != token.getPredecessor();
|
||||
List<Token> list = viterbiLoserMap.get(token);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Token>();
|
||||
viterbiLoserMap.put(token, list);
|
||||
}
|
||||
list.add(predecessor);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a list of alternate predecessors for a token.
|
||||
*
|
||||
* @param token - a token that may have alternate lower scoring predecessor that still might be of interest
|
||||
* @return A list of predecessors that scores lower than token.getPredecessor().
|
||||
*/
|
||||
public List<Token> getAlternatePredecessors(Token token) {
|
||||
return viterbiLoserMap.get(token);
|
||||
}
|
||||
|
||||
|
||||
/** Purge all but max number of alternate preceding token hypotheses. */
|
||||
public void purge() {
|
||||
|
||||
int max = maxEdges - 1;
|
||||
|
||||
for (Map.Entry<Token, List<Token>> entry : viterbiLoserMap.entrySet()) {
|
||||
List<Token> list = entry.getValue();
|
||||
Collections.sort(list, Scoreable.COMPARATOR);
|
||||
List<Token> newList = list.subList(0, list.size() > max ? max : list.size());
|
||||
viterbiLoserMap.put(entry.getKey(), newList);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasAlternatePredecessors(Token token) {
|
||||
return viterbiLoserMap.containsKey(token);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,270 @@
|
|||
/*
|
||||
*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/** A factory for PartitionActiveLists */
|
||||
public class PartitionActiveListFactory extends ActiveListFactory {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param absoluteBeamWidth beam for absolute pruning
|
||||
* @param relativeBeamWidth beam for relative pruning
|
||||
*/
|
||||
public PartitionActiveListFactory(int absoluteBeamWidth, double relativeBeamWidth) {
|
||||
super(absoluteBeamWidth, relativeBeamWidth);
|
||||
}
|
||||
|
||||
public PartitionActiveListFactory() {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
|
||||
*/
|
||||
@Override
|
||||
public ActiveList newInstance() {
|
||||
return new PartitionActiveList(absoluteBeamWidth, logRelativeBeamWidth);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An active list that does absolute beam with pruning by partitioning the
|
||||
* token list based on absolute beam width, instead of sorting the token
|
||||
* list, and then chopping the list up with the absolute beam width. The
|
||||
* expected run time of this partitioning algorithm is O(n), instead of O(n log n)
|
||||
* for merge sort.
|
||||
* <p>
|
||||
* This class is not thread safe and should only be used by a single thread.
|
||||
* <p>
|
||||
* Note that all scores are maintained in the LogMath log base.
|
||||
*/
|
||||
class PartitionActiveList implements ActiveList {
|
||||
|
||||
private int size;
|
||||
private final int absoluteBeamWidth;
|
||||
private final float logRelativeBeamWidth;
|
||||
private Token bestToken;
|
||||
// when the list is changed these things should be
|
||||
// changed/updated as well
|
||||
private Token[] tokenList;
|
||||
private final Partitioner partitioner = new Partitioner();
|
||||
|
||||
|
||||
/** Creates an empty active list
|
||||
* @param absoluteBeamWidth beam for absolute pruning
|
||||
* @param logRelativeBeamWidth beam for relative pruning
|
||||
*/
|
||||
public PartitionActiveList(int absoluteBeamWidth,
|
||||
float logRelativeBeamWidth) {
|
||||
this.absoluteBeamWidth = absoluteBeamWidth;
|
||||
this.logRelativeBeamWidth = logRelativeBeamWidth;
|
||||
int listSize = 2000;
|
||||
if (absoluteBeamWidth > 0) {
|
||||
listSize = absoluteBeamWidth / 3;
|
||||
}
|
||||
this.tokenList = new Token[listSize];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the given token to the list
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token) {
|
||||
if (size < tokenList.length) {
|
||||
tokenList[size] = token;
|
||||
size++;
|
||||
} else {
|
||||
// token array too small, double the capacity
|
||||
doubleCapacity();
|
||||
add(token);
|
||||
}
|
||||
if (bestToken == null || token.getScore() > bestToken.getScore()) {
|
||||
bestToken = token;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Doubles the capacity of the Token array. */
|
||||
private void doubleCapacity() {
|
||||
tokenList = Arrays.copyOf(tokenList, tokenList.length * 2);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Purges excess members. Remove all nodes that fall below the relativeBeamWidth
|
||||
*
|
||||
* @return a (possible new) active list
|
||||
*/
|
||||
public ActiveList purge() {
|
||||
// if the absolute beam is zero, this means there
|
||||
// should be no constraint on the abs beam size at all
|
||||
// so we will only be relative beam pruning, which means
|
||||
// that we don't have to sort the list
|
||||
if (absoluteBeamWidth > 0) {
|
||||
// if we have an absolute beam, then we will
|
||||
// need to sort the tokens to apply the beam
|
||||
if (size > absoluteBeamWidth) {
|
||||
size = partitioner.partition(tokenList, size,
|
||||
absoluteBeamWidth) + 1;
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the beam threshold best upon the best scoring token
|
||||
*
|
||||
* @return the beam threshold
|
||||
*/
|
||||
public float getBeamThreshold() {
|
||||
return getBestScore() + logRelativeBeamWidth;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the best score in the list
|
||||
*
|
||||
* @return the best score
|
||||
*/
|
||||
public float getBestScore() {
|
||||
float bestScore = -Float.MAX_VALUE;
|
||||
if (bestToken != null) {
|
||||
bestScore = bestToken.getScore();
|
||||
}
|
||||
// A sanity check
|
||||
// for (Token t : this) {
|
||||
// if (t.getScore() > bestScore) {
|
||||
// System.out.println("GBS: found better score "
|
||||
// + t + " vs. " + bestScore);
|
||||
// }
|
||||
// }
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best scoring token for this active list
|
||||
*
|
||||
* @param token the best scoring token
|
||||
*/
|
||||
public void setBestToken(Token token) {
|
||||
bestToken = token;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the best scoring token for this active list
|
||||
*
|
||||
* @return the best scoring token
|
||||
*/
|
||||
public Token getBestToken() {
|
||||
return bestToken;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the iterator for this tree.
|
||||
*
|
||||
* @return the iterator for this token list
|
||||
*/
|
||||
public Iterator<Token> iterator() {
|
||||
return (new TokenArrayIterator(tokenList, size));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the list of all tokens
|
||||
*
|
||||
* @return the list of tokens
|
||||
*/
|
||||
public List<Token> getTokens() {
|
||||
return Arrays.asList(tokenList).subList(0, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of tokens on this active list
|
||||
*
|
||||
* @return the size of the active list
|
||||
*/
|
||||
public final int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
|
||||
*/
|
||||
public ActiveList newInstance() {
|
||||
return PartitionActiveListFactory.this.newInstance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class TokenArrayIterator implements Iterator<Token> {
|
||||
|
||||
private final Token[] tokenArray;
|
||||
private final int size;
|
||||
private int pos;
|
||||
|
||||
|
||||
TokenArrayIterator(Token[] tokenArray, int size) {
|
||||
this.tokenArray = tokenArray;
|
||||
this.pos = 0;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
|
||||
/** Returns true if the iteration has more tokens. */
|
||||
public boolean hasNext() {
|
||||
return pos < size;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the next token in the iteration. */
|
||||
public Token next() throws NoSuchElementException {
|
||||
if (pos >= tokenArray.length) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
return tokenArray[pos++];
|
||||
}
|
||||
|
||||
|
||||
/** Unimplemented, throws an Error if called. */
|
||||
public void remove() {
|
||||
throw new Error("TokenArrayIterator.remove() unimplemented");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import java.util.Arrays;
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
|
||||
/**
|
||||
* Partitions a list of tokens according to the token score, used
|
||||
* in {@link PartitionActiveListFactory}. This method is supposed
|
||||
* to provide O(n) performance so it's more preferable than
|
||||
*/
|
||||
public class Partitioner {
|
||||
|
||||
/** Max recursion depth **/
|
||||
final private int MAX_DEPTH = 50;
|
||||
|
||||
|
||||
/**
|
||||
* Partitions sub-array of tokens around the end token.
|
||||
* Put all elements less or equal then pivot to the start of the array,
|
||||
* shifting new pivot position
|
||||
*
|
||||
* @param tokens the token array to partition
|
||||
* @param start the starting index of the subarray
|
||||
* @param end the pivot and the ending index of the subarray, inclusive
|
||||
* @return the index (after partitioning) of the element around which the array is partitioned
|
||||
*/
|
||||
private int endPointPartition(Token[] tokens, int start, int end) {
|
||||
Token pivot = tokens[end];
|
||||
float pivotScore = pivot.getScore();
|
||||
|
||||
int i = start;
|
||||
int j = end - 1;
|
||||
|
||||
while (true) {
|
||||
|
||||
while (i < end && tokens[i].getScore() >= pivotScore)
|
||||
i++;
|
||||
while (j > i && tokens[j].getScore() < pivotScore)
|
||||
j--;
|
||||
|
||||
if (j <= i)
|
||||
break;
|
||||
|
||||
Token current = tokens[j];
|
||||
setToken(tokens, j, tokens[i]);
|
||||
setToken(tokens, i, current);
|
||||
}
|
||||
|
||||
setToken(tokens, end, tokens[i]);
|
||||
setToken(tokens, i, pivot);
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Partitions sub-array of tokens around the x-th token by selecting the midpoint of the token array as the pivot.
|
||||
* Partially solves issues with slow performance on already sorted arrays.
|
||||
*
|
||||
* @param tokens the token array to partition
|
||||
* @param start the starting index of the subarray
|
||||
* @param end the ending index of the subarray, inclusive
|
||||
* @return the index of the element around which the array is partitioned
|
||||
*/
|
||||
private int midPointPartition(Token[] tokens, int start, int end) {
|
||||
int middle = (start + end) >>> 1;
|
||||
Token temp = tokens[end];
|
||||
setToken(tokens, end, tokens[middle]);
|
||||
setToken(tokens, middle, temp);
|
||||
return endPointPartition(tokens, start, end);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Partitions the given array of tokens in place, so that the highest scoring n token will be at the beginning of
|
||||
* the array, not in any order.
|
||||
*
|
||||
* @param tokens the array of tokens to partition
|
||||
* @param size the number of tokens to partition
|
||||
* @param n the number of tokens in the final partition
|
||||
* @return the index of the last element in the partition
|
||||
*/
|
||||
public int partition(Token[] tokens, int size, int n) {
|
||||
if (tokens.length > n) {
|
||||
return midPointSelect(tokens, 0, size - 1, n, 0);
|
||||
} else {
|
||||
return findBest(tokens, size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simply find the best token and put it in the last slot
|
||||
*
|
||||
* @param tokens array of tokens
|
||||
* @param size the number of tokens to partition
|
||||
* @return index of the best token
|
||||
*/
|
||||
private int findBest(Token[] tokens, int size) {
|
||||
int r = -1;
|
||||
float lowestScore = Float.MAX_VALUE;
|
||||
for (int i = 0; i < tokens.length; i++) {
|
||||
float currentScore = tokens[i].getScore();
|
||||
if (currentScore <= lowestScore) {
|
||||
lowestScore = currentScore;
|
||||
r = i; // "r" is the returned index
|
||||
}
|
||||
}
|
||||
|
||||
// exchange tokens[r] <=> last token,
|
||||
// where tokens[r] has the lowest score
|
||||
int last = size - 1;
|
||||
if (last >= 0) {
|
||||
Token lastToken = tokens[last];
|
||||
setToken(tokens, last, tokens[r]);
|
||||
setToken(tokens, r, lastToken);
|
||||
}
|
||||
|
||||
// return the last index
|
||||
return last;
|
||||
}
|
||||
|
||||
|
||||
private void setToken(Token[] list, int index, Token token) {
|
||||
list[index] = token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects the token with the ith largest token score.
|
||||
*
|
||||
* @param tokens the token array to partition
|
||||
* @param start the starting index of the subarray
|
||||
* @param end the ending index of the subarray, inclusive
|
||||
* @param targetSize target size of the partition
|
||||
* @param depth recursion depth to avoid stack overflow and fall back to simple partition.
|
||||
* @return the index of the token with the ith largest score
|
||||
*/
|
||||
private int midPointSelect(Token[] tokens, int start, int end, int targetSize, int depth) {
|
||||
if (depth > MAX_DEPTH) {
|
||||
return simplePointSelect (tokens, start, end, targetSize);
|
||||
}
|
||||
if (start == end) {
|
||||
return start;
|
||||
}
|
||||
int partitionToken = midPointPartition(tokens, start, end);
|
||||
int newSize = partitionToken - start + 1;
|
||||
if (targetSize == newSize) {
|
||||
return partitionToken;
|
||||
} else if (targetSize < newSize) {
|
||||
return midPointSelect(tokens, start, partitionToken - 1, targetSize, depth + 1);
|
||||
} else {
|
||||
return midPointSelect(tokens, partitionToken + 1, end, targetSize - newSize, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback method to get the partition
|
||||
*
|
||||
* @param tokens the token array to partition
|
||||
* @param start the starting index of the subarray
|
||||
* @param end the ending index of the subarray, inclusive
|
||||
* @param targetSize target size of the partition
|
||||
* @return the index of the token with the ith largest score
|
||||
*/
|
||||
private int simplePointSelect(Token[] tokens, int start, int end, int targetSize) {
|
||||
Arrays.sort(tokens, start, end + 1, Scoreable.COMPARATOR);
|
||||
return start + targetSize - 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
/**
|
||||
* Defines the interface for the SearchManager. The SearchManager's primary role is to execute the search for a given
|
||||
* number of frames. The SearchManager will return interim results as the recognition proceeds and when recognition
|
||||
* completes a final result will be returned.
|
||||
*/
|
||||
public interface SearchManager extends Configurable {
|
||||
|
||||
/**
|
||||
* Allocates the resources necessary for this search. This should be called once before an recognitions are
|
||||
* performed
|
||||
*/
|
||||
public void allocate();
|
||||
|
||||
|
||||
/**
|
||||
* Deallocates resources necessary for this search. This should be called once after all recognitions are completed
|
||||
* at the search manager is no longer needed.
|
||||
*/
|
||||
public void deallocate();
|
||||
|
||||
|
||||
/**
|
||||
* Prepares the SearchManager for recognition. This method must be called before <code> recognize </code> is
|
||||
* called. Typically, <code> start </code> and <code> stop </code> are called bracketing an utterance.
|
||||
*/
|
||||
public void startRecognition();
|
||||
|
||||
|
||||
/** Performs post-recognition cleanup. This method should be called after recognize returns a final result. */
|
||||
public void stopRecognition();
|
||||
|
||||
|
||||
/**
|
||||
* Performs recognition. Processes no more than the given number of frames before returning. This method returns a
|
||||
* partial result after nFrames have been processed, or a final result if recognition completes while processing
|
||||
* frames. If a final result is returned, the actual number of frames processed can be retrieved from the result.
|
||||
* This method may block while waiting for frames to arrive.
|
||||
*
|
||||
* @param nFrames the maximum number of frames to process. A final result may be returned before all nFrames are
|
||||
* processed.
|
||||
* @return the recognition result, the result may be a partial or a final result; or return null if no frames are
|
||||
* arrived
|
||||
*/
|
||||
public Result recognize(int nFrames);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
/*
|
||||
*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/** A factory for simple active lists */
|
||||
public class SimpleActiveListFactory extends ActiveListFactory {
|
||||
|
||||
/**
|
||||
* Creates factory for simple active lists
|
||||
* @param absoluteBeamWidth absolute pruning beam
|
||||
* @param relativeBeamWidth relative pruning beam
|
||||
*/
|
||||
public SimpleActiveListFactory(int absoluteBeamWidth,
|
||||
double relativeBeamWidth)
|
||||
{
|
||||
super(absoluteBeamWidth, relativeBeamWidth);
|
||||
}
|
||||
|
||||
public SimpleActiveListFactory() {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
|
||||
*/
|
||||
@Override
|
||||
public ActiveList newInstance() {
|
||||
return new SimpleActiveList(absoluteBeamWidth, logRelativeBeamWidth);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An active list that tries to be simple and correct. This type of active list will be slow, but should exhibit
|
||||
* correct behavior. Faster versions of the ActiveList exist (HeapActiveList, TreeActiveList).
|
||||
* <p>
|
||||
* This class is not thread safe and should only be used by a single thread.
|
||||
* <p>
|
||||
* Note that all scores are maintained in the LogMath log domain
|
||||
*/
|
||||
class SimpleActiveList implements ActiveList {
|
||||
|
||||
private int absoluteBeamWidth = 2000;
|
||||
private final float logRelativeBeamWidth;
|
||||
private Token bestToken;
|
||||
private List<Token> tokenList = new LinkedList<Token>();
|
||||
|
||||
|
||||
/**
|
||||
* Creates an empty active list
|
||||
*
|
||||
* @param absoluteBeamWidth the absolute beam width
|
||||
* @param logRelativeBeamWidth the relative beam width (in the log domain)
|
||||
*/
|
||||
public SimpleActiveList(int absoluteBeamWidth,
|
||||
float logRelativeBeamWidth) {
|
||||
this.absoluteBeamWidth = absoluteBeamWidth;
|
||||
this.logRelativeBeamWidth = logRelativeBeamWidth;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the given token to the list
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token) {
|
||||
tokenList.add(token);
|
||||
if (bestToken == null || token.getScore() > bestToken.getScore()) {
|
||||
bestToken = token;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Replaces an old token with a new token
|
||||
*
|
||||
* @param oldToken the token to replace (or null in which case, replace works like add).
|
||||
* @param newToken the new token to be placed in the list.
|
||||
*/
|
||||
public void replace(Token oldToken, Token newToken) {
|
||||
add(newToken);
|
||||
if (oldToken != null) {
|
||||
if (!tokenList.remove(oldToken)) {
|
||||
// Some optional debugging code here to dump out the paths
|
||||
// when this "should never happen" error happens
|
||||
// System.out.println("SimpleActiveList: remove "
|
||||
// + oldToken + " missing, but replaced by "
|
||||
// + newToken);
|
||||
// oldToken.dumpTokenPath(true);
|
||||
// newToken.dumpTokenPath(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Purges excess members. Remove all nodes that fall below the relativeBeamWidth
|
||||
*
|
||||
* @return a (possible new) active list
|
||||
*/
|
||||
public ActiveList purge() {
|
||||
if (absoluteBeamWidth > 0 && tokenList.size() > absoluteBeamWidth) {
|
||||
Collections.sort(tokenList, Scoreable.COMPARATOR);
|
||||
tokenList = tokenList.subList(0, absoluteBeamWidth);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the iterator for this tree.
|
||||
*
|
||||
* @return the iterator for this token list
|
||||
*/
|
||||
public Iterator<Token> iterator() {
|
||||
return tokenList.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the set of all tokens
|
||||
*
|
||||
* @return the set of tokens
|
||||
*/
|
||||
public List<Token> getTokens() {
|
||||
return tokenList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of tokens on this active list
|
||||
*
|
||||
* @return the size of the active list
|
||||
*/
|
||||
public final int size() {
|
||||
return tokenList.size();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the beam threshold best upon the best scoring token
|
||||
*
|
||||
* @return the beam threshold
|
||||
*/
|
||||
public float getBeamThreshold() {
|
||||
return getBestScore() + logRelativeBeamWidth;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the best score in the list
|
||||
*
|
||||
* @return the best score
|
||||
*/
|
||||
public float getBestScore() {
|
||||
float bestScore = -Float.MAX_VALUE;
|
||||
if (bestToken != null) {
|
||||
bestScore = bestToken.getScore();
|
||||
}
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best scoring token for this active list
|
||||
*
|
||||
* @param token the best scoring token
|
||||
*/
|
||||
public void setBestToken(Token token) {
|
||||
bestToken = token;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the best scoring token for this active list
|
||||
*
|
||||
* @return the best scoring token
|
||||
*/
|
||||
public Token getBestToken() {
|
||||
return bestToken;
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
|
||||
*/
|
||||
public ActiveList newInstance() {
|
||||
return SimpleActiveListFactory.this.newInstance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Boolean;
|
||||
import edu.cmu.sphinx.util.props.S4ComponentList;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* A list of ActiveLists. Different token types are placed in different lists.
|
||||
* <p>
|
||||
* This class is not thread safe and should only be used by a single thread.
|
||||
*/
|
||||
public class SimpleActiveListManager implements ActiveListManager {
|
||||
|
||||
/**
|
||||
* This property is used in the Iterator returned by the getNonEmittingListIterator() method. When the
|
||||
* Iterator.next() method is called, this property determines whether the lists prior to that returned by next() are
|
||||
* empty (they should be empty). If they are not empty, an Error will be thrown.
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public static final String PROP_CHECK_PRIOR_LISTS_EMPTY = "checkPriorListsEmpty";
|
||||
|
||||
/** The property that defines the name of the active list factory to be used by this search manager. */
|
||||
@S4ComponentList(type = ActiveListFactory.class)
|
||||
public final static String PROP_ACTIVE_LIST_FACTORIES = "activeListFactories";
|
||||
|
||||
// --------------------------------------
|
||||
// Configuration data
|
||||
// --------------------------------------
|
||||
private Logger logger;
|
||||
private boolean checkPriorLists;
|
||||
private List<ActiveListFactory> activeListFactories;
|
||||
private ActiveList[] currentActiveLists;
|
||||
|
||||
|
||||
/**
|
||||
* Create a simple list manager
|
||||
* @param activeListFactories factories
|
||||
* @param checkPriorLists check prior lists during operation
|
||||
*/
|
||||
public SimpleActiveListManager(List<ActiveListFactory> activeListFactories, boolean checkPriorLists) {
|
||||
this.logger = Logger.getLogger( getClass().getName() );
|
||||
|
||||
this.activeListFactories = activeListFactories;
|
||||
this.checkPriorLists = checkPriorLists;
|
||||
}
|
||||
|
||||
public SimpleActiveListManager() {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
logger = ps.getLogger();
|
||||
|
||||
activeListFactories = ps.getComponentList(PROP_ACTIVE_LIST_FACTORIES, ActiveListFactory.class);
|
||||
checkPriorLists = ps.getBoolean(PROP_CHECK_PRIOR_LISTS_EMPTY);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveListManager#setNumStateOrder(java.lang.Class[])
|
||||
*/
|
||||
public void setNumStateOrder(int numStateOrder) {
|
||||
// check to make sure that we have the correct
|
||||
// number of active list factories for the given search states
|
||||
currentActiveLists = new ActiveList[numStateOrder];
|
||||
|
||||
if (activeListFactories.isEmpty()) {
|
||||
logger.severe("No active list factories configured");
|
||||
throw new Error("No active list factories configured");
|
||||
}
|
||||
if (activeListFactories.size() != currentActiveLists.length) {
|
||||
logger.warning("Need " + currentActiveLists.length +
|
||||
" active list factories, found " +
|
||||
activeListFactories.size());
|
||||
}
|
||||
createActiveLists();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates the emitting and non-emitting active lists. When creating the non-emitting active lists, we will look at
|
||||
* their respective beam widths (eg, word beam, unit beam, state beam).
|
||||
*/
|
||||
private void createActiveLists() {
|
||||
int nlists = activeListFactories.size();
|
||||
for (int i = 0; i < currentActiveLists.length; i++) {
|
||||
int which = i;
|
||||
if (which >= nlists) {
|
||||
which = nlists - 1;
|
||||
}
|
||||
ActiveListFactory alf = activeListFactories.get(which);
|
||||
currentActiveLists[i] = alf.newInstance();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the given token to the list
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token) {
|
||||
ActiveList activeList = findListFor(token);
|
||||
if (activeList == null) {
|
||||
throw new Error("Cannot find ActiveList for "
|
||||
+ token.getSearchState().getClass());
|
||||
}
|
||||
activeList.add(token);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a token find the active list associated with the token type
|
||||
*
|
||||
* @param token
|
||||
* @return the active list
|
||||
*/
|
||||
private ActiveList findListFor(Token token) {
|
||||
return currentActiveLists[token.getSearchState().getOrder()];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the emitting ActiveList from the manager
|
||||
*
|
||||
* @return the emitting ActiveList
|
||||
*/
|
||||
public ActiveList getEmittingList() {
|
||||
ActiveList list = currentActiveLists[currentActiveLists.length - 1];
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clears emitting list in manager
|
||||
*/
|
||||
public void clearEmittingList() {
|
||||
ActiveList list = currentActiveLists[currentActiveLists.length - 1];
|
||||
currentActiveLists[currentActiveLists.length - 1] = list.newInstance();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an Iterator of all the non-emitting ActiveLists. The iteration order is the same as the search state
|
||||
* order.
|
||||
*
|
||||
* @return an Iterator of non-emitting ActiveLists
|
||||
*/
|
||||
public Iterator<ActiveList> getNonEmittingListIterator() {
|
||||
return (new NonEmittingListIterator());
|
||||
}
|
||||
|
||||
|
||||
private class NonEmittingListIterator implements Iterator<ActiveList> {
|
||||
|
||||
private int listPtr;
|
||||
|
||||
|
||||
public NonEmittingListIterator() {
|
||||
listPtr = -1;
|
||||
}
|
||||
|
||||
|
||||
public boolean hasNext() {
|
||||
return listPtr + 1 < currentActiveLists.length - 1;
|
||||
}
|
||||
|
||||
|
||||
public ActiveList next() throws NoSuchElementException {
|
||||
listPtr++;
|
||||
|
||||
if (listPtr >= currentActiveLists.length) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
if (checkPriorLists) {
|
||||
checkPriorLists();
|
||||
}
|
||||
return currentActiveLists[listPtr];
|
||||
}
|
||||
|
||||
|
||||
/** Check that all lists prior to listPtr is empty. */
|
||||
private void checkPriorLists() {
|
||||
for (int i = 0; i < listPtr; i++) {
|
||||
ActiveList activeList = currentActiveLists[i];
|
||||
if (activeList.size() > 0) {
|
||||
throw new Error("At while processing state order"
|
||||
+ listPtr + ", state order " + i + " not empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void remove() {
|
||||
currentActiveLists[listPtr] =
|
||||
currentActiveLists[listPtr].newInstance();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Outputs debugging info for this list manager */
|
||||
public void dump() {
|
||||
System.out.println("--------------------");
|
||||
for (ActiveList al : currentActiveLists) {
|
||||
dumpList(al);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dumps out debugging info for the given active list
|
||||
*
|
||||
* @param al the active list to dump
|
||||
*/
|
||||
private void dumpList(ActiveList al) {
|
||||
System.out.println("Size: " + al.size() + " Best token: " + al.getBestToken());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,680 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.decoder.pruner.Pruner;
|
||||
import edu.cmu.sphinx.decoder.scorer.AcousticScorer;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.linguist.Linguist;
|
||||
import edu.cmu.sphinx.linguist.SearchState;
|
||||
import edu.cmu.sphinx.linguist.SearchStateArc;
|
||||
import edu.cmu.sphinx.linguist.WordSearchState;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.util.LogMath;
|
||||
import edu.cmu.sphinx.util.StatisticsVariable;
|
||||
import edu.cmu.sphinx.util.Timer;
|
||||
import edu.cmu.sphinx.util.TimerPool;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Provides the breadth first search. To perform recognition an application should call initialize before recognition
|
||||
* begins, and repeatedly call <code> recognize </code> until Result.isFinal() returns true. Once a final result has
|
||||
* been obtained, <code> terminate </code> should be called.
|
||||
* <p>
|
||||
* All scores and probabilities are maintained in the log math log domain.
|
||||
* <p>
|
||||
* For information about breadth first search please refer to "Spoken Language Processing", X. Huang, PTR
|
||||
*/
|
||||
|
||||
// TODO - need to add in timing code.
|
||||
public class SimpleBreadthFirstSearchManager extends TokenSearchManager {
|
||||
|
||||
/** The property that defines the name of the linguist to be used by this search manager. */
|
||||
@S4Component(type = Linguist.class)
|
||||
public final static String PROP_LINGUIST = "linguist";
|
||||
|
||||
/** The property that defines the name of the linguist to be used by this search manager. */
|
||||
@S4Component(type = Pruner.class)
|
||||
public final static String PROP_PRUNER = "pruner";
|
||||
|
||||
/** The property that defines the name of the scorer to be used by this search manager. */
|
||||
@S4Component(type = AcousticScorer.class)
|
||||
public final static String PROP_SCORER = "scorer";
|
||||
|
||||
/** The property that defines the name of the active list factory to be used by this search manager. */
|
||||
@S4Component(type = ActiveListFactory.class)
|
||||
public final static String PROP_ACTIVE_LIST_FACTORY = "activeListFactory";
|
||||
|
||||
/**
|
||||
* The property that when set to <code>true</code> will cause the recognizer to count up all the tokens in the
|
||||
* active list after every frame.
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public final static String PROP_SHOW_TOKEN_COUNT = "showTokenCount";
|
||||
|
||||
/**
|
||||
* The property that sets the minimum score relative to the maximum score in the word list for pruning. Words with a
|
||||
* score less than relativeBeamWidth * maximumScore will be pruned from the list
|
||||
*/
|
||||
@S4Double(defaultValue = 0.0)
|
||||
public final static String PROP_RELATIVE_WORD_BEAM_WIDTH = "relativeWordBeamWidth";
|
||||
|
||||
/**
|
||||
* The property that controls whether or not relative beam pruning will be performed on the entry into a
|
||||
* state.
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public final static String PROP_WANT_ENTRY_PRUNING = "wantEntryPruning";
|
||||
|
||||
/**
|
||||
* The property that controls the number of frames processed for every time the decode growth step is skipped.
|
||||
* Setting this property to zero disables grow skipping. Setting this number to a small integer will increase the
|
||||
* speed of the decoder but will also decrease its accuracy. The higher the number, the less often the grow code is
|
||||
* skipped.
|
||||
*/
|
||||
@S4Integer(defaultValue = 0)
|
||||
public final static String PROP_GROW_SKIP_INTERVAL = "growSkipInterval";
|
||||
|
||||
|
||||
protected Linguist linguist; // Provides grammar/language info
|
||||
private Pruner pruner; // used to prune the active list
|
||||
private AcousticScorer scorer; // used to score the active list
|
||||
protected int currentFrameNumber; // the current frame number
|
||||
protected long currentCollectTime; // the current frame number
|
||||
protected ActiveList activeList; // the list of active tokens
|
||||
protected List<Token> resultList; // the current set of results
|
||||
protected LogMath logMath;
|
||||
|
||||
private Logger logger;
|
||||
private String name;
|
||||
|
||||
// ------------------------------------
|
||||
// monitoring data
|
||||
// ------------------------------------
|
||||
|
||||
private Timer scoreTimer; // TODO move these timers out
|
||||
private Timer pruneTimer;
|
||||
protected Timer growTimer;
|
||||
private StatisticsVariable totalTokensScored;
|
||||
private StatisticsVariable tokensPerSecond;
|
||||
private StatisticsVariable curTokensScored;
|
||||
private StatisticsVariable tokensCreated;
|
||||
private StatisticsVariable viterbiPruned;
|
||||
private StatisticsVariable beamPruned;
|
||||
|
||||
// ------------------------------------
|
||||
// Working data
|
||||
// ------------------------------------
|
||||
|
||||
protected boolean showTokenCount;
|
||||
private boolean wantEntryPruning;
|
||||
protected Map<SearchState, Token> bestTokenMap;
|
||||
private float logRelativeWordBeamWidth;
|
||||
private int totalHmms;
|
||||
private double startTime;
|
||||
private float threshold;
|
||||
private float wordThreshold;
|
||||
private int growSkipInterval;
|
||||
protected ActiveListFactory activeListFactory;
|
||||
protected boolean streamEnd;
|
||||
|
||||
public SimpleBreadthFirstSearchManager() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a manager for simple search
|
||||
*
|
||||
* @param linguist linguist to configure search space
|
||||
* @param pruner pruner to prune extra paths
|
||||
* @param scorer scorer to estimate token probability
|
||||
* @param activeListFactory factory for list of tokens
|
||||
* @param showTokenCount show count of the tokens during decoding
|
||||
* @param relativeWordBeamWidth relative pruning beam for lookahead
|
||||
* @param growSkipInterval interval to skip growth step
|
||||
* @param wantEntryPruning entry pruning
|
||||
*/
|
||||
public SimpleBreadthFirstSearchManager(Linguist linguist, Pruner pruner,
|
||||
AcousticScorer scorer, ActiveListFactory activeListFactory,
|
||||
boolean showTokenCount, double relativeWordBeamWidth,
|
||||
int growSkipInterval, boolean wantEntryPruning) {
|
||||
this.name = getClass().getName();
|
||||
this.logger = Logger.getLogger(name);
|
||||
this.logMath = LogMath.getLogMath();
|
||||
this.linguist = linguist;
|
||||
this.pruner = pruner;
|
||||
this.scorer = scorer;
|
||||
this.activeListFactory = activeListFactory;
|
||||
this.showTokenCount = showTokenCount;
|
||||
this.growSkipInterval = growSkipInterval;
|
||||
this.wantEntryPruning = wantEntryPruning;
|
||||
this.logRelativeWordBeamWidth = logMath.linearToLog(relativeWordBeamWidth);
|
||||
this.keepAllTokens = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
logMath = LogMath.getLogMath();
|
||||
logger = ps.getLogger();
|
||||
name = ps.getInstanceName();
|
||||
|
||||
linguist = (Linguist) ps.getComponent(PROP_LINGUIST);
|
||||
pruner = (Pruner) ps.getComponent(PROP_PRUNER);
|
||||
scorer = (AcousticScorer) ps.getComponent(PROP_SCORER);
|
||||
activeListFactory = (ActiveListFactory) ps.getComponent(PROP_ACTIVE_LIST_FACTORY);
|
||||
showTokenCount = ps.getBoolean(PROP_SHOW_TOKEN_COUNT);
|
||||
|
||||
double relativeWordBeamWidth = ps.getDouble(PROP_RELATIVE_WORD_BEAM_WIDTH);
|
||||
growSkipInterval = ps.getInt(PROP_GROW_SKIP_INTERVAL);
|
||||
wantEntryPruning = ps.getBoolean(PROP_WANT_ENTRY_PRUNING);
|
||||
logRelativeWordBeamWidth = logMath.linearToLog(relativeWordBeamWidth);
|
||||
|
||||
this.keepAllTokens = true;
|
||||
}
|
||||
|
||||
|
||||
/** Called at the start of recognition. Gets the search manager ready to recognize */
|
||||
public void startRecognition() {
|
||||
logger.finer("starting recognition");
|
||||
|
||||
linguist.startRecognition();
|
||||
pruner.startRecognition();
|
||||
scorer.startRecognition();
|
||||
localStart();
|
||||
if (startTime == 0.0) {
|
||||
startTime = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Performs the recognition for the given number of frames.
|
||||
*
|
||||
* @param nFrames the number of frames to recognize
|
||||
* @return the current result or null if there is no Result (due to the lack of frames to recognize)
|
||||
*/
|
||||
public Result recognize(int nFrames) {
|
||||
boolean done = false;
|
||||
Result result = null;
|
||||
streamEnd = false;
|
||||
|
||||
for (int i = 0; i < nFrames && !done; i++) {
|
||||
done = recognize();
|
||||
}
|
||||
|
||||
// generate a new temporary result if the current token is based on a final search state
|
||||
// remark: the first check for not null is necessary in cases that the search space does not contain scoreable tokens.
|
||||
if (activeList.getBestToken() != null) {
|
||||
// to make the current result as correct as possible we undo the last search graph expansion here
|
||||
ActiveList fixedList = undoLastGrowStep();
|
||||
|
||||
// Now create the result using the fixed active-list.
|
||||
if (!streamEnd)
|
||||
result =
|
||||
new Result(fixedList, resultList, currentFrameNumber, done, linguist.getSearchGraph().getWordTokenFirst(), false);
|
||||
}
|
||||
|
||||
if (showTokenCount) {
|
||||
showTokenCount();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Because the growBranches() is called although no data is left after the last speech frame, the ordering of the
|
||||
* active-list might depend on the transition probabilities and (penalty-scores) only. Therefore we need to undo the last
|
||||
* grow-step up to final states or the last emitting state in order to fix the list.
|
||||
* @return newly created list
|
||||
*/
|
||||
protected ActiveList undoLastGrowStep() {
|
||||
ActiveList fixedList = activeList.newInstance();
|
||||
|
||||
for (Token token : activeList) {
|
||||
Token curToken = token.getPredecessor();
|
||||
|
||||
// remove the final states that are not the real final ones because they're just hide prior final tokens:
|
||||
while (curToken.getPredecessor() != null && (
|
||||
(curToken.isFinal() && curToken.getPredecessor() != null && !curToken.getPredecessor().isFinal())
|
||||
|| (curToken.isEmitting() && curToken.getData() == null) // the so long not scored tokens
|
||||
|| (!curToken.isFinal() && !curToken.isEmitting()))) {
|
||||
curToken = curToken.getPredecessor();
|
||||
}
|
||||
|
||||
fixedList.add(curToken);
|
||||
}
|
||||
|
||||
return fixedList;
|
||||
}
|
||||
|
||||
|
||||
/** Terminates a recognition */
|
||||
public void stopRecognition() {
|
||||
localStop();
|
||||
scorer.stopRecognition();
|
||||
pruner.stopRecognition();
|
||||
linguist.stopRecognition();
|
||||
|
||||
logger.finer("recognition stopped");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Performs recognition for one frame. Returns true if recognition has been completed.
|
||||
*
|
||||
* @return <code>true</code> if recognition is completed.
|
||||
*/
|
||||
protected boolean recognize() {
|
||||
boolean more = scoreTokens(); // score emitting tokens
|
||||
if (more) {
|
||||
pruneBranches(); // eliminate poor branches
|
||||
currentFrameNumber++;
|
||||
if (growSkipInterval == 0
|
||||
|| (currentFrameNumber % growSkipInterval) != 0) {
|
||||
growBranches(); // extend remaining branches
|
||||
}
|
||||
}
|
||||
return !more;
|
||||
}
|
||||
|
||||
|
||||
/** Gets the initial grammar node from the linguist and creates a GrammarNodeToken */
|
||||
protected void localStart() {
|
||||
currentFrameNumber = 0;
|
||||
curTokensScored.value = 0;
|
||||
ActiveList newActiveList = activeListFactory.newInstance();
|
||||
SearchState state = linguist.getSearchGraph().getInitialState();
|
||||
newActiveList.add(new Token(state, -1));
|
||||
activeList = newActiveList;
|
||||
|
||||
growBranches();
|
||||
}
|
||||
|
||||
|
||||
/** Local cleanup for this search manager */
|
||||
protected void localStop() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Goes through the active list of tokens and expands each token, finding the set of successor tokens until all the
|
||||
* successor tokens are emitting tokens.
|
||||
*/
|
||||
protected void growBranches() {
|
||||
int mapSize = activeList.size() * 10;
|
||||
if (mapSize == 0) {
|
||||
mapSize = 1;
|
||||
}
|
||||
growTimer.start();
|
||||
bestTokenMap = new HashMap<SearchState, Token>(mapSize);
|
||||
ActiveList oldActiveList = activeList;
|
||||
resultList = new LinkedList<Token>();
|
||||
activeList = activeListFactory.newInstance();
|
||||
threshold = oldActiveList.getBeamThreshold();
|
||||
wordThreshold = oldActiveList.getBestScore() + logRelativeWordBeamWidth;
|
||||
|
||||
for (Token token : oldActiveList) {
|
||||
collectSuccessorTokens(token);
|
||||
}
|
||||
growTimer.stop();
|
||||
if (logger.isLoggable(Level.FINE)) {
|
||||
int hmms = activeList.size();
|
||||
totalHmms += hmms;
|
||||
logger.fine("Frame: " + currentFrameNumber + " Hmms: "
|
||||
+ hmms + " total " + totalHmms);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate the acoustic scores for the active list. The active list should contain only emitting tokens.
|
||||
*
|
||||
* @return <code>true</code> if there are more frames to score, otherwise, false
|
||||
*/
|
||||
protected boolean scoreTokens() {
|
||||
boolean hasMoreFrames = false;
|
||||
|
||||
scoreTimer.start();
|
||||
Data data = scorer.calculateScores(activeList.getTokens());
|
||||
scoreTimer.stop();
|
||||
|
||||
Token bestToken = null;
|
||||
if (data instanceof Token) {
|
||||
bestToken = (Token)data;
|
||||
} else if (data == null) {
|
||||
streamEnd = true;
|
||||
}
|
||||
|
||||
if (bestToken != null) {
|
||||
hasMoreFrames = true;
|
||||
currentCollectTime = bestToken.getCollectTime();
|
||||
activeList.setBestToken(bestToken);
|
||||
}
|
||||
|
||||
// update statistics
|
||||
curTokensScored.value += activeList.size();
|
||||
totalTokensScored.value += activeList.size();
|
||||
tokensPerSecond.value = totalTokensScored.value / getTotalTime();
|
||||
|
||||
// if (logger.isLoggable(Level.FINE)) {
|
||||
// logger.fine(currentFrameNumber + " " + activeList.size()
|
||||
// + " " + curTokensScored.value + " "
|
||||
// + (int) tokensPerSecond.value);
|
||||
// }
|
||||
|
||||
return hasMoreFrames;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the total time since we start4ed
|
||||
*
|
||||
* @return the total time (in seconds)
|
||||
*/
|
||||
private double getTotalTime() {
|
||||
return (System.currentTimeMillis() - startTime) / 1000.0;
|
||||
}
|
||||
|
||||
|
||||
/** Removes unpromising branches from the active list */
|
||||
protected void pruneBranches() {
|
||||
int startSize = activeList.size();
|
||||
pruneTimer.start();
|
||||
activeList = pruner.prune(activeList);
|
||||
beamPruned.value += startSize - activeList.size();
|
||||
pruneTimer.stop();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the best token for this state
|
||||
*
|
||||
* @param state the state of interest
|
||||
* @return the best token
|
||||
*/
|
||||
protected Token getBestToken(SearchState state) {
|
||||
Token best = bestTokenMap.get(state);
|
||||
if (logger.isLoggable(Level.FINER) && best != null) {
|
||||
logger.finer("BT " + best + " for state " + state);
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best token for a given state
|
||||
*
|
||||
* @param token the best token
|
||||
* @param state the state
|
||||
* @return the previous best token for the given state, or null if no previous best token
|
||||
*/
|
||||
protected Token setBestToken(Token token, SearchState state) {
|
||||
return bestTokenMap.put(state, token);
|
||||
}
|
||||
|
||||
|
||||
public ActiveList getActiveList() {
|
||||
return activeList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Collects the next set of emitting tokens from a token and accumulates them in the active or result lists
|
||||
*
|
||||
* @param token the token to collect successors from
|
||||
*/
|
||||
protected void collectSuccessorTokens(Token token) {
|
||||
SearchState state = token.getSearchState();
|
||||
// If this is a final state, add it to the final list
|
||||
if (token.isFinal()) {
|
||||
resultList.add(token);
|
||||
}
|
||||
if (token.getScore() < threshold) {
|
||||
return;
|
||||
}
|
||||
if (state instanceof WordSearchState
|
||||
&& token.getScore() < wordThreshold) {
|
||||
return;
|
||||
}
|
||||
SearchStateArc[] arcs = state.getSuccessors();
|
||||
// For each successor
|
||||
// calculate the entry score for the token based upon the
|
||||
// predecessor token score and the transition probabilities
|
||||
// if the score is better than the best score encountered for
|
||||
// the SearchState and frame then create a new token, add
|
||||
// it to the lattice and the SearchState.
|
||||
// If the token is an emitting token add it to the list,
|
||||
// otherwise recursively collect the new tokens successors.
|
||||
for (SearchStateArc arc : arcs) {
|
||||
SearchState nextState = arc.getState();
|
||||
// We're actually multiplying the variables, but since
|
||||
// these come in log(), multiply gets converted to add
|
||||
float logEntryScore = token.getScore() + arc.getProbability();
|
||||
if (wantEntryPruning) { // false by default
|
||||
if (logEntryScore < threshold) {
|
||||
continue;
|
||||
}
|
||||
if (nextState instanceof WordSearchState
|
||||
&& logEntryScore < wordThreshold) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Token predecessor = getResultListPredecessor(token);
|
||||
|
||||
// if not emitting, check to see if we've already visited
|
||||
// this state during this frame. Expand the token only if we
|
||||
// haven't visited it already. This prevents the search
|
||||
// from getting stuck in a loop of states with no
|
||||
// intervening emitting nodes. This can happen with nasty
|
||||
// jsgf grammars such as ((foo*)*)*
|
||||
if (!nextState.isEmitting()) {
|
||||
Token newToken = new Token(predecessor, nextState, logEntryScore,
|
||||
arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(),
|
||||
currentCollectTime);
|
||||
tokensCreated.value++;
|
||||
if (!isVisited(newToken)) {
|
||||
collectSuccessorTokens(newToken);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
Token bestToken = getBestToken(nextState);
|
||||
if (bestToken == null) {
|
||||
Token newToken = new Token(predecessor, nextState, logEntryScore,
|
||||
arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(),
|
||||
currentFrameNumber);
|
||||
tokensCreated.value++;
|
||||
setBestToken(newToken, nextState);
|
||||
activeList.add(newToken);
|
||||
} else {
|
||||
if (bestToken.getScore() <= logEntryScore) {
|
||||
bestToken.update(predecessor, nextState, logEntryScore,
|
||||
arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(),
|
||||
currentCollectTime);
|
||||
viterbiPruned.value++;
|
||||
} else {
|
||||
viterbiPruned.value++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines whether or not we've visited the state associated with this token since the previous frame.
|
||||
*
|
||||
* @param t the token to check
|
||||
* @return true if we've visited the search state since the last frame
|
||||
*/
|
||||
private boolean isVisited(Token t) {
|
||||
SearchState curState = t.getSearchState();
|
||||
|
||||
t = t.getPredecessor();
|
||||
|
||||
while (t != null && !t.isEmitting()) {
|
||||
if (curState.equals(t.getSearchState())) {
|
||||
return true;
|
||||
}
|
||||
t = t.getPredecessor();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Counts all the tokens in the active list (and displays them). This is an expensive operation. */
|
||||
protected void showTokenCount() {
|
||||
if (logger.isLoggable(Level.INFO)) {
|
||||
Set<Token> tokenSet = new HashSet<Token>();
|
||||
for (Token token : activeList) {
|
||||
while (token != null) {
|
||||
tokenSet.add(token);
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
}
|
||||
logger.info("Token Lattice size: " + tokenSet.size());
|
||||
tokenSet = new HashSet<Token>();
|
||||
for (Token token : resultList) {
|
||||
while (token != null) {
|
||||
tokenSet.add(token);
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
}
|
||||
logger.info("Result Lattice size: " + tokenSet.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the best token map.
|
||||
*
|
||||
* @return the best token map
|
||||
*/
|
||||
protected Map<SearchState, Token> getBestTokenMap() {
|
||||
return bestTokenMap;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best token Map.
|
||||
*
|
||||
* @param bestTokenMap the new best token Map
|
||||
*/
|
||||
protected void setBestTokenMap(Map<SearchState, Token> bestTokenMap) {
|
||||
this.bestTokenMap = bestTokenMap;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the result list.
|
||||
*
|
||||
* @return the result list
|
||||
*/
|
||||
public List<Token> getResultList() {
|
||||
return resultList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the current frame number.
|
||||
*
|
||||
* @return the current frame number
|
||||
*/
|
||||
public int getCurrentFrameNumber() {
|
||||
return currentFrameNumber;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Timer for growing.
|
||||
*
|
||||
* @return the Timer for growing
|
||||
*/
|
||||
public Timer getGrowTimer() {
|
||||
return growTimer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the tokensCreated StatisticsVariable.
|
||||
*
|
||||
* @return the tokensCreated StatisticsVariable.
|
||||
*/
|
||||
public StatisticsVariable getTokensCreated() {
|
||||
return tokensCreated;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.SearchManager#allocate()
|
||||
*/
|
||||
public void allocate() {
|
||||
totalTokensScored = StatisticsVariable
|
||||
.getStatisticsVariable("totalTokensScored");
|
||||
tokensPerSecond = StatisticsVariable
|
||||
.getStatisticsVariable("tokensScoredPerSecond");
|
||||
curTokensScored = StatisticsVariable
|
||||
.getStatisticsVariable("curTokensScored");
|
||||
tokensCreated = StatisticsVariable
|
||||
.getStatisticsVariable("tokensCreated");
|
||||
viterbiPruned = StatisticsVariable
|
||||
.getStatisticsVariable("viterbiPruned");
|
||||
beamPruned = StatisticsVariable.getStatisticsVariable("beamPruned");
|
||||
|
||||
|
||||
try {
|
||||
linguist.allocate();
|
||||
pruner.allocate();
|
||||
scorer.allocate();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Allocation of search manager resources failed", e);
|
||||
}
|
||||
|
||||
scoreTimer = TimerPool.getTimer(this, "Score");
|
||||
pruneTimer = TimerPool.getTimer(this, "Prune");
|
||||
growTimer = TimerPool.getTimer(this, "Grow");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.SearchManager#deallocate()
|
||||
*/
|
||||
public void deallocate() {
|
||||
try {
|
||||
scorer.deallocate();
|
||||
pruner.deallocate();
|
||||
linguist.deallocate();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Deallocation of search manager resources failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author plamere
|
||||
*/
|
||||
public class SortingActiveListFactory extends ActiveListFactory {
|
||||
/**
|
||||
* @param absoluteBeamWidth absolute pruning beam
|
||||
* @param relativeBeamWidth relative pruning beam
|
||||
*/
|
||||
public SortingActiveListFactory(int absoluteBeamWidth,
|
||||
double relativeBeamWidth)
|
||||
{
|
||||
super(absoluteBeamWidth, relativeBeamWidth);
|
||||
}
|
||||
|
||||
public SortingActiveListFactory() {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
|
||||
*/
|
||||
@Override
|
||||
public ActiveList newInstance() {
|
||||
return new SortingActiveList(absoluteBeamWidth, logRelativeBeamWidth);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An active list that tries to be simple and correct. This type of active list will be slow, but should exhibit
|
||||
* correct behavior. Faster versions of the ActiveList exist (HeapActiveList, TreeActiveList).
|
||||
* <p>
|
||||
* This class is not thread safe and should only be used by a single thread.
|
||||
* <p>
|
||||
* Note that all scores are maintained in the LogMath log base.
|
||||
*/
|
||||
|
||||
class SortingActiveList implements ActiveList {
|
||||
|
||||
private final static int DEFAULT_SIZE = 1000;
|
||||
private final int absoluteBeamWidth;
|
||||
private final float logRelativeBeamWidth;
|
||||
private Token bestToken;
|
||||
// when the list is changed these things should be
|
||||
// changed/updated as well
|
||||
private List<Token> tokenList;
|
||||
|
||||
|
||||
/**
|
||||
* Creates an empty active list
|
||||
*
|
||||
* @param absoluteBeamWidth beam for absolute pruning
|
||||
* @param logRelativeBeamWidth beam for relative pruning
|
||||
*/
|
||||
public SortingActiveList(int absoluteBeamWidth, float logRelativeBeamWidth) {
|
||||
this.absoluteBeamWidth = absoluteBeamWidth;
|
||||
this.logRelativeBeamWidth = logRelativeBeamWidth;
|
||||
|
||||
int initListSize = absoluteBeamWidth > 0 ? absoluteBeamWidth : DEFAULT_SIZE;
|
||||
this.tokenList = new ArrayList<Token>(initListSize);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the given token to the list
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token) {
|
||||
tokenList.add(token);
|
||||
if (bestToken == null || token.getScore() > bestToken.getScore()) {
|
||||
bestToken = token;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Purges excess members. Reduce the size of the token list to the absoluteBeamWidth
|
||||
*
|
||||
* @return a (possible new) active list
|
||||
*/
|
||||
public ActiveList purge() {
|
||||
// if the absolute beam is zero, this means there
|
||||
// should be no constraint on the abs beam size at all
|
||||
// so we will only be relative beam pruning, which means
|
||||
// that we don't have to sort the list
|
||||
if (absoluteBeamWidth > 0 && tokenList.size() > absoluteBeamWidth) {
|
||||
Collections.sort(tokenList, Scoreable.COMPARATOR);
|
||||
tokenList = tokenList.subList(0, absoluteBeamWidth);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the beam threshold best upon the best scoring token
|
||||
*
|
||||
* @return the beam threshold
|
||||
*/
|
||||
public float getBeamThreshold() {
|
||||
return getBestScore() + logRelativeBeamWidth;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the best score in the list
|
||||
*
|
||||
* @return the best score
|
||||
*/
|
||||
public float getBestScore() {
|
||||
float bestScore = -Float.MAX_VALUE;
|
||||
if (bestToken != null) {
|
||||
bestScore = bestToken.getScore();
|
||||
}
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best scoring token for this active list
|
||||
*
|
||||
* @param token the best scoring token
|
||||
*/
|
||||
public void setBestToken(Token token) {
|
||||
bestToken = token;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the best scoring token for this active list
|
||||
*
|
||||
* @return the best scoring token
|
||||
*/
|
||||
public Token getBestToken() {
|
||||
return bestToken;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the iterator for this tree.
|
||||
*
|
||||
* @return the iterator for this token list
|
||||
*/
|
||||
public Iterator<Token> iterator() {
|
||||
return tokenList.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the list of all tokens
|
||||
*
|
||||
* @return the list of tokens
|
||||
*/
|
||||
public List<Token> getTokens() {
|
||||
return tokenList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of tokens on this active list
|
||||
*
|
||||
* @return the size of the active list
|
||||
*/
|
||||
public final int size() {
|
||||
return tokenList.size();
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveList#newInstance()
|
||||
*/
|
||||
public ActiveList newInstance() {
|
||||
return SortingActiveListFactory.this.newInstance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,477 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
import edu.cmu.sphinx.decoder.scorer.ScoreProvider;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.frontend.FloatData;
|
||||
import edu.cmu.sphinx.linguist.HMMSearchState;
|
||||
import edu.cmu.sphinx.linguist.SearchState;
|
||||
import edu.cmu.sphinx.linguist.UnitSearchState;
|
||||
import edu.cmu.sphinx.linguist.WordSearchState;
|
||||
import edu.cmu.sphinx.linguist.acoustic.Unit;
|
||||
import edu.cmu.sphinx.linguist.dictionary.Pronunciation;
|
||||
import edu.cmu.sphinx.linguist.dictionary.Word;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Represents a single state in the recognition trellis. Subclasses of a token are used to represent the various
|
||||
* emitting state.
|
||||
* <p>
|
||||
* All scores are maintained in LogMath log base
|
||||
*/
|
||||
public class Token implements Scoreable {
|
||||
|
||||
private static int curCount;
|
||||
private static int lastCount;
|
||||
private static final DecimalFormat scoreFmt = new DecimalFormat("0.0000000E00");
|
||||
private static final DecimalFormat numFmt = new DecimalFormat("0000");
|
||||
|
||||
private Token predecessor;
|
||||
|
||||
private float logLanguageScore;
|
||||
private float logTotalScore;
|
||||
private float logInsertionScore;
|
||||
private float logAcousticScore;
|
||||
|
||||
private SearchState searchState;
|
||||
|
||||
private long collectTime;
|
||||
private Data data;
|
||||
|
||||
/**
|
||||
* Internal constructor for a token. Used by classes Token, CombineToken, ParallelToken
|
||||
*
|
||||
* @param predecessor the predecessor for this token
|
||||
* @param state the SentenceHMMState associated with this token
|
||||
* @param logTotalScore the total entry score for this token (in LogMath log base)
|
||||
* @param logInsertionScore the insertion score associated with this token (in LogMath log base)
|
||||
* @param logLanguageScore the language score associated with this token (in LogMath log base)
|
||||
* @param collectTime the frame collection time
|
||||
*/
|
||||
public Token(Token predecessor,
|
||||
SearchState state,
|
||||
float logTotalScore,
|
||||
float logInsertionScore,
|
||||
float logLanguageScore,
|
||||
long collectTime) {
|
||||
this.predecessor = predecessor;
|
||||
this.searchState = state;
|
||||
this.logTotalScore = logTotalScore;
|
||||
this.logInsertionScore = logInsertionScore;
|
||||
this.logLanguageScore = logLanguageScore;
|
||||
this.collectTime = collectTime;
|
||||
curCount++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates the initial token with the given word history depth
|
||||
*
|
||||
* @param state the SearchState associated with this token
|
||||
* @param collectTime collection time of this token
|
||||
*/
|
||||
public Token(SearchState state, long collectTime) {
|
||||
this(null, state, 0.0f, 0.0f, 0.0f, collectTime);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a Token with the given acoustic and language scores and predecessor.
|
||||
*
|
||||
* @param predecessor previous token
|
||||
* @param logTotalScore total score
|
||||
* @param logAcousticScore the log acoustic score
|
||||
* @param logInsertionScore the log insertion score
|
||||
* @param logLanguageScore the log language score
|
||||
*/
|
||||
public Token(Token predecessor,
|
||||
float logTotalScore,
|
||||
float logAcousticScore,
|
||||
float logInsertionScore,
|
||||
float logLanguageScore) {
|
||||
this(predecessor, null, logTotalScore, logInsertionScore, logLanguageScore, 0);
|
||||
this.logAcousticScore = logAcousticScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the predecessor for this token, or null if this token has no predecessors
|
||||
*
|
||||
* @return the predecessor
|
||||
*/
|
||||
public Token getPredecessor() {
|
||||
return predecessor;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Collect time is different from frame number because some frames might be skipped in silence detector
|
||||
*
|
||||
* @return collection time in milliseconds
|
||||
*/
|
||||
public long getCollectTime() {
|
||||
return collectTime;
|
||||
}
|
||||
|
||||
|
||||
/** Sets the feature for this Token.
|
||||
* @param data features
|
||||
*/
|
||||
public void setData(Data data) {
|
||||
this.data = data;
|
||||
if (data instanceof FloatData) {
|
||||
collectTime = ((FloatData)data).getCollectTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the feature for this Token.
|
||||
*
|
||||
* @return the feature for this Token
|
||||
*/
|
||||
public Data getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the score for the token. The score is a combination of language and acoustic scores
|
||||
*
|
||||
* @return the score of this frame (in logMath log base)
|
||||
*/
|
||||
public float getScore() {
|
||||
return logTotalScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates a score against the given feature. The score can be retrieved
|
||||
* with get score. The token will keep a reference to the scored feature-vector.
|
||||
*
|
||||
* @param feature the feature to be scored
|
||||
* @return the score for the feature
|
||||
*/
|
||||
public float calculateScore(Data feature) {
|
||||
|
||||
logAcousticScore = ((ScoreProvider) searchState).getScore(feature);
|
||||
|
||||
logTotalScore += logAcousticScore;
|
||||
|
||||
setData(feature);
|
||||
|
||||
return logTotalScore;
|
||||
}
|
||||
|
||||
public float[] calculateComponentScore(Data feature){
|
||||
return ((ScoreProvider) searchState).getComponentScore(feature);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Normalizes a previously calculated score
|
||||
*
|
||||
* @param maxLogScore the score to normalize this score with
|
||||
* @return the normalized score
|
||||
*/
|
||||
public float normalizeScore(float maxLogScore) {
|
||||
logTotalScore -= maxLogScore;
|
||||
logAcousticScore -= maxLogScore;
|
||||
return logTotalScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the score for this token
|
||||
*
|
||||
* @param logScore the new score for the token (in logMath log base)
|
||||
*/
|
||||
public void setScore(float logScore) {
|
||||
this.logTotalScore = logScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the language score associated with this token
|
||||
*
|
||||
* @return the language score (in logMath log base)
|
||||
*/
|
||||
public float getLanguageScore() {
|
||||
return logLanguageScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the insertion score associated with this token.
|
||||
* Insertion score is the score of the transition between
|
||||
* states. It might be transition score from the acoustic model,
|
||||
* phone insertion score or word insertion probability from
|
||||
* the linguist.
|
||||
*
|
||||
* @return the language score (in logMath log base)
|
||||
*/
|
||||
public float getInsertionScore() {
|
||||
return logInsertionScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the acoustic score for this token (in logMath log base).
|
||||
* Acoustic score is a sum of frame GMM.
|
||||
*
|
||||
* @return score
|
||||
*/
|
||||
public float getAcousticScore() {
|
||||
return logAcousticScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the SearchState associated with this token
|
||||
*
|
||||
* @return the searchState
|
||||
*/
|
||||
public SearchState getSearchState() {
|
||||
return searchState;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if this token is associated with an emitting state. An emitting state is a state that can be scored
|
||||
* acoustically.
|
||||
*
|
||||
* @return <code>true</code> if this token is associated with an emitting state
|
||||
*/
|
||||
public boolean isEmitting() {
|
||||
return searchState.isEmitting();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if this token is associated with a final SentenceHMM state.
|
||||
*
|
||||
* @return <code>true</code> if this token is associated with a final state
|
||||
*/
|
||||
public boolean isFinal() {
|
||||
return searchState.isFinal();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if this token marks the end of a word
|
||||
*
|
||||
* @return <code>true</code> if this token marks the end of a word
|
||||
*/
|
||||
public boolean isWord() {
|
||||
return searchState instanceof WordSearchState;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the string representation of this object
|
||||
*
|
||||
* @return the string representation of this object
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return
|
||||
numFmt.format(getCollectTime()) + ' ' +
|
||||
scoreFmt.format(getScore()) + ' ' +
|
||||
scoreFmt.format(getAcousticScore()) + ' ' +
|
||||
scoreFmt.format(getLanguageScore()) + ' ' +
|
||||
getSearchState();
|
||||
}
|
||||
|
||||
|
||||
/** dumps a branch of tokens */
|
||||
public void dumpTokenPath() {
|
||||
dumpTokenPath(true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* dumps a branch of tokens
|
||||
*
|
||||
* @param includeHMMStates if true include all sentence hmm states
|
||||
*/
|
||||
public void dumpTokenPath(boolean includeHMMStates) {
|
||||
Token token = this;
|
||||
List<Token> list = new ArrayList<Token>();
|
||||
|
||||
while (token != null) {
|
||||
list.add(token);
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
for (int i = list.size() - 1; i >= 0; i--) {
|
||||
token = list.get(i);
|
||||
if (includeHMMStates ||
|
||||
(!(token.getSearchState() instanceof HMMSearchState))) {
|
||||
System.out.println(" " + token);
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string of words leading up to this token.
|
||||
*
|
||||
* @param wantFiller if true, filler words are added
|
||||
* @param wantPronunciations if true append [ phoneme phoneme ... ] after each word
|
||||
* @return the word path
|
||||
*/
|
||||
public String getWordPath(boolean wantFiller, boolean wantPronunciations) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Token token = this;
|
||||
|
||||
while (token != null) {
|
||||
if (token.isWord()) {
|
||||
WordSearchState wordState =
|
||||
(WordSearchState) token.getSearchState();
|
||||
Pronunciation pron = wordState.getPronunciation();
|
||||
Word word = wordState.getPronunciation().getWord();
|
||||
|
||||
// System.out.println(token.getFrameNumber() + " " + word + " " + token.logLanguageScore + " " + token.logAcousticScore);
|
||||
|
||||
if (wantFiller || !word.isFiller()) {
|
||||
if (wantPronunciations) {
|
||||
sb.insert(0, ']');
|
||||
Unit[] u = pron.getUnits();
|
||||
for (int i = u.length - 1; i >= 0; i--) {
|
||||
if (i < u.length - 1) sb.insert(0, ',');
|
||||
sb.insert(0, u[i].getName());
|
||||
}
|
||||
sb.insert(0, '[');
|
||||
}
|
||||
sb.insert(0, word.getSpelling());
|
||||
sb.insert(0, ' ');
|
||||
}
|
||||
}
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string of words for this token, with no embedded filler words
|
||||
*
|
||||
* @return the string of words
|
||||
*/
|
||||
public String getWordPathNoFiller() {
|
||||
return getWordPath(false, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string of words for this token, with embedded silences
|
||||
*
|
||||
* @return the string of words
|
||||
*/
|
||||
public String getWordPath() {
|
||||
return getWordPath(true, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string of words and units for this token, with embedded silences.
|
||||
*
|
||||
* @return the string of words and units
|
||||
*/
|
||||
public String getWordUnitPath() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Token token = this;
|
||||
|
||||
while (token != null) {
|
||||
SearchState searchState = token.getSearchState();
|
||||
if (searchState instanceof WordSearchState) {
|
||||
WordSearchState wordState = (WordSearchState) searchState;
|
||||
Word word = wordState.getPronunciation().getWord();
|
||||
sb.insert(0, ' ' + word.getSpelling());
|
||||
} else if (searchState instanceof UnitSearchState) {
|
||||
UnitSearchState unitState = (UnitSearchState) searchState;
|
||||
Unit unit = unitState.getUnit();
|
||||
sb.insert(0, ' ' + unit.getName());
|
||||
}
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the word of this Token, the search state is a WordSearchState. If the search state is not a
|
||||
* WordSearchState, return null.
|
||||
*
|
||||
* @return the word of this Token, or null if this is not a word token
|
||||
*/
|
||||
public Word getWord() {
|
||||
if (isWord()) {
|
||||
WordSearchState wordState = (WordSearchState) searchState;
|
||||
return wordState.getPronunciation().getWord();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Shows the token count */
|
||||
public static void showCount() {
|
||||
System.out.println("Cur count: " + curCount + " new " +
|
||||
(curCount - lastCount));
|
||||
lastCount = curCount;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if this branch is valid
|
||||
*
|
||||
* @return true if the token and its predecessors are valid
|
||||
*/
|
||||
public boolean validate() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the DecimalFormat object for formatting the print out of scores.
|
||||
*
|
||||
* @return the DecimalFormat object for formatting score print outs
|
||||
*/
|
||||
protected static DecimalFormat getScoreFormat() {
|
||||
return scoreFmt;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the DecimalFormat object for formatting the print out of numbers
|
||||
*
|
||||
* @return the DecimalFormat object for formatting number print outs
|
||||
*/
|
||||
protected static DecimalFormat getNumberFormat() {
|
||||
return numFmt;
|
||||
}
|
||||
|
||||
public void update(Token predecessor, SearchState nextState,
|
||||
float logEntryScore, float insertionProbability,
|
||||
float languageProbability, long collectTime) {
|
||||
this.predecessor = predecessor;
|
||||
this.searchState = nextState;
|
||||
this.logTotalScore = logEntryScore;
|
||||
this.logInsertionScore = insertionProbability;
|
||||
this.logLanguageScore = languageProbability;
|
||||
this.collectTime = collectTime;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
import edu.cmu.sphinx.linguist.SearchState;
|
||||
/**
|
||||
* The token heap search manager that maintains the heap of best tokens for each
|
||||
* search state instead of single one best token
|
||||
*
|
||||
*/
|
||||
public class TokenHeapSearchManager extends WordPruningBreadthFirstSearchManager {
|
||||
|
||||
protected final int maxTokenHeapSize = 3;
|
||||
|
||||
Map<Object, TokenHeap> bestTokenMap;
|
||||
|
||||
@Override
|
||||
protected void createBestTokenMap() {
|
||||
int mapSize = activeList.size() << 2;
|
||||
if (mapSize == 0) {
|
||||
mapSize = 1;
|
||||
}
|
||||
bestTokenMap = new HashMap<Object, TokenHeap>(mapSize, 0.3F);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setBestToken(Token token, SearchState state) {
|
||||
TokenHeap th = bestTokenMap.get(state);
|
||||
if (th == null) {
|
||||
th = new TokenHeap(maxTokenHeapSize);
|
||||
bestTokenMap.put(state, th);
|
||||
}
|
||||
th.add(token);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Token getBestToken(SearchState state) {
|
||||
// new way... if the heap for this state isn't full return
|
||||
// null, otherwise return the worst scoring token
|
||||
TokenHeap th = bestTokenMap.get(state);
|
||||
Token t;
|
||||
|
||||
if (th == null) {
|
||||
return null;
|
||||
} else if ((t = th.get(state)) != null) {
|
||||
return t;
|
||||
} else if (!th.isFull()) {
|
||||
return null;
|
||||
} else {
|
||||
return th.getSmallest();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A quick and dirty token heap that allows us to perform token stack
|
||||
* experiments. It is not very efficient. We will likely replace this with
|
||||
* something better once we figure out how we want to prune things.
|
||||
*/
|
||||
|
||||
class TokenHeap {
|
||||
|
||||
final Token[] tokens;
|
||||
int curSize;
|
||||
|
||||
/**
|
||||
* Creates a token heap with the maximum size
|
||||
*
|
||||
* @param maxSize
|
||||
* the maximum size of the heap
|
||||
*/
|
||||
TokenHeap(int maxSize) {
|
||||
tokens = new Token[maxSize];
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a token to the heap
|
||||
*
|
||||
* @param token
|
||||
* the token to add
|
||||
*/
|
||||
void add(Token token) {
|
||||
// first, if an identical state exists, replace
|
||||
// it.
|
||||
|
||||
if (!tryReplace(token)) {
|
||||
if (curSize < tokens.length) {
|
||||
tokens[curSize++] = token;
|
||||
} else if (token.getScore() > tokens[curSize - 1].getScore()) {
|
||||
tokens[curSize - 1] = token;
|
||||
}
|
||||
}
|
||||
fixupInsert();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the smallest scoring token on the heap
|
||||
*
|
||||
* @return the smallest scoring token
|
||||
*/
|
||||
Token getSmallest() {
|
||||
if (curSize == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return tokens[curSize - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the heap is full
|
||||
*
|
||||
* @return <code>true</code> if the heap is full
|
||||
*/
|
||||
boolean isFull() {
|
||||
return curSize == tokens.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if there is already a token t on the heap that has the
|
||||
* same search state. If so, this token replaces that one
|
||||
*
|
||||
* @param t
|
||||
* the token to try to add to the heap
|
||||
* @return <code>true</code> if the token was added
|
||||
*/
|
||||
private boolean tryReplace(Token t) {
|
||||
for (int i = 0; i < curSize; i++) {
|
||||
if (t.getSearchState().equals(tokens[i].getSearchState())) {
|
||||
assert t.getScore() > tokens[i].getScore();
|
||||
tokens[i] = t;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Orders the heap after an insert */
|
||||
private void fixupInsert() {
|
||||
Arrays.sort(tokens, 0, curSize - 1, Scoreable.COMPARATOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* returns a token on the heap that matches the given search state
|
||||
*
|
||||
* @param s
|
||||
* the search state
|
||||
* @return the token that matches, or null
|
||||
*/
|
||||
Token get(SearchState s) {
|
||||
for (int i = 0; i < curSize; i++) {
|
||||
if (tokens[i].getSearchState().equals(s)) {
|
||||
return tokens[i];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Boolean;
|
||||
|
||||
abstract public class TokenSearchManager implements SearchManager {
|
||||
|
||||
/** The property that specifies whether to build a word lattice. */
|
||||
@S4Boolean(defaultValue = true)
|
||||
public final static String PROP_BUILD_WORD_LATTICE = "buildWordLattice";
|
||||
|
||||
/**
|
||||
* The property that controls whether or not we keep all tokens. If this is
|
||||
* set to false, only word tokens are retained, otherwise all tokens are
|
||||
* retained.
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public final static String PROP_KEEP_ALL_TOKENS = "keepAllTokens";
|
||||
|
||||
protected boolean buildWordLattice;
|
||||
protected boolean keepAllTokens;
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
|
||||
* .props.PropertySheet)
|
||||
*/
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
buildWordLattice = ps.getBoolean(PROP_BUILD_WORD_LATTICE);
|
||||
keepAllTokens = ps.getBoolean(PROP_KEEP_ALL_TOKENS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the token to use as a predecessor in resultList given a candidate
|
||||
* predecessor. There are three cases here:
|
||||
*
|
||||
* <ul>
|
||||
* <li>We want to store everything in resultList. In that case
|
||||
* {@link #keepAllTokens} is set to true and we just store everything that
|
||||
* was built before.
|
||||
* <li>We are only interested in sequence of words. In this case we just
|
||||
* keep word tokens and ignore everything else. In this case timing and
|
||||
* scoring information is lost since we keep scores in emitting tokens.
|
||||
* <li>We want to keep words but we want to keep scores to build a lattice
|
||||
* from the result list later and {@link #buildWordLattice} is set to true.
|
||||
* In this case we want to insert intermediate token to store the score and
|
||||
* this token will be used during lattice path collapse to get score on
|
||||
* edge. See {@link edu.cmu.sphinx.result.Lattice} for details of resultList
|
||||
* compression.
|
||||
* </ul>
|
||||
*
|
||||
* @param token
|
||||
* the token of interest
|
||||
* @return the immediate successor word token
|
||||
*/
|
||||
protected Token getResultListPredecessor(Token token) {
|
||||
|
||||
if (keepAllTokens) {
|
||||
return token;
|
||||
}
|
||||
|
||||
if(!buildWordLattice) {
|
||||
if (token.isWord())
|
||||
return token;
|
||||
else
|
||||
return token.getPredecessor();
|
||||
}
|
||||
|
||||
float logAcousticScore = 0.0f;
|
||||
float logLanguageScore = 0.0f;
|
||||
float logInsertionScore = 0.0f;
|
||||
|
||||
while (token != null && !token.isWord()) {
|
||||
logAcousticScore += token.getAcousticScore();
|
||||
logLanguageScore += token.getLanguageScore();
|
||||
logInsertionScore += token.getInsertionScore();
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
|
||||
return new Token(token, token.getScore(), logInsertionScore, logAcousticScore, logLanguageScore);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,259 @@
|
|||
/*
|
||||
*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electronic Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
import edu.cmu.sphinx.decoder.scorer.Scoreable;
|
||||
import edu.cmu.sphinx.linguist.WordSearchState;
|
||||
import edu.cmu.sphinx.linguist.dictionary.Word;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A factory for WordActiveList. The word active list is active list designed to hold word tokens only. In addition to
|
||||
* the usual active list properties such as absolute and relative beams, the word active list allows restricting the
|
||||
* number of copies of any particular word in the word beam. Also the word active list can restrict the number of
|
||||
* fillers in the beam.
|
||||
*/
|
||||
public class WordActiveListFactory extends ActiveListFactory {
|
||||
|
||||
/** property that sets the max paths for a single word. (zero disables this feature) */
|
||||
@S4Integer(defaultValue = 0)
|
||||
public final static String PROP_MAX_PATHS_PER_WORD = "maxPathsPerWord";
|
||||
|
||||
/** property that sets the max filler words allowed in the beam. (zero disables this feature) */
|
||||
@S4Integer(defaultValue = 1)
|
||||
public final static String PROP_MAX_FILLER_WORDS = "maxFillerWords";
|
||||
|
||||
private int maxPathsPerWord;
|
||||
private int maxFiller;
|
||||
|
||||
/**
|
||||
* Create factory for word active list
|
||||
* @param absoluteBeamWidth beam for absolute pruning
|
||||
* @param relativeBeamWidth beam for relative pruning
|
||||
* @param maxPathsPerWord maximum number of path to keep per word
|
||||
* @param maxFiller maximum number of fillers
|
||||
*/
|
||||
public WordActiveListFactory(int absoluteBeamWidth,
|
||||
double relativeBeamWidth, int maxPathsPerWord, int maxFiller )
|
||||
{
|
||||
super(absoluteBeamWidth, relativeBeamWidth);
|
||||
this.maxPathsPerWord = maxPathsPerWord;
|
||||
this.maxFiller = maxFiller;
|
||||
}
|
||||
|
||||
public WordActiveListFactory() {
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
maxPathsPerWord = ps.getInt(PROP_MAX_PATHS_PER_WORD);
|
||||
maxFiller = ps.getInt(PROP_MAX_FILLER_WORDS);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
|
||||
*/
|
||||
@Override
|
||||
public ActiveList newInstance() {
|
||||
return new WordActiveList();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An active list that manages words. Guarantees only one version of a word.
|
||||
* <p>
|
||||
* <p>
|
||||
* Note that all scores are maintained in the LogMath log domain
|
||||
*/
|
||||
class WordActiveList implements ActiveList {
|
||||
|
||||
private Token bestToken;
|
||||
private List<Token> tokenList = new LinkedList<Token>();
|
||||
|
||||
|
||||
/**
|
||||
* Adds the given token to the list
|
||||
*
|
||||
* @param token the token to add
|
||||
*/
|
||||
public void add(Token token) {
|
||||
tokenList.add(token);
|
||||
if (bestToken == null || token.getScore() > bestToken.getScore()) {
|
||||
bestToken = token;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Replaces an old token with a new token
|
||||
*
|
||||
* @param oldToken the token to replace (or null in which case, replace works like add).
|
||||
* @param newToken the new token to be placed in the list.
|
||||
*/
|
||||
public void replace(Token oldToken, Token newToken) {
|
||||
add(newToken);
|
||||
if (oldToken != null) {
|
||||
tokenList.remove(oldToken);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Purges excess members. Remove all nodes that fall below the relativeBeamWidth
|
||||
*
|
||||
* @return a (possible new) active list
|
||||
*/
|
||||
|
||||
public ActiveList purge() {
|
||||
int fillerCount = 0;
|
||||
Map<Word, Integer> countMap = new HashMap<Word, Integer>();
|
||||
Collections.sort(tokenList, Scoreable.COMPARATOR);
|
||||
// remove word duplicates
|
||||
for (Iterator<Token> i = tokenList.iterator(); i.hasNext();) {
|
||||
Token token = i.next();
|
||||
WordSearchState wordState = (WordSearchState)token.getSearchState();
|
||||
|
||||
Word word = wordState.getPronunciation().getWord();
|
||||
|
||||
// only allow maxFiller words
|
||||
if (maxFiller > 0) {
|
||||
if (word.isFiller()) {
|
||||
if (fillerCount < maxFiller) {
|
||||
fillerCount++;
|
||||
} else {
|
||||
i.remove();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (maxPathsPerWord > 0) {
|
||||
Integer count = countMap.get(word);
|
||||
int c = count == null ? 0 : count;
|
||||
|
||||
// Since the tokens are sorted by score we only
|
||||
// keep the n tokens for a particular word
|
||||
|
||||
if (c < maxPathsPerWord - 1) {
|
||||
countMap.put(word, c + 1);
|
||||
} else {
|
||||
i.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (tokenList.size() > absoluteBeamWidth) {
|
||||
tokenList = tokenList.subList(0, absoluteBeamWidth);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the iterator for this tree.
|
||||
*
|
||||
* @return the iterator for this token list
|
||||
*/
|
||||
public Iterator<Token> iterator() {
|
||||
return tokenList.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the set of all tokens
|
||||
*
|
||||
* @return the set of tokens
|
||||
*/
|
||||
public List<Token> getTokens() {
|
||||
return tokenList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of tokens on this active list
|
||||
*
|
||||
* @return the size of the active list
|
||||
*/
|
||||
public final int size() {
|
||||
return tokenList.size();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the beam threshold best upon the best scoring token
|
||||
*
|
||||
* @return the beam threshold
|
||||
*/
|
||||
public float getBeamThreshold() {
|
||||
return getBestScore() + logRelativeBeamWidth;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gets the best score in the list
|
||||
*
|
||||
* @return the best score
|
||||
*/
|
||||
public float getBestScore() {
|
||||
float bestScore = -Float.MAX_VALUE;
|
||||
if (bestToken != null) {
|
||||
bestScore = bestToken.getScore();
|
||||
}
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the best scoring token for this active list
|
||||
*
|
||||
* @param token the best scoring token
|
||||
*/
|
||||
public void setBestToken(Token token) {
|
||||
bestToken = token;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the best scoring token for this active list
|
||||
*
|
||||
* @return the best scoring token
|
||||
*/
|
||||
public Token getBestToken() {
|
||||
return bestToken;
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
|
||||
*/
|
||||
public ActiveList newInstance() {
|
||||
return WordActiveListFactory.this.newInstance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,497 @@
|
|||
/*
|
||||
* Copyright 2014 Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
// a test search manager.
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.cmu.sphinx.decoder.pruner.Pruner;
|
||||
import edu.cmu.sphinx.decoder.scorer.AcousticScorer;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.linguist.Linguist;
|
||||
import edu.cmu.sphinx.linguist.SearchState;
|
||||
import edu.cmu.sphinx.linguist.SearchStateArc;
|
||||
import edu.cmu.sphinx.linguist.WordSearchState;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
|
||||
import edu.cmu.sphinx.linguist.allphone.PhoneHmmSearchState;
|
||||
import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeHMMState;
|
||||
import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeNonEmittingHMMState;
|
||||
import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeWordState;
|
||||
import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeEndUnitState;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Component;
|
||||
import edu.cmu.sphinx.util.props.S4Double;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
/**
|
||||
* Provides the breadth first search with fast match heuristic included to
|
||||
* reduce amount of tokens created.
|
||||
* <p>
|
||||
* All scores and probabilities are maintained in the log math log domain.
|
||||
*/
|
||||
|
||||
public class WordPruningBreadthFirstLookaheadSearchManager extends WordPruningBreadthFirstSearchManager {
|
||||
|
||||
/** The property that to get direct access to gau for score caching control. */
|
||||
@S4Component(type = Loader.class)
|
||||
public final static String PROP_LOADER = "loader";
|
||||
|
||||
/**
|
||||
* The property that defines the name of the linguist to be used for fast
|
||||
* match.
|
||||
*/
|
||||
@S4Component(type = Linguist.class)
|
||||
public final static String PROP_FASTMATCH_LINGUIST = "fastmatchLinguist";
|
||||
|
||||
@S4Component(type = ActiveListFactory.class)
|
||||
/** The property that defines the type active list factory for fast match */
|
||||
public final static String PROP_FM_ACTIVE_LIST_FACTORY = "fastmatchActiveListFactory";
|
||||
|
||||
@S4Double(defaultValue = 1.0)
|
||||
public final static String PROP_LOOKAHEAD_PENALTY_WEIGHT = "lookaheadPenaltyWeight";
|
||||
|
||||
/**
|
||||
* The property that controls size of lookahead window. Acceptable values
|
||||
* are in range [1..10].
|
||||
*/
|
||||
@S4Integer(defaultValue = 5)
|
||||
public final static String PROP_LOOKAHEAD_WINDOW = "lookaheadWindow";
|
||||
|
||||
// -----------------------------------
|
||||
// Configured Subcomponents
|
||||
// -----------------------------------
|
||||
private Linguist fastmatchLinguist; // Provides phones info for fastmatch
|
||||
private Loader loader;
|
||||
private ActiveListFactory fastmatchActiveListFactory;
|
||||
|
||||
// -----------------------------------
|
||||
// Lookahead data
|
||||
// -----------------------------------
|
||||
private int lookaheadWindow;
|
||||
private float lookaheadWeight;
|
||||
private HashMap<Integer, Float> penalties;
|
||||
private LinkedList<FrameCiScores> ciScores;
|
||||
|
||||
// -----------------------------------
|
||||
// Working data
|
||||
// -----------------------------------
|
||||
private int currentFastMatchFrameNumber; // the current frame number for
|
||||
// lookahead matching
|
||||
protected ActiveList fastmatchActiveList; // the list of active tokens for
|
||||
// fast match
|
||||
protected Map<SearchState, Token> fastMatchBestTokenMap;
|
||||
private boolean fastmatchStreamEnd;
|
||||
|
||||
/**
|
||||
* Creates a pruning manager with lookahead
|
||||
* @param linguist a linguist for search space
|
||||
* @param fastmatchLinguist a linguist for fast search space
|
||||
* @param pruner pruner to drop tokens
|
||||
* @param loader model loader
|
||||
* @param scorer scorer to estimate token probability
|
||||
* @param activeListManager active list manager to store tokens
|
||||
* @param fastmatchActiveListFactory fast match active list factor to store phoneloop tokens
|
||||
* @param showTokenCount show count during decoding
|
||||
* @param relativeWordBeamWidth relative beam for lookahead pruning
|
||||
* @param growSkipInterval skip interval for grown
|
||||
* @param checkStateOrder check order of states during growth
|
||||
* @param buildWordLattice build a lattice during decoding
|
||||
* @param maxLatticeEdges max edges to keep in lattice
|
||||
* @param acousticLookaheadFrames frames to do lookahead
|
||||
* @param keepAllTokens keep tokens including emitting tokens
|
||||
* @param lookaheadWindow window for lookahead
|
||||
* @param lookaheadWeight weight for lookahead pruning
|
||||
*/
|
||||
public WordPruningBreadthFirstLookaheadSearchManager(Linguist linguist, Linguist fastmatchLinguist, Loader loader,
|
||||
Pruner pruner, AcousticScorer scorer, ActiveListManager activeListManager,
|
||||
ActiveListFactory fastmatchActiveListFactory, boolean showTokenCount, double relativeWordBeamWidth,
|
||||
int growSkipInterval, boolean checkStateOrder, boolean buildWordLattice, int lookaheadWindow, float lookaheadWeight,
|
||||
int maxLatticeEdges, float acousticLookaheadFrames, boolean keepAllTokens) {
|
||||
|
||||
super(linguist, pruner, scorer, activeListManager, showTokenCount, relativeWordBeamWidth, growSkipInterval,
|
||||
checkStateOrder, buildWordLattice, maxLatticeEdges, acousticLookaheadFrames, keepAllTokens);
|
||||
|
||||
this.loader = loader;
|
||||
this.fastmatchLinguist = fastmatchLinguist;
|
||||
this.fastmatchActiveListFactory = fastmatchActiveListFactory;
|
||||
this.lookaheadWindow = lookaheadWindow;
|
||||
this.lookaheadWeight = lookaheadWeight;
|
||||
if (lookaheadWindow < 1 || lookaheadWindow > 10)
|
||||
throw new IllegalArgumentException("Unsupported lookahead window size: " + lookaheadWindow
|
||||
+ ". Value in range [1..10] is expected");
|
||||
this.ciScores = new LinkedList<FrameCiScores>();
|
||||
this.penalties = new HashMap<Integer, Float>();
|
||||
if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
|
||||
((Sphinx3Loader) loader).setGauScoresQueueLength(lookaheadWindow + 2);
|
||||
}
|
||||
|
||||
public WordPruningBreadthFirstLookaheadSearchManager() {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
|
||||
* .props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
fastmatchLinguist = (Linguist) ps.getComponent(PROP_FASTMATCH_LINGUIST);
|
||||
fastmatchActiveListFactory = (ActiveListFactory) ps.getComponent(PROP_FM_ACTIVE_LIST_FACTORY);
|
||||
loader = (Loader) ps.getComponent(PROP_LOADER);
|
||||
lookaheadWindow = ps.getInt(PROP_LOOKAHEAD_WINDOW);
|
||||
lookaheadWeight = ps.getFloat(PROP_LOOKAHEAD_PENALTY_WEIGHT);
|
||||
if (lookaheadWindow < 1 || lookaheadWindow > 10)
|
||||
throw new PropertyException(WordPruningBreadthFirstLookaheadSearchManager.class.getName(), PROP_LOOKAHEAD_WINDOW,
|
||||
"Unsupported lookahead window size: " + lookaheadWindow + ". Value in range [1..10] is expected");
|
||||
ciScores = new LinkedList<FrameCiScores>();
|
||||
penalties = new HashMap<Integer, Float>();
|
||||
if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
|
||||
((Sphinx3Loader) loader).setGauScoresQueueLength(lookaheadWindow + 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the recognition for the given number of frames.
|
||||
*
|
||||
* @param nFrames
|
||||
* the number of frames to recognize
|
||||
* @return the current result
|
||||
*/
|
||||
@Override
|
||||
public Result recognize(int nFrames) {
|
||||
boolean done = false;
|
||||
Result result = null;
|
||||
streamEnd = false;
|
||||
|
||||
for (int i = 0; i < nFrames && !done; i++) {
|
||||
if (!fastmatchStreamEnd)
|
||||
fastMatchRecognize();
|
||||
penalties.clear();
|
||||
ciScores.poll();
|
||||
done = recognize();
|
||||
}
|
||||
|
||||
if (!streamEnd) {
|
||||
result = new Result(loserManager, activeList, resultList, currentCollectTime, done, linguist.getSearchGraph()
|
||||
.getWordTokenFirst(), true);
|
||||
}
|
||||
|
||||
// tokenTypeTracker.show();
|
||||
if (showTokenCount) {
|
||||
showTokenCount();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void fastMatchRecognize() {
|
||||
boolean more = scoreFastMatchTokens();
|
||||
|
||||
if (more) {
|
||||
pruneFastMatchBranches();
|
||||
currentFastMatchFrameNumber++;
|
||||
createFastMatchBestTokenMap();
|
||||
growFastmatchBranches();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a new best token map with the best size
|
||||
*/
|
||||
protected void createFastMatchBestTokenMap() {
|
||||
int mapSize = fastmatchActiveList.size() * 10;
|
||||
if (mapSize == 0) {
|
||||
mapSize = 1;
|
||||
}
|
||||
fastMatchBestTokenMap = new HashMap<SearchState, Token>(mapSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the initial grammar node from the linguist and creates a
|
||||
* GrammarNodeToken
|
||||
*/
|
||||
@Override
|
||||
protected void localStart() {
|
||||
currentFastMatchFrameNumber = 0;
|
||||
if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
|
||||
((Sphinx3Loader) loader).clearGauScores();
|
||||
// prepare fast match active list
|
||||
fastmatchActiveList = fastmatchActiveListFactory.newInstance();
|
||||
SearchState fmInitState = fastmatchLinguist.getSearchGraph().getInitialState();
|
||||
fastmatchActiveList.add(new Token(fmInitState, currentFastMatchFrameNumber));
|
||||
createFastMatchBestTokenMap();
|
||||
growFastmatchBranches();
|
||||
fastmatchStreamEnd = false;
|
||||
for (int i = 0; (i < lookaheadWindow - 1) && !fastmatchStreamEnd; i++)
|
||||
fastMatchRecognize();
|
||||
|
||||
super.localStart();
|
||||
}
|
||||
|
||||
/**
|
||||
* Goes through the fast match active list of tokens and expands each token,
|
||||
* finding the set of successor tokens until all the successor tokens are
|
||||
* emitting tokens.
|
||||
*/
|
||||
protected void growFastmatchBranches() {
|
||||
growTimer.start();
|
||||
ActiveList oldActiveList = fastmatchActiveList;
|
||||
fastmatchActiveList = fastmatchActiveListFactory.newInstance();
|
||||
float fastmathThreshold = oldActiveList.getBeamThreshold();
|
||||
// TODO more precise range of baseIds, remove magic number
|
||||
float[] frameCiScores = new float[100];
|
||||
|
||||
Arrays.fill(frameCiScores, -Float.MAX_VALUE);
|
||||
float frameMaxCiScore = -Float.MAX_VALUE;
|
||||
for (Token token : oldActiveList) {
|
||||
float tokenScore = token.getScore();
|
||||
if (tokenScore < fastmathThreshold)
|
||||
continue;
|
||||
// filling max ci scores array that will be used in general search
|
||||
// token score composing
|
||||
if (token.getSearchState() instanceof PhoneHmmSearchState) {
|
||||
int baseId = ((PhoneHmmSearchState) token.getSearchState()).getBaseId();
|
||||
if (frameCiScores[baseId] < tokenScore)
|
||||
frameCiScores[baseId] = tokenScore;
|
||||
if (frameMaxCiScore < tokenScore)
|
||||
frameMaxCiScore = tokenScore;
|
||||
}
|
||||
collectFastMatchSuccessorTokens(token);
|
||||
}
|
||||
ciScores.add(new FrameCiScores(frameCiScores, frameMaxCiScore));
|
||||
growTimer.stop();
|
||||
}
|
||||
|
||||
protected boolean scoreFastMatchTokens() {
|
||||
boolean moreTokens;
|
||||
scoreTimer.start();
|
||||
Data data = scorer.calculateScoresAndStoreData(fastmatchActiveList.getTokens());
|
||||
scoreTimer.stop();
|
||||
|
||||
Token bestToken = null;
|
||||
if (data instanceof Token) {
|
||||
bestToken = (Token) data;
|
||||
} else {
|
||||
fastmatchStreamEnd = true;
|
||||
}
|
||||
|
||||
moreTokens = (bestToken != null);
|
||||
fastmatchActiveList.setBestToken(bestToken);
|
||||
|
||||
// monitorWords(activeList);
|
||||
monitorStates(fastmatchActiveList);
|
||||
|
||||
// System.out.println("BEST " + bestToken);
|
||||
|
||||
curTokensScored.value += fastmatchActiveList.size();
|
||||
totalTokensScored.value += fastmatchActiveList.size();
|
||||
|
||||
return moreTokens;
|
||||
}
|
||||
|
||||
/** Removes unpromising branches from the fast match active list */
|
||||
protected void pruneFastMatchBranches() {
|
||||
pruneTimer.start();
|
||||
fastmatchActiveList = pruner.prune(fastmatchActiveList);
|
||||
pruneTimer.stop();
|
||||
}
|
||||
|
||||
protected Token getFastMatchBestToken(SearchState state) {
|
||||
return fastMatchBestTokenMap.get(state);
|
||||
}
|
||||
|
||||
protected void setFastMatchBestToken(Token token, SearchState state) {
|
||||
fastMatchBestTokenMap.put(state, token);
|
||||
}
|
||||
|
||||
protected void collectFastMatchSuccessorTokens(Token token) {
|
||||
SearchState state = token.getSearchState();
|
||||
SearchStateArc[] arcs = state.getSuccessors();
|
||||
// For each successor
|
||||
// calculate the entry score for the token based upon the
|
||||
// predecessor token score and the transition probabilities
|
||||
// if the score is better than the best score encountered for
|
||||
// the SearchState and frame then create a new token, add
|
||||
// it to the lattice and the SearchState.
|
||||
// If the token is an emitting token add it to the list,
|
||||
// otherwise recursively collect the new tokens successors.
|
||||
for (SearchStateArc arc : arcs) {
|
||||
SearchState nextState = arc.getState();
|
||||
// We're actually multiplying the variables, but since
|
||||
// these come in log(), multiply gets converted to add
|
||||
float logEntryScore = token.getScore() + arc.getProbability();
|
||||
Token predecessor = getResultListPredecessor(token);
|
||||
|
||||
// if not emitting, check to see if we've already visited
|
||||
// this state during this frame. Expand the token only if we
|
||||
// haven't visited it already. This prevents the search
|
||||
// from getting stuck in a loop of states with no
|
||||
// intervening emitting nodes. This can happen with nasty
|
||||
// jsgf grammars such as ((foo*)*)*
|
||||
if (!nextState.isEmitting()) {
|
||||
Token newToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentFastMatchFrameNumber);
|
||||
tokensCreated.value++;
|
||||
if (!isVisited(newToken)) {
|
||||
collectFastMatchSuccessorTokens(newToken);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
Token bestToken = getFastMatchBestToken(nextState);
|
||||
if (bestToken == null) {
|
||||
Token newToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentFastMatchFrameNumber);
|
||||
tokensCreated.value++;
|
||||
setFastMatchBestToken(newToken, nextState);
|
||||
fastmatchActiveList.add(newToken);
|
||||
} else {
|
||||
if (bestToken.getScore() <= logEntryScore) {
|
||||
bestToken.update(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentFastMatchFrameNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects the next set of emitting tokens from a token and accumulates
|
||||
* them in the active or result lists
|
||||
*
|
||||
* @param token
|
||||
* the token to collect successors from be immediately expanded
|
||||
* are placed. Null if we should always expand all nodes.
|
||||
*/
|
||||
@Override
|
||||
protected void collectSuccessorTokens(Token token) {
|
||||
|
||||
// tokenTracker.add(token);
|
||||
// tokenTypeTracker.add(token);
|
||||
|
||||
// If this is a final state, add it to the final list
|
||||
|
||||
if (token.isFinal()) {
|
||||
resultList.add(getResultListPredecessor(token));
|
||||
return;
|
||||
}
|
||||
|
||||
// if this is a non-emitting token and we've already
|
||||
// visited the same state during this frame, then we
|
||||
// are in a grammar loop, so we don't continue to expand.
|
||||
// This check only works properly if we have kept all of the
|
||||
// tokens (instead of skipping the non-word tokens).
|
||||
// Note that certain linguists will never generate grammar loops
|
||||
// (lextree linguist for example). For these cases, it is perfectly
|
||||
// fine to disable this check by setting keepAllTokens to false
|
||||
|
||||
if (!token.isEmitting() && (keepAllTokens && isVisited(token))) {
|
||||
return;
|
||||
}
|
||||
|
||||
SearchState state = token.getSearchState();
|
||||
SearchStateArc[] arcs = state.getSuccessors();
|
||||
Token predecessor = getResultListPredecessor(token);
|
||||
|
||||
// For each successor
|
||||
// calculate the entry score for the token based upon the
|
||||
// predecessor token score and the transition probabilities
|
||||
// if the score is better than the best score encountered for
|
||||
// the SearchState and frame then create a new token, add
|
||||
// it to the lattice and the SearchState.
|
||||
// If the token is an emitting token add it to the list,
|
||||
// otherwise recursively collect the new tokens successors.
|
||||
|
||||
float tokenScore = token.getScore();
|
||||
float beamThreshold = activeList.getBeamThreshold();
|
||||
boolean stateProducesPhoneHmms = state instanceof LexTreeNonEmittingHMMState || state instanceof LexTreeWordState
|
||||
|| state instanceof LexTreeEndUnitState;
|
||||
for (SearchStateArc arc : arcs) {
|
||||
SearchState nextState = arc.getState();
|
||||
|
||||
// prune states using lookahead heuristics
|
||||
if (stateProducesPhoneHmms) {
|
||||
if (nextState instanceof LexTreeHMMState) {
|
||||
Float penalty;
|
||||
int baseId = ((LexTreeHMMState) nextState).getHMMState().getHMM().getBaseUnit().getBaseID();
|
||||
if ((penalty = penalties.get(baseId)) == null)
|
||||
penalty = updateLookaheadPenalty(baseId);
|
||||
if ((tokenScore + lookaheadWeight * penalty) < beamThreshold)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (checkStateOrder) {
|
||||
checkStateOrder(state, nextState);
|
||||
}
|
||||
|
||||
// We're actually multiplying the variables, but since
|
||||
// these come in log(), multiply gets converted to add
|
||||
float logEntryScore = tokenScore + arc.getProbability();
|
||||
|
||||
Token bestToken = getBestToken(nextState);
|
||||
|
||||
if (bestToken == null) {
|
||||
Token newBestToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentCollectTime);
|
||||
tokensCreated.value++;
|
||||
setBestToken(newBestToken, nextState);
|
||||
activeListAdd(newBestToken);
|
||||
} else if (bestToken.getScore() < logEntryScore) {
|
||||
// System.out.println("Updating " + bestToken + " with " +
|
||||
// newBestToken);
|
||||
Token oldPredecessor = bestToken.getPredecessor();
|
||||
bestToken.update(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentCollectTime);
|
||||
if (buildWordLattice && nextState instanceof WordSearchState) {
|
||||
loserManager.addAlternatePredecessor(bestToken, oldPredecessor);
|
||||
}
|
||||
} else if (buildWordLattice && nextState instanceof WordSearchState) {
|
||||
if (predecessor != null) {
|
||||
loserManager.addAlternatePredecessor(bestToken, predecessor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Float updateLookaheadPenalty(int baseId) {
|
||||
if (ciScores.isEmpty())
|
||||
return 0.0f;
|
||||
float penalty = -Float.MAX_VALUE;
|
||||
for (FrameCiScores frameCiScores : ciScores) {
|
||||
float diff = frameCiScores.scores[baseId] - frameCiScores.maxScore;
|
||||
if (diff > penalty)
|
||||
penalty = diff;
|
||||
}
|
||||
penalties.put(baseId, penalty);
|
||||
return penalty;
|
||||
}
|
||||
|
||||
private class FrameCiScores {
|
||||
public final float[] scores;
|
||||
public final float maxScore;
|
||||
|
||||
public FrameCiScores(float[] scores, float maxScore) {
|
||||
this.scores = scores;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,796 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.decoder.search;
|
||||
|
||||
// a test search manager.
|
||||
|
||||
import edu.cmu.sphinx.decoder.pruner.Pruner;
|
||||
import edu.cmu.sphinx.decoder.scorer.AcousticScorer;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.linguist.*;
|
||||
import edu.cmu.sphinx.result.Result;
|
||||
import edu.cmu.sphinx.util.LogMath;
|
||||
import edu.cmu.sphinx.util.StatisticsVariable;
|
||||
import edu.cmu.sphinx.util.Timer;
|
||||
import edu.cmu.sphinx.util.TimerPool;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* Provides the breadth first search. To perform recognition an application
|
||||
* should call initialize before recognition begins, and repeatedly call
|
||||
* <code> recognize </code> until Result.isFinal() returns true. Once a final
|
||||
* result has been obtained, <code> stopRecognition </code> should be called.
|
||||
* <p>
|
||||
* All scores and probabilities are maintained in the log math log domain.
|
||||
*/
|
||||
|
||||
public class WordPruningBreadthFirstSearchManager extends TokenSearchManager {
|
||||
|
||||
/**
|
||||
* The property that defines the name of the linguist to be used by this
|
||||
* search manager.
|
||||
*/
|
||||
@S4Component(type = Linguist.class)
|
||||
public final static String PROP_LINGUIST = "linguist";
|
||||
|
||||
/**
|
||||
* The property that defines the name of the linguist to be used by this
|
||||
* search manager.
|
||||
*/
|
||||
@S4Component(type = Pruner.class)
|
||||
public final static String PROP_PRUNER = "pruner";
|
||||
|
||||
/**
|
||||
* The property that defines the name of the scorer to be used by this
|
||||
* search manager.
|
||||
*/
|
||||
@S4Component(type = AcousticScorer.class)
|
||||
public final static String PROP_SCORER = "scorer";
|
||||
|
||||
/**
|
||||
* The property than, when set to <code>true</code> will cause the
|
||||
* recognizer to count up all the tokens in the active list after every
|
||||
* frame.
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public final static String PROP_SHOW_TOKEN_COUNT = "showTokenCount";
|
||||
|
||||
/**
|
||||
* The property that controls the number of frames processed for every time
|
||||
* the decode growth step is skipped. Setting this property to zero disables
|
||||
* grow skipping. Setting this number to a small integer will increase the
|
||||
* speed of the decoder but will also decrease its accuracy. The higher the
|
||||
* number, the less often the grow code is skipped. Values like 6-8 is known
|
||||
* to be the good enough for large vocabulary tasks. That means that one of
|
||||
* 6 frames will be skipped.
|
||||
*/
|
||||
@S4Integer(defaultValue = 0)
|
||||
public final static String PROP_GROW_SKIP_INTERVAL = "growSkipInterval";
|
||||
|
||||
/** The property that defines the type of active list to use */
|
||||
@S4Component(type = ActiveListManager.class)
|
||||
public final static String PROP_ACTIVE_LIST_MANAGER = "activeListManager";
|
||||
|
||||
/** The property for checking if the order of states is valid. */
|
||||
@S4Boolean(defaultValue = false)
|
||||
public final static String PROP_CHECK_STATE_ORDER = "checkStateOrder";
|
||||
|
||||
/** The property that specifies the maximum lattice edges */
|
||||
@S4Integer(defaultValue = 100)
|
||||
public final static String PROP_MAX_LATTICE_EDGES = "maxLatticeEdges";
|
||||
|
||||
/**
|
||||
* The property that controls the amount of simple acoustic lookahead
|
||||
* performed. Setting the property to zero (the default) disables simple
|
||||
* acoustic lookahead. The lookahead need not be an integer.
|
||||
*/
|
||||
@S4Double(defaultValue = 0)
|
||||
public final static String PROP_ACOUSTIC_LOOKAHEAD_FRAMES = "acousticLookaheadFrames";
|
||||
|
||||
/** The property that specifies the relative beam width */
|
||||
@S4Double(defaultValue = 0.0)
|
||||
// TODO: this should be a more meaningful default e.g. the common 1E-80
|
||||
public final static String PROP_RELATIVE_BEAM_WIDTH = "relativeBeamWidth";
|
||||
|
||||
// -----------------------------------
|
||||
// Configured Subcomponents
|
||||
// -----------------------------------
|
||||
protected Linguist linguist; // Provides grammar/language info
|
||||
protected Pruner pruner; // used to prune the active list
|
||||
protected AcousticScorer scorer; // used to score the active list
|
||||
private ActiveListManager activeListManager;
|
||||
protected LogMath logMath;
|
||||
|
||||
// -----------------------------------
|
||||
// Configuration data
|
||||
// -----------------------------------
|
||||
protected Logger logger;
|
||||
protected boolean showTokenCount;
|
||||
protected boolean checkStateOrder;
|
||||
private int growSkipInterval;
|
||||
protected float relativeBeamWidth;
|
||||
protected float acousticLookaheadFrames;
|
||||
private int maxLatticeEdges = 100;
|
||||
|
||||
// -----------------------------------
|
||||
// Instrumentation
|
||||
// -----------------------------------
|
||||
protected Timer scoreTimer;
|
||||
protected Timer pruneTimer;
|
||||
protected Timer growTimer;
|
||||
protected StatisticsVariable totalTokensScored;
|
||||
protected StatisticsVariable curTokensScored;
|
||||
protected StatisticsVariable tokensCreated;
|
||||
private long tokenSum;
|
||||
private int tokenCount;
|
||||
|
||||
// -----------------------------------
|
||||
// Working data
|
||||
// -----------------------------------
|
||||
protected int currentFrameNumber; // the current frame number
|
||||
protected long currentCollectTime; // the current frame number
|
||||
protected ActiveList activeList; // the list of active tokens
|
||||
protected List<Token> resultList; // the current set of results
|
||||
protected Map<SearchState, Token> bestTokenMap;
|
||||
protected AlternateHypothesisManager loserManager;
|
||||
private int numStateOrder;
|
||||
// private TokenTracker tokenTracker;
|
||||
// private TokenTypeTracker tokenTypeTracker;
|
||||
protected boolean streamEnd;
|
||||
|
||||
/**
|
||||
* Creates a pruning manager withs separate lists for tokens
|
||||
* @param linguist a linguist for search space
|
||||
* @param pruner pruner to drop tokens
|
||||
* @param scorer scorer to estimate token probability
|
||||
* @param activeListManager active list manager to store tokens
|
||||
* @param showTokenCount show count during decoding
|
||||
* @param relativeWordBeamWidth relative beam for lookahead pruning
|
||||
* @param growSkipInterval skip interval for grown
|
||||
* @param checkStateOrder check order of states during growth
|
||||
* @param buildWordLattice build a lattice during decoding
|
||||
* @param maxLatticeEdges max edges to keep in lattice
|
||||
* @param acousticLookaheadFrames frames to do lookahead
|
||||
* @param keepAllTokens keep tokens including emitting tokens
|
||||
*/
|
||||
public WordPruningBreadthFirstSearchManager(Linguist linguist, Pruner pruner, AcousticScorer scorer,
|
||||
ActiveListManager activeListManager, boolean showTokenCount, double relativeWordBeamWidth, int growSkipInterval,
|
||||
boolean checkStateOrder, boolean buildWordLattice, int maxLatticeEdges, float acousticLookaheadFrames,
|
||||
boolean keepAllTokens) {
|
||||
|
||||
this.logger = Logger.getLogger(getClass().getName());
|
||||
this.logMath = LogMath.getLogMath();
|
||||
this.linguist = linguist;
|
||||
this.pruner = pruner;
|
||||
this.scorer = scorer;
|
||||
this.activeListManager = activeListManager;
|
||||
this.showTokenCount = showTokenCount;
|
||||
this.growSkipInterval = growSkipInterval;
|
||||
this.checkStateOrder = checkStateOrder;
|
||||
this.buildWordLattice = buildWordLattice;
|
||||
this.maxLatticeEdges = maxLatticeEdges;
|
||||
this.acousticLookaheadFrames = acousticLookaheadFrames;
|
||||
this.keepAllTokens = keepAllTokens;
|
||||
|
||||
this.relativeBeamWidth = logMath.linearToLog(relativeWordBeamWidth);
|
||||
}
|
||||
|
||||
public WordPruningBreadthFirstSearchManager() {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
|
||||
* .props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
logMath = LogMath.getLogMath();
|
||||
logger = ps.getLogger();
|
||||
|
||||
linguist = (Linguist) ps.getComponent(PROP_LINGUIST);
|
||||
pruner = (Pruner) ps.getComponent(PROP_PRUNER);
|
||||
scorer = (AcousticScorer) ps.getComponent(PROP_SCORER);
|
||||
activeListManager = (ActiveListManager) ps.getComponent(PROP_ACTIVE_LIST_MANAGER);
|
||||
showTokenCount = ps.getBoolean(PROP_SHOW_TOKEN_COUNT);
|
||||
growSkipInterval = ps.getInt(PROP_GROW_SKIP_INTERVAL);
|
||||
|
||||
checkStateOrder = ps.getBoolean(PROP_CHECK_STATE_ORDER);
|
||||
maxLatticeEdges = ps.getInt(PROP_MAX_LATTICE_EDGES);
|
||||
acousticLookaheadFrames = ps.getFloat(PROP_ACOUSTIC_LOOKAHEAD_FRAMES);
|
||||
|
||||
relativeBeamWidth = logMath.linearToLog(ps.getDouble(PROP_RELATIVE_BEAM_WIDTH));
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.SearchManager#allocate()
|
||||
*/
|
||||
public void allocate() {
|
||||
// tokenTracker = new TokenTracker();
|
||||
// tokenTypeTracker = new TokenTypeTracker();
|
||||
|
||||
scoreTimer = TimerPool.getTimer(this, "Score");
|
||||
pruneTimer = TimerPool.getTimer(this, "Prune");
|
||||
growTimer = TimerPool.getTimer(this, "Grow");
|
||||
|
||||
totalTokensScored = StatisticsVariable.getStatisticsVariable("totalTokensScored");
|
||||
curTokensScored = StatisticsVariable.getStatisticsVariable("curTokensScored");
|
||||
tokensCreated = StatisticsVariable.getStatisticsVariable("tokensCreated");
|
||||
|
||||
try {
|
||||
linguist.allocate();
|
||||
pruner.allocate();
|
||||
scorer.allocate();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Allocation of search manager resources failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.search.SearchManager#deallocate()
|
||||
*/
|
||||
public void deallocate() {
|
||||
try {
|
||||
scorer.deallocate();
|
||||
pruner.deallocate();
|
||||
linguist.deallocate();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Deallocation of search manager resources failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called at the start of recognition. Gets the search manager ready to
|
||||
* recognize
|
||||
*/
|
||||
public void startRecognition() {
|
||||
linguist.startRecognition();
|
||||
pruner.startRecognition();
|
||||
scorer.startRecognition();
|
||||
localStart();
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the recognition for the given number of frames.
|
||||
*
|
||||
* @param nFrames
|
||||
* the number of frames to recognize
|
||||
* @return the current result
|
||||
*/
|
||||
public Result recognize(int nFrames) {
|
||||
boolean done = false;
|
||||
Result result = null;
|
||||
streamEnd = false;
|
||||
|
||||
for (int i = 0; i < nFrames && !done; i++) {
|
||||
done = recognize();
|
||||
}
|
||||
|
||||
if (!streamEnd) {
|
||||
result = new Result(loserManager, activeList, resultList, currentCollectTime, done, linguist.getSearchGraph()
|
||||
.getWordTokenFirst(), true);
|
||||
}
|
||||
|
||||
// tokenTypeTracker.show();
|
||||
if (showTokenCount) {
|
||||
showTokenCount();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected boolean recognize() {
|
||||
|
||||
activeList = activeListManager.getEmittingList();
|
||||
boolean more = scoreTokens();
|
||||
|
||||
if (more) {
|
||||
pruneBranches();
|
||||
currentFrameNumber++;
|
||||
if (growSkipInterval == 0 || (currentFrameNumber % growSkipInterval) != 0) {
|
||||
clearCollectors();
|
||||
growEmittingBranches();
|
||||
growNonEmittingBranches();
|
||||
}
|
||||
}
|
||||
return !more;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears lists and maps before next expansion stage
|
||||
*/
|
||||
private void clearCollectors() {
|
||||
resultList = new LinkedList<Token>();
|
||||
createBestTokenMap();
|
||||
activeListManager.clearEmittingList();
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a new best token map with the best size
|
||||
*/
|
||||
protected void createBestTokenMap() {
|
||||
int mapSize = activeList.size() * 10;
|
||||
if (mapSize == 0) {
|
||||
mapSize = 1;
|
||||
}
|
||||
bestTokenMap = new HashMap<SearchState, Token>(mapSize, 0.3F);
|
||||
}
|
||||
|
||||
/** Terminates a recognition */
|
||||
public void stopRecognition() {
|
||||
localStop();
|
||||
scorer.stopRecognition();
|
||||
pruner.stopRecognition();
|
||||
linguist.stopRecognition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the initial grammar node from the linguist and creates a
|
||||
* GrammarNodeToken
|
||||
*/
|
||||
protected void localStart() {
|
||||
SearchGraph searchGraph = linguist.getSearchGraph();
|
||||
currentFrameNumber = 0;
|
||||
curTokensScored.value = 0;
|
||||
numStateOrder = searchGraph.getNumStateOrder();
|
||||
activeListManager.setNumStateOrder(numStateOrder);
|
||||
if (buildWordLattice) {
|
||||
loserManager = new AlternateHypothesisManager(maxLatticeEdges);
|
||||
}
|
||||
|
||||
SearchState state = searchGraph.getInitialState();
|
||||
|
||||
activeList = activeListManager.getEmittingList();
|
||||
activeList.add(new Token(state, -1));
|
||||
|
||||
clearCollectors();
|
||||
|
||||
growBranches();
|
||||
growNonEmittingBranches();
|
||||
// tokenTracker.setEnabled(false);
|
||||
// tokenTracker.startUtterance();
|
||||
}
|
||||
|
||||
/** Local cleanup for this search manager */
|
||||
protected void localStop() {
|
||||
// tokenTracker.stopUtterance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Goes through the active list of tokens and expands each token, finding
|
||||
* the set of successor tokens until all the successor tokens are emitting
|
||||
* tokens.
|
||||
*/
|
||||
protected void growBranches() {
|
||||
growTimer.start();
|
||||
float relativeBeamThreshold = activeList.getBeamThreshold();
|
||||
if (logger.isLoggable(Level.FINE)) {
|
||||
logger.fine("Frame: " + currentFrameNumber + " thresh : " + relativeBeamThreshold + " bs "
|
||||
+ activeList.getBestScore() + " tok " + activeList.getBestToken());
|
||||
}
|
||||
for (Token token : activeList) {
|
||||
if (token.getScore() >= relativeBeamThreshold && allowExpansion(token)) {
|
||||
collectSuccessorTokens(token);
|
||||
}
|
||||
}
|
||||
growTimer.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Grows the emitting branches. This version applies a simple acoustic
|
||||
* lookahead based upon the rate of change in the current acoustic score.
|
||||
*/
|
||||
protected void growEmittingBranches() {
|
||||
if (acousticLookaheadFrames <= 0.0f) {
|
||||
growBranches();
|
||||
return;
|
||||
}
|
||||
growTimer.start();
|
||||
float bestScore = -Float.MAX_VALUE;
|
||||
for (Token t : activeList) {
|
||||
float score = t.getScore() + t.getAcousticScore() * acousticLookaheadFrames;
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
float relativeBeamThreshold = bestScore + relativeBeamWidth;
|
||||
for (Token t : activeList) {
|
||||
if (t.getScore() + t.getAcousticScore() * acousticLookaheadFrames > relativeBeamThreshold)
|
||||
collectSuccessorTokens(t);
|
||||
}
|
||||
growTimer.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Grow the non-emitting branches, until the tokens reach an emitting state.
|
||||
*/
|
||||
private void growNonEmittingBranches() {
|
||||
for (Iterator<ActiveList> i = activeListManager.getNonEmittingListIterator(); i.hasNext();) {
|
||||
activeList = i.next();
|
||||
if (activeList != null) {
|
||||
i.remove();
|
||||
pruneBranches();
|
||||
growBranches();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the acoustic scores for the active list. The active list should
|
||||
* contain only emitting tokens.
|
||||
*
|
||||
* @return <code>true</code> if there are more frames to score, otherwise,
|
||||
* false
|
||||
*/
|
||||
protected boolean scoreTokens() {
|
||||
boolean moreTokens;
|
||||
scoreTimer.start();
|
||||
Data data = scorer.calculateScores(activeList.getTokens());
|
||||
scoreTimer.stop();
|
||||
|
||||
Token bestToken = null;
|
||||
if (data instanceof Token) {
|
||||
bestToken = (Token) data;
|
||||
} else if (data == null) {
|
||||
streamEnd = true;
|
||||
}
|
||||
|
||||
if (bestToken != null) {
|
||||
currentCollectTime = bestToken.getCollectTime();
|
||||
}
|
||||
|
||||
moreTokens = (bestToken != null);
|
||||
activeList.setBestToken(bestToken);
|
||||
|
||||
// monitorWords(activeList);
|
||||
monitorStates(activeList);
|
||||
|
||||
// System.out.println("BEST " + bestToken);
|
||||
|
||||
curTokensScored.value += activeList.size();
|
||||
totalTokensScored.value += activeList.size();
|
||||
|
||||
return moreTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Keeps track of and reports all of the active word histories for the given
|
||||
* active list
|
||||
*
|
||||
* @param activeList
|
||||
* the active list to track
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
private void monitorWords(ActiveList activeList) {
|
||||
|
||||
// WordTracker tracker1 = new WordTracker(currentFrameNumber);
|
||||
//
|
||||
// for (Token t : activeList) {
|
||||
// tracker1.add(t);
|
||||
// }
|
||||
// tracker1.dump();
|
||||
//
|
||||
// TokenTracker tracker2 = new TokenTracker();
|
||||
//
|
||||
// for (Token t : activeList) {
|
||||
// tracker2.add(t);
|
||||
// }
|
||||
// tracker2.dumpSummary();
|
||||
// tracker2.dumpDetails();
|
||||
//
|
||||
// TokenTypeTracker tracker3 = new TokenTypeTracker();
|
||||
//
|
||||
// for (Token t : activeList) {
|
||||
// tracker3.add(t);
|
||||
// }
|
||||
// tracker3.dump();
|
||||
|
||||
// StateHistoryTracker tracker4 = new
|
||||
// StateHistoryTracker(currentFrameNumber);
|
||||
|
||||
// for (Token t : activeList) {
|
||||
// tracker4.add(t);
|
||||
// }
|
||||
// tracker4.dump();
|
||||
}
|
||||
|
||||
/**
|
||||
* Keeps track of and reports statistics about the number of active states
|
||||
*
|
||||
* @param activeList
|
||||
* the active list of states
|
||||
*/
|
||||
protected void monitorStates(ActiveList activeList) {
|
||||
|
||||
tokenSum += activeList.size();
|
||||
tokenCount++;
|
||||
|
||||
if ((tokenCount % 1000) == 0) {
|
||||
logger.info("Average Tokens/State: " + (tokenSum / tokenCount));
|
||||
}
|
||||
}
|
||||
|
||||
/** Removes unpromising branches from the active list */
|
||||
protected void pruneBranches() {
|
||||
pruneTimer.start();
|
||||
activeList = pruner.prune(activeList);
|
||||
pruneTimer.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best token for this state
|
||||
*
|
||||
* @param state
|
||||
* the state of interest
|
||||
* @return the best token
|
||||
*/
|
||||
protected Token getBestToken(SearchState state) {
|
||||
return bestTokenMap.get(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the best token for a given state
|
||||
*
|
||||
* @param token
|
||||
* the best token
|
||||
* @param state
|
||||
* the state
|
||||
*/
|
||||
protected void setBestToken(Token token, SearchState state) {
|
||||
bestTokenMap.put(state, token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the given two states are in legitimate order.
|
||||
*
|
||||
* @param fromState parent state
|
||||
* @param toState child state
|
||||
*/
|
||||
protected void checkStateOrder(SearchState fromState, SearchState toState) {
|
||||
if (fromState.getOrder() == numStateOrder - 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fromState.getOrder() > toState.getOrder()) {
|
||||
throw new Error("IllegalState order: from " + fromState.getClass().getName() + ' ' + fromState.toPrettyString()
|
||||
+ " order: " + fromState.getOrder() + " to " + toState.getClass().getName() + ' ' + toState.toPrettyString()
|
||||
+ " order: " + toState.getOrder());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects the next set of emitting tokens from a token and accumulates
|
||||
* them in the active or result lists
|
||||
*
|
||||
* @param token
|
||||
* the token to collect successors from be immediately expanded
|
||||
* are placed. Null if we should always expand all nodes.
|
||||
*/
|
||||
protected void collectSuccessorTokens(Token token) {
|
||||
|
||||
// tokenTracker.add(token);
|
||||
// tokenTypeTracker.add(token);
|
||||
|
||||
// If this is a final state, add it to the final list
|
||||
|
||||
if (token.isFinal()) {
|
||||
resultList.add(getResultListPredecessor(token));
|
||||
return;
|
||||
}
|
||||
|
||||
// if this is a non-emitting token and we've already
|
||||
// visited the same state during this frame, then we
|
||||
// are in a grammar loop, so we don't continue to expand.
|
||||
// This check only works properly if we have kept all of the
|
||||
// tokens (instead of skipping the non-word tokens).
|
||||
// Note that certain linguists will never generate grammar loops
|
||||
// (lextree linguist for example). For these cases, it is perfectly
|
||||
// fine to disable this check by setting keepAllTokens to false
|
||||
|
||||
if (!token.isEmitting() && (keepAllTokens && isVisited(token))) {
|
||||
return;
|
||||
}
|
||||
|
||||
SearchState state = token.getSearchState();
|
||||
SearchStateArc[] arcs = state.getSuccessors();
|
||||
Token predecessor = getResultListPredecessor(token);
|
||||
|
||||
// For each successor
|
||||
// calculate the entry score for the token based upon the
|
||||
// predecessor token score and the transition probabilities
|
||||
// if the score is better than the best score encountered for
|
||||
// the SearchState and frame then create a new token, add
|
||||
// it to the lattice and the SearchState.
|
||||
// If the token is an emitting token add it to the list,
|
||||
// otherwise recursively collect the new tokens successors.
|
||||
|
||||
for (SearchStateArc arc : arcs) {
|
||||
SearchState nextState = arc.getState();
|
||||
|
||||
if (checkStateOrder) {
|
||||
checkStateOrder(state, nextState);
|
||||
}
|
||||
|
||||
// We're actually multiplying the variables, but since
|
||||
// these come in log(), multiply gets converted to add
|
||||
float logEntryScore = token.getScore() + arc.getProbability();
|
||||
|
||||
Token bestToken = getBestToken(nextState);
|
||||
|
||||
if (bestToken == null) {
|
||||
Token newBestToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentCollectTime);
|
||||
tokensCreated.value++;
|
||||
setBestToken(newBestToken, nextState);
|
||||
activeListAdd(newBestToken);
|
||||
} else if (bestToken.getScore() < logEntryScore) {
|
||||
// System.out.println("Updating " + bestToken + " with " +
|
||||
// newBestToken);
|
||||
Token oldPredecessor = bestToken.getPredecessor();
|
||||
bestToken.update(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
|
||||
arc.getLanguageProbability(), currentCollectTime);
|
||||
if (buildWordLattice && nextState instanceof WordSearchState) {
|
||||
loserManager.addAlternatePredecessor(bestToken, oldPredecessor);
|
||||
}
|
||||
} else if (buildWordLattice && nextState instanceof WordSearchState) {
|
||||
if (predecessor != null) {
|
||||
loserManager.addAlternatePredecessor(bestToken, predecessor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether or not we've visited the state associated with this
|
||||
* token since the previous frame.
|
||||
*
|
||||
* @param t token to check
|
||||
* @return true if we've visited the search state since the last frame
|
||||
*/
|
||||
protected boolean isVisited(Token t) {
|
||||
SearchState curState = t.getSearchState();
|
||||
|
||||
t = t.getPredecessor();
|
||||
|
||||
while (t != null && !t.isEmitting()) {
|
||||
if (curState.equals(t.getSearchState())) {
|
||||
System.out.println("CS " + curState + " match " + t.getSearchState());
|
||||
return true;
|
||||
}
|
||||
t = t.getPredecessor();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected void activeListAdd(Token token) {
|
||||
activeListManager.add(token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the given token should be expanded
|
||||
*
|
||||
* @param t
|
||||
* the token to test
|
||||
* @return <code>true</code> if the token should be expanded
|
||||
*/
|
||||
protected boolean allowExpansion(Token t) {
|
||||
return true; // currently disabled
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts all the tokens in the active list (and displays them). This is an
|
||||
* expensive operation.
|
||||
*/
|
||||
protected void showTokenCount() {
|
||||
Set<Token> tokenSet = new HashSet<Token>();
|
||||
|
||||
for (Token token : activeList) {
|
||||
while (token != null) {
|
||||
tokenSet.add(token);
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("Token Lattice size: " + tokenSet.size());
|
||||
|
||||
tokenSet = new HashSet<Token>();
|
||||
|
||||
for (Token token : resultList) {
|
||||
while (token != null) {
|
||||
tokenSet.add(token);
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("Result Lattice size: " + tokenSet.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ActiveList.
|
||||
*
|
||||
* @return the ActiveList
|
||||
*/
|
||||
public ActiveList getActiveList() {
|
||||
return activeList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the ActiveList.
|
||||
*
|
||||
* @param activeList
|
||||
* the new ActiveList
|
||||
*/
|
||||
public void setActiveList(ActiveList activeList) {
|
||||
this.activeList = activeList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the result list.
|
||||
*
|
||||
* @return the result list
|
||||
*/
|
||||
public List<Token> getResultList() {
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the result list.
|
||||
*
|
||||
* @param resultList
|
||||
* the new result list
|
||||
*/
|
||||
public void setResultList(List<Token> resultList) {
|
||||
this.resultList = resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current frame number.
|
||||
*
|
||||
* @return the current frame number
|
||||
*/
|
||||
public int getCurrentFrameNumber() {
|
||||
return currentFrameNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Timer for growing.
|
||||
*
|
||||
* @return the Timer for growing
|
||||
*/
|
||||
public Timer getGrowTimer() {
|
||||
return growTimer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the tokensCreated StatisticsVariable.
|
||||
*
|
||||
* @return the tokensCreated StatisticsVariable.
|
||||
*/
|
||||
public StatisticsVariable getTokensCreated() {
|
||||
return tokensCreated;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
package edu.cmu.sphinx.decoder.search.stats;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.linguist.WordSequence;
|
||||
|
||||
/** A class that keeps track of word histories */
|
||||
|
||||
public class StateHistoryTracker {
|
||||
|
||||
final Map<WordSequence, WordStats> statMap;
|
||||
final int frameNumber;
|
||||
int stateCount;
|
||||
int maxWordHistories;
|
||||
|
||||
/**
|
||||
* Creates a word tracker for the given frame number
|
||||
*
|
||||
* @param frameNumber the frame number
|
||||
*/
|
||||
public StateHistoryTracker(int frameNumber) {
|
||||
statMap = new HashMap<WordSequence, WordStats>();
|
||||
this.frameNumber = frameNumber;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a word history for the given token to the word tracker
|
||||
*
|
||||
* @param t the token to add
|
||||
*/
|
||||
public void add(Token t) {
|
||||
stateCount++;
|
||||
WordSequence ws = getWordSequence(t);
|
||||
WordStats stats = statMap.get(ws);
|
||||
if (stats == null) {
|
||||
stats = new WordStats(ws);
|
||||
statMap.put(ws, stats);
|
||||
}
|
||||
stats.update(t);
|
||||
}
|
||||
|
||||
|
||||
/** Dumps the word histories in the tracker */
|
||||
public void dump() {
|
||||
dumpSummary();
|
||||
List<WordStats> stats = new ArrayList<WordStats>(statMap.values());
|
||||
Collections.sort(stats, WordStats.COMPARATOR);
|
||||
for (WordStats stat : stats) {
|
||||
System.out.println(" " + stat);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Dumps summary information in the tracker */
|
||||
void dumpSummary() {
|
||||
System.out.println("Frame: " + frameNumber + " states: " + stateCount
|
||||
+ " histories " + statMap.size());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a token, gets the history sequence
|
||||
*
|
||||
* @param token the token of interest
|
||||
* @return the word sequence for the token
|
||||
*/
|
||||
private WordSequence getWordSequence(Token token) {
|
||||
return token.getSearchState().getWordHistory();
|
||||
}
|
||||
|
||||
/** Keeps track of statistics for a particular word sequence */
|
||||
|
||||
static class WordStats {
|
||||
|
||||
public final static Comparator<WordStats> COMPARATOR = new Comparator<WordStats>() {
|
||||
public int compare(WordStats ws1, WordStats ws2) {
|
||||
if (ws1.maxScore > ws2.maxScore) {
|
||||
return -1;
|
||||
} else if (ws1.maxScore == ws2.maxScore) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private int size;
|
||||
private float maxScore;
|
||||
private float minScore;
|
||||
private final WordSequence ws;
|
||||
|
||||
/**
|
||||
* Creates a word statistics for the given sequence
|
||||
*
|
||||
* @param ws the word sequence
|
||||
*/
|
||||
WordStats(WordSequence ws) {
|
||||
size = 0;
|
||||
maxScore = -Float.MAX_VALUE;
|
||||
minScore = Float.MAX_VALUE;
|
||||
this.ws = ws;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Updates the statistics based upon the scores for the given token
|
||||
*
|
||||
* @param t the token
|
||||
*/
|
||||
void update(Token t) {
|
||||
size++;
|
||||
if (t.getScore() > maxScore) {
|
||||
maxScore = t.getScore();
|
||||
}
|
||||
if (t.getScore() < minScore) {
|
||||
minScore = t.getScore();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a string representation of the statistics
|
||||
*
|
||||
* @return a string representation
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "states:" + size + " max:" + maxScore + " min:" + minScore + ' '
|
||||
+ ws;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,198 @@
|
|||
package edu.cmu.sphinx.decoder.search.stats;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.linguist.HMMSearchState;
|
||||
|
||||
/** This debugging class is used to track the number of active tokens per state */
|
||||
|
||||
public class TokenTracker {
|
||||
|
||||
private Map<Object, TokenStats> stateMap;
|
||||
private boolean enabled;
|
||||
private int frame;
|
||||
|
||||
private int utteranceStateCount;
|
||||
private int utteranceMaxStates;
|
||||
private int utteranceSumStates;
|
||||
|
||||
|
||||
/**
|
||||
* Enables or disables the token tracker
|
||||
*
|
||||
* @param enabled if <code>true</code> the tracker is enabled
|
||||
*/
|
||||
void setEnabled(boolean enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
|
||||
/** Starts the per-utterance tracking */
|
||||
void startUtterance() {
|
||||
if (enabled) {
|
||||
frame = 0;
|
||||
utteranceStateCount = 0;
|
||||
utteranceMaxStates = -Integer.MAX_VALUE;
|
||||
utteranceSumStates = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** stops the per-utterance tracking */
|
||||
void stopUtterance() {
|
||||
if (enabled) {
|
||||
dumpSummary();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Starts the per-frame tracking */
|
||||
void startFrame() {
|
||||
if (enabled) {
|
||||
stateMap = new HashMap<Object, TokenStats>();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a new token to the tracker
|
||||
*
|
||||
* @param t the token to add.
|
||||
*/
|
||||
public void add(Token t) {
|
||||
if (enabled) {
|
||||
TokenStats stats = getStats(t);
|
||||
stats.update(t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Stops the per-frame tracking */
|
||||
void stopFrame() {
|
||||
if (enabled) {
|
||||
frame++;
|
||||
dumpDetails();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Dumps summary info about the tokens */
|
||||
public void dumpSummary() {
|
||||
if (enabled) {
|
||||
float avgStates = 0f;
|
||||
if (utteranceStateCount > 0) {
|
||||
avgStates = ((float) utteranceSumStates) / utteranceStateCount;
|
||||
}
|
||||
System.out.print("# Utterance stats ");
|
||||
System.out.print(" States: " + utteranceStateCount / frame);
|
||||
|
||||
if (utteranceStateCount > 0) {
|
||||
System.out.print(" Paths: " + utteranceSumStates / frame);
|
||||
System.out.print(" Max: " + utteranceMaxStates);
|
||||
System.out.print(" Avg: " + avgStates);
|
||||
}
|
||||
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Dumps detailed info about the tokens */
|
||||
public void dumpDetails() {
|
||||
if (enabled) {
|
||||
int maxStates = -Integer.MAX_VALUE;
|
||||
int hmmCount = 0;
|
||||
int sumStates = 0;
|
||||
|
||||
for (TokenStats stats : stateMap.values()) {
|
||||
if (stats.isHMM) {
|
||||
hmmCount++;
|
||||
}
|
||||
sumStates += stats.count;
|
||||
utteranceSumStates += stats.count;
|
||||
if (stats.count > maxStates) {
|
||||
maxStates = stats.count;
|
||||
}
|
||||
|
||||
if (stats.count > utteranceMaxStates) {
|
||||
utteranceMaxStates = stats.count;
|
||||
}
|
||||
}
|
||||
|
||||
utteranceStateCount += stateMap.size();
|
||||
|
||||
float avgStates = 0f;
|
||||
if (!stateMap.isEmpty()) {
|
||||
avgStates = ((float) sumStates) / stateMap.size();
|
||||
}
|
||||
System.out.print("# Frame " + frame);
|
||||
System.out.print(" States: " + stateMap.size());
|
||||
|
||||
if (!stateMap.isEmpty()) {
|
||||
System.out.print(" Paths: " + sumStates);
|
||||
System.out.print(" Max: " + maxStates);
|
||||
System.out.print(" Avg: " + avgStates);
|
||||
System.out.print(" HMM: " + hmmCount);
|
||||
}
|
||||
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the statistics for a particular token
|
||||
*
|
||||
* @param t the token of interest
|
||||
* @return the token statistics associated with the given token
|
||||
*/
|
||||
private TokenStats getStats(Token t) {
|
||||
TokenStats stats = stateMap.get(t.getSearchState()
|
||||
.getLexState());
|
||||
if (stats == null) {
|
||||
stats = new TokenStats();
|
||||
stateMap.put(t.getSearchState().getLexState(), stats);
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* A class for keeping track of statistics about tokens. Tracks the count,
|
||||
* minimum and maximum score for a particular state.
|
||||
*/
|
||||
class TokenStats {
|
||||
|
||||
int count;
|
||||
float maxScore;
|
||||
float minScore;
|
||||
boolean isHMM;
|
||||
|
||||
|
||||
TokenStats() {
|
||||
count = 0;
|
||||
maxScore = -Float.MAX_VALUE;
|
||||
minScore = Float.MIN_VALUE;
|
||||
}
|
||||
|
||||
|
||||
/** Update this state with the given token
|
||||
* @param t*/
|
||||
public void update(Token t) {
|
||||
count++;
|
||||
if (t.getScore() > maxScore) {
|
||||
maxScore = t.getScore();
|
||||
}
|
||||
|
||||
if (t.getScore() < minScore) {
|
||||
minScore = t.getScore();
|
||||
}
|
||||
|
||||
isHMM = t.getSearchState() instanceof HMMSearchState;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
package edu.cmu.sphinx.decoder.search.stats;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.linguist.HMMSearchState;
|
||||
import edu.cmu.sphinx.linguist.SearchState;
|
||||
import edu.cmu.sphinx.linguist.UnitSearchState;
|
||||
import edu.cmu.sphinx.linguist.WordSearchState;
|
||||
import edu.cmu.sphinx.linguist.acoustic.HMM;
|
||||
|
||||
/**
|
||||
* A tool for tracking the types tokens created and placed in the beam
|
||||
* <p>
|
||||
* TODO: Develop a mechanism for adding trackers such as these in a more general fashion.
|
||||
*/
|
||||
public class TokenTypeTracker {
|
||||
// keep track of the various types of states
|
||||
|
||||
private int numWords;
|
||||
private int numUnits;
|
||||
private int numOthers;
|
||||
private int numHMMBegin;
|
||||
private int numHMMEnd;
|
||||
private int numHMMSingle;
|
||||
private int numHMMInternal;
|
||||
private int numTokens;
|
||||
|
||||
|
||||
/**
|
||||
* Adds a token to this tracker. Records statistics about the type of token.
|
||||
*
|
||||
* @param t the token to track
|
||||
*/
|
||||
public void add(Token t) {
|
||||
numTokens++;
|
||||
SearchState s = t.getSearchState();
|
||||
|
||||
if (s instanceof WordSearchState) {
|
||||
numWords++;
|
||||
} else if (s instanceof UnitSearchState) {
|
||||
numUnits++;
|
||||
} else if (s instanceof HMMSearchState) {
|
||||
HMM hmm = ((HMMSearchState) s).getHMMState().getHMM();
|
||||
switch (hmm.getPosition()) {
|
||||
case BEGIN: numHMMBegin++; break;
|
||||
case END: numHMMEnd++; break;
|
||||
case SINGLE: numHMMSingle++; break;
|
||||
case INTERNAL: numHMMInternal++; break;
|
||||
default: break;
|
||||
}
|
||||
} else {
|
||||
numOthers++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Shows the accumulated statistics */
|
||||
public void dump() {
|
||||
System.out.println("TotalTokens: " + numTokens);
|
||||
System.out.println(" Words: " + numWords + pc(numWords));
|
||||
System.out.println(" Units: " + numUnits + pc(numUnits));
|
||||
System.out.println(" HMM-b: " + numHMMBegin + pc(numHMMBegin));
|
||||
System.out.println(" HMM-e: " + numHMMEnd + pc(numHMMEnd));
|
||||
System.out.println(" HMM-s: " + numHMMSingle + pc(numHMMSingle));
|
||||
System.out.println(" HMM-i: " + numHMMInternal +
|
||||
pc(numHMMInternal));
|
||||
System.out.println(" Others: " + numOthers + pc(numOthers));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility method for generating integer percents
|
||||
*
|
||||
* @param num the value to be converted into percent
|
||||
* @return a string representation as a percent
|
||||
*/
|
||||
private String pc(int num) {
|
||||
int percent = ((100 * num) / numTokens);
|
||||
return " (" + percent + "%)";
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
package edu.cmu.sphinx.decoder.search.stats;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.cmu.sphinx.decoder.search.Token;
|
||||
import edu.cmu.sphinx.linguist.WordSearchState;
|
||||
import edu.cmu.sphinx.linguist.WordSequence;
|
||||
import edu.cmu.sphinx.linguist.dictionary.Word;
|
||||
|
||||
/** A class that keeps track of word histories */
|
||||
|
||||
public class WordTracker {
|
||||
|
||||
final Map<WordSequence, WordStats> statMap;
|
||||
final int frameNumber;
|
||||
int stateCount;
|
||||
int maxWordHistories;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a word tracker for the given frame number
|
||||
*
|
||||
* @param frameNumber the frame number
|
||||
*/
|
||||
public WordTracker(int frameNumber) {
|
||||
statMap = new HashMap<WordSequence, WordStats>();
|
||||
this.frameNumber = frameNumber;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a word history for the given token to the word tracker
|
||||
*
|
||||
* @param t the token to add
|
||||
*/
|
||||
public void add(Token t) {
|
||||
stateCount++;
|
||||
WordSequence ws = getWordSequence(t);
|
||||
WordStats stats = statMap.get(ws);
|
||||
if (stats == null) {
|
||||
stats = new WordStats(ws);
|
||||
statMap.put(ws, stats);
|
||||
}
|
||||
stats.update(t);
|
||||
}
|
||||
|
||||
|
||||
/** Dumps the word histories in the tracker */
|
||||
public void dump() {
|
||||
dumpSummary();
|
||||
List<WordStats> stats = new ArrayList<WordStats>(statMap.values());
|
||||
Collections.sort(stats, WordStats.COMPARATOR);
|
||||
for (WordStats stat : stats) {
|
||||
System.out.println(" " + stat);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Dumps summary information in the tracker */
|
||||
void dumpSummary() {
|
||||
System.out.println("Frame: " + frameNumber + " states: " + stateCount
|
||||
+ " histories " + statMap.size());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a token, gets the word sequence represented by the token
|
||||
*
|
||||
* @param token the token of interest
|
||||
* @return the word sequence for the token
|
||||
*/
|
||||
private WordSequence getWordSequence(Token token) {
|
||||
List<Word> wordList = new LinkedList<Word>();
|
||||
|
||||
while (token != null) {
|
||||
if (token.isWord()) {
|
||||
WordSearchState wordState = (WordSearchState) token
|
||||
.getSearchState();
|
||||
Word word = wordState.getPronunciation().getWord();
|
||||
wordList.add(0, word);
|
||||
}
|
||||
token = token.getPredecessor();
|
||||
}
|
||||
return new WordSequence(wordList);
|
||||
}
|
||||
|
||||
/** Keeps track of statistics for a particular word sequence */
|
||||
|
||||
static class WordStats {
|
||||
|
||||
public final static Comparator<WordStats> COMPARATOR = new Comparator<WordStats>() {
|
||||
public int compare(WordStats ws1, WordStats ws2) {
|
||||
if (ws1.maxScore > ws2.maxScore) {
|
||||
return -1;
|
||||
} else if (ws1.maxScore == ws2.maxScore) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private int size;
|
||||
private float maxScore;
|
||||
private float minScore;
|
||||
private final WordSequence ws;
|
||||
|
||||
/**
|
||||
* Creates a word statistics for the given sequence
|
||||
*
|
||||
* @param ws the word sequence
|
||||
*/
|
||||
WordStats(WordSequence ws) {
|
||||
size = 0;
|
||||
maxScore = -Float.MAX_VALUE;
|
||||
minScore = Float.MAX_VALUE;
|
||||
this.ws = ws;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Updates the statistics based upon the scores for the given token
|
||||
*
|
||||
* @param t the token
|
||||
*/
|
||||
void update(Token t) {
|
||||
size++;
|
||||
if (t.getScore() > maxScore) {
|
||||
maxScore = t.getScore();
|
||||
}
|
||||
if (t.getScore() < minScore) {
|
||||
minScore = t.getScore();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a string representation of the statistics
|
||||
*
|
||||
* @return a string representation
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "states:" + size + " max:" + maxScore + " min:" + minScore + ' '
|
||||
+ ws;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,274 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University. All Rights Reserved. Use is
|
||||
* subject to license terms. See the file "license.terms" for information on
|
||||
* usage and redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import static java.lang.Double.parseDouble;
|
||||
import static java.lang.Integer.parseInt;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import edu.cmu.sphinx.frontend.denoise.Denoise;
|
||||
import edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank;
|
||||
import edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank2;
|
||||
import edu.cmu.sphinx.frontend.transform.*;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.KaldiLoader;
|
||||
import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
|
||||
/**
|
||||
* Cepstrum is an auto-configurable DataProcessor which is used to compute a
|
||||
* specific cepstrum (for a target acoustic model) given the spectrum. The
|
||||
* Cepstrum is computed using a pipeline of front end components which are
|
||||
* selected, customized or ignored depending on the feat.params file which
|
||||
* characterizes the target acoustic model for which this cepstrum is computed.
|
||||
* A typical legacy MFCC Cepstrum will use a MelFrequencyFilterBank, followed
|
||||
* by a DiscreteCosineTransform. A typical denoised MFCC Cepstrum will use a
|
||||
* MelFrequencyFilterBank, followed by a Denoise component, followed by a
|
||||
* DiscreteCosineTransform2, followed by a Lifter component. The
|
||||
* MelFrequencyFilterBank parameters (numberFilters, minimumFrequency and
|
||||
* maximumFrequency) are auto-configured based on the values found in
|
||||
* feat.params.
|
||||
*
|
||||
* @author Horia Cucu
|
||||
*/
|
||||
public class AutoCepstrum extends BaseDataProcessor {
|
||||
|
||||
/**
|
||||
* The property specifying the acoustic model for which this cepstrum will
|
||||
* be configured. For this acoustic model (AM) it is mandatory to specify a
|
||||
* location in the configuration file. The Cepstrum will be configured
|
||||
* based on the feat.params file that will be found in the specified AM
|
||||
* location.
|
||||
*/
|
||||
@S4Component(type = Loader.class)
|
||||
public final static String PROP_LOADER = "loader";
|
||||
protected Loader loader;
|
||||
|
||||
/**
|
||||
* The filter bank which will be used for creating the cepstrum. The filter
|
||||
* bank is always inserted in the pipeline and its minimum frequency,
|
||||
* maximum frequency and number of filters are configured based on the
|
||||
* "lowerf", "upperf" and "nfilt" values in the feat.params file of the
|
||||
* target acoustic model.
|
||||
*/
|
||||
protected BaseDataProcessor filterBank;
|
||||
|
||||
/**
|
||||
* The denoise component which could be used for creating the cepstrum. The
|
||||
* denoise component is inserted in the pipeline only if
|
||||
* "-remove_noise yes" is specified in the feat.params file of the target
|
||||
* acoustic model.
|
||||
*/
|
||||
protected Denoise denoise;
|
||||
|
||||
/**
|
||||
* The property specifying the DCT which will be used for creating the
|
||||
* cepstrum. If "-transform legacy" is specified in the feat.params file of
|
||||
* the target acoustic model or if the "-transform" parameter does not
|
||||
* appear in this file at all, the legacy DCT component is inserted in the
|
||||
* pipeline. If "-transform dct" is specified in the feat.params file of
|
||||
* the target acoustic model, then the current DCT component is inserted in
|
||||
* the pipeline.
|
||||
*/
|
||||
protected DiscreteCosineTransform dct;
|
||||
|
||||
/**
|
||||
* The lifter component which could be used for creating the cepstrum. The
|
||||
* lifter component is inserted in the pipeline only if
|
||||
* "-lifter <lifterValue>" is specified in the feat.params file of the
|
||||
* target acoustic model.
|
||||
*/
|
||||
protected Lifter lifter;
|
||||
|
||||
/**
|
||||
* The list of <code>DataProcessor</code>s which were auto-configured for
|
||||
* this Cepstrum component.
|
||||
*/
|
||||
protected List<DataProcessor> selectedDataProcessors;
|
||||
|
||||
public AutoCepstrum(Loader loader) throws IOException {
|
||||
initLogger();
|
||||
this.loader = loader;
|
||||
loader.load();
|
||||
initDataProcessors();
|
||||
}
|
||||
|
||||
public AutoCepstrum() {
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see
|
||||
* edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
|
||||
* .props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
loader = (Loader) ps.getComponent(PROP_LOADER);
|
||||
try {
|
||||
loader.load();
|
||||
} catch (IOException e) {
|
||||
throw new PropertyException(e);
|
||||
}
|
||||
initDataProcessors();
|
||||
}
|
||||
|
||||
private void initDataProcessors() {
|
||||
try {
|
||||
Properties featParams = loader.getProperties();
|
||||
selectedDataProcessors = new ArrayList<DataProcessor>();
|
||||
|
||||
double lowFreq = parseDouble(featParams.getProperty("-lowerf"));
|
||||
double hiFreq = parseDouble(featParams.getProperty("-upperf"));
|
||||
int numFilter = parseInt(featParams.getProperty("-nfilt"));
|
||||
|
||||
// TODO: should not be there, but for now me must preserve
|
||||
// backward compatibility with the legacy code.
|
||||
if (loader instanceof KaldiLoader)
|
||||
filterBank = new MelFrequencyFilterBank2(lowFreq,
|
||||
hiFreq,
|
||||
numFilter);
|
||||
else
|
||||
filterBank = new MelFrequencyFilterBank(lowFreq,
|
||||
hiFreq,
|
||||
numFilter);
|
||||
|
||||
selectedDataProcessors.add(filterBank);
|
||||
|
||||
if ((featParams.get("-remove_noise") == null)
|
||||
|| (featParams.get("-remove_noise").equals("yes"))) {
|
||||
denoise = new Denoise(Denoise.class.getField("LAMBDA_POWER")
|
||||
.getAnnotation(S4Double.class)
|
||||
.defaultValue(),
|
||||
Denoise.class.getField("LAMBDA_A")
|
||||
.getAnnotation(S4Double.class)
|
||||
.defaultValue(),
|
||||
Denoise.class.getField("LAMBDA_B")
|
||||
.getAnnotation(S4Double.class)
|
||||
.defaultValue(),
|
||||
Denoise.class.getField("LAMBDA_T")
|
||||
.getAnnotation(S4Double.class)
|
||||
.defaultValue(),
|
||||
Denoise.class.getField("MU_T")
|
||||
.getAnnotation(S4Double.class)
|
||||
.defaultValue(),
|
||||
Denoise.class.getField("MAX_GAIN")
|
||||
.getAnnotation(S4Double.class)
|
||||
.defaultValue(),
|
||||
Denoise.class.getField("SMOOTH_WINDOW")
|
||||
.getAnnotation(S4Integer.class)
|
||||
.defaultValue());
|
||||
// denoise.newProperties();
|
||||
denoise.setPredecessor(selectedDataProcessors
|
||||
.get(selectedDataProcessors.size() - 1));
|
||||
selectedDataProcessors.add(denoise);
|
||||
}
|
||||
|
||||
if ((featParams.get("-transform") != null)
|
||||
&& (featParams.get("-transform").equals("dct"))) {
|
||||
dct = new DiscreteCosineTransform2(
|
||||
numFilter,
|
||||
DiscreteCosineTransform.class
|
||||
.getField("PROP_CEPSTRUM_LENGTH")
|
||||
.getAnnotation(S4Integer.class)
|
||||
.defaultValue());
|
||||
} else if ((featParams.get("-transform") != null)
|
||||
&& (featParams.get("-transform").equals("kaldi")))
|
||||
{
|
||||
dct = new KaldiDiscreteCosineTransform(
|
||||
numFilter,
|
||||
DiscreteCosineTransform.class
|
||||
.getField("PROP_CEPSTRUM_LENGTH")
|
||||
.getAnnotation(S4Integer.class)
|
||||
.defaultValue());
|
||||
} else {
|
||||
dct = new DiscreteCosineTransform(numFilter,
|
||||
DiscreteCosineTransform.class
|
||||
.getField("PROP_CEPSTRUM_LENGTH")
|
||||
.getAnnotation(S4Integer.class)
|
||||
.defaultValue());
|
||||
}
|
||||
dct.setPredecessor(selectedDataProcessors
|
||||
.get(selectedDataProcessors.size() - 1));
|
||||
selectedDataProcessors.add(dct);
|
||||
|
||||
if (featParams.get("-lifter") != null) {
|
||||
lifter = new Lifter(Integer.parseInt((String) featParams
|
||||
.get("-lifter")));
|
||||
lifter.setPredecessor(selectedDataProcessors
|
||||
.get(selectedDataProcessors.size() - 1));
|
||||
selectedDataProcessors.add(lifter);
|
||||
}
|
||||
logger.info("Cepstrum component auto-configured as follows: "
|
||||
+ toString());
|
||||
} catch (NoSuchFieldException exc) {
|
||||
throw new RuntimeException(exc);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see
|
||||
* edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend
|
||||
* .CommonConfig)
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
|
||||
for (DataProcessor dataProcessor : selectedDataProcessors)
|
||||
dataProcessor.initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed Data output, basically calls
|
||||
* <code>getData()</code> on the last processor.
|
||||
*
|
||||
* @return a Data object that has been processed by the cepstrum
|
||||
* @throws DataProcessingException if a data processor error occurs
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
DataProcessor dp;
|
||||
dp = selectedDataProcessors.get(selectedDataProcessors.size() - 1);
|
||||
return dp.getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the predecessor for this DataProcessor. The predecessor is actually
|
||||
* the spectrum builder.
|
||||
*
|
||||
* @param predecessor the predecessor of this DataProcessor
|
||||
*/
|
||||
@Override
|
||||
public void setPredecessor(DataProcessor predecessor) {
|
||||
filterBank.setPredecessor(predecessor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a description of this Cepstrum component in the format:
|
||||
* <cepstrum name> {<DataProcessor1>, <DataProcessor2> ...
|
||||
* <DataProcessorN>}
|
||||
*
|
||||
* @return a description of this Cepstrum
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder description = new StringBuilder(super.toString())
|
||||
.append(" {");
|
||||
for (DataProcessor dp : selectedDataProcessors)
|
||||
description.append(dp).append(", ");
|
||||
description.setLength(description.length() - 2);
|
||||
return description.append('}').toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import edu.cmu.sphinx.util.props.ConfigurableAdapter;
|
||||
|
||||
/**
|
||||
* An abstract DataProcessor implementing elements common to all concrete DataProcessors, such as name, predecessor, and
|
||||
* timer.
|
||||
*/
|
||||
public abstract class BaseDataProcessor extends ConfigurableAdapter implements DataProcessor {
|
||||
|
||||
private DataProcessor predecessor;
|
||||
|
||||
public BaseDataProcessor() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed Data output.
|
||||
*
|
||||
* @return an Data object that has been processed by this DataProcessor
|
||||
* @throws DataProcessingException if a data processor error occurs
|
||||
*/
|
||||
public abstract Data getData() throws DataProcessingException;
|
||||
|
||||
|
||||
/** Initializes this DataProcessor. This is typically called after the DataProcessor has been configured. */
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the predecessor DataProcessor.
|
||||
*
|
||||
* @return the predecessor
|
||||
*/
|
||||
public DataProcessor getPredecessor() {
|
||||
return predecessor;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the predecessor DataProcessor. This method allows dynamic reconfiguration of the front end.
|
||||
*
|
||||
* @param predecessor the new predecessor of this DataProcessor
|
||||
*/
|
||||
public void setPredecessor(DataProcessor predecessor) {
|
||||
this.predecessor = predecessor;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
/**
|
||||
* Implements the interface for all Data objects that passes between
|
||||
* DataProcessors.
|
||||
*
|
||||
* Subclass of Data can contain the actual data, or be a signal
|
||||
* (e.g., data start, data end, speech start, speech end).
|
||||
*
|
||||
* @see Data
|
||||
* @see FrontEnd
|
||||
*/
|
||||
public interface Data {
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Double;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* A <code>DataProcessor</code> which wraps incoming <code>DoubleData</code>-objects into equally size blocks of defined
|
||||
* length.
|
||||
*/
|
||||
public class DataBlocker extends BaseDataProcessor {
|
||||
|
||||
/** The property for the block size of generated data-blocks in milliseconds. */
|
||||
@S4Double(defaultValue = 10)
|
||||
public static final String PROP_BLOCK_SIZE_MS = "blockSizeMs";
|
||||
|
||||
private double blockSizeMs;
|
||||
private int blockSizeSamples = Integer.MAX_VALUE;
|
||||
|
||||
private int curFirstSamplePos;
|
||||
private int sampleRate = -1;
|
||||
|
||||
private final LinkedList<DoubleData> inBuffer = new LinkedList<DoubleData>();
|
||||
|
||||
private int curInBufferSize;
|
||||
|
||||
|
||||
public DataBlocker() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param blockSizeMs block size in milliseconds
|
||||
*/
|
||||
public DataBlocker(double blockSizeMs) {
|
||||
initLogger();
|
||||
this.blockSizeMs = blockSizeMs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet propertySheet) throws PropertyException {
|
||||
super.newProperties(propertySheet);
|
||||
blockSizeMs = propertySheet.getDouble(PROP_BLOCK_SIZE_MS);
|
||||
}
|
||||
|
||||
|
||||
public double getBlockSizeMs() {
|
||||
return blockSizeMs;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
while (curInBufferSize < blockSizeSamples || curInBufferSize == 0) {
|
||||
Data data = getPredecessor().getData();
|
||||
|
||||
if (data instanceof DataStartSignal) {
|
||||
sampleRate = ((DataStartSignal) data).getSampleRate();
|
||||
blockSizeSamples = (int) Math.round(sampleRate * blockSizeMs / 1000);
|
||||
|
||||
curInBufferSize = 0;
|
||||
curFirstSamplePos = 0;
|
||||
|
||||
inBuffer.clear();
|
||||
}
|
||||
|
||||
if (!(data instanceof DoubleData)) {
|
||||
return data;
|
||||
}
|
||||
|
||||
DoubleData dd = (DoubleData) data;
|
||||
|
||||
inBuffer.add(dd);
|
||||
curInBufferSize += dd.getValues().length;
|
||||
}
|
||||
|
||||
// now we are ready to merge all data blocks into one
|
||||
double[] newSampleBlock = new double[blockSizeSamples];
|
||||
|
||||
int copiedSamples = 0;
|
||||
|
||||
long firstSample = inBuffer.get(0).getFirstSampleNumber() + curFirstSamplePos;
|
||||
|
||||
while (!inBuffer.isEmpty()) {
|
||||
DoubleData dd = inBuffer.remove(0);
|
||||
double[] values = dd.getValues();
|
||||
int copyLength = Math.min(blockSizeSamples - copiedSamples, values.length - curFirstSamplePos);
|
||||
|
||||
System.arraycopy(values, curFirstSamplePos, newSampleBlock, copiedSamples, copyLength);
|
||||
|
||||
// does the current data-object contains more samples than necessary? -> keep the rest for the next block
|
||||
if (copyLength < (values.length - curFirstSamplePos)) {
|
||||
assert inBuffer.isEmpty();
|
||||
|
||||
curFirstSamplePos += copyLength;
|
||||
inBuffer.add(0, dd);
|
||||
break;
|
||||
} else {
|
||||
copiedSamples += copyLength;
|
||||
curFirstSamplePos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
curInBufferSize = inBuffer.isEmpty() ? 0 : inBuffer.get(0).getValues().length - curFirstSamplePos;
|
||||
|
||||
// for (int i = 0; i < newSampleBlock.length; i++) {
|
||||
// newSampleBlock[i] *= 10;
|
||||
// }
|
||||
return new DoubleData(newSampleBlock, sampleRate, firstSample);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
/**
|
||||
* A signal that indicates the end of data.
|
||||
*
|
||||
* @see Data
|
||||
* @see DataProcessor
|
||||
* @see Signal
|
||||
*/
|
||||
public class DataEndSignal extends Signal {
|
||||
|
||||
private final long duration;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a DataEndSignal.
|
||||
*
|
||||
* @param duration the duration of the entire data stream in milliseconds
|
||||
*/
|
||||
public DataEndSignal(long duration) {
|
||||
this(duration, System.currentTimeMillis());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a DataEndSignal with the given creation time.
|
||||
*
|
||||
* @param duration the duration of the entire data stream in milliseconds
|
||||
* @param time the creation time of the DataEndSignal
|
||||
*/
|
||||
public DataEndSignal(long duration, long time) {
|
||||
super(time);
|
||||
this.duration = duration;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the duration of the entire data stream in milliseconds
|
||||
*
|
||||
* @return the duration of the entire data stream in milliseconds
|
||||
*/
|
||||
public long getDuration() {
|
||||
return duration;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string "DataEndSignal".
|
||||
*
|
||||
* @return the string "DataEndSignal"
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return ("DataEndSignal: creation time: " + getTime() + ", duration: " +
|
||||
getDuration() + "ms");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
/** Thrown to indicate that a DataProcessor has problems processing incoming Data objects. */
|
||||
@SuppressWarnings("serial")
|
||||
public class DataProcessingException extends RuntimeException {
|
||||
|
||||
/** Constructs a DataProcessingException with no detailed message. */
|
||||
public DataProcessingException() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a DataProcessingException with the specified detail message.
|
||||
*
|
||||
* @param message the detail message
|
||||
*/
|
||||
public DataProcessingException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a DataProcessingException with the specified detail message and cause.
|
||||
*
|
||||
* @param message the detail message
|
||||
* @param cause the cause
|
||||
*/
|
||||
public DataProcessingException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a DataProcessingException with the specified cause.
|
||||
*
|
||||
* @param cause the cause
|
||||
*/
|
||||
public DataProcessingException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
* @see FrontEnd
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
/**
|
||||
* A processor that performs a signal processing function.
|
||||
*
|
||||
* Since a DataProcessor usually belongs to a particular front end pipeline,
|
||||
* you can name the pipeline it belongs to in the {@link #initialize()
|
||||
* initialize} method. (Note, however, that it is not always the case that a
|
||||
* DataProcessor belongs to a particular pipeline. For example, the {@link
|
||||
* edu.cmu.sphinx.frontend.util.Microphone Microphone}class is a DataProcessor,
|
||||
* but it usually does not belong to any particular pipeline.
|
||||
* <p>
|
||||
* Each
|
||||
* DataProcessor usually have a predecessor as well. This is the previous
|
||||
* DataProcessor in the pipeline. Again, not all DataProcessors have
|
||||
* predecessors.
|
||||
* <p>
|
||||
* Calling {@link #getData() getData}will return the
|
||||
* processed Data object.
|
||||
*/
|
||||
public interface DataProcessor extends Configurable {
|
||||
|
||||
/**
|
||||
* Initializes this DataProcessor.
|
||||
*
|
||||
* This is typically called after the DataProcessor has been configured.
|
||||
*/
|
||||
public void initialize();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the processed Data output.
|
||||
*
|
||||
* @return an Data object that has been processed by this DataProcessor
|
||||
* @throws DataProcessingException if a data processor error occurs
|
||||
*/
|
||||
public abstract Data getData() throws DataProcessingException;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the predecessor DataProcessor.
|
||||
*
|
||||
* @return the predecessor
|
||||
*/
|
||||
public DataProcessor getPredecessor();
|
||||
|
||||
|
||||
/**
|
||||
* Sets the predecessor DataProcessor. This method allows dynamic reconfiguration of the front end.
|
||||
*
|
||||
* @param predecessor the new predecessor of this DataProcessor
|
||||
*/
|
||||
public void setPredecessor(DataProcessor predecessor);
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
/**
|
||||
* A signal that indicates the start of data.
|
||||
*
|
||||
* @see Data
|
||||
* @see DataProcessor
|
||||
* @see Signal
|
||||
*/
|
||||
public class DataStartSignal extends Signal {
|
||||
|
||||
private final int sampleRate;
|
||||
|
||||
/**
|
||||
* Constructs a DataStartSignal at the given time.
|
||||
*
|
||||
* @param sampleRate the sampling rate of the started data stream.
|
||||
* @param time the time this DataStartSignal is created
|
||||
*/
|
||||
public DataStartSignal(int sampleRate, long time) {
|
||||
super(time);
|
||||
this.sampleRate = sampleRate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a DataStartSignal at the given time.
|
||||
*
|
||||
* @param sampleRate the sampling rate of the started data stream.
|
||||
*/
|
||||
public DataStartSignal(int sampleRate) {
|
||||
this(sampleRate, System.currentTimeMillis());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string "DataStartSignal".
|
||||
*
|
||||
* @return the string "DataStartSignal"
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DataStartSignal: creation time: " + getTime();
|
||||
}
|
||||
|
||||
|
||||
/** @return the sampling rate of the started data stream. */
|
||||
public int getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import edu.cmu.sphinx.util.machlearn.OVector;
|
||||
|
||||
/** A Data object that holds data of primitive type double. */
|
||||
@SuppressWarnings("serial")
|
||||
public class DoubleData extends OVector implements Data {
|
||||
|
||||
private int sampleRate;
|
||||
private long firstSampleNumber;
|
||||
private long collectTime;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a new <code>Data</code> object with values only. All other internal fields like
|
||||
* sampling rate etc. are initialized to -1.
|
||||
* @param values source values
|
||||
*/
|
||||
public DoubleData(double[] values) {
|
||||
super(values);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a Data object with the given values, collect time, and first sample number.
|
||||
*
|
||||
* @param values the data values
|
||||
* @param sampleRate the sample rate of the data
|
||||
* @param firstSampleNumber the position of the first sample in the original data
|
||||
*/
|
||||
public DoubleData(double[] values, int sampleRate,
|
||||
long firstSampleNumber) {
|
||||
super(values);
|
||||
|
||||
this.sampleRate = sampleRate;
|
||||
this.collectTime = firstSampleNumber * 1000 / sampleRate;
|
||||
this.firstSampleNumber = firstSampleNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a Data object with the given values, collect time, and first sample number.
|
||||
*
|
||||
* @param values the data values
|
||||
* @param sampleRate the sample rate of the data
|
||||
* @param collectTime the time at which this data is collected
|
||||
* @param firstSampleNumber the position of the first sample in the original data
|
||||
*/
|
||||
public DoubleData(double[] values, int sampleRate,
|
||||
long collectTime, long firstSampleNumber) {
|
||||
super(values);
|
||||
|
||||
this.sampleRate = sampleRate;
|
||||
this.collectTime = collectTime;
|
||||
this.firstSampleNumber = firstSampleNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a string that describes the data.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return ("DoubleData: " + sampleRate + "Hz, first sample #: " +
|
||||
firstSampleNumber + ", collect time: " + collectTime);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the sample rate of the data.
|
||||
*/
|
||||
public int getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the position of the first sample in the original data. The very first sample number
|
||||
* is zero.
|
||||
*/
|
||||
public long getFirstSampleNumber() {
|
||||
return firstSampleNumber;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the time in milliseconds at which the audio data is collected.
|
||||
*
|
||||
* @return the difference, in milliseconds, between the time the audio data is collected and
|
||||
* midnight, January 1, 1970
|
||||
*/
|
||||
public long getCollectTime() {
|
||||
return collectTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleData clone() throws CloneNotSupportedException {
|
||||
try {
|
||||
DoubleData data = (DoubleData)super.clone();
|
||||
data.sampleRate = sampleRate;
|
||||
data.collectTime = collectTime;
|
||||
data.firstSampleNumber = firstSampleNumber;
|
||||
return data;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new InternalError(e.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import edu.cmu.sphinx.util.MatrixUtils;
|
||||
|
||||
/**
|
||||
* A Data object that holds data of primitive type float.
|
||||
*
|
||||
* @see Data
|
||||
*/
|
||||
public class FloatData implements Data, Cloneable {
|
||||
|
||||
private final float[] values;
|
||||
private final int sampleRate;
|
||||
private final long firstSampleNumber;
|
||||
private final long collectTime;
|
||||
|
||||
/**
|
||||
* Constructs a Data object with the given values, sample rate, collect time, and first sample number.
|
||||
*
|
||||
* @param values the data values
|
||||
* @param sampleRate the sample rate of the data
|
||||
* @param firstSampleNumber the position of the first sample in the original data
|
||||
*/
|
||||
public FloatData(float[] values, int sampleRate, long firstSampleNumber) {
|
||||
this(values, sampleRate, firstSampleNumber * 1000 / sampleRate, firstSampleNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a Data object with the given values, sample rate, collect time, and first sample number.
|
||||
*
|
||||
* @param values the data values
|
||||
* @param sampleRate the sample rate of the data
|
||||
* @param collectTime the time at which this data is collected
|
||||
* @param firstSampleNumber the position of the first sample in the original data
|
||||
*/
|
||||
public FloatData(float[] values, int sampleRate,
|
||||
long collectTime, long firstSampleNumber) {
|
||||
this.values = values;
|
||||
this.sampleRate = sampleRate;
|
||||
this.collectTime = collectTime;
|
||||
this.firstSampleNumber = firstSampleNumber;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the values of this data.
|
||||
*/
|
||||
public float[] getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the sample rate of this data.
|
||||
*/
|
||||
public int getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the position of the first sample in the original data. The very first sample number is zero.
|
||||
*/
|
||||
public long getFirstSampleNumber() {
|
||||
return firstSampleNumber;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the time in milliseconds at which the audio data is collected.
|
||||
*
|
||||
* @return the difference, in milliseconds, between the time the audio data is collected and midnight, January 1,
|
||||
* 1970
|
||||
*/
|
||||
public long getCollectTime() {
|
||||
return collectTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FloatData clone() throws CloneNotSupportedException {
|
||||
try {
|
||||
FloatData data = (FloatData)super.clone();
|
||||
return data;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new InternalError(e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts a given Data-object into a <code>FloatData</code> if possible.
|
||||
* @param data data to convert
|
||||
* @return converted data
|
||||
*/
|
||||
public static FloatData toFloatData(Data data) {
|
||||
FloatData convertData;
|
||||
if (data instanceof FloatData)
|
||||
convertData = (FloatData) data;
|
||||
else if (data instanceof DoubleData) {
|
||||
DoubleData dd = (DoubleData) data;
|
||||
convertData = new FloatData(MatrixUtils.double2float(dd.getValues()), dd.getSampleRate(),
|
||||
dd.getFirstSampleNumber());
|
||||
} else
|
||||
throw new IllegalArgumentException("data type '" + data.getClass() + "' is not supported");
|
||||
|
||||
return convertData;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,303 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import edu.cmu.sphinx.util.Timer;
|
||||
import edu.cmu.sphinx.util.TimerPool;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4ComponentList;
|
||||
|
||||
/**
|
||||
* FrontEnd is a wrapper class for the chain of front end processors. It provides methods for manipulating and
|
||||
* navigating the processors.
|
||||
* <p>
|
||||
* The front end is modeled as a series of data processors, each of which performs a specific signal processing
|
||||
* function. For example, a processor performs Fast-Fourier Transform (FFT) on input data, another processor performs
|
||||
* high-pass filtering. Figure 1 below describes how the front end looks like:
|
||||
* <img alt="Frontend" src="doc-files/frontend.jpg"> <br> <b>Figure 1: The Sphinx4 front end.</b>
|
||||
* <p>
|
||||
* Each such data processor implements the {@link edu.cmu.sphinx.frontend.DataProcessor} interface. Objects that
|
||||
* implements the {@link edu.cmu.sphinx.frontend.Data} interface enters and exits the front end, and go between the
|
||||
* processors in the front end. The input data to the front end is typically audio data, but this front end allows any
|
||||
* input type. Similarly, the output data is typically features, but this front end allows any output type. You can
|
||||
* configure the front end to accept any input type and return any output type. We will describe the configuration of
|
||||
* the front end in more detail below.
|
||||
* <p>
|
||||
* <b>The Pull Model of the Front End</b>
|
||||
* <p>
|
||||
* The front end uses a pull model. To obtain output from the front end, one would call the method:
|
||||
* <p>
|
||||
* <code> FrontEnd frontend = ... // see how to obtain the front end below <br>Data output = frontend.getData();
|
||||
* </code>
|
||||
* <p>
|
||||
* Calling {@link #getData() getData} on the front end would in turn call the getData() method on the last
|
||||
* DataProcessor, which in turn calls the getData() method on the second last DataProcessor, and so on, until the
|
||||
* getData() method on the first DataProcessor is called, which reads Data objects from the input. The input to the
|
||||
* front end is actually another DataProcessor, and is usually (though not necessarily) part of the front end and is not
|
||||
* shown in the figure above. If you want to maintain some control of the input DataProcessor, you can create it
|
||||
* separately, and use the {@link #setDataSource(edu.cmu.sphinx.frontend.DataProcessor) setDataSource} method to set it
|
||||
* as the input DataProcessor. In that case, the input DataProcessor will be prepended to the existing chain of
|
||||
* DataProcessors. One common input DataProcessor is the {@link edu.cmu.sphinx.frontend.util.Microphone}, which
|
||||
* implements the DataProcessor interface.
|
||||
* <p>
|
||||
* <code> DataProcessor microphone = new Microphone(); <br>microphone.initialize(...);
|
||||
* <br>frontend.setDataSource(microphone); </code>
|
||||
* <p>
|
||||
* Another common input DataProcessor is the {@link edu.cmu.sphinx.frontend.util.StreamDataSource}. It turns a Java
|
||||
* {@link java.io.InputStream} into Data objects. It is usually used in batch mode decoding.
|
||||
* <p>
|
||||
* <b>Configuring the front end</b>
|
||||
* <p>
|
||||
* The front end must be configured through the Sphinx properties file. For details about configuring the front end,
|
||||
* refer to the document <a href="doc-files/FrontEndConfiguration.html">Configuring the Front End</a>.
|
||||
* <p>
|
||||
* Current state-of-the-art front ends generate features that contain Mel-frequency cepstral coefficients (MFCC). To
|
||||
* specify such a front end (called a 'pipeline') in Sphinx-4, insert the following lines in the Sphinx-4 configuration
|
||||
* file:
|
||||
* <pre>
|
||||
* <component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
|
||||
* <propertylist name="pipeline">
|
||||
* <item>preemphasizer</item>
|
||||
* <item>windower</item>
|
||||
* <item>dft</item>
|
||||
* <item>melFilterBank</item>
|
||||
* <item>dct</item>
|
||||
* <item>batchCMN</item>
|
||||
* <item>featureExtractor</item>
|
||||
* </propertylist>
|
||||
* </component>
|
||||
*
|
||||
* <component name="preemphasizer" type="{@link edu.cmu.sphinx.frontend.filter.Preemphasizer
|
||||
* edu.cmu.sphinx.frontend.filter.Preemphasizer}"/>
|
||||
* <component name="windower" type="{@link edu.cmu.sphinx.frontend.window.RaisedCosineWindower
|
||||
* edu.cmu.sphinx.frontend.window.RaisedCosineWindower}"/>
|
||||
* <component name="dft" type="{@link edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform
|
||||
* edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform}"/>
|
||||
* <component name="melFilterBank" type="{@link edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank2
|
||||
* edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank}"/>
|
||||
* <component name="dct" type="{@link edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform
|
||||
* edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform}"/>
|
||||
* <component name="batchCMN" type="{@link edu.cmu.sphinx.frontend.feature.BatchCMN
|
||||
* edu.cmu.sphinx.frontend.feature.BatchCMN}"/>
|
||||
* <component name="featureExtractor" type="{@link edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor
|
||||
* edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor}"/>
|
||||
* </pre>
|
||||
* Note: In this example, 'mfcFrontEnd' becomes the name of the front end.
|
||||
* <p>
|
||||
* Sphinx-4 also allows you to: <ul> <li>specify multiple front end pipelines</li> <li>specify multiple instance of the
|
||||
* same DataProcessor in the same pipeline</li> </ul>
|
||||
* <p>
|
||||
* For details on how to do this, refer to the document <a href="doc-files/FrontEndConfiguration.html">Configuring the
|
||||
* Front End</a>.
|
||||
* <p>
|
||||
* <b>Obtaining a Front End</b>
|
||||
* <p>
|
||||
* In order to obtain a front end, it must be specified in the configuration file. The Sphinx-4 front end is connected
|
||||
* to the rest of the system via the scorer. We will continue with the above example to show how the scorer will obtain
|
||||
* the front end. In the configuration file, the scorer should be specified as follows:
|
||||
* <pre>
|
||||
* <component name="scorer" type="edu.cmu.sphinx.decoder.scorer.SimpleAcousticScorer">
|
||||
* <property name="frontend" value="mfcFrontEnd"/>
|
||||
* </component>
|
||||
* </pre>
|
||||
* In the SimpleAcousticScorer, the front end is obtained in the {@link edu.cmu.sphinx.util.props.Configurable#newProperties
|
||||
* newProperties} method as follows:
|
||||
* <pre>
|
||||
* public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
* FrontEnd frontend = (FrontEnd) ps.getComponent("frontend", FrontEnd.class);
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
public class FrontEnd extends BaseDataProcessor {
|
||||
|
||||
/** the name of the property list of all the components of the frontend pipe line */
|
||||
@S4ComponentList(type = DataProcessor.class)
|
||||
public final static String PROP_PIPELINE = "pipeline";
|
||||
|
||||
|
||||
// ----------------------------
|
||||
// Configuration data
|
||||
// -----------------------------
|
||||
private List<DataProcessor> frontEndList;
|
||||
private Timer timer;
|
||||
|
||||
private DataProcessor first;
|
||||
private DataProcessor last;
|
||||
private final List<SignalListener> signalListeners = new ArrayList<SignalListener>();
|
||||
|
||||
public FrontEnd(List<DataProcessor> frontEndList) {
|
||||
initLogger();
|
||||
this.frontEndList = frontEndList;
|
||||
init();
|
||||
}
|
||||
|
||||
public FrontEnd() {
|
||||
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
frontEndList = ps.getComponentList(PROP_PIPELINE, DataProcessor.class);
|
||||
init();
|
||||
}
|
||||
|
||||
private void init() {
|
||||
this.timer = TimerPool.getTimer(this, "Frontend");
|
||||
|
||||
last = null;
|
||||
for (DataProcessor dp : frontEndList) {
|
||||
assert dp != null;
|
||||
|
||||
if (last != null)
|
||||
dp.setPredecessor(last);
|
||||
|
||||
if (first == null) {
|
||||
first = dp;
|
||||
}
|
||||
last = dp;
|
||||
}
|
||||
initialize();
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend.CommonConfig)
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
for (DataProcessor dp : frontEndList) {
|
||||
dp.initialize();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the source of data for this front end. It basically sets the predecessor of the first DataProcessor of this
|
||||
* front end.
|
||||
*
|
||||
* @param dataSource the source of data
|
||||
*/
|
||||
public void setDataSource(DataProcessor dataSource) {
|
||||
first.setPredecessor(dataSource);
|
||||
}
|
||||
|
||||
|
||||
/** Returns the collection of <code>DataProcessor</code>s of this <code>FrontEnd</code>.
|
||||
* @return list of processors
|
||||
*/
|
||||
public List<DataProcessor> getElements() {
|
||||
return frontEndList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed Data output, basically calls <code>getData()</code> on the last processor.
|
||||
*
|
||||
* @return Data object that has been processed by this front end
|
||||
* @throws DataProcessingException if a data processor error occurs
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
timer.start();
|
||||
Data data = last.getData();
|
||||
|
||||
// fire the signal listeners if its a signal
|
||||
if (data instanceof Signal) {
|
||||
fireSignalListeners((Signal) data);
|
||||
}
|
||||
timer.stop();
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the source of data for this front end. It basically calls <code>setDataSource(dataSource)</code>.
|
||||
*
|
||||
* @param dataSource the source of data
|
||||
*/
|
||||
@Override
|
||||
public void setPredecessor(DataProcessor dataSource) {
|
||||
setDataSource(dataSource);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a listener to be called when a signal is detected.
|
||||
*
|
||||
* @param listener the listener to be added
|
||||
*/
|
||||
public void addSignalListener(SignalListener listener) {
|
||||
signalListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes a listener for signals.
|
||||
*
|
||||
* @param listener the listener to be removed
|
||||
*/
|
||||
public void removeSignalListener(SignalListener listener) {
|
||||
signalListeners.remove(listener);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fire all listeners for signals.
|
||||
*
|
||||
* @param signal the signal that occurred
|
||||
*/
|
||||
protected void fireSignalListeners(Signal signal) {
|
||||
for (SignalListener listener : new ArrayList<SignalListener>(signalListeners))
|
||||
listener.signalOccurred(signal);
|
||||
}
|
||||
|
||||
|
||||
/** Returns the last data processor within the <code>DataProcessor</code> chain of this <code>FrontEnd</code>.
|
||||
* @return last processor
|
||||
*/
|
||||
public DataProcessor getLastDataProcessor() {
|
||||
return last;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a description of this FrontEnd in the format: <front end name> {<DataProcessor1>, <DataProcessor2> ...
|
||||
* <DataProcessorN>}
|
||||
*
|
||||
* @return a description of this FrontEnd
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
if (last == null)
|
||||
return super.toString() + " {}";
|
||||
LinkedList<DataProcessor> list = new LinkedList<DataProcessor>();
|
||||
for (DataProcessor current = last; current != null; current = current.getPredecessor())
|
||||
list.addFirst(current); // add processors in their correct order
|
||||
StringBuilder description = new StringBuilder(super.toString()).append(" {");
|
||||
for (DataProcessor dp : list)
|
||||
description.append(dp).append(", ");
|
||||
description.setLength(description.length() - 2);
|
||||
return description.append('}').toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Double;
|
||||
|
||||
/**
|
||||
* Allows to modify the gain of an audio-signal. If the gainFactor is 1 the signal passes this
|
||||
* <code>DataProcessor</code> unchanged.
|
||||
*
|
||||
* @author Holger Brandl
|
||||
*/
|
||||
public class GainControlProcessor extends BaseDataProcessor {
|
||||
|
||||
@S4Double(defaultValue = 1.0)
|
||||
public static final String GAIN_FACTOR = "gainFactor";
|
||||
|
||||
private double gainFactor;
|
||||
|
||||
public GainControlProcessor(double gainFactor) {
|
||||
initLogger();
|
||||
this.gainFactor = gainFactor;
|
||||
}
|
||||
|
||||
public GainControlProcessor() {
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
gainFactor = ps.getDouble(GAIN_FACTOR);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
Data data = getPredecessor().getData();
|
||||
|
||||
if (data instanceof FloatData) {
|
||||
float[] values = ((FloatData) data).getValues();
|
||||
if (gainFactor != 1.0) {
|
||||
// apply the gain-factor
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] *= gainFactor;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} else if (data instanceof DoubleData) {
|
||||
double[] values = ((DoubleData) data).getValues();
|
||||
if (gainFactor != 1.0) {
|
||||
// apply the gain-factor
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] *= gainFactor;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
public double getGainFactor() {
|
||||
return gainFactor;
|
||||
}
|
||||
|
||||
|
||||
public void setGainFactor(double gainFactor) {
|
||||
this.gainFactor = gainFactor;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Indicates events like beginning or end of data, data dropped, quality changed, etc.. It implements the Data
|
||||
* interface, and it will pass between DataProcessors to inform them about the Data that is passed between
|
||||
* DataProcessors.
|
||||
*
|
||||
* @see Data
|
||||
* @see DataProcessor
|
||||
*/
|
||||
public class Signal implements Data {
|
||||
|
||||
/** the time this Signal was issued. */
|
||||
private final long time;
|
||||
|
||||
/**
|
||||
* A (lazily initialized) collection of names properties of this signal. This collection might contain infos about
|
||||
* the file being processed, shift-size of frame-length of the windowing process, etc.
|
||||
*/
|
||||
private Map<String, Object> props;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a Signal with the given name.
|
||||
*
|
||||
* @param time the time this Signal is created
|
||||
*/
|
||||
protected Signal(long time) {
|
||||
this.time = time;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the time this Signal was created.
|
||||
*
|
||||
* @return the time this Signal was created
|
||||
*/
|
||||
public long getTime() {
|
||||
return time;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the properties associated to this signal.
|
||||
*/
|
||||
public synchronized Map<String, Object> getProps() {
|
||||
if (props == null)
|
||||
props = new HashMap<String, Object>();
|
||||
|
||||
return props;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.frontend;
|
||||
|
||||
|
||||
/** The listener interface for being informed when a {@link Signal Signal} is generated. */
|
||||
public interface SignalListener {
|
||||
|
||||
/**
|
||||
* Method called when a signal is detected
|
||||
*
|
||||
* @param signal the signal
|
||||
*/
|
||||
public void signalOccurred(Signal signal);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
package edu.cmu.sphinx.frontend.databranch;
|
||||
|
||||
import edu.cmu.sphinx.frontend.BaseDataProcessor;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.frontend.DataProcessingException;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A FIFO-buffer for <code>Data</code>-elements.
|
||||
* <p>
|
||||
* <code>Data</code>s are inserted to the buffer using the <code>processDataFrame</code>-method.
|
||||
*/
|
||||
public class DataBufferProcessor extends BaseDataProcessor implements DataListener {
|
||||
|
||||
/** The FIFO- data buffer. */
|
||||
private final List<Data> featureBuffer = new LinkedList<Data>();
|
||||
|
||||
/**
|
||||
* If this property is set <code>true</code> the buffer will wait for new data until it returns from a
|
||||
* <code>getData</code>-call. Enable this flag if the buffer should serve as starting point for a new
|
||||
* feature-pull-chain..
|
||||
*/
|
||||
@S4Boolean(defaultValue = false)
|
||||
public static final String PROP_WAIT_IF_EMPTY = "waitIfEmpty";
|
||||
private boolean waitIfEmpty;
|
||||
|
||||
/**
|
||||
* The time in milliseconds which will be waited between two attempts to read a data element from the buffer when
|
||||
* being in <code>waitIfEmpty</code>-mode
|
||||
*/
|
||||
@S4Integer(defaultValue = 10)
|
||||
public static final String PROP_WAIT_TIME_MS = "waitTimeMs";
|
||||
private long waitTime;
|
||||
|
||||
|
||||
/** The maximal size of the buffer in frames. The oldest frames will be removed if the buffer grows out of bounds. */
|
||||
@S4Integer(defaultValue = 50000)
|
||||
public static final String PROP_BUFFER_SIZE = "maxBufferSize";
|
||||
private int maxBufferSize;
|
||||
|
||||
|
||||
@S4ComponentList(type = Configurable.class, beTolerant = true)
|
||||
public static final String DATA_LISTENERS = "dataListeners";
|
||||
private List<DataListener> dataListeners = new ArrayList<DataListener>();
|
||||
|
||||
/**
|
||||
* @param maxBufferSize The maximal size of the buffer in frames. The oldest frames will be removed if the buffer grows out of bounds.
|
||||
* @param waitIfEmpty If this property is set <code>true</code> the buffer will wait for new data until it returns from a
|
||||
* <code>getData</code>-call. Enable this flag if the buffer should serve as starting point for a new
|
||||
* feature-pull-chain.
|
||||
* @param waitTime The time in milliseconds which will be waited between two attempts to read a data element from the buffer when
|
||||
* being in <code>waitIfEmpty</code>-mode
|
||||
* @param listeners listeners to get notified
|
||||
*/
|
||||
public DataBufferProcessor(int maxBufferSize, boolean waitIfEmpty, int waitTime, List<? extends Configurable> listeners) {
|
||||
initLogger();
|
||||
|
||||
this.maxBufferSize = maxBufferSize;
|
||||
this.waitIfEmpty = waitIfEmpty;
|
||||
|
||||
if (waitIfEmpty) // if false we don't need the value
|
||||
this.waitTime = waitTime;
|
||||
|
||||
for (Configurable configurable : listeners) {
|
||||
assert configurable instanceof DataListener;
|
||||
addDataListener((DataListener) configurable);
|
||||
}
|
||||
}
|
||||
|
||||
public DataBufferProcessor() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
maxBufferSize = ps.getInt(PROP_BUFFER_SIZE);
|
||||
waitIfEmpty = ps.getBoolean(PROP_WAIT_IF_EMPTY);
|
||||
|
||||
if (waitIfEmpty) // if false we don't need the value
|
||||
waitTime = ps.getInt(PROP_WAIT_TIME_MS);
|
||||
|
||||
dataListeners = ps.getComponentList(DATA_LISTENERS, DataListener.class);
|
||||
}
|
||||
|
||||
|
||||
public void processDataFrame(Data data) {
|
||||
featureBuffer.add(data);
|
||||
|
||||
// inform data-listeners if necessary
|
||||
for (DataListener dataListener : dataListeners) {
|
||||
dataListener.processDataFrame(data);
|
||||
}
|
||||
|
||||
//reduce the buffer-size if necessary
|
||||
while (featureBuffer.size() > maxBufferSize) {
|
||||
featureBuffer.remove(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the processed Data output.
|
||||
*
|
||||
* @return an Data object that has been processed by this DataProcessor
|
||||
* @throws edu.cmu.sphinx.frontend.DataProcessingException
|
||||
* if a data processor error occurs
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
Data data = null;
|
||||
|
||||
while (waitIfEmpty && featureBuffer.isEmpty()) {
|
||||
try {
|
||||
Thread.sleep(waitTime);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
if (!featureBuffer.isEmpty()) {
|
||||
data = featureBuffer.remove(0);
|
||||
} else {
|
||||
assert !waitIfEmpty;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
public int getBufferSize() {
|
||||
return featureBuffer.size();
|
||||
}
|
||||
|
||||
|
||||
public void clearBuffer() {
|
||||
featureBuffer.clear();
|
||||
}
|
||||
|
||||
|
||||
public List<Data> getBuffer() {
|
||||
return Collections.unmodifiableList(featureBuffer);
|
||||
}
|
||||
|
||||
|
||||
/** Adds a new listener.
|
||||
* @param l listener to add
|
||||
*/
|
||||
public void addDataListener(DataListener l) {
|
||||
if (l == null)
|
||||
return;
|
||||
|
||||
dataListeners.add(l);
|
||||
}
|
||||
|
||||
|
||||
/** Removes a listener.
|
||||
* @param l listener to remove
|
||||
*/
|
||||
public void removeDataListener(DataListener l) {
|
||||
if (l == null)
|
||||
return;
|
||||
|
||||
dataListeners.remove(l);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package edu.cmu.sphinx.frontend.databranch;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
|
||||
|
||||
/**
|
||||
* Defines some API-elements for Data-observer classes.
|
||||
*/
|
||||
public interface DataListener {
|
||||
|
||||
/** This method is invoked when a new {@link Data} object becomes available.
|
||||
* @param data feature frame
|
||||
*/
|
||||
public void processDataFrame(Data data);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
package edu.cmu.sphinx.frontend.databranch;
|
||||
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
|
||||
/** Some API-elements which are shared by components which can generate {@link edu.cmu.sphinx.frontend.Data}s. */
|
||||
public interface DataProducer extends Configurable {
|
||||
|
||||
/** Registers a new listener for <code>Data</code>s.
|
||||
* @param l listener to add
|
||||
*/
|
||||
void addDataListener(DataListener l);
|
||||
|
||||
|
||||
/** Unregisters a listener for <code>Data</code>s.
|
||||
* @param l listener to remove
|
||||
*/
|
||||
void removeDataListener(DataListener l);
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
package edu.cmu.sphinx.frontend.databranch;
|
||||
|
||||
import edu.cmu.sphinx.frontend.BaseDataProcessor;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.frontend.DataProcessingException;
|
||||
import edu.cmu.sphinx.util.props.Configurable;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4ComponentList;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Creates push-branches out of a Frontend. This might be used for for push-decoding or to create new pull-streams
|
||||
*
|
||||
* @see edu.cmu.sphinx.decoder.FrameDecoder
|
||||
* @see edu.cmu.sphinx.frontend.databranch.DataBufferProcessor
|
||||
*/
|
||||
public class FrontEndSplitter extends BaseDataProcessor implements DataProducer {
|
||||
|
||||
|
||||
@S4ComponentList(type = Configurable.class, beTolerant = true)
|
||||
public static final String PROP_DATA_LISTENERS = "dataListeners";
|
||||
private List<DataListener> listeners = new ArrayList<DataListener>();
|
||||
|
||||
public FrontEndSplitter() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
listeners = ps.getComponentList(PROP_DATA_LISTENERS, DataListener.class);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads and returns the next Data frame or return <code>null</code> if no data is available.
|
||||
*
|
||||
* @return the next Data or <code>null</code> if none is available
|
||||
* @throws edu.cmu.sphinx.frontend.DataProcessingException
|
||||
* if there is a data processing error
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
Data input = getPredecessor().getData();
|
||||
|
||||
for (DataListener l : listeners)
|
||||
l.processDataFrame(input);
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
|
||||
public void addDataListener(DataListener l) {
|
||||
if (l == null) {
|
||||
return;
|
||||
}
|
||||
listeners.add(l);
|
||||
}
|
||||
|
||||
|
||||
public void removeDataListener(DataListener l) {
|
||||
if (l == null) {
|
||||
return;
|
||||
}
|
||||
listeners.remove(l);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,220 @@
|
|||
/*
|
||||
* Copyright 2013 Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend.denoise;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import edu.cmu.sphinx.frontend.BaseDataProcessor;
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.frontend.DataProcessingException;
|
||||
import edu.cmu.sphinx.frontend.DataStartSignal;
|
||||
import edu.cmu.sphinx.frontend.DoubleData;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Double;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
/**
|
||||
* The noise filter, same as implemented in sphinxbase/sphinxtrain/pocketsphinx.
|
||||
*
|
||||
* Noise removal algorithm is inspired by the following papers Computationally
|
||||
* Efficient Speech Enchancement by Spectral Minina Tracking by G. Doblinger
|
||||
*
|
||||
* Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition
|
||||
* by C. Kim.
|
||||
*
|
||||
* For the recent research and state of art see papers about IMRCA and A
|
||||
* Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency Cepstra
|
||||
* For Robust Speech Recognition by Dong Yu and others
|
||||
*
|
||||
*/
|
||||
public class Denoise extends BaseDataProcessor {
|
||||
|
||||
double[] power;
|
||||
double[] noise;
|
||||
double[] floor;
|
||||
double[] peak;
|
||||
|
||||
@S4Double(defaultValue = 0.7)
|
||||
public final static String LAMBDA_POWER = "lambdaPower";
|
||||
double lambdaPower;
|
||||
|
||||
@S4Double(defaultValue = 0.995)
|
||||
public final static String LAMBDA_A = "lambdaA";
|
||||
double lambdaA;
|
||||
|
||||
@S4Double(defaultValue = 0.5)
|
||||
public final static String LAMBDA_B = "lambdaB";
|
||||
double lambdaB;
|
||||
|
||||
@S4Double(defaultValue = 0.85)
|
||||
public final static String LAMBDA_T = "lambdaT";
|
||||
double lambdaT;
|
||||
|
||||
@S4Double(defaultValue = 0.2)
|
||||
public final static String MU_T = "muT";
|
||||
double muT;
|
||||
|
||||
@S4Double(defaultValue = 20.0)
|
||||
public final static String MAX_GAIN = "maxGain";
|
||||
double maxGain;
|
||||
|
||||
@S4Integer(defaultValue = 4)
|
||||
public final static String SMOOTH_WINDOW = "smoothWindow";
|
||||
int smoothWindow;
|
||||
|
||||
final static double EPS = 1e-10;
|
||||
|
||||
public Denoise(double lambdaPower, double lambdaA, double lambdaB,
|
||||
double lambdaT, double muT,
|
||||
double maxGain, int smoothWindow) {
|
||||
this.lambdaPower = lambdaPower;
|
||||
this.lambdaA = lambdaA;
|
||||
this.lambdaB = lambdaB;
|
||||
this.lambdaT = lambdaT;
|
||||
this.muT = muT;
|
||||
this.maxGain = maxGain;
|
||||
this.smoothWindow = smoothWindow;
|
||||
}
|
||||
|
||||
public Denoise() {
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
|
||||
* .props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
lambdaPower = ps.getDouble(LAMBDA_POWER);
|
||||
lambdaA = ps.getDouble(LAMBDA_A);
|
||||
lambdaB = ps.getDouble(LAMBDA_B);
|
||||
lambdaT = ps.getDouble(LAMBDA_T);
|
||||
muT = ps.getDouble(MU_T);
|
||||
maxGain = ps.getDouble(MAX_GAIN);
|
||||
smoothWindow = ps.getInt(SMOOTH_WINDOW);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
Data inputData = getPredecessor().getData();
|
||||
int i;
|
||||
|
||||
if (inputData instanceof DataStartSignal) {
|
||||
power = null;
|
||||
noise = null;
|
||||
floor = null;
|
||||
peak = null;
|
||||
return inputData;
|
||||
}
|
||||
if (!(inputData instanceof DoubleData)) {
|
||||
return inputData;
|
||||
}
|
||||
|
||||
DoubleData inputDoubleData = (DoubleData) inputData;
|
||||
double[] input = inputDoubleData.getValues();
|
||||
int length = input.length;
|
||||
|
||||
if (power == null)
|
||||
initStatistics(input, length);
|
||||
|
||||
updatePower(input);
|
||||
|
||||
estimateEnvelope(power, noise);
|
||||
|
||||
double[] signal = new double[length];
|
||||
for (i = 0; i < length; i++) {
|
||||
signal[i] = Math.max(power[i] - noise[i], 0.0);
|
||||
}
|
||||
|
||||
estimateEnvelope(signal, floor);
|
||||
|
||||
tempMasking(signal);
|
||||
|
||||
powerBoosting(signal);
|
||||
|
||||
double[] gain = new double[length];
|
||||
for (i = 0; i < length; i++) {
|
||||
gain[i] = signal[i] / (power[i] + EPS);
|
||||
gain[i] = Math.min(Math.max(gain[i], 1.0 / maxGain), maxGain);
|
||||
}
|
||||
double[] smoothGain = smooth(gain);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
input[i] *= smoothGain[i];
|
||||
}
|
||||
|
||||
return inputData;
|
||||
}
|
||||
|
||||
private double[] smooth(double[] gain) {
|
||||
double[] result = new double[gain.length];
|
||||
for (int i = 0; i < gain.length; i++) {
|
||||
int start = Math.max(i - smoothWindow, 0);
|
||||
int end = Math.min(i + smoothWindow + 1, gain.length);
|
||||
double sum = 0.0;
|
||||
for (int j = start; j < end; j++) {
|
||||
sum += gain[j];
|
||||
}
|
||||
result[i] = sum / (end - start);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void powerBoosting(double[] signal) {
|
||||
for (int i = 0; i < signal.length; i++) {
|
||||
if (signal[i] < floor[i])
|
||||
signal[i] = floor[i];
|
||||
}
|
||||
}
|
||||
|
||||
private void tempMasking(double[] signal) {
|
||||
for (int i = 0; i < signal.length; i++) {
|
||||
double in = signal[i];
|
||||
|
||||
peak[i] *= lambdaT;
|
||||
if (signal[i] < lambdaT * peak[i])
|
||||
signal[i] = peak[i] * muT;
|
||||
|
||||
if (in > peak[i])
|
||||
peak[i] = in;
|
||||
}
|
||||
}
|
||||
|
||||
private void updatePower(double[] input) {
|
||||
for (int i = 0; i < input.length; i++) {
|
||||
power[i] = lambdaPower * power[i] + (1 - lambdaPower) * input[i];
|
||||
}
|
||||
}
|
||||
|
||||
private void estimateEnvelope(double[] signal, double[] envelope) {
|
||||
for (int i = 0; i < signal.length; i++) {
|
||||
if (signal[i] > envelope[i])
|
||||
envelope[i] = lambdaA * envelope[i] + (1 - lambdaA) * signal[i];
|
||||
else
|
||||
envelope[i] = lambdaB * envelope[i] + (1 - lambdaB) * signal[i];
|
||||
}
|
||||
}
|
||||
|
||||
private void initStatistics(double[] input, int length) {
|
||||
/* no previous data, initialize the statistics */
|
||||
power = Arrays.copyOf(input, length);
|
||||
noise = Arrays.copyOf(input, length);
|
||||
floor = new double[length];
|
||||
peak = new double[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
floor[i] = input[i] / maxGain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright 2010 Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend.endpoint;
|
||||
|
||||
import edu.cmu.sphinx.frontend.BaseDataProcessor;
|
||||
|
||||
/**
|
||||
* An abstract analyzer that signals about presense of speech in last processing frame.
|
||||
* This information is used in noise filtering components to estimate noise spectrum
|
||||
* for example.
|
||||
*/
|
||||
public abstract class AbstractVoiceActivityDetector extends BaseDataProcessor {
|
||||
|
||||
/**
|
||||
* Returns the state of speech detected.
|
||||
*
|
||||
* @return if last processed data object was classified as speech.
|
||||
*/
|
||||
public abstract boolean isSpeech();
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright 1999-2004 Carnegie Mellon University.
|
||||
* Portions Copyright 2002-2004 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend.endpoint;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Data;
|
||||
import edu.cmu.sphinx.frontend.DoubleData;
|
||||
|
||||
|
||||
/** A container for DoubleData class that indicates whether the contained DoubleData is speech or not. */
|
||||
public class SpeechClassifiedData implements Data {
|
||||
|
||||
private boolean isSpeech;
|
||||
private final DoubleData data;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a SpeechClassifiedData object.
|
||||
*
|
||||
* @param doubleData the DoubleData
|
||||
* @param isSpeech indicates whether the DoubleData is speech
|
||||
*/
|
||||
public SpeechClassifiedData(DoubleData doubleData, boolean isSpeech) {
|
||||
this.data = doubleData;
|
||||
this.isSpeech = isSpeech;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets whether this SpeechClassifiedData is speech or not.
|
||||
*
|
||||
* @param isSpeech true if this is speech, false otherwise
|
||||
*/
|
||||
public void setSpeech(boolean isSpeech) {
|
||||
this.isSpeech = isSpeech;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether this is classified as speech.
|
||||
*
|
||||
* @return true if this is classified as speech, false otherwise
|
||||
*/
|
||||
public boolean isSpeech() {
|
||||
return isSpeech;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the data values.
|
||||
*
|
||||
* @return the data values
|
||||
*/
|
||||
public double[] getValues() {
|
||||
return data.getValues();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the sample rate of the data.
|
||||
*
|
||||
* @return the sample rate of the data
|
||||
*/
|
||||
public int getSampleRate() {
|
||||
return data.getSampleRate();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the time in milliseconds at which the audio data is collected.
|
||||
*
|
||||
* @return the difference, in milliseconds, between the time the audio data is collected and midnight, January 1,
|
||||
* 1970
|
||||
*/
|
||||
public long getCollectTime() {
|
||||
return data.getCollectTime();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the position of the first sample in the original data. The very first sample number is zero.
|
||||
*
|
||||
* @return the position of the first sample in the original data
|
||||
*/
|
||||
public long getFirstSampleNumber() {
|
||||
return data.getFirstSampleNumber();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the DoubleData contained by this SpeechClassifiedData.
|
||||
*
|
||||
* @return the DoubleData contained by this SpeechClassifiedData
|
||||
*/
|
||||
public DoubleData getDoubleData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return a string that describes the data.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SpeechClassifiedData containing " + data.toString() + " classified as " + (isSpeech ? "speech" : "non-speech");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend.endpoint;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.util.LogMath;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Double;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
import java.util.logging.Level;
|
||||
|
||||
/**
|
||||
* Implements a level tracking endpointer invented by Bent Schmidt Nielsen.
|
||||
* <p>This endpointer is composed of two main steps.
|
||||
* <ol>
|
||||
* <li>classification of audio into speech and non-speech
|
||||
* <li>inserting SPEECH_START and SPEECH_END signals around speech and removing non-speech regions
|
||||
* </ol>
|
||||
* <p>
|
||||
* The first step, classification of audio into speech and non-speech, uses Bent Schmidt Nielsen's algorithm. Each
|
||||
* time audio comes in, the average signal level and the background noise level are updated, using the signal level of
|
||||
* the current audio. If the average signal level is greater than the background noise level by a certain threshold
|
||||
* value (configurable), then the current audio is marked as speech. Otherwise, it is marked as non-speech.
|
||||
* <p>
|
||||
* The second step of this endpointer is documented in the class {@link SpeechMarker SpeechMarker}
|
||||
*
|
||||
* @see SpeechMarker
|
||||
*/
|
||||
public class SpeechClassifier extends AbstractVoiceActivityDetector {
|
||||
|
||||
/** The property specifying the endpointing frame length in milliseconds. */
|
||||
@S4Integer(defaultValue = 10)
|
||||
public static final String PROP_FRAME_LENGTH_MS = "frameLengthInMs";
|
||||
|
||||
/** The property specifying the minimum signal level used to update the background signal level. */
|
||||
@S4Double(defaultValue = 0)
|
||||
public static final String PROP_MIN_SIGNAL = "minSignal";
|
||||
|
||||
/**
|
||||
* The property specifying the threshold. If the current signal level is greater than the background level by
|
||||
* this threshold, then the current signal is marked as speech. Therefore, a lower threshold will make the
|
||||
* endpointer more sensitive, that is, mark more audio as speech. A higher threshold will make the endpointer less
|
||||
* sensitive, that is, mark less audio as speech.
|
||||
*/
|
||||
@S4Double(defaultValue = 10)
|
||||
public static final String PROP_THRESHOLD = "threshold";
|
||||
|
||||
/** The property specifying the adjustment. */
|
||||
@S4Double(defaultValue = 0.003)
|
||||
public static final String PROP_ADJUSTMENT = "adjustment";
|
||||
|
||||
protected final double averageNumber = 1;
|
||||
protected double adjustment;
|
||||
/** average signal level. */
|
||||
protected double level;
|
||||
/** background signal level. */
|
||||
protected double background;
|
||||
/** minimum valid signal level. */
|
||||
protected double minSignal;
|
||||
protected double threshold;
|
||||
protected float frameLengthSec;
|
||||
protected boolean isSpeech;
|
||||
|
||||
/* Statistics */
|
||||
protected long speechFrames;
|
||||
protected long backgroundFrames;
|
||||
protected double totalBackgroundLevel;
|
||||
protected double totalSpeechLevel;
|
||||
|
||||
public SpeechClassifier(int frameLengthMs, double adjustment, double threshold, double minSignal ) {
|
||||
initLogger();
|
||||
this.frameLengthSec = frameLengthMs / 1000.f;
|
||||
|
||||
this.adjustment = adjustment;
|
||||
this.threshold = threshold;
|
||||
this.minSignal = minSignal;
|
||||
|
||||
initialize();
|
||||
}
|
||||
|
||||
public SpeechClassifier() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
int frameLengthMs = ps.getInt(PROP_FRAME_LENGTH_MS);
|
||||
frameLengthSec = frameLengthMs / 1000.f;
|
||||
|
||||
adjustment = ps.getDouble(PROP_ADJUSTMENT);
|
||||
threshold = ps.getDouble(PROP_THRESHOLD);
|
||||
minSignal = ps.getDouble(PROP_MIN_SIGNAL);
|
||||
|
||||
logger = ps.getLogger();
|
||||
//logger.setLevel(Level.FINEST);
|
||||
|
||||
initialize();
|
||||
}
|
||||
|
||||
|
||||
/** Initializes this LevelTracker endpointer and DataProcessor predecessor. */
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
reset();
|
||||
}
|
||||
|
||||
|
||||
/** Resets this LevelTracker to a starting state. */
|
||||
protected void reset() {
|
||||
level = 0;
|
||||
background = 300;
|
||||
resetStats();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the logarithm base 10 of the root mean square of the given samples.
|
||||
*
|
||||
* @param samples the samples
|
||||
* @return the calculated log root mean square in log 10
|
||||
*/
|
||||
public static double logRootMeanSquare(double[] samples) {
|
||||
assert samples.length > 0;
|
||||
double sumOfSquares = 0.0f;
|
||||
for (double sample : samples) {
|
||||
sumOfSquares += sample * sample;
|
||||
}
|
||||
double rootMeanSquare = Math.sqrt
|
||||
(sumOfSquares / samples.length);
|
||||
rootMeanSquare = Math.max(rootMeanSquare, 1);
|
||||
return (LogMath.log10((float) rootMeanSquare) * 20);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Classifies the given audio frame as speech or not, and updates the endpointing parameters.
|
||||
*
|
||||
* @param audio the audio frame
|
||||
* @return Data with classification flag
|
||||
*/
|
||||
protected SpeechClassifiedData classify(DoubleData audio) {
|
||||
double current = logRootMeanSquare(audio.getValues());
|
||||
isSpeech = false;
|
||||
if (current >= minSignal) {
|
||||
level = ((level * averageNumber) + current) / (averageNumber + 1);
|
||||
if (current < background) {
|
||||
background = current;
|
||||
} else {
|
||||
background += (current - background) * adjustment;
|
||||
}
|
||||
if (level < background) {
|
||||
level = background;
|
||||
}
|
||||
isSpeech = (level - background > threshold);
|
||||
}
|
||||
|
||||
SpeechClassifiedData labeledAudio = new SpeechClassifiedData(audio, isSpeech);
|
||||
|
||||
if (logger.isLoggable(Level.FINEST)) {
|
||||
String speech = "";
|
||||
if (labeledAudio.isSpeech())
|
||||
speech = "*";
|
||||
|
||||
logger.finest("Bkg: " + background + ", level: " + level +
|
||||
", current: " + current + ' ' + speech);
|
||||
}
|
||||
|
||||
collectStats (isSpeech);
|
||||
|
||||
return labeledAudio;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset statistics
|
||||
*/
|
||||
private void resetStats () {
|
||||
backgroundFrames = 1;
|
||||
speechFrames = 1;
|
||||
totalSpeechLevel = 0;
|
||||
totalBackgroundLevel = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects the statistics to provide information about signal to noise ratio in channel
|
||||
*
|
||||
* @param isSpeech if the current frame is classified as speech
|
||||
*/
|
||||
private void collectStats(boolean isSpeech) {
|
||||
if (isSpeech) {
|
||||
totalSpeechLevel = totalSpeechLevel + level;
|
||||
speechFrames = speechFrames + 1;
|
||||
} else {
|
||||
totalBackgroundLevel = totalBackgroundLevel + background;
|
||||
backgroundFrames = backgroundFrames + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next Data object.
|
||||
*
|
||||
* @return the next Data object, or null if none available
|
||||
* @throws DataProcessingException if a data processing error occurs
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
Data audio = getPredecessor().getData();
|
||||
|
||||
if (audio instanceof DataStartSignal)
|
||||
reset();
|
||||
|
||||
if (audio instanceof DoubleData) {
|
||||
DoubleData data = (DoubleData) audio;
|
||||
audio = classify(data);
|
||||
}
|
||||
return audio;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method that returns if current returned frame contains speech.
|
||||
* It could be used by noise filter for example to adjust noise
|
||||
* spectrum estimation.
|
||||
*
|
||||
* @return if current frame is speech
|
||||
*/
|
||||
@Override
|
||||
public boolean isSpeech() {
|
||||
return isSpeech;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves accumulated signal to noise ratio in dbScale
|
||||
*
|
||||
* @return signal to noise ratio
|
||||
*/
|
||||
public double getSNR () {
|
||||
double snr = (totalBackgroundLevel / backgroundFrames - totalSpeechLevel / speechFrames);
|
||||
logger.fine ("Background " + totalBackgroundLevel / backgroundFrames);
|
||||
logger.fine ("Speech " + totalSpeechLevel / speechFrames);
|
||||
logger.fine ("SNR is " + snr);
|
||||
return snr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the estimation if input data was noisy enough to break
|
||||
* recognition. The audio is counted noisy if signal to noise ratio
|
||||
* is less then -20dB.
|
||||
*
|
||||
* @return estimation of data being noisy
|
||||
*/
|
||||
public boolean getNoisy () {
|
||||
return (getSNR() > -20);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend.endpoint;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Signal;
|
||||
|
||||
/** A signal that indicates the end of speech. */
|
||||
public class SpeechEndSignal extends Signal {
|
||||
|
||||
/** Constructs a SpeechEndSignal. */
|
||||
public SpeechEndSignal() {
|
||||
this(System.currentTimeMillis());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a SpeechEndSignal with the given creation time.
|
||||
*
|
||||
* @param time the creation time of the SpeechEndSignal
|
||||
*/
|
||||
public SpeechEndSignal(long time) {
|
||||
super(time);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string "SpeechEndSignal".
|
||||
*
|
||||
* @return the string "SpeechEndSignal"
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SpeechEndSignal";
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.frontend.endpoint;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
import edu.cmu.sphinx.util.props.S4Integer;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Converts a stream of SpeechClassifiedData objects, marked as speech and
|
||||
* non-speech, and mark out the regions that are considered speech. This is done
|
||||
* by inserting SPEECH_START and SPEECH_END signals into the stream.
|
||||
* <p>
|
||||
* The algorithm for inserting the two signals is as follows.
|
||||
* <p>
|
||||
* The algorithm is always in one of two states: 'in-speech' and
|
||||
* 'out-of-speech'. If 'out-of-speech', it will read in audio until we hit audio
|
||||
* that is speech. If we have read more than 'startSpeech' amount of
|
||||
* <i>continuous</i> speech, we consider that speech has started, and insert a
|
||||
* SPEECH_START at 'speechLeader' time before speech first started. The state of
|
||||
* the algorithm changes to 'in-speech'.
|
||||
* <p>
|
||||
* Now consider the case when the algorithm is in 'in-speech' state. If it read
|
||||
* an audio that is speech, it is scheduled for output. If the audio is
|
||||
* non-speech, we read ahead until we have 'endSilence' amount of
|
||||
* <i>continuous</i> non-speech. At the point we consider that speech has ended.
|
||||
* A SPEECH_END signal is inserted at 'speechTrailer' time after the first
|
||||
* non-speech audio. The algorithm returns to 'out-of-speech' state. If any
|
||||
* speech audio is encountered in-between, the accounting starts all over again.
|
||||
*
|
||||
* While speech audio is processed delay is lowered to some minimal amount. This
|
||||
* helps to segment both slow speech with visible delays and fast speech when
|
||||
* delays are minimal.
|
||||
*/
|
||||
public class SpeechMarker extends BaseDataProcessor {
|
||||
|
||||
/**
|
||||
* The property for the minimum amount of time in speech (in milliseconds)
|
||||
* to be considered as utterance start.
|
||||
*/
|
||||
@S4Integer(defaultValue = 200)
|
||||
public static final String PROP_START_SPEECH = "startSpeech";
|
||||
private int startSpeechTime;
|
||||
|
||||
/**
|
||||
* The property for the amount of time in silence (in milliseconds) to be
|
||||
* considered as utterance end.
|
||||
*/
|
||||
@S4Integer(defaultValue = 200)
|
||||
public static final String PROP_END_SILENCE = "endSilence";
|
||||
private int endSilenceTime;
|
||||
|
||||
/**
|
||||
* The property for the amount of time (in milliseconds) before speech start
|
||||
* to be included as speech data.
|
||||
*/
|
||||
@S4Integer(defaultValue = 50)
|
||||
public static final String PROP_SPEECH_LEADER = "speechLeader";
|
||||
private int speechLeader;
|
||||
|
||||
private LinkedList<Data> inputQueue; // Audio objects are added to the end
|
||||
private LinkedList<Data> outputQueue; // Audio objects are added to the end
|
||||
private boolean inSpeech;
|
||||
private int speechCount;
|
||||
private int silenceCount;
|
||||
private int startSpeechFrames;
|
||||
private int endSilenceFrames;
|
||||
private int speechLeaderFrames;
|
||||
|
||||
public SpeechMarker(int startSpeechTime, int endSilenceTime, int speechLeader) {
|
||||
initLogger();
|
||||
this.startSpeechTime = startSpeechTime;
|
||||
this.speechLeader = speechLeader;
|
||||
this.endSilenceTime = endSilenceTime;
|
||||
}
|
||||
|
||||
public SpeechMarker() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
|
||||
startSpeechTime = ps.getInt(PROP_START_SPEECH);
|
||||
endSilenceTime = ps.getInt(PROP_END_SILENCE);
|
||||
speechLeader = ps.getInt(PROP_SPEECH_LEADER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes this SpeechMarker
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this SpeechMarker to a starting state.
|
||||
*/
|
||||
private void reset() {
|
||||
inSpeech = false;
|
||||
speechCount = 0;
|
||||
silenceCount = 0;
|
||||
startSpeechFrames = startSpeechTime / 10;
|
||||
endSilenceFrames = endSilenceTime / 10;
|
||||
speechLeaderFrames = speechLeader / 10;
|
||||
this.inputQueue = new LinkedList<Data>();
|
||||
this.outputQueue = new LinkedList<Data>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next Data object.
|
||||
*
|
||||
* @return the next Data object, or null if none available
|
||||
* @throws DataProcessingException
|
||||
* if a data processing error occurs
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
|
||||
while (outputQueue.isEmpty()) {
|
||||
Data data = getPredecessor().getData();
|
||||
|
||||
if (data == null)
|
||||
break;
|
||||
|
||||
if (data instanceof DataStartSignal) {
|
||||
reset();
|
||||
outputQueue.add(data);
|
||||
break;
|
||||
}
|
||||
|
||||
if (data instanceof DataEndSignal) {
|
||||
if (inSpeech) {
|
||||
outputQueue.add(new SpeechEndSignal());
|
||||
}
|
||||
outputQueue.add(data);
|
||||
break;
|
||||
}
|
||||
|
||||
if (data instanceof SpeechClassifiedData) {
|
||||
SpeechClassifiedData cdata = (SpeechClassifiedData) data;
|
||||
|
||||
if (cdata.isSpeech()) {
|
||||
speechCount++;
|
||||
silenceCount = 0;
|
||||
} else {
|
||||
speechCount = 0;
|
||||
silenceCount++;
|
||||
}
|
||||
|
||||
if (inSpeech) {
|
||||
outputQueue.add(data);
|
||||
} else {
|
||||
inputQueue.add(data);
|
||||
if (inputQueue.size() > startSpeechFrames + speechLeaderFrames) {
|
||||
inputQueue.remove(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!inSpeech && speechCount == startSpeechFrames) {
|
||||
inSpeech = true;
|
||||
outputQueue.add(new SpeechStartSignal(cdata.getCollectTime() - speechLeader - startSpeechFrames));
|
||||
outputQueue.addAll(inputQueue.subList(
|
||||
Math.max(0, inputQueue.size() - startSpeechFrames - speechLeaderFrames), inputQueue.size()));
|
||||
inputQueue.clear();
|
||||
}
|
||||
if (inSpeech && silenceCount == endSilenceFrames) {
|
||||
inSpeech = false;
|
||||
outputQueue.add(new SpeechEndSignal(cdata.getCollectTime()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have something left, return that
|
||||
if (!outputQueue.isEmpty()) {
|
||||
Data audio = outputQueue.remove(0);
|
||||
if (audio instanceof SpeechClassifiedData) {
|
||||
SpeechClassifiedData data = (SpeechClassifiedData) audio;
|
||||
audio = data.getDoubleData();
|
||||
}
|
||||
return audio;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public boolean inSpeech() {
|
||||
return inSpeech;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
package edu.cmu.sphinx.frontend.endpoint;
|
||||
|
||||
import edu.cmu.sphinx.frontend.Signal;
|
||||
|
||||
/** A signal that indicates the start of speech. */
|
||||
public class SpeechStartSignal extends Signal {
|
||||
|
||||
/** Constructs a SpeechStartSignal. */
|
||||
public SpeechStartSignal() {
|
||||
this(System.currentTimeMillis());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a SpeechStartSignal at the given time.
|
||||
*
|
||||
* @param time the time this SpeechStartSignal is created
|
||||
*/
|
||||
public SpeechStartSignal(long time) {
|
||||
super(time);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string "SpeechStartSignal".
|
||||
*
|
||||
* @return the string "SpeechStartSignal"
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SpeechStartSignal";
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend.feature;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.frontend.endpoint.*;
|
||||
import edu.cmu.sphinx.util.props.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Abstract base class for windowed feature extractors like DeltasFeatureExtractor, ConcatFeatureExtractor
|
||||
* or S3FeatureExtractor. The main purpose of this it to collect window size cepstra frames in a buffer
|
||||
* and let the extractor compute the feature frame with them.
|
||||
*/
|
||||
public abstract class AbstractFeatureExtractor extends BaseDataProcessor {
|
||||
|
||||
/** The property for the window of the DeltasFeatureExtractor. */
|
||||
@S4Integer(defaultValue = 3)
|
||||
public static final String PROP_FEATURE_WINDOW = "windowSize";
|
||||
|
||||
private int bufferPosition;
|
||||
private Signal pendingSignal;
|
||||
private LinkedList<Data> outputQueue;
|
||||
|
||||
protected int cepstraBufferEdge;
|
||||
protected int window;
|
||||
protected int currentPosition;
|
||||
protected int cepstraBufferSize;
|
||||
protected DoubleData[] cepstraBuffer;
|
||||
|
||||
public AbstractFeatureExtractor(int window) {
|
||||
initLogger();
|
||||
this.window = window;
|
||||
}
|
||||
|
||||
public AbstractFeatureExtractor() {
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
window = ps.getInt(PROP_FEATURE_WINDOW);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend.CommonConfig)
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
cepstraBufferSize = 256;
|
||||
cepstraBuffer = new DoubleData[cepstraBufferSize];
|
||||
cepstraBufferEdge = cepstraBufferSize - (window * 2 + 2);
|
||||
outputQueue = new LinkedList<Data>();
|
||||
reset();
|
||||
}
|
||||
|
||||
|
||||
/** Resets the DeltasFeatureExtractor to be ready to read the next segment of data. */
|
||||
private void reset() {
|
||||
bufferPosition = 0;
|
||||
currentPosition = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the next Data object produced by this DeltasFeatureExtractor.
|
||||
*
|
||||
* @return the next available Data object, returns null if no Data is available
|
||||
* @throws DataProcessingException if there is a data processing error
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
if (outputQueue.isEmpty()) {
|
||||
Data input = getNextData();
|
||||
if (input != null) {
|
||||
if (input instanceof DoubleData) {
|
||||
addCepstrum((DoubleData) input);
|
||||
computeFeatures(1);
|
||||
} else if (input instanceof DataStartSignal) {
|
||||
pendingSignal = null;
|
||||
outputQueue.add(input);
|
||||
Data start = getNextData();
|
||||
int n = processFirstCepstrum(start);
|
||||
computeFeatures(n);
|
||||
if (pendingSignal != null) {
|
||||
outputQueue.add(pendingSignal);
|
||||
}
|
||||
} else if (input instanceof SpeechEndSignal) {
|
||||
// when the DataEndSignal is right at the boundary
|
||||
int n = replicateLastCepstrum();
|
||||
computeFeatures(n);
|
||||
outputQueue.add(input);
|
||||
} else if (input instanceof DataEndSignal) {
|
||||
outputQueue.add(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
return outputQueue.isEmpty() ? null : outputQueue.removeFirst();
|
||||
}
|
||||
|
||||
|
||||
private Data getNextData() throws DataProcessingException {
|
||||
Data d = getPredecessor().getData();
|
||||
while (d != null && !(d instanceof DoubleData || d instanceof DataEndSignal || d instanceof DataStartSignal || d instanceof SpeechEndSignal)) {
|
||||
outputQueue.add(d);
|
||||
d = getPredecessor().getData();
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Replicate the given cepstrum Data object into the first window+1 number of frames in the cepstraBuffer. This is
|
||||
* the first cepstrum in the segment.
|
||||
*
|
||||
* @param cepstrum the Data to replicate
|
||||
* @return the number of Features that can be computed
|
||||
* @throws edu.cmu.sphinx.frontend.DataProcessingException
|
||||
*/
|
||||
private int processFirstCepstrum(Data cepstrum)
|
||||
throws DataProcessingException {
|
||||
if (cepstrum instanceof DataEndSignal) {
|
||||
outputQueue.add(cepstrum);
|
||||
return 0;
|
||||
} else if (cepstrum instanceof DataStartSignal) {
|
||||
throw new Error("Too many UTTERANCE_START");
|
||||
} else {
|
||||
// At the start of an utterance, we replicate the first frame
|
||||
// into window+1 frames, and then read the next "window" number
|
||||
// of frames. This will allow us to compute the delta-
|
||||
// double-delta of the first frame.
|
||||
Arrays.fill(cepstraBuffer, 0, window + 1, cepstrum);
|
||||
bufferPosition = window + 1;
|
||||
bufferPosition %= cepstraBufferSize;
|
||||
currentPosition = window;
|
||||
currentPosition %= cepstraBufferSize;
|
||||
int numberFeatures = 1;
|
||||
pendingSignal = null;
|
||||
for (int i = 0; i < window; i++) {
|
||||
Data next = getNextData();
|
||||
if (next != null) {
|
||||
if (next instanceof DoubleData) {
|
||||
// just a cepstra
|
||||
addCepstrum((DoubleData) next);
|
||||
} else if (next instanceof DataEndSignal || next instanceof SpeechEndSignal) {
|
||||
// end of segment cepstrum
|
||||
pendingSignal = (Signal) next;
|
||||
replicateLastCepstrum();
|
||||
numberFeatures += i;
|
||||
break;
|
||||
} else if (next instanceof DataStartSignal) {
|
||||
throw new Error("Too many UTTERANCE_START");
|
||||
}
|
||||
}
|
||||
}
|
||||
return numberFeatures;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the given DoubleData object to the cepstraBuffer.
|
||||
*
|
||||
* @param cepstrum the DoubleData object to add
|
||||
*/
|
||||
private void addCepstrum(DoubleData cepstrum) {
|
||||
cepstraBuffer[bufferPosition++] = cepstrum;
|
||||
bufferPosition %= cepstraBufferSize;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Replicate the last frame into the last window number of frames in the cepstraBuffer.
|
||||
*
|
||||
* @return the number of replicated Cepstrum
|
||||
*/
|
||||
private int replicateLastCepstrum() {
|
||||
DoubleData last;
|
||||
if (bufferPosition > 0) {
|
||||
last = cepstraBuffer[bufferPosition - 1];
|
||||
} else if (bufferPosition == 0) {
|
||||
last = cepstraBuffer[cepstraBuffer.length - 1];
|
||||
} else {
|
||||
throw new Error("BufferPosition < 0");
|
||||
}
|
||||
for (int i = 0; i < window; i++) {
|
||||
addCepstrum(last);
|
||||
}
|
||||
return window;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts the Cepstrum data in the cepstraBuffer into a FeatureFrame.
|
||||
*
|
||||
* @param totalFeatures the number of Features that will be produced
|
||||
*/
|
||||
private void computeFeatures(int totalFeatures) {
|
||||
if (totalFeatures == 1) {
|
||||
computeFeature();
|
||||
} else {
|
||||
// create the Features
|
||||
for (int i = 0; i < totalFeatures; i++) {
|
||||
computeFeature();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Computes the next Feature. */
|
||||
private void computeFeature() {
|
||||
Data feature = computeNextFeature();
|
||||
outputQueue.add(feature);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the next feature. Advances the pointers as well.
|
||||
*
|
||||
* @return the feature Data computed
|
||||
*/
|
||||
protected abstract Data computeNextFeature();
|
||||
}
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.frontend.feature;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.frontend.endpoint.*;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Applies automatic gain control (CMN)
|
||||
*/
|
||||
public class BatchAGC extends BaseDataProcessor {
|
||||
|
||||
private List<Data> cepstraList;
|
||||
private double agc;
|
||||
|
||||
public BatchAGC() {
|
||||
initLogger();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
}
|
||||
|
||||
/** Initializes this BatchCMN. */
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
cepstraList = new LinkedList<Data>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified.
|
||||
*
|
||||
* @return the next available Data object, returns null if no Data object is available
|
||||
* @throws DataProcessingException if there is an error processing data
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
|
||||
Data output = null;
|
||||
|
||||
if (!cepstraList.isEmpty()) {
|
||||
output = cepstraList.remove(0);
|
||||
} else {
|
||||
agc = 0.0;
|
||||
cepstraList.clear();
|
||||
// read the cepstra of the entire utterance, calculate and substract gain
|
||||
if (readUtterance() > 0) {
|
||||
normalizeList();
|
||||
output = cepstraList.remove(0);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads the cepstra of the entire Utterance into the cepstraList.
|
||||
*
|
||||
* @return the number cepstra (with Data) read
|
||||
* @throws DataProcessingException if an error occurred reading the Data
|
||||
*/
|
||||
private int readUtterance() throws DataProcessingException {
|
||||
|
||||
Data input = null;
|
||||
int numFrames = 0;
|
||||
|
||||
while (true) {
|
||||
input = getPredecessor().getData();
|
||||
if (input == null) {
|
||||
break;
|
||||
} else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
|
||||
cepstraList.add(input);
|
||||
break;
|
||||
} else if (input instanceof DoubleData) {
|
||||
cepstraList.add(input);
|
||||
double c0 = ((DoubleData)input).getValues()[0];
|
||||
if (agc < c0)
|
||||
agc = c0;
|
||||
} else { // DataStartSignal or other Signal
|
||||
cepstraList.add(input);
|
||||
}
|
||||
numFrames++;
|
||||
}
|
||||
|
||||
return numFrames;
|
||||
}
|
||||
|
||||
/** Normalizes the list of Data. */
|
||||
private void normalizeList() {
|
||||
for (Data data : cepstraList) {
|
||||
if (data instanceof DoubleData) {
|
||||
((DoubleData)data).getValues()[0] -= agc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.frontend.feature;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.frontend.endpoint.*;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.DecimalFormatSymbols;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Applies cepstral mean normalization (CMN), sometimes called channel mean normalization, to incoming cepstral data.
|
||||
*
|
||||
* Its goal is to reduce the distortion caused by the transmission channel. The output is mean normalized cepstral
|
||||
* data.
|
||||
* <p>
|
||||
* The CMN processing subtracts the mean from all the {@link Data} objects between a {@link
|
||||
* edu.cmu.sphinx.frontend.DataStartSignal} and a {@link DataEndSignal} or between a {@link
|
||||
* edu.cmu.sphinx.frontend.endpoint.SpeechStartSignal} and a {@link SpeechEndSignal}. BatchCMN will read in all the {@link Data}
|
||||
* objects, calculate the mean, and subtract this mean from all the {@link Data} objects. For a given utterance, it will
|
||||
* only produce an output after reading all the incoming data for the utterance. As a result, this process can introduce
|
||||
* a significant processing delay, which is acceptable for batch processing, but not for live mode. In the latter case,
|
||||
* one should use the {@link LiveCMN}.
|
||||
* <p>
|
||||
* CMN is a technique used to reduce distortions that are introduced by the transfer function of the transmission
|
||||
* channel (e.g., the microphone). Using a transmission channel to transmit the input speech translates to multiplying
|
||||
* the spectrum of the input speech with the transfer function of the channel (the distortion). Since the cepstrum is
|
||||
* the Fourier Transform of the log spectrum, the logarithm turns the multiplication into a summation. Averaging over
|
||||
* time, the mean is an estimate of the channel, which remains roughly constant. The channel is thus removed from the
|
||||
* cepstrum by subtracting the mean cepstral vector. Intuitively, the mean cepstral vector approximately describes the
|
||||
* spectral characteristics of the transmission channel (e.g., microphone).
|
||||
*
|
||||
* @see LiveCMN
|
||||
*/
|
||||
public class BatchCMN extends BaseDataProcessor {
|
||||
|
||||
private double[] sums; // array of current sums
|
||||
private List<Data> cepstraList;
|
||||
private int numberDataCepstra;
|
||||
private DecimalFormat formatter = new DecimalFormat("0.00;-0.00", new DecimalFormatSymbols(Locale.US));;
|
||||
|
||||
public BatchCMN() {
|
||||
initLogger();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
}
|
||||
|
||||
|
||||
/** Initializes this BatchCMN. */
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
sums = null;
|
||||
cepstraList = new LinkedList<Data>();
|
||||
}
|
||||
|
||||
|
||||
/** Initializes the sums array and clears the cepstra list. */
|
||||
private void reset() {
|
||||
sums = null; // clears the sums array
|
||||
cepstraList.clear();
|
||||
numberDataCepstra = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified.
|
||||
*
|
||||
* @return the next available Data object, returns null if no Data object is available
|
||||
* @throws DataProcessingException if there is an error processing data
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
|
||||
Data output = null;
|
||||
|
||||
if (!cepstraList.isEmpty()) {
|
||||
output = cepstraList.remove(0);
|
||||
} else {
|
||||
reset();
|
||||
// read the cepstra of the entire utterance, calculate
|
||||
// and apply the cepstral mean
|
||||
if (readUtterance() > 0) {
|
||||
normalizeList();
|
||||
output = cepstraList.remove(0);//getData();
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads the cepstra of the entire Utterance into the cepstraList.
|
||||
*
|
||||
* @return the number cepstra (with Data) read
|
||||
* @throws DataProcessingException if an error occurred reading the Data
|
||||
*/
|
||||
private int readUtterance() throws DataProcessingException {
|
||||
|
||||
Data input = null;
|
||||
|
||||
do {
|
||||
input = getPredecessor().getData();
|
||||
if (input != null) {
|
||||
if (input instanceof DoubleData) {
|
||||
double[] cepstrumData = ((DoubleData) input).getValues();
|
||||
if (sums == null) {
|
||||
sums = new double[cepstrumData.length];
|
||||
} else {
|
||||
if (sums.length != cepstrumData.length) {
|
||||
throw new Error
|
||||
("Inconsistent cepstrum lengths: sums: " +
|
||||
sums.length + ", cepstrum: " +
|
||||
cepstrumData.length);
|
||||
}
|
||||
}
|
||||
if (cepstrumData[0] >= 0) {
|
||||
// add the cepstrum data to the sums
|
||||
for (int j = 0; j < cepstrumData.length; j++) {
|
||||
sums[j] += cepstrumData[j];
|
||||
}
|
||||
numberDataCepstra++;
|
||||
}
|
||||
|
||||
cepstraList.add(input);
|
||||
|
||||
} else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
|
||||
cepstraList.add(input);
|
||||
break;
|
||||
} else { // DataStartSignal or other Signal
|
||||
cepstraList.add(input);
|
||||
}
|
||||
}
|
||||
} while (input != null);
|
||||
|
||||
return numberDataCepstra;
|
||||
}
|
||||
|
||||
|
||||
/** Normalizes the list of Data. */
|
||||
private void normalizeList() {
|
||||
StringBuilder cmn = new StringBuilder();
|
||||
// calculate the mean first
|
||||
for (int i = 0; i < sums.length; i++) {
|
||||
sums[i] /= numberDataCepstra;
|
||||
cmn.append (formatter.format(sums[i]));
|
||||
cmn.append(' ');
|
||||
}
|
||||
logger.info(cmn.toString());
|
||||
|
||||
for (Data data : cepstraList) {
|
||||
if (data instanceof DoubleData) {
|
||||
double[] cepstrum = ((DoubleData)data).getValues();
|
||||
for (int j = 0; j < cepstrum.length; j++) {
|
||||
cepstrum[j] -= sums[j]; // sums[] is now the means[]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright 2010 PC-NG Inc..
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
|
||||
package edu.cmu.sphinx.frontend.feature;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
import edu.cmu.sphinx.frontend.endpoint.*;
|
||||
import edu.cmu.sphinx.util.props.PropertyException;
|
||||
import edu.cmu.sphinx.util.props.PropertySheet;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Applies cepstral variance normalization (CVN), so that each coefficient
|
||||
* will have unit variance. You need to put this element after the means
|
||||
* normalizer in frontend pipeline.
|
||||
* <p>
|
||||
* CVN is sited to improve the stability of the decoding with the additive
|
||||
* noise, so it might be useful in some situations.
|
||||
*
|
||||
* @see LiveCMN
|
||||
*/
|
||||
public class BatchVarNorm extends BaseDataProcessor {
|
||||
|
||||
private double[] variances; // array of current sums
|
||||
private List<Data> cepstraList;
|
||||
private int numberDataCepstra;
|
||||
|
||||
public BatchVarNorm() {
|
||||
initLogger();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
|
||||
*/
|
||||
@Override
|
||||
public void newProperties(PropertySheet ps) throws PropertyException {
|
||||
super.newProperties(ps);
|
||||
}
|
||||
|
||||
|
||||
/** Initializes this BatchCMN. */
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
variances = null;
|
||||
cepstraList = new LinkedList<Data>();
|
||||
}
|
||||
|
||||
|
||||
/** Initializes the sums array and clears the cepstra list. */
|
||||
private void reset() {
|
||||
variances = null; // clears the sums array
|
||||
cepstraList.clear();
|
||||
numberDataCepstra = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified.
|
||||
*
|
||||
* @return the next available Data object, returns null if no Data object is available
|
||||
* @throws DataProcessingException if there is an error processing data
|
||||
*/
|
||||
@Override
|
||||
public Data getData() throws DataProcessingException {
|
||||
|
||||
Data output = null;
|
||||
|
||||
if (!cepstraList.isEmpty()) {
|
||||
output = cepstraList.remove(0);
|
||||
} else {
|
||||
reset();
|
||||
// read the cepstra of the entire utterance, calculate
|
||||
// and apply variance normalization
|
||||
if (readUtterance() > 0) {
|
||||
normalizeList();
|
||||
output = cepstraList.remove(0); //getData();
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads the cepstra of the entire Utterance into the cepstraList.
|
||||
*
|
||||
* @return the number cepstra (with Data) read
|
||||
* @throws DataProcessingException if an error occurred reading the Data
|
||||
*/
|
||||
private int readUtterance() throws DataProcessingException {
|
||||
|
||||
Data input = null;
|
||||
|
||||
do {
|
||||
input = getPredecessor().getData();
|
||||
if (input != null) {
|
||||
if (input instanceof DoubleData) {
|
||||
numberDataCepstra++;
|
||||
double[] cepstrumData = ((DoubleData) input).getValues();
|
||||
if (variances == null) {
|
||||
variances = new double[cepstrumData.length];
|
||||
} else {
|
||||
if (variances.length != cepstrumData.length) {
|
||||
throw new Error
|
||||
("Inconsistent cepstrum lengths: sums: " +
|
||||
variances.length + ", cepstrum: " +
|
||||
cepstrumData.length);
|
||||
}
|
||||
}
|
||||
// add the cepstrum data to the sums
|
||||
for (int j = 0; j < cepstrumData.length; j++) {
|
||||
variances[j] += cepstrumData[j] * cepstrumData[j];
|
||||
}
|
||||
cepstraList.add(input);
|
||||
|
||||
} else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
|
||||
cepstraList.add(input);
|
||||
break;
|
||||
} else { // DataStartSignal or other Signal
|
||||
cepstraList.add(input);
|
||||
}
|
||||
}
|
||||
} while (input != null);
|
||||
|
||||
return numberDataCepstra;
|
||||
}
|
||||
|
||||
|
||||
/** Normalizes the list of Data. */
|
||||
private void normalizeList() {
|
||||
|
||||
// calculate the variance first
|
||||
for (int i = 0; i < variances.length; i++) {
|
||||
variances[i] = Math.sqrt(numberDataCepstra / variances[i]);
|
||||
}
|
||||
|
||||
for (Data data : cepstraList) {
|
||||
if (data instanceof DoubleData) {
|
||||
double[] cepstrum = ((DoubleData)data).getValues();
|
||||
for (int j = 0; j < cepstrum.length; j++) {
|
||||
cepstrum[j] *= variances[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright 2002-2009 Carnegie Mellon University.
|
||||
* Copyright 2009 PC-NG Inc.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend.feature;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
|
||||
/**
|
||||
* This component concatenate the cepstrum from the sequence of frames according to the window size.
|
||||
* It's not supposed to give high accuracy alone, but combined with LDA transform it can give the same
|
||||
* or even better results than conventional delta and delta-delta coefficients. The idea is that
|
||||
* delta-delta computation is also a matrix multiplication thus using automatically generated
|
||||
* with LDA/MLLT matrix we can gain better results.
|
||||
* The model for this feature extractor should be trained with SphinxTrain with 1s_c feature type and
|
||||
* with cepwin option enabled. Don't forget to set the window size accordingly.
|
||||
*/
|
||||
public class ConcatFeatureExtractor extends AbstractFeatureExtractor {
|
||||
|
||||
public ConcatFeatureExtractor(int window) {
|
||||
super(window);
|
||||
}
|
||||
|
||||
public ConcatFeatureExtractor( ) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the next feature. Advances the pointers as well.
|
||||
*
|
||||
* @return the feature Data computed
|
||||
*/
|
||||
@Override
|
||||
protected Data computeNextFeature() {
|
||||
DoubleData currentCepstrum = cepstraBuffer[currentPosition];
|
||||
float[] feature = new float[(window * 2 + 1) * currentCepstrum.getValues().length];
|
||||
int j = 0;
|
||||
for (int k = -window; k <= window; k++) {
|
||||
int position = (currentPosition + k + cepstraBufferSize) % cepstraBufferSize;
|
||||
double[] buffer = cepstraBuffer[position].getValues();
|
||||
for (double val : buffer) {
|
||||
feature[j++] = (float)val;
|
||||
}
|
||||
}
|
||||
currentPosition = (currentPosition + 1) % cepstraBufferSize ;
|
||||
|
||||
return (new FloatData(feature,
|
||||
currentCepstrum.getSampleRate(),
|
||||
currentCepstrum.getFirstSampleNumber()));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright 1999-2002 Carnegie Mellon University.
|
||||
* Portions Copyright 2002 Sun Microsystems, Inc.
|
||||
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
|
||||
* All Rights Reserved. Use is subject to license terms.
|
||||
*
|
||||
* See the file "license.terms" for information on usage and
|
||||
* redistribution of this file, and for a DISCLAIMER OF ALL
|
||||
* WARRANTIES.
|
||||
*
|
||||
*/
|
||||
package edu.cmu.sphinx.frontend.feature;
|
||||
|
||||
import edu.cmu.sphinx.frontend.*;
|
||||
|
||||
/**
|
||||
* Computes the delta and double delta of input cepstrum (or plp or ...). The delta is the first order derivative and
|
||||
* the double delta (a.k.a. delta delta) is the second order derivative of the original cepstrum. They help model the
|
||||
* speech signal dynamics. The output data is a {@link FloatData} object with a float array of size three times the
|
||||
* original cepstrum, formed by the concatenation of cepstra, delta cepstra, and double delta cepstra. The output is the
|
||||
* feature vector used by the decoder. Figure 1 shows the arrangement of the output feature data array:
|
||||
* <p>
|
||||
* <img alt="Layout of features" src="doc-files/feature.jpg"> <br> <b>Figure 1: Layout of the returned features. </b>
|
||||
* <p>
|
||||
* Suppose that the original cepstrum has a length of N, the first N elements of the feature are just the original
|
||||
* cepstrum, the second N elements are the delta of the cepstrum, and the last N elements are the double delta of the
|
||||
* cepstrum.
|
||||
* <p>
|
||||
* Figure 2 below shows pictorially the computation of the delta and double delta of a cepstrum vector, using the last 3
|
||||
* cepstra and the next 3 cepstra. <img alt="Delta computation" src="doc-files/deltas.jpg"> <br> <b>Figure 2: Delta and double delta vector
|
||||
* computation. </b>
|
||||
* <p>
|
||||
* Referring to Figure 2, the delta is computed by subtracting the cepstrum that is two frames behind of the current
|
||||
* cepstrum from the cepstrum that is two frames ahead of the current cepstrum. The computation of the double delta is
|
||||
* similar. It is computed by subtracting the delta cepstrum one time frame behind from the delta cepstrum one time
|
||||
* frame ahead. Replacing delta cepstra with cepstra, this works out to a formula involving the cepstra that are one and
|
||||
* three behind and after the current cepstrum.
|
||||
*/
|
||||
public class DeltasFeatureExtractor extends AbstractFeatureExtractor {
|
||||
|
||||
public DeltasFeatureExtractor(int window) {
|
||||
super(window);
|
||||
}
|
||||
|
||||
public DeltasFeatureExtractor( ) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the next feature. Advances the pointers as well.
|
||||
*
|
||||
* @return the feature Data computed
|
||||
*/
|
||||
@Override
|
||||
protected Data computeNextFeature() {
|
||||
|
||||
int jp1 = (currentPosition - 1 + cepstraBufferSize) % cepstraBufferSize;
|
||||
int jp2 = (currentPosition - 2 + cepstraBufferSize) % cepstraBufferSize;
|
||||
int jp3 = (currentPosition - 3 + cepstraBufferSize) % cepstraBufferSize;
|
||||
int jf1 = (currentPosition + 1) % cepstraBufferSize;
|
||||
int jf2 = (currentPosition + 2) % cepstraBufferSize;
|
||||
int jf3 = (currentPosition + 3) % cepstraBufferSize;
|
||||
|
||||
DoubleData currentCepstrum = cepstraBuffer[currentPosition];
|
||||
double[] mfc3f = cepstraBuffer[jf3].getValues();
|
||||
double[] mfc2f = cepstraBuffer[jf2].getValues();
|
||||
double[] mfc1f = cepstraBuffer[jf1].getValues();
|
||||
double[] current = currentCepstrum.getValues();
|
||||
double[] mfc1p = cepstraBuffer[jp1].getValues();
|
||||
double[] mfc2p = cepstraBuffer[jp2].getValues();
|
||||
double[] mfc3p = cepstraBuffer[jp3].getValues();
|
||||
float[] feature = new float[current.length * 3];
|
||||
|
||||
currentPosition = (currentPosition + 1) % cepstraBufferSize;
|
||||
|
||||
// CEP; copy all the cepstrum data
|
||||
int j = 0;
|
||||
for (double val : current) {
|
||||
feature[j++] = (float)val;
|
||||
}
|
||||
// System.arraycopy(current, 0, feature, 0, j);
|
||||
// DCEP: mfc[2] - mfc[-2]
|
||||
for (int k = 0; k < mfc2f.length; k++) {
|
||||
feature[j++] = (float) (mfc2f[k] - mfc2p[k]);
|
||||
}
|
||||
// D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3])
|
||||
for (int k = 0; k < mfc3f.length; k++) {
|
||||
feature[j++] = (float) ((mfc3f[k] - mfc1p[k]) - (mfc1f[k] - mfc3p[k]));
|
||||
}
|
||||
return (new FloatData(feature,
|
||||
currentCepstrum.getSampleRate(),
|
||||
currentCepstrum.getFirstSampleNumber()));
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue