Added voice control

Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2
2015-05-13 21:14:10 +00:00 · 2015-05-13 21:14:10 +00:00 · 53da641909
commit 53da641909
parent 35c92407a3
863 changed files with 192681 additions and 0 deletions
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/pom.xml
@ -0,0 +1,34 @@
+<project
+  xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+  http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>edu.cmu.sphinx</groupId>
+    <artifactId>sphinx4-parent</artifactId>
+    <version>1.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>sphinx4-core</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Sphinx4 core</name>
+
+  <dependencies>
+
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
+      <version>3.2</version>
+    </dependency>
+
+    <dependency>
+      <groupId>edu.cmu.sphinx</groupId>
+      <artifactId>sphinx4-data</artifactId>
+      <version>1.0-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/LongTextAligner.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/LongTextAligner.java
@ -0,0 +1,355 @@
+/*
+ * Copyright 2014 Alpha Cephei Inc.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.alignment;
+
+import static java.lang.Math.abs;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static java.util.Arrays.fill;
+import static java.util.Collections.emptyList;
+
+import java.util.*;
+
+import edu.cmu.sphinx.util.Range;
+import edu.cmu.sphinx.util.Utilities;
+
+/**
+ *
+ * @author Alexander Solovets
+ */
+public class LongTextAligner {
+
+    private final class Alignment {
+
+        public final class Node {
+
+            private final int databaseIndex;
+            private final int queryIndex;
+
+            private Node(int row, int column) {
+                this.databaseIndex = column;
+                this.queryIndex = row;
+            }
+
+            public int getDatabaseIndex() {
+                return shifts.get(databaseIndex - 1);
+            }
+
+            public int getQueryIndex() {
+                return indices.get(queryIndex - 1);
+            }
+
+            public String getQueryWord() {
+                if (queryIndex > 0)
+                    return query.get(getQueryIndex());
+                return null;
+            }
+
+            public String getDatabaseWord() {
+                if (databaseIndex > 0)
+                    return reftup.get(getDatabaseIndex());
+                return null;
+            }
+
+            public int getValue() {
+                if (isBoundary())
+                    return max(queryIndex, databaseIndex);
+                return hasMatch() ? 0 : 1;
+            }
+
+            public boolean hasMatch() {
+                return getQueryWord().equals(getDatabaseWord());
+            }
+
+            public boolean isBoundary() {
+                return queryIndex == 0 || databaseIndex == 0;
+            }
+
+            public boolean isTarget() {
+                return queryIndex == indices.size() &&
+                       databaseIndex == shifts.size();
+            }
+
+            public List<Node> adjacent() {
+                List<Node> result = new ArrayList<Node>(3);
+                if (queryIndex < indices.size() &&
+                    databaseIndex < shifts.size()) {
+                    result.add(new Node(queryIndex + 1, databaseIndex + 1));
+                }
+                if (databaseIndex < shifts.size()) {
+                    result.add(new Node(queryIndex, databaseIndex + 1));
+                }
+                if (queryIndex < indices.size()) {
+                    result.add(new Node(queryIndex + 1, databaseIndex));
+                }
+
+                return result;
+            }
+
+            @Override
+            public boolean equals(Object object) {
+                if (!(object instanceof Node))
+                    return false;
+
+                Node other = (Node) object;
+                return queryIndex == other.queryIndex &&
+                       databaseIndex == other.databaseIndex;
+            }
+
+            @Override
+            public int hashCode() {
+                return 31 * (31 * queryIndex + databaseIndex);
+            }
+
+            @Override
+            public String toString() {
+                return String.format("[%d %d]", queryIndex, databaseIndex);
+            }
+        }
+
+        private final List<Integer> shifts;
+        private final List<String> query;
+        private final List<Integer> indices;
+        private final List<Node> alignment;
+
+        public Alignment(List<String> query, Range range) {
+            this.query = query;
+            indices = new ArrayList<Integer>();
+            Set<Integer> shiftSet = new TreeSet<Integer>();
+            for (int i = 0; i < query.size(); i++) {
+                if (tupleIndex.containsKey(query.get(i))) {
+                    indices.add(i);
+                    for (Integer shift : tupleIndex.get(query.get(i))) {
+                        if (range.contains(shift))
+                            shiftSet.add(shift);
+                    }                    
+                }
+            }
+
+            shifts = new ArrayList<Integer>(shiftSet);
+
+            final Map<Node, Integer> cost = new HashMap<Node, Integer>();
+            PriorityQueue<Node> openSet = new PriorityQueue<Node>(1, new Comparator<Node>() {
+                @Override
+                public int compare(Node o1, Node o2) {
+                    return cost.get(o1).compareTo(cost.get(o2));
+                }
+            });
+            Collection<Node> closedSet = new HashSet<Node>();
+            Map<Node, Node> parents = new HashMap<Node, Node>();
+
+            Node startNode = new Node(0, 0);
+            cost.put(startNode, 0);
+            openSet.add(startNode);
+
+            while (!openSet.isEmpty()) {
+                Node q = openSet.poll();
+                if (closedSet.contains(q))
+                    continue;
+
+                if (q.isTarget()) {
+                    List<Node> backtrace = new ArrayList<Node>();
+                    while (parents.containsKey(q)) {
+                        if (!q.isBoundary() && q.hasMatch())
+                            backtrace.add(q);
+                        q = parents.get(q);
+                    }
+                    alignment = new ArrayList<Node>(backtrace);
+                    Collections.reverse(alignment);
+                    return;
+                }
+
+                closedSet.add(q);
+                for (Node nb : q.adjacent()) {
+                    
+                    if (closedSet.contains(nb))
+                        continue;
+                    
+                    // FIXME: move to appropriate location
+                    int l = abs(indices.size() - shifts.size() - q.queryIndex +
+                                q.databaseIndex) -
+                            abs(indices.size() - shifts.size() -
+                                nb.queryIndex +
+                                nb.databaseIndex);
+
+                    Integer oldScore = cost.get(nb);
+                    Integer qScore = cost.get(q);
+                    if (oldScore == null)
+                        oldScore = Integer.MAX_VALUE;
+                    if (qScore == null)
+                        qScore = Integer.MAX_VALUE;
+                    
+                    int newScore = qScore + nb.getValue() - l;
+                    if (newScore < oldScore) {
+                        cost.put(nb, newScore);
+                        openSet.add(nb);
+                        parents.put(nb, q);
+                    }
+                }
+            }
+
+            alignment = emptyList();
+        }
+
+        public List<Node> getIndices() {
+            return alignment;
+        }
+    }
+
+    private final int tupleSize;
+    private final List<String> reftup;
+    private final HashMap<String, ArrayList<Integer>> tupleIndex;
+    private List<String> refWords;
+
+    /**
+     * Constructs new text aligner that servers requests for alignment of
+     * sequence of words with the provided database sequence. Sequences are
+     * aligned by tuples comprising one or more subsequent words.
+     *
+     * @param words list of words forming the database
+     * @param tupleSize size of a tuple, must be greater or equal to 1
+     */
+    public LongTextAligner(List<String> words, int tupleSize) {
+        assert words != null;
+        assert tupleSize > 0;
+
+        this.tupleSize = tupleSize;
+        this.refWords = words;
+
+        int offset = 0;
+        reftup = getTuples(words);
+
+        tupleIndex = new HashMap<String, ArrayList<Integer>>();
+        for (String tuple : reftup) {
+            ArrayList<Integer> indexes = tupleIndex.get(tuple);
+            if (indexes == null) {
+                indexes = new ArrayList<Integer>();
+                tupleIndex.put(tuple, indexes);
+            }
+            indexes.add(offset++);
+        }
+    }
+
+    /**
+     * Aligns query sequence with the previously built database.
+     * @param query list of words to look for
+     *
+     * @return indices of alignment
+     */
+    public int[] align(List<String> query) {
+        return align(query, new Range(0, refWords.size()));
+    }
+
+    /**
+     * Aligns query sequence with the previously built database.
+     * @param words list words to look for
+     * @param range range of database to look for alignment
+     *
+     * @return indices of alignment
+     */
+    public int[] align(List<String> words, Range range) {
+        
+        if (range.upperEndpoint() - range.lowerEndpoint() < tupleSize || words.size() < tupleSize) {
+            return alignTextSimple(refWords.subList(range.lowerEndpoint(), range.upperEndpoint()), words, range.lowerEndpoint());
+        }
+
+        int[] result = new int[words.size()];
+        fill(result, -1);
+        int lastIndex = 0;
+        for (Alignment.Node node : new Alignment(getTuples(words), range)
+                .getIndices()) {
+            // for (int j = 0; j < tupleSize; ++j)
+            lastIndex = max(lastIndex, node.getQueryIndex());
+            for (; lastIndex < node.getQueryIndex() + tupleSize; ++lastIndex)
+                result[lastIndex] = node.getDatabaseIndex() + lastIndex -
+                                    node.getQueryIndex();
+        }
+        return result;
+    }
+
+    /**
+     * Makes list of tuples of the given size out of list of words.
+     *
+     * @param words words
+     * @return list of tuples of size {@link #tupleSize}
+     */
+    private List<String> getTuples(List<String> words) {
+        List<String> result = new ArrayList<String>();
+        LinkedList<String> tuple = new LinkedList<String>();
+        
+        Iterator<String> it = words.iterator();
+        for (int i = 0; i < tupleSize - 1; i++) {
+            tuple.add(it.next());
+        }
+        while (it.hasNext()) {
+            tuple.addLast(it.next());
+            result.add(Utilities.join(tuple));
+            tuple.removeFirst();
+        }
+        return result;
+    }
+    
+    static int[] alignTextSimple(List<String> database, List<String> query,
+            int offset) {
+        int n = database.size() + 1;
+        int m = query.size() + 1;
+        int[][] f = new int[n][m];
+
+        f[0][0] = 0;
+        for (int i = 1; i < n; ++i) {
+            f[i][0] = i;
+        }
+
+        for (int j = 1; j < m; ++j) {
+            f[0][j] = j;
+        }
+
+        for (int i = 1; i < n; ++i) {
+            for (int j = 1; j < m; ++j) {
+                int match = f[i - 1][j - 1];
+                String refWord = database.get(i - 1);
+                String queryWord = query.get(j - 1);
+                if (!refWord.equals(queryWord)) {
+                    ++match;
+                }
+                int insert = f[i][j - 1] + 1;
+                int delete = f[i - 1][j] + 1;
+                f[i][j] = min(match, min(insert, delete));
+            }
+        }
+
+        --n;
+        --m;
+        int[] alignment = new int[m];
+        Arrays.fill(alignment, -1);
+        while (m > 0) {
+            if (n == 0) {
+                --m;
+            } else {
+                String refWord = database.get(n - 1);
+                String queryWord = query.get(m - 1);
+                if (f[n - 1][m - 1] <= f[n - 1][m - 1]
+                        && f[n - 1][m - 1] <= f[n][m - 1]
+                        && refWord.equals(queryWord)) {
+                    alignment[--m] = --n + offset;
+                } else {
+                    if (f[n - 1][m] < f[n][m - 1]) {
+                        --n;
+                    } else {
+                        --m;
+                    }
+                }
+            }
+        }
+
+        return alignment;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/SimpleTokenizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/SimpleTokenizer.java
@ -0,0 +1,36 @@
+/*
+ * Copyright 2014 Alpha Cephei Inc.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.alignment;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class SimpleTokenizer implements TextTokenizer {
+    public List<String> expand(String text) {
+
+        text = text.replace('’', '\'');
+        text = text.replace('‘', ' ');
+        text = text.replace('”', ' ');
+        text = text.replace('“', ' ');
+        text = text.replace('"', ' ');
+        text = text.replace('»', ' ');
+        text = text.replace('«', ' ');
+        text = text.replace('–', '-');
+        text = text.replace('—', ' ');
+        text = text.replace('…', ' ');
+
+        text = text.replace(" - ", " ");
+        text = text.replaceAll("[/_*%]", " ");
+        text = text.toLowerCase();
+
+        String[] tokens = text.split("[.,?:!;()]");
+        return Arrays.asList(tokens);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/TextTokenizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/TextTokenizer.java
@ -0,0 +1,25 @@
+/*
+ * Copyright 2014 Alpha Cephei Inc.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.alignment;
+
+import java.util.List;
+
+public interface TextTokenizer {
+    
+    
+    /**
+     * Cleans the text and returns the list of lines
+     * 
+     * @param text Input text 
+     * @return a list of lines in the text.
+     */
+    List<String> expand(String text);
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/Token.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/Token.java
@ -0,0 +1,158 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute, 
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment;
+
+/**
+ * Contains a parsed token from a Tokenizer.
+ */
+public class Token {
+
+    private String token = null;
+    private String whitespace = null;
+    private String prepunctuation = null;
+    private String postpunctuation = null;
+    private int position = 0; // position in the original input text
+    private int lineNumber = 0;
+
+    /**
+     * Returns the whitespace characters of this Token.
+     * 
+     * @return the whitespace characters of this Token; null if this Token does
+     *         not use whitespace characters
+     */
+    public String getWhitespace() {
+        return whitespace;
+    }
+
+    /**
+     * Returns the prepunctuation characters of this Token.
+     * 
+     * @return the prepunctuation characters of this Token; null if this Token
+     *         does not use prepunctuation characters
+     */
+    public String getPrepunctuation() {
+        return prepunctuation;
+    }
+
+    /**
+     * Returns the postpunctuation characters of this Token.
+     * 
+     * @return the postpunctuation characters of this Token; null if this Token
+     *         does not use postpunctuation characters
+     */
+    public String getPostpunctuation() {
+        return postpunctuation;
+    }
+
+    /**
+     * Returns the position of this token in the original input text.
+     * 
+     * @return the position of this token in the original input text
+     */
+    public int getPosition() {
+        return position;
+    }
+
+    /**
+     * Returns the line of this token in the original text.
+     * 
+     * @return the line of this token in the original text
+     */
+    public int getLineNumber() {
+        return lineNumber;
+    }
+
+    /**
+     * Sets the whitespace characters of this Token.
+     * 
+     * @param whitespace the whitespace character for this token
+     */
+    public void setWhitespace(String whitespace) {
+        this.whitespace = whitespace;
+    }
+
+    /**
+     * Sets the prepunctuation characters of this Token.
+     * 
+     * @param prepunctuation the prepunctuation characters
+     */
+    public void setPrepunctuation(String prepunctuation) {
+        this.prepunctuation = prepunctuation;
+    }
+
+    /**
+     * Sets the postpunctuation characters of this Token.
+     * 
+     * @param postpunctuation the postpunctuation characters
+     */
+    public void setPostpunctuation(String postpunctuation) {
+        this.postpunctuation = postpunctuation;
+    }
+
+    /**
+     * Sets the position of the token in the original input text.
+     * 
+     * @param position the position of the input text
+     */
+    public void setPosition(int position) {
+        this.position = position;
+    }
+
+    /**
+     * Set the line of this token in the original text.
+     * 
+     * @param lineNumber the line of this token in the original text
+     */
+    public void setLineNumber(int lineNumber) {
+        this.lineNumber = lineNumber;
+    }
+
+    /**
+     * Returns the string associated with this token.
+     * 
+     * @return the token if it exists; otherwise null
+     */
+    public String getWord() {
+        return token;
+    }
+
+    /**
+     * Sets the string of this Token.
+     * 
+     * @param word the word for this token
+     */
+    public void setWord(String word) {
+        token = word;
+    }
+
+    /**
+     * Converts this token to a string.
+     * 
+     * @return the string representation of this object
+     */
+    public String toString() {
+        StringBuffer fullToken = new StringBuffer();
+
+        if (whitespace != null) {
+            fullToken.append(whitespace);
+        }
+        if (prepunctuation != null) {
+            fullToken.append(prepunctuation);
+        }
+        if (token != null) {
+            fullToken.append(token);
+        }
+        if (postpunctuation != null) {
+            fullToken.append(postpunctuation);
+        }
+        return fullToken.toString();
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/USEnglishTokenizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/USEnglishTokenizer.java
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/CharTokenizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/CharTokenizer.java
@ -0,0 +1,405 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Iterator;
+
+import edu.cmu.sphinx.alignment.Token;
+
+/**
+ * Implements the tokenizer interface. Breaks an input sequence of characters
+ * into a set of tokens.
+ */
+public class CharTokenizer implements Iterator<Token> {
+
+    /** A constant indicating that the end of the stream has been read. */
+    public static final int EOF = -1;
+
+    /** A string containing the default whitespace characters. */
+    public static final String DEFAULT_WHITESPACE_SYMBOLS = " \t\n\r";
+
+    /** A string containing the default single characters. */
+    public static final String DEFAULT_SINGLE_CHAR_SYMBOLS = "(){}[]";
+
+    /** A string containing the default pre-punctuation characters. */
+    public static final String DEFAULT_PREPUNCTUATION_SYMBOLS = "\"'`({[";
+
+    /** A string containing the default post-punctuation characters. */
+    public static final String DEFAULT_POSTPUNCTUATION_SYMBOLS =
+            "\"'`.,:;!?(){}[]";
+
+    /** The line number. */
+    private int lineNumber;
+
+    /** The input text (from the Utterance) to tokenize. */
+    private String inputText;
+
+    /** The file to read input text from, if using file mode. */
+    private Reader reader;
+
+    /** The current character, whether its from the file or the input text. */
+    private int currentChar;
+
+    /**
+     * The current char position for the input text (not the file) this is
+     * called "file_pos" in flite
+     */
+    private int currentPosition;
+
+    /** The delimiting symbols of this tokenizer. */
+    private String whitespaceSymbols = DEFAULT_WHITESPACE_SYMBOLS;
+    private String singleCharSymbols = DEFAULT_SINGLE_CHAR_SYMBOLS;
+    private String prepunctuationSymbols = DEFAULT_PREPUNCTUATION_SYMBOLS;
+    private String postpunctuationSymbols = DEFAULT_POSTPUNCTUATION_SYMBOLS;
+
+    /** The error description. */
+    private String errorDescription;
+
+    /** A place to store the current token. */
+    private Token token;
+    private Token lastToken;
+
+    /**
+     * Constructs a Tokenizer.
+     */
+    public CharTokenizer() {}
+
+    /**
+     * Creates a tokenizer that will return tokens from the given string.
+     *
+     * @param string the string to tokenize
+     */
+    public CharTokenizer(String string) {
+        setInputText(string);
+    }
+
+    /**
+     * Creates a tokenizer that will return tokens from the given file.
+     *
+     * @param file where to read the input from
+     */
+    public CharTokenizer(Reader file) {
+        setInputReader(file);
+    }
+
+    /**
+     * Sets the whitespace symbols of this Tokenizer to the given symbols.
+     *
+     * @param symbols the whitespace symbols
+     */
+    public void setWhitespaceSymbols(String symbols) {
+        whitespaceSymbols = symbols;
+    }
+
+    /**
+     * Sets the single character symbols of this Tokenizer to the given
+     * symbols.
+     *
+     * @param symbols the single character symbols
+     */
+    public void setSingleCharSymbols(String symbols) {
+        singleCharSymbols = symbols;
+    }
+
+    /**
+     * Sets the prepunctuation symbols of this Tokenizer to the given symbols.
+     *
+     * @param symbols the prepunctuation symbols
+     */
+    public void setPrepunctuationSymbols(String symbols) {
+        prepunctuationSymbols = symbols;
+    }
+
+    /**
+     * Sets the postpunctuation symbols of this Tokenizer to the given symbols.
+     *
+     * @param symbols the postpunctuation symbols
+     */
+    public void setPostpunctuationSymbols(String symbols) {
+        postpunctuationSymbols = symbols;
+    }
+
+    /**
+     * Sets the text to tokenize.
+     *
+     * @param inputString the string to tokenize
+     */
+    public void setInputText(String inputString) {
+        inputText = inputString;
+        currentPosition = 0;
+
+        if (inputText != null) {
+            getNextChar();
+        }
+    }
+
+    /**
+     * Sets the input reader
+     *
+     * @param reader the input source
+     */
+    public void setInputReader(Reader reader) {
+        this.reader = reader;
+        getNextChar();
+    }
+
+    /**
+     * Returns the next token.
+     *
+     * @return the next token if it exists, <code>null</code> if no more tokens
+     */
+    public Token next() {
+        lastToken = token;
+        token = new Token();
+
+        // Skip whitespace
+        token.setWhitespace(getTokenOfCharClass(whitespaceSymbols));
+
+        // quoted strings currently ignored
+
+        // get prepunctuation
+        token.setPrepunctuation(getTokenOfCharClass(prepunctuationSymbols));
+
+        // get the symbol itself
+        if (singleCharSymbols.indexOf(currentChar) != -1) {
+            token.setWord(String.valueOf((char) currentChar));
+            getNextChar();
+        } else {
+            token.setWord(getTokenNotOfCharClass(whitespaceSymbols));
+        }
+
+        token.setPosition(currentPosition);
+        token.setLineNumber(lineNumber);
+
+        // This'll have token *plus* postpunctuation
+        // Get postpunctuation
+        removeTokenPostpunctuation();
+
+        return token;
+    }
+
+    /**
+     * Returns <code>true</code> if there are more tokens, <code>false</code>
+     * otherwise.
+     *
+     * @return <code>true</code> if there are more tokens <code>false</code>
+     *         otherwise
+     */
+    public boolean hasNext() {
+        int nextChar = currentChar;
+        return (nextChar != EOF);
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Advances the currentPosition pointer by 1 (if not exceeding length of
+     * inputText, and returns the character pointed by currentPosition.
+     *
+     * @return the next character EOF if no more characters exist
+     */
+    private int getNextChar() {
+        if (reader != null) {
+            try {
+                int readVal = reader.read();
+                if (readVal == -1) {
+                    currentChar = EOF;
+                } else {
+                    currentChar = (char) readVal;
+                }
+            } catch (IOException ioe) {
+                currentChar = EOF;
+                errorDescription = ioe.getMessage();
+            }
+        } else if (inputText != null) {
+            if (currentPosition < inputText.length()) {
+                currentChar = (int) inputText.charAt(currentPosition);
+            } else {
+                currentChar = EOF;
+            }
+        }
+        if (currentChar != EOF) {
+            currentPosition++;
+        }
+        if (currentChar == '\n') {
+            lineNumber++;
+        }
+        return currentChar;
+    }
+
+    /**
+     * Starting from the current position of the input text, returns the
+     * subsequent characters of type charClass, and not of type
+     * singleCharSymbols.
+     *
+     * @param charClass the type of characters to look for
+     * @param buffer the place to append characters of type charClass
+     *
+     * @return a string of characters starting from the current position of the
+     *         input text, until it encounters a character not in the string
+     *         charClass
+     *
+     */
+    private String getTokenOfCharClass(String charClass) {
+        return getTokenByCharClass(charClass, true);
+    }
+
+    /**
+     * Starting from the current position of the input text/file, returns the
+     * subsequent characters, not of type singleCharSymbols, and ended at
+     * characters of type endingCharClass. E.g., if the current string is
+     * "xxxxyyy", endingCharClass is "yz", and singleCharClass "abc". Then this
+     * method will return to "xxxx".
+     *
+     * @param endingCharClass the type of characters to look for
+     *
+     * @return a string of characters from the current position until it
+     *         encounters characters in endingCharClass
+     *
+     */
+    private String getTokenNotOfCharClass(String endingCharClass) {
+        return getTokenByCharClass(endingCharClass, false);
+    }
+
+    /**
+     * Provides a `compressed' method from getTokenOfCharClass() and
+     * getTokenNotOfCharClass(). If parameter containThisCharClass is
+     * <code>true</code>, then a string from the current position to the last
+     * character in charClass is returned. If containThisCharClass is
+     * <code>false</code> , then a string before the first occurrence of a
+     * character in containThisCharClass is returned.
+     *
+     * @param charClass the string of characters you want included or excluded
+     *        in your return
+     * @param containThisCharClass determines if you want characters in
+     *        charClass in the returned string or not
+     *
+     * @return a string of characters from the current position until it
+     *         encounters characters in endingCharClass
+     */
+    private String getTokenByCharClass(String charClass,
+            boolean containThisCharClass) {
+        final StringBuilder buffer = new StringBuilder();
+
+        // if we want the returned string to contain chars in charClass, then
+        // containThisCharClass is TRUE and
+        // (charClass.indexOf(currentChar) != 1) == containThisCharClass)
+        // returns true; if we want it to stop at characters of charClass,
+        // then containThisCharClass is FALSE, and the condition returns
+        // false.
+        while ((charClass.indexOf(currentChar) != -1) == containThisCharClass
+                && singleCharSymbols.indexOf(currentChar) == -1
+                && currentChar != EOF) {
+            buffer.append((char) currentChar);
+            getNextChar();
+        }
+        return buffer.toString();
+    }
+
+    /**
+     * Removes the postpunctuation characters from the current token. Copies
+     * those postpunctuation characters to the class variable
+     * 'postpunctuation'.
+     */
+    private void removeTokenPostpunctuation() {
+        if (token == null) {
+            return;
+        }
+        final String tokenWord = token.getWord();
+
+        int tokenLength = tokenWord.length();
+        int position = tokenLength - 1;
+
+        while (position > 0
+                && postpunctuationSymbols.indexOf((int) tokenWord
+                        .charAt(position)) != -1) {
+            position--;
+        }
+
+        if (tokenLength - 1 != position) {
+            // Copy postpunctuation from token
+            token.setPostpunctuation(tokenWord.substring(position + 1));
+
+            // truncate token at postpunctuation
+            token.setWord(tokenWord.substring(0, position + 1));
+        } else {
+            token.setPostpunctuation("");
+        }
+    }
+
+    /**
+     * Returns <code>true</code> if there were errors while reading tokens
+     *
+     * @return <code>true</code> if there were errors; <code>false</code>
+     *         otherwise
+     */
+    public boolean hasErrors() {
+        return errorDescription != null;
+    }
+
+    /**
+     * if hasErrors returns <code>true</code>, this will return a description
+     * of the error encountered, otherwise it will return <code>null</code>
+     *
+     * @return a description of the last error that occurred.
+     */
+    public String getErrorDescription() {
+        return errorDescription;
+    }
+
+    /**
+     * Determines if the current token should start a new sentence.
+     *
+     * @return <code>true</code> if a new sentence should be started
+     */
+    public boolean isSentenceSeparator() {
+        String tokenWhiteSpace = token.getWhitespace();
+        String lastTokenPostpunctuation = null;
+        if (lastToken != null) {
+            lastTokenPostpunctuation = lastToken.getPostpunctuation();
+        }
+
+        if (lastToken == null || token == null) {
+            return false;
+        } else if (tokenWhiteSpace.indexOf('\n') != tokenWhiteSpace
+                .lastIndexOf('\n')) {
+            return true;
+        } else if (lastTokenPostpunctuation.indexOf(':') != -1
+                || lastTokenPostpunctuation.indexOf('?') != -1
+                || lastTokenPostpunctuation.indexOf('!') != -1) {
+            return true;
+        } else if (lastTokenPostpunctuation.indexOf('.') != -1
+                && tokenWhiteSpace.length() > 1
+                && Character.isUpperCase(token.getWord().charAt(0))) {
+            return true;
+        } else {
+            String lastWord = lastToken.getWord();
+            int lastWordLength = lastWord.length();
+
+            if (lastTokenPostpunctuation.indexOf('.') != -1
+                    &&
+                    /* next word starts with a capital */
+                    Character.isUpperCase(token.getWord().charAt(0))
+                    &&
+                    /* last word isn't an abbreviation */
+                    !(Character.isUpperCase(lastWord
+                            .charAt(lastWordLength - 1)) || (lastWordLength < 4 && Character
+                            .isUpperCase(lastWord.charAt(0))))) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/DecisionTree.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/DecisionTree.java
@ -0,0 +1,608 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.io.*;
+import java.net.URL;
+import java.util.StringTokenizer;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+
+
+/**
+ * Implementation of a Classification and Regression Tree (CART) that is used
+ * more like a binary decision tree, with each node containing a decision or a
+ * final value. The decision nodes in the CART trees operate on an Item and
+ * have the following format:
+ *
+ * <pre>
+ *   NODE feat operand value qfalse
+ * </pre>
+ *
+ * <p>
+ * Where <code>feat</code> is an string that represents a feature to pass to
+ * the <code>findFeature</code> method of an item.
+ *
+ * <p>
+ * The <code>value</code> represents the value to be compared against the
+ * feature obtained from the item via the <code>feat</code> string. The
+ * <code>operand</code> is the operation to do the comparison. The available
+ * operands are as follows:
+ *
+ * <ul>
+ * <li>&lt; - the feature is less than value
+ * <li>=- the feature is equal to the value
+ * <li>&gt;- the feature is greater than the value
+ * <li>MATCHES - the feature matches the regular expression stored in value
+ * <li>IN - [[[TODO: still guessing because none of the CART's in Flite seem to
+ * use IN]]] the value is in the list defined by the feature.
+ * </ul>
+ *
+ * <p>
+ * [[[TODO: provide support for the IN operator.]]]
+ *
+ * <p>
+ * For &lt; and &gt;, this CART coerces the value and feature to float's. For =,
+ * this CART coerces the value and feature to string and checks for string
+ * equality. For MATCHES, this CART uses the value as a regular expression and
+ * compares the obtained feature to that.
+ *
+ * <p>
+ * A CART is represented by an array in this implementation. The
+ * <code>qfalse</code> value represents the index of the array to go to if the
+ * comparison does not match. In this implementation, qtrue index is always
+ * implied, and represents the next element in the array. The root node of the
+ * CART is the first element in the array.
+ *
+ * <p>
+ * The interpretations always start at the root node of the CART and continue
+ * until a final node is found. The final nodes have the following form:
+ *
+ * <pre>
+ *   LEAF value
+ * </pre>
+ *
+ * <p>
+ * Where <code>value</code> represents the value of the node. Reaching a final
+ * node indicates the interpretation is over and the value of the node is the
+ * interpretation result.
+ */
+public class DecisionTree {
+    /** Logger instance. */
+    private static final Logger logger = Logger.getLogger(DecisionTree.class.getSimpleName());
+    /**
+     * Entry in file represents the total number of nodes in the file. This
+     * should be at the top of the file. The format should be "TOTAL n" where n
+     * is an integer value.
+     */
+    final static String TOTAL = "TOTAL";
+
+    /**
+     * Entry in file represents a node. The format should be
+     * "NODE feat op val f" where 'feat' represents a feature, op represents an
+     * operand, val is the value, and f is the index of the node to go to is
+     * there isn't a match.
+     */
+    final static String NODE = "NODE";
+
+    /**
+     * Entry in file represents a final node. The format should be "LEAF val"
+     * where val represents the value.
+     */
+    final static String LEAF = "LEAF";
+
+    /**
+     * OPERAND_MATCHES
+     */
+    final static String OPERAND_MATCHES = "MATCHES";
+
+    /**
+     * The CART. Entries can be DecisionNode or LeafNode. An ArrayList could be
+     * used here -- I chose not to because I thought it might be quicker to
+     * avoid dealing with the dynamic resizing.
+     */
+    Node[] cart = null;
+
+    /**
+     * The number of nodes in the CART.
+     */
+    transient int curNode = 0;
+
+    /**
+     * Creates a new CART by reading from the given URL.
+     *
+     * @param url the location of the CART data
+     *
+     * @throws IOException if errors occur while reading the data
+     */
+    public DecisionTree(URL url) throws IOException {
+        BufferedReader reader;
+        String line;
+
+        reader = new BufferedReader(new InputStreamReader(url.openStream()));
+        line = reader.readLine();
+        while (line != null) {
+            if (!line.startsWith("***")) {
+                parseAndAdd(line);
+            }
+            line = reader.readLine();
+        }
+        reader.close();
+    }
+
+    /**
+     * Creates a new CART by reading from the given reader.
+     *
+     * @param reader the source of the CART data
+     * @param nodes the number of nodes to read for this cart
+     *
+     * @throws IOException if errors occur while reading the data
+     */
+    public DecisionTree(BufferedReader reader, int nodes) throws IOException {
+        this(nodes);
+        String line;
+        for (int i = 0; i < nodes; i++) {
+            line = reader.readLine();
+            if (!line.startsWith("***")) {
+                parseAndAdd(line);
+            }
+        }
+    }
+
+    /**
+     * Creates a new CART that will be populated with nodes later.
+     *
+     * @param numNodes the number of nodes
+     */
+    private DecisionTree(int numNodes) {
+        cart = new Node[numNodes];
+    }
+
+    /**
+     * Dump the CART tree as a dot file.
+     * <p>
+     * The dot tool is part of the graphviz distribution at <a
+     * href="http://www.graphviz.org/">http://www.graphviz.org/</a>. If
+     * installed, call it as "dot -O -Tpdf *.dot" from the console to generate
+     * pdfs.
+     * </p>
+     *
+     * @param out The PrintWriter to write to.
+     */
+    public void dumpDot(PrintWriter out) {
+        out.write("digraph \"" + "CART Tree" + "\" {\n");
+        out.write("rankdir = LR\n");
+
+        for (Node n : cart) {
+            out.println("\tnode" + Math.abs(n.hashCode()) + " [ label=\""
+                    + n.toString() + "\", color=" + dumpDotNodeColor(n)
+                    + ", shape=" + dumpDotNodeShape(n) + " ]\n");
+            if (n instanceof DecisionNode) {
+                DecisionNode dn = (DecisionNode) n;
+                if (dn.qtrue < cart.length && cart[dn.qtrue] != null) {
+                    out.write("\tnode" + Math.abs(n.hashCode()) + " -> node"
+                            + Math.abs(cart[dn.qtrue].hashCode())
+                            + " [ label=" + "TRUE" + " ]\n");
+                }
+                if (dn.qfalse < cart.length && cart[dn.qfalse] != null) {
+                    out.write("\tnode" + Math.abs(n.hashCode()) + " -> node"
+                            + Math.abs(cart[dn.qfalse].hashCode())
+                            + " [ label=" + "FALSE" + " ]\n");
+                }
+            }
+        }
+
+        out.write("}\n");
+        out.close();
+    }
+
+    protected String dumpDotNodeColor(Node n) {
+        if (n instanceof LeafNode) {
+            return "green";
+        }
+        return "red";
+    }
+
+    protected String dumpDotNodeShape(Node n) {
+        return "box";
+    }
+
+    /**
+     * Creates a node from the given input line and add it to the CART. It
+     * expects the TOTAL line to come before any of the nodes.
+     *
+     * @param line a line of input to parse
+     */
+    protected void parseAndAdd(String line) {
+        StringTokenizer tokenizer = new StringTokenizer(line, " ");
+        String type = tokenizer.nextToken();
+        if (type.equals(LEAF) || type.equals(NODE)) {
+            cart[curNode] = getNode(type, tokenizer, curNode);
+            cart[curNode].setCreationLine(line);
+            curNode++;
+        } else if (type.equals(TOTAL)) {
+            cart = new Node[Integer.parseInt(tokenizer.nextToken())];
+            curNode = 0;
+        } else {
+            throw new Error("Invalid CART type: " + type);
+        }
+    }
+
+    /**
+     * Gets the node based upon the type and tokenizer.
+     *
+     * @param type <code>NODE</code> or <code>LEAF</code>
+     * @param tokenizer the StringTokenizer containing the data to get
+     * @param currentNode the index of the current node we're looking at
+     *
+     * @return the node
+     */
+    protected Node getNode(String type, StringTokenizer tokenizer,
+            int currentNode) {
+        if (type.equals(NODE)) {
+            String feature = tokenizer.nextToken();
+            String operand = tokenizer.nextToken();
+            Object value = parseValue(tokenizer.nextToken());
+            int qfalse = Integer.parseInt(tokenizer.nextToken());
+            if (operand.equals(OPERAND_MATCHES)) {
+                return new MatchingNode(feature, value.toString(),
+                        currentNode + 1, qfalse);
+            } else {
+                return new ComparisonNode(feature, value, operand,
+                        currentNode + 1, qfalse);
+            }
+        } else if (type.equals(LEAF)) {
+            return new LeafNode(parseValue(tokenizer.nextToken()));
+        }
+
+        return null;
+    }
+
+    /**
+     * Coerces a string into a value.
+     *
+     * @param string of the form "type(value)"; for example, "Float(2.3)"
+     *
+     * @return the value
+     */
+    protected Object parseValue(String string) {
+        int openParen = string.indexOf("(");
+        String type = string.substring(0, openParen);
+        String value = string.substring(openParen + 1, string.length() - 1);
+        if (type.equals("String")) {
+            return value;
+        } else if (type.equals("Float")) {
+            return new Float(Float.parseFloat(value));
+        } else if (type.equals("Integer")) {
+            return new Integer(Integer.parseInt(value));
+        } else if (type.equals("List")) {
+            StringTokenizer tok = new StringTokenizer(value, ",");
+            int size = tok.countTokens();
+
+            int[] values = new int[size];
+            for (int i = 0; i < size; i++) {
+                float fval = Float.parseFloat(tok.nextToken());
+                values[i] = Math.round(fval);
+            }
+            return values;
+        } else {
+            throw new Error("Unknown type: " + type);
+        }
+    }
+
+    /**
+     * Passes the given item through this CART and returns the interpretation.
+     *
+     * @param item the item to analyze
+     *
+     * @return the interpretation
+     */
+    public Object interpret(Item item) {
+        int nodeIndex = 0;
+        DecisionNode decision;
+
+        while (!(cart[nodeIndex] instanceof LeafNode)) {
+            decision = (DecisionNode) cart[nodeIndex];
+            nodeIndex = decision.getNextNode(item);
+        }
+        logger.fine("LEAF " + cart[nodeIndex].getValue());
+        return ((LeafNode) cart[nodeIndex]).getValue();
+    }
+
+    /**
+     * A node for the CART.
+     */
+    static abstract class Node {
+        /**
+         * The value of this node.
+         */
+        protected Object value;
+
+        /**
+         * Create a new Node with the given value.
+         */
+        public Node(Object value) {
+            this.value = value;
+        }
+
+        /**
+         * Get the value.
+         */
+        public Object getValue() {
+            return value;
+        }
+
+        /**
+         * Return a string representation of the type of the value.
+         */
+        public String getValueString() {
+            if (value == null) {
+                return "NULL()";
+            } else if (value instanceof String) {
+                return "String(" + value.toString() + ")";
+            } else if (value instanceof Float) {
+                return "Float(" + value.toString() + ")";
+            } else if (value instanceof Integer) {
+                return "Integer(" + value.toString() + ")";
+            } else {
+                return value.getClass().toString() + "(" + value.toString()
+                        + ")";
+            }
+        }
+
+        /**
+         * sets the line of text used to create this node.
+         *
+         * @param line the creation line
+         */
+        public void setCreationLine(String line) {}
+    }
+
+    /**
+     * A decision node that determines the next Node to go to in the CART.
+     */
+    abstract static class DecisionNode extends Node {
+        /**
+         * The feature used to find a value from an Item.
+         */
+        private PathExtractor path;
+
+        /**
+         * Index of Node to go to if the comparison doesn't match.
+         */
+        protected int qfalse;
+
+        /**
+         * Index of Node to go to if the comparison matches.
+         */
+        protected int qtrue;
+
+        /**
+         * The feature used to find a value from an Item.
+         */
+        public String getFeature() {
+            return path.toString();
+        }
+
+        /**
+         * Find the feature associated with this DecisionNode and the given
+         * item
+         *
+         * @param item the item to start from
+         * @return the object representing the feature
+         */
+        public Object findFeature(Item item) {
+            return path.findFeature(item);
+        }
+
+        /**
+         * Returns the next node based upon the descision determined at this
+         * node
+         *
+         * @param item the current item.
+         * @return the index of the next node
+         */
+        public final int getNextNode(Item item) {
+            return getNextNode(findFeature(item));
+        }
+
+        /**
+         * Create a new DecisionNode.
+         *
+         * @param feature the string used to get a value from an Item
+         * @param value the value to compare to
+         * @param qtrue the Node index to go to if the comparison matches
+         * @param qfalse the Node machine index to go to upon no match
+         */
+        public DecisionNode(String feature, Object value, int qtrue, int qfalse) {
+            super(value);
+            this.path = new PathExtractor(feature, true);
+            this.qtrue = qtrue;
+            this.qfalse = qfalse;
+        }
+
+        /**
+         * Get the next Node to go to in the CART. The return value is an index
+         * in the CART.
+         */
+        abstract public int getNextNode(Object val);
+    }
+
+    /**
+     * A decision Node that compares two values.
+     */
+    static class ComparisonNode extends DecisionNode {
+        /**
+         * LESS_THAN
+         */
+        final static String LESS_THAN = "<";
+
+        /**
+         * EQUALS
+         */
+        final static String EQUALS = "=";
+
+        /**
+         * GREATER_THAN
+         */
+        final static String GREATER_THAN = ">";
+
+        /**
+         * The comparison type. One of LESS_THAN, GREATER_THAN, or EQUAL_TO.
+         */
+        String comparisonType;
+
+        /**
+         * Create a new ComparisonNode with the given values.
+         *
+         * @param feature the string used to get a value from an Item
+         * @param value the value to compare to
+         * @param comparisonType one of LESS_THAN, EQUAL_TO, or GREATER_THAN
+         * @param qtrue the Node index to go to if the comparison matches
+         * @param qfalse the Node index to go to upon no match
+         */
+        public ComparisonNode(String feature, Object value,
+                String comparisonType, int qtrue, int qfalse) {
+            super(feature, value, qtrue, qfalse);
+            if (!comparisonType.equals(LESS_THAN)
+                    && !comparisonType.equals(EQUALS)
+                    && !comparisonType.equals(GREATER_THAN)) {
+                throw new Error("Invalid comparison type: " + comparisonType);
+            } else {
+                this.comparisonType = comparisonType;
+            }
+        }
+
+        /**
+         * Compare the given value and return the appropriate Node index.
+         * IMPLEMENTATION NOTE: LESS_THAN and GREATER_THAN, the Node's value
+         * and the value passed in are converted to floating point values. For
+         * EQUAL, the Node's value and the value passed in are treated as
+         * String compares. This is the way of Flite, so be it Flite.
+         *
+         * @param val the value to compare
+         */
+        public int getNextNode(Object val) {
+            boolean yes = false;
+            int ret;
+
+            if (comparisonType.equals(LESS_THAN)
+                    || comparisonType.equals(GREATER_THAN)) {
+                float cart_fval;
+                float fval;
+                if (value instanceof Float) {
+                    cart_fval = ((Float) value).floatValue();
+                } else {
+                    cart_fval = Float.parseFloat(value.toString());
+                }
+                if (val instanceof Float) {
+                    fval = ((Float) val).floatValue();
+                } else {
+                    fval = Float.parseFloat(val.toString());
+                }
+                if (comparisonType.equals(LESS_THAN)) {
+                    yes = (fval < cart_fval);
+                } else {
+                    yes = (fval > cart_fval);
+                }
+            } else { // comparisonType = "="
+                String sval = val.toString();
+                String cart_sval = value.toString();
+                yes = sval.equals(cart_sval);
+            }
+            if (yes) {
+                ret = qtrue;
+            } else {
+                ret = qfalse;
+            }
+            logger.fine(trace(val, yes, ret));
+            return ret;
+        }
+
+        private String trace(Object value, boolean match, int next) {
+            return "NODE " + getFeature() + " [" + value + "] "
+                    + comparisonType + " [" + getValue() + "] "
+                    + (match ? "Yes" : "No") + " next " + next;
+        }
+
+        /**
+         * Get a string representation of this Node.
+         */
+        public String toString() {
+            return "NODE " + getFeature() + " " + comparisonType + " "
+                    + getValueString() + " " + Integer.toString(qtrue) + " "
+                    + Integer.toString(qfalse);
+        }
+    }
+
+    /**
+     * A Node that checks for a regular expression match.
+     */
+    static class MatchingNode extends DecisionNode {
+        Pattern pattern;
+
+        /**
+         * Create a new MatchingNode with the given values.
+         *
+         * @param feature the string used to get a value from an Item
+         * @param regex the regular expression
+         * @param qtrue the Node index to go to if the comparison matches
+         * @param qfalse the Node index to go to upon no match
+         */
+        public MatchingNode(String feature, String regex, int qtrue, int qfalse) {
+            super(feature, regex, qtrue, qfalse);
+            this.pattern = Pattern.compile(regex);
+        }
+
+        /**
+         * Compare the given value and return the appropriate CART index.
+         *
+         * @param val the value to compare -- this must be a String
+         */
+        public int getNextNode(Object val) {
+            return pattern.matcher((String) val).matches() ? qtrue : qfalse;
+        }
+
+        /**
+         * Get a string representation of this Node.
+         */
+        public String toString() {
+            StringBuffer buf =
+                    new StringBuffer(NODE + " " + getFeature() + " "
+                            + OPERAND_MATCHES);
+            buf.append(getValueString() + " ");
+            buf.append(Integer.toString(qtrue) + " ");
+            buf.append(Integer.toString(qfalse));
+            return buf.toString();
+        }
+    }
+
+    /**
+     * The final Node of a CART. This just a marker class.
+     */
+    static class LeafNode extends Node {
+        /**
+         * Create a new LeafNode with the given value.
+         *
+         * @param the value of this LeafNode
+         */
+        public LeafNode(Object value) {
+            super(value);
+        }
+
+        /**
+         * Get a string representation of this Node.
+         */
+        public String toString() {
+            return "LEAF " + getValueString();
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/FeatureSet.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/FeatureSet.java
@ -0,0 +1,145 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.text.DecimalFormat;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * Implementation of the FeatureSet interface.
+ */
+public class FeatureSet {
+
+    private final Map<String, Object> featureMap;
+    static DecimalFormat formatter;
+
+    /**
+     * Creates a new empty feature set
+     */
+    public FeatureSet() {
+        featureMap = new LinkedHashMap<String, Object>();
+    }
+
+    /**
+     * Determines if the given feature is present.
+     *
+     * @param name the name of the feature of interest
+     *
+     * @return true if the named feature is present
+     */
+    public boolean isPresent(String name) {
+        return featureMap.containsKey(name);
+    }
+
+    /**
+     * Removes the named feature from this set of features.
+     *
+     * @param name the name of the feature of interest
+     */
+    public void remove(String name) {
+        featureMap.remove(name);
+    }
+
+    /**
+     * Convenience method that returns the named feature as a string.
+     *
+     * @param name the name of the feature
+     *
+     * @return the value associated with the name or null if the value is not
+     *         found
+     *
+     * @throws ClassCastException if the associated value is not a String
+     */
+    public String getString(String name) {
+        return (String) getObject(name);
+    }
+
+    /**
+     * Convenience method that returns the named feature as a int.
+     *
+     * @param name the name of the feature
+     *
+     * @return the value associated with the name or null if the value is not
+     *         found
+     *
+     * @throws ClassCastException if the associated value is not an int.
+     */
+    public int getInt(String name) {
+        return ((Integer) getObject(name)).intValue();
+    }
+
+    /**
+     * Convenience method that returns the named feature as a float.
+     *
+     * @param name the name of the feature
+     *
+     * @return the value associated with the name or null if the value is not
+     *         found.
+     *
+     * @throws ClassCastException if the associated value is not a float
+     */
+    public float getFloat(String name) {
+        return ((Float) getObject(name)).floatValue();
+    }
+
+    /**
+     * Returns the named feature as an object.
+     *
+     * @param name the name of the feature
+     *
+     * @return the value associated with the name or null if the value is not
+     *         found
+     */
+    public Object getObject(String name) {
+        return featureMap.get(name);
+    }
+
+    /**
+     * Convenience method that sets the named feature as a int.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setInt(String name, int value) {
+        setObject(name, new Integer(value));
+    }
+
+    /**
+     * Convenience method that sets the named feature as a float.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setFloat(String name, float value) {
+        setObject(name, new Float(value));
+    }
+
+    /**
+     * Convenience method that sets the named feature as a String.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setString(String name, String value) {
+        setObject(name, value);
+    }
+
+    /**
+     * Sets the named feature.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setObject(String name, Object value) {
+        featureMap.put(name, value);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Item.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Item.java
@ -0,0 +1,447 @@
+/**
+ * Portions Copyright 2001-2003 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.util.StringTokenizer;
+
+/**
+ * Represents a node in a Relation. Items can have shared contents but each
+ * item has its own set of Daughters. The shared contents of an item
+ * (represented by ItemContents) includes the feature set for the item and the
+ * set of all relations that this item is contained in. An item can be
+ * contained in a number of relations and as daughters to other items. This
+ * class is used to keep track of all of these relationships. There may be many
+ * instances of item that reference the same shared ItemContents.
+ */
+public class Item {
+    private Relation ownerRelation;
+    private ItemContents contents;
+    private Item parent;
+    private Item daughter;
+    private Item next;
+    private Item prev;
+
+    /**
+     * Creates an item. The item is coupled to a particular Relation. If shared
+     * contents is null a new sharedContents is created.
+     *
+     * @param relation the relation that owns this item
+     * @param sharedContents the contents that is shared with others. If null,
+     *        a new sharedContents is created.
+     */
+    public Item(Relation relation, ItemContents sharedContents) {
+        ownerRelation = relation;
+        if (sharedContents != null) {
+            contents = sharedContents;
+        } else {
+            contents = new ItemContents();
+        }
+        parent = null;
+        daughter = null;
+        next = null;
+        prev = null;
+
+        getSharedContents().addItemRelation(relation.getName(), this);
+    }
+
+    /**
+     * Finds the item in the given relation that has the same shared contents.
+     *
+     * @param relationName the relation of interest
+     *
+     * @return the item as found in the given relation or null if not found
+     */
+    public Item getItemAs(String relationName) {
+        return getSharedContents().getItemRelation(relationName);
+    }
+
+    /**
+     * Retrieves the owning Relation.
+     *
+     * @return the relation that owns this item
+     */
+    public Relation getOwnerRelation() {
+        return ownerRelation;
+    }
+
+    /**
+     * Retrieves the shared contents for this item.
+     *
+     * @return the shared item contents
+     */
+    public ItemContents getSharedContents() {
+        return contents;
+    }
+
+    /**
+     * Determines if this item has daughters.
+     *
+     * @return true if this item has daughters
+     */
+    public boolean hasDaughters() {
+        return daughter != null;
+    }
+
+    /**
+     * Retrieves the first daughter of this item.
+     *
+     * @return the first daughter or null if none
+     */
+    public Item getDaughter() {
+        return daughter;
+    }
+
+    /**
+     * Retrieves the Nth daughter of this item.
+     *
+     * @param which the index of the daughter to return
+     *
+     * @return the Nth daughter or null if none at the given index
+     */
+    public Item getNthDaughter(int which) {
+        Item d = daughter;
+        int count = 0;
+        while (count++ != which && d != null) {
+            d = d.next;
+        }
+        return d;
+    }
+
+    /**
+     * Retrieves the last daughter of this item.
+     *
+     * @return the last daughter or null if none at the given index
+     */
+    public Item getLastDaughter() {
+        Item d = daughter;
+        if (d == null) {
+            return null;
+        }
+        while (d.next != null) {
+            d = d.next;
+        }
+        return d;
+    }
+
+    /**
+     * Adds the given item as a daughter to this item.
+     *
+     * @param item for the new daughter
+     * @return created item
+     */
+    public Item addDaughter(Item item) {
+        Item newItem;
+        ItemContents contents;
+
+        Item p = getLastDaughter();
+
+        if (p != null) {
+            newItem = p.appendItem(item);
+        } else {
+            if (item == null) {
+                contents = new ItemContents();
+            } else {
+                contents = item.getSharedContents();
+            }
+            newItem = new Item(getOwnerRelation(), contents);
+            newItem.parent = this;
+            daughter = newItem;
+        }
+        return newItem;
+    }
+
+    /**
+     * Creates a new Item, adds it as a daughter to this item and returns the
+     * new item.
+     *
+     * @return the newly created item that was added as a daughter
+     */
+    public Item createDaughter() {
+        return addDaughter(null);
+    }
+
+    /**
+     * Returns the parent of this item.
+     *
+     * @return the parent of this item
+     */
+    public Item getParent() {
+        Item n;
+        for (n = this; n.prev != null; n = n.prev) {
+        }
+        return n.parent;
+    }
+
+    /**
+     * Sets the parent of this item.
+     *
+     * @param parent the parent of this item
+     */
+    /*
+     * private void setParent(Item parent) { this.parent = parent; }
+     */
+
+    /**
+     * Returns the utterance associated with this item.
+     *
+     * @return the utterance that contains this item
+     */
+    public Utterance getUtterance() {
+        return getOwnerRelation().getUtterance();
+    }
+
+    /**
+     * Returns the feature set of this item.
+     *
+     * @return the feature set of this item
+     */
+    public FeatureSet getFeatures() {
+        return getSharedContents().getFeatures();
+    }
+
+    /**
+     * Finds the feature by following the given path. Path is a string of ":"
+     * or "." separated strings with the following interpretations:
+     * <ul>
+     * <li>n - next item
+     * <li>p - previous item
+     * <li>parent - the parent
+     * <li>daughter - the daughter
+     * <li>daughter1 - same as daughter
+     * <li>daughtern - the last daughter
+     * <li>R:relname - the item as found in the given relation 'relname'
+     * </ul>
+     * The last element of the path will be interpreted as a voice/language
+     * specific feature function (if present) or an item feature name. If the
+     * feature function exists it will be called with the item specified by the
+     * path, otherwise, a feature will be retrieved with the given name. If
+     * neither exist than a String "0" is returned.
+     *
+     * @param pathAndFeature the path to follow
+     * @return created object
+     */
+    public Object findFeature(String pathAndFeature) {
+        int lastDot;
+        String feature;
+        String path;
+        Item item;
+        Object results = null;
+
+        lastDot = pathAndFeature.lastIndexOf(".");
+        // string can be of the form "p.feature" or just "feature"
+
+        if (lastDot == -1) {
+            feature = pathAndFeature;
+            path = null;
+        } else {
+            feature = pathAndFeature.substring(lastDot + 1);
+            path = pathAndFeature.substring(0, lastDot);
+        }
+
+        item = findItem(path);
+        if (item != null) {
+            results = item.getFeatures().getObject(feature);
+        }
+        results = (results == null) ? "0" : results;
+
+        // System.out.println("FI " + pathAndFeature + " are " + results);
+
+        return results;
+    }
+
+    /**
+     * Finds the item specified by the given path.
+     *
+     * Path is a string of ":" or "." separated strings with the following
+     * interpretations:
+     * <ul>
+     * <li>n - next item
+     * <li>p - previous item
+     * <li>parent - the parent
+     * <li>daughter - the daughter
+     * <li>daughter1 - same as daughter
+     * <li>daughtern - the last daughter
+     * <li>R:relname - the item as found in the given relation 'relname'
+     * </ul>
+     * If the given path takes us outside of the bounds of the item graph, then
+     * list access exceptions will be thrown.
+     *
+     * @param path the path to follow
+     *
+     * @return the item at the given path
+     */
+    public Item findItem(String path) {
+        Item pitem = this;
+        StringTokenizer tok;
+
+        if (path == null) {
+            return this;
+        }
+
+        tok = new StringTokenizer(path, ":.");
+
+        while (pitem != null && tok.hasMoreTokens()) {
+            String token = tok.nextToken();
+            if (token.equals("n")) {
+                pitem = pitem.getNext();
+            } else if (token.equals("p")) {
+                pitem = pitem.getPrevious();
+            } else if (token.equals("nn")) {
+                pitem = pitem.getNext();
+                if (pitem != null) {
+                    pitem = pitem.getNext();
+                }
+            } else if (token.equals("pp")) {
+                pitem = pitem.getPrevious();
+                if (pitem != null) {
+                    pitem = pitem.getPrevious();
+                }
+            } else if (token.equals("parent")) {
+                pitem = pitem.getParent();
+            } else if (token.equals("daughter") || token.equals("daughter1")) {
+                pitem = pitem.getDaughter();
+            } else if (token.equals("daughtern")) {
+                pitem = pitem.getLastDaughter();
+            } else if (token.equals("R")) {
+                String relationName = tok.nextToken();
+                pitem =
+                        pitem.getSharedContents()
+                                .getItemRelation(relationName);
+            } else {
+                System.out.println("findItem: bad feature " + token + " in "
+                        + path);
+            }
+        }
+        return pitem;
+    }
+
+    /**
+     * Gets the next item in this list.
+     *
+     * @return the next item or null
+     */
+    public Item getNext() {
+        return next;
+    }
+
+    /**
+     * Gets the previous item in this list.
+     *
+     * @return the previous item or null
+     */
+    public Item getPrevious() {
+        return prev;
+    }
+
+    /**
+     * Appends an item in this list after this item.
+     *
+     * @param originalItem new item has shared contents with this item (or *
+     *        null)
+     *
+     * @return the newly appended item
+     */
+    public Item appendItem(Item originalItem) {
+        ItemContents contents;
+        Item newItem;
+
+        if (originalItem == null) {
+            contents = null;
+        } else {
+            contents = originalItem.getSharedContents();
+        }
+
+        newItem = new Item(getOwnerRelation(), contents);
+        newItem.next = this.next;
+        if (this.next != null) {
+            this.next.prev = newItem;
+        }
+
+        attach(newItem);
+
+        if (this.ownerRelation.getTail() == this) {
+            this.ownerRelation.setTail(newItem);
+        }
+        return newItem;
+    }
+
+    /**
+     * Attaches/appends an item to this one.
+     *
+     * @param item the item to append
+     */
+    void attach(Item item) {
+        this.next = item;
+        item.prev = this;
+    }
+
+    /**
+     * Prepends an item in this list before this item.
+     *
+     * @param originalItem new item has shared contents with this item (or *
+     *        null)
+     *
+     * @return the newly appended item
+     */
+    public Item prependItem(Item originalItem) {
+        ItemContents contents;
+        Item newItem;
+
+        if (originalItem == null) {
+            contents = null;
+        } else {
+            contents = originalItem.getSharedContents();
+        }
+
+        newItem = new Item(getOwnerRelation(), contents);
+        newItem.prev = this.prev;
+        if (this.prev != null) {
+            this.prev.next = newItem;
+        }
+        newItem.next = this;
+        this.prev = newItem;
+        if (this.parent != null) {
+            this.parent.daughter = newItem;
+            newItem.parent = this.parent;
+            this.parent = null;
+        }
+        if (this.ownerRelation.getHead() == this) {
+            this.ownerRelation.setHead(newItem);
+        }
+        return newItem;
+    }
+
+    // Inherited from object
+    public String toString() {
+        // if we have a feature called 'name' use that
+        // otherwise fall back on the default.
+        String name = getFeatures().getString("name");
+        if (name == null) {
+            name = "";
+        }
+        return name;
+    }
+
+    /**
+     * Determines if the shared contents of the two items are the same.
+     *
+     * @param otherItem the item to compare
+     *
+     * @return true if the shared contents are the same
+     */
+    public boolean equalsShared(Item otherItem) {
+        if (otherItem == null) {
+            return false;
+        } else {
+            return getSharedContents().equals(otherItem.getSharedContents());
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/ItemContents.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/ItemContents.java
@ -0,0 +1,74 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+
+/**
+ * Contains the information that is shared between multiple items.
+ */
+public class ItemContents {
+    private FeatureSet features;
+    private FeatureSet relations;
+
+    /**
+     * Class Constructor.
+     */
+    public ItemContents() {
+        features = new FeatureSet();
+        relations = new FeatureSet();
+    }
+
+    /**
+     * Adds the given item to the set of relations. Whenever an item is added
+     * to a relation, it should add the name and the Item reference to this set
+     * of name/item mappings. This allows an item to find out the set of all
+     * relations that it is contained in.
+     *
+     * @param relationName the name of the relation
+     * @param item the item reference in the relation
+     */
+    public void addItemRelation(String relationName, Item item) {
+        // System.out.println("AddItemRelation: " + relationName
+        // + " item: " + item);
+        relations.setObject(relationName, item);
+    }
+
+    /**
+     * Removes the relation/item mapping from this ItemContents.
+     *
+     * @param relationName the name of the relation/item to remove
+     */
+    public void removeItemRelation(String relationName) {
+        relations.remove(relationName);
+    }
+
+    /**
+     * Given the name of a relation, returns the item the shares the same
+     * ItemContents.
+     *
+     * @param relationName the name of the relation of interest
+     *
+     * @return the item associated with this ItemContents in the named
+     *         relation, or null if it does not exist
+     */
+    public Item getItemRelation(String relationName) {
+        return (Item) relations.getObject(relationName);
+    }
+
+    /**
+     * Returns the feature set for this item contents.
+     *
+     * @return the FeatureSet for this contents
+     */
+    public FeatureSet getFeatures() {
+        return features;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/NumberExpander.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/NumberExpander.java
@ -0,0 +1,449 @@
+/**
+ * Portions Copyright 2001-2003 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+
+/**
+ * Expands Strings containing digits characters into a list of words
+ * representing those digits.
+ *
+ * It translates the following code from flite:
+ * <code>lang/usEnglish/us_expand.c</code>
+ */
+public class NumberExpander {
+
+    private static final String[] digit2num = {"zero", "one", "two", "three",
+            "four", "five", "six", "seven", "eight", "nine"};
+
+    private static final String[] digit2teen = {"ten", /* shouldn't get called */
+    "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
+            "seventeen", "eighteen", "nineteen"};
+
+    private static final String[] digit2enty = {"zero", /* shouldn't get called */
+    "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty",
+            "ninety"};
+
+    private static final String[] ord2num = {"zeroth", "first", "second",
+            "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth"};
+
+    private static final String[] ord2teen = {"tenth", /* shouldn't get called */
+    "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth",
+            "sixteenth", "seventeenth", "eighteenth", "nineteenth"};
+
+    private static final String[] ord2enty = {"zeroth", /* shouldn't get called */
+    "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth",
+            "seventieth", "eightieth", "ninetieth"};
+
+    private static String[] digit2Numness = {
+           "", "tens", "twenties", "thirties", "fourties", "fifties", 
+           "sixties", "seventies", "eighties", "nineties" 
+    };
+
+    /**
+     * Unconstructable
+     */
+    private NumberExpander() {}
+
+    /**
+     * Expands a digit string into a list of English words of those digits. For
+     * example, "1234" expands to "one two three four"
+     *
+     * @param numberString the digit string to expand.
+     * @param wordRelation words are added to this Relation
+     */
+    public static void expandNumber(String numberString,
+            WordRelation wordRelation) {
+        int numDigits = numberString.length();
+
+        if (numDigits == 0) {
+            // wordRelation = null;
+        } else if (numDigits == 1) {
+            expandDigits(numberString, wordRelation);
+        } else if (numDigits == 2) {
+            expand2DigitNumber(numberString, wordRelation);
+        } else if (numDigits == 3) {
+            expand3DigitNumber(numberString, wordRelation);
+        } else if (numDigits < 7) {
+            expandBelow7DigitNumber(numberString, wordRelation);
+        } else if (numDigits < 10) {
+            expandBelow10DigitNumber(numberString, wordRelation);
+        } else if (numDigits < 13) {
+            expandBelow13DigitNumber(numberString, wordRelation);
+        } else {
+            expandDigits(numberString, wordRelation);
+        }
+    }
+
+    /**
+     * Expands a two-digit string into a list of English words.
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expand2DigitNumber(String numberString,
+            WordRelation wordRelation) {
+        if (numberString.charAt(0) == '0') {
+            // numberString is "0X"
+            if (numberString.charAt(1) == '0') {
+                // numberString is "00", do nothing
+            } else {
+                // numberString is "01", "02" ...
+                String number = digit2num[numberString.charAt(1) - '0'];
+                wordRelation.addWord(number);
+            }
+        } else if (numberString.charAt(1) == '0') {
+            // numberString is "10", "20", ...
+            String number = digit2enty[numberString.charAt(0) - '0'];
+            wordRelation.addWord(number);
+        } else if (numberString.charAt(0) == '1') {
+            // numberString is "11", "12", ..., "19"
+            String number = digit2teen[numberString.charAt(1) - '0'];
+            wordRelation.addWord(number);
+        } else {
+            // numberString is "2X", "3X", ...
+            String enty = digit2enty[numberString.charAt(0) - '0'];
+            wordRelation.addWord(enty);
+            expandDigits(numberString.substring(1, numberString.length()),
+                    wordRelation);
+        }
+    }
+
+    /**
+     * Expands a three-digit string into a list of English words.
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expand3DigitNumber(String numberString,
+            WordRelation wordRelation) {
+        if (numberString.charAt(0) == '0') {
+            expandNumberAt(numberString, 1, wordRelation);
+        } else {
+            String hundredDigit = digit2num[numberString.charAt(0) - '0'];
+            wordRelation.addWord(hundredDigit);
+            wordRelation.addWord("hundred");
+            expandNumberAt(numberString, 1, wordRelation);
+        }
+    }
+
+    /**
+     * Expands a string that is a 4 to 6 digits number into a list of English
+     * words. For example, "333000" into "three hundred and thirty-three
+     * thousand".
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expandBelow7DigitNumber(String numberString,
+            WordRelation wordRelation) {
+        expandLargeNumber(numberString, "thousand", 3, wordRelation);
+    }
+
+    /**
+     * Expands a string that is a 7 to 9 digits number into a list of English
+     * words. For example, "19000000" into nineteen million.
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expandBelow10DigitNumber(String numberString,
+            WordRelation wordRelation) {
+        expandLargeNumber(numberString, "million", 6, wordRelation);
+    }
+
+    /**
+     * Expands a string that is a 10 to 12 digits number into a list of English
+     * words. For example, "27000000000" into twenty-seven billion.
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expandBelow13DigitNumber(String numberString,
+            WordRelation wordRelation) {
+        expandLargeNumber(numberString, "billion", 9, wordRelation);
+    }
+
+    /**
+     * Expands a string that is a number longer than 3 digits into a list of
+     * English words. For example, "1000" into one thousand.
+     *
+     * @param numberString the string which is the number to expand
+     * @param order either "thousand", "million", or "billion"
+     * @param numberZeroes the number of zeroes, depending on the order, so its
+     *        either 3, 6, or 9
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expandLargeNumber(String numberString, String order,
+            int numberZeroes, WordRelation wordRelation) {
+        int numberDigits = numberString.length();
+
+        // parse out the prefix, e.g., "113" in "113,000"
+        int i = numberDigits - numberZeroes;
+        String part = numberString.substring(0, i);
+
+        // get how many thousands/millions/billions
+        Item oldTail = wordRelation.getTail();
+        expandNumber(part, wordRelation);
+        if (wordRelation.getTail() != oldTail) {
+            wordRelation.addWord(order);
+        }
+        expandNumberAt(numberString, i, wordRelation);
+    }
+
+    /**
+     * Returns the number string list of the given string starting at the given
+     * index. E.g., expandNumberAt("1100", 1) gives "one hundred"
+     *
+     * @param numberString the string which is the number to expand
+     * @param startIndex the starting position
+     * @param wordRelation words are added to this Relation
+     */
+    private static void expandNumberAt(String numberString, int startIndex,
+            WordRelation wordRelation) {
+        expandNumber(
+                numberString.substring(startIndex, numberString.length()),
+                wordRelation);
+    }
+
+    /**
+     * Expands given token to list of words pronouncing it as digits
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    public static void expandDigits(String numberString,
+            WordRelation wordRelation) {
+        int numberDigits = numberString.length();
+        for (int i = 0; i < numberDigits; i++) {
+            char digit = numberString.charAt(i);
+            if (Character.isDigit(digit)) {
+                wordRelation.addWord(digit2num[numberString.charAt(i) - '0']);
+            } else {
+                wordRelation.addWord("umpty");
+            }
+        }
+    }
+
+    /**
+     * Expands the digit string of an ordinal number.
+     *
+     * @param rawNumberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    public static void expandOrdinal(String rawNumberString,
+            WordRelation wordRelation) {
+        // remove all ','s from the raw number string
+        expandNumber(rawNumberString.replace(",", ""), wordRelation);
+
+        // get the last in the list of number strings
+        Item lastItem = wordRelation.getTail();
+
+        if (lastItem != null) {
+
+            FeatureSet featureSet = lastItem.getFeatures();
+            String lastNumber = featureSet.getString("name");
+            String ordinal = findMatchInArray(lastNumber, digit2num, ord2num);
+
+            if (ordinal == null) {
+                ordinal = findMatchInArray(lastNumber, digit2teen, ord2teen);
+            }
+            if (ordinal == null) {
+                ordinal = findMatchInArray(lastNumber, digit2enty, ord2enty);
+            }
+
+            if (lastNumber.equals("hundred")) {
+                ordinal = "hundredth";
+            } else if (lastNumber.equals("thousand")) {
+                ordinal = "thousandth";
+            } else if (lastNumber.equals("billion")) {
+                ordinal = "billionth";
+            }
+
+            // if there was an ordinal, set the last element of the list
+            // to that ordinal; otherwise, don't do anything
+            if (ordinal != null) {
+                wordRelation.setLastWord(ordinal);
+            }
+        }
+    }
+    
+
+    public static void expandNumess(String rawString, WordRelation wordRelation) {
+            if (rawString.length() == 4) {
+                expand2DigitNumber(rawString.substring(0, 2), wordRelation);
+                expandNumess(rawString.substring(2), wordRelation);
+            } else {
+                wordRelation.addWord(digit2Numness[rawString.charAt(0) - '0']);
+            }
+    }
+
+    /**
+     * Finds a match of the given string in the given array, and returns the
+     * element at the same index in the returnInArray
+     *
+     * @param strToMatch the string to match
+     * @param matchInArray the source array
+     * @param returnInArray the return array
+     *
+     * @return an element in returnInArray, or <code>null</code> if a match is
+     *         not found
+     */
+    private static String findMatchInArray(String strToMatch,
+            String[] matchInArray, String[] returnInArray) {
+        for (int i = 0; i < matchInArray.length; i++) {
+            if (strToMatch.equals(matchInArray[i])) {
+                if (i < returnInArray.length) {
+                    return returnInArray[i];
+                } else {
+                    return null;
+                }
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Expands the given number string as pairs as in years or IDs
+     *
+     * @param numberString the string which is the number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    public static void expandID(String numberString, WordRelation wordRelation) {
+
+        int numberDigits = numberString.length();
+
+        if ((numberDigits == 4) && (numberString.charAt(2) == '0')
+                && (numberString.charAt(3) == '0')) {
+            if (numberString.charAt(1) == '0') { // e.g. 2000, 3000
+                expandNumber(numberString, wordRelation);
+            } else {
+                expandNumber(numberString.substring(0, 2), wordRelation);
+                wordRelation.addWord("hundred");
+            }
+        } else if ((numberDigits == 2) && (numberString.charAt(0) == '0')) {
+            wordRelation.addWord("oh");
+            expandDigits(numberString.substring(1, 2), wordRelation);
+        } else if ((numberDigits == 4 && numberString.charAt(1) == '0')
+                || numberDigits < 3) {
+            expandNumber(numberString, wordRelation);
+        } else if (numberDigits % 2 == 1) {
+            String firstDigit = digit2num[numberString.charAt(0) - '0'];
+            wordRelation.addWord(firstDigit);
+            expandID(numberString.substring(1, numberDigits), wordRelation);
+        } else {
+            expandNumber(numberString.substring(0, 2), wordRelation);
+            expandID(numberString.substring(2, numberDigits), wordRelation);
+        }
+    }
+
+    /**
+     * Expands the given number string as a real number.
+     *
+     * @param numberString the string which is the real number to expand
+     * @param wordRelation words are added to this Relation
+     */
+    public static void expandReal(String numberString,
+            WordRelation wordRelation) {
+
+        int stringLength = numberString.length();
+        int position;
+
+        if (numberString.charAt(0) == '-') {
+            // negative real numbers
+            wordRelation.addWord("minus");
+            expandReal(numberString.substring(1, stringLength), wordRelation);
+        } else if (numberString.charAt(0) == '+') {
+            // prefixed with a '+'
+            wordRelation.addWord("plus");
+            expandReal(numberString.substring(1, stringLength), wordRelation);
+        } else if ((position = numberString.indexOf('e')) != -1
+                || (position = numberString.indexOf('E')) != -1) {
+            // numbers with 'E' or 'e'
+            expandReal(numberString.substring(0, position), wordRelation);
+            wordRelation.addWord("e");
+            expandReal(numberString.substring(position + 1), wordRelation);
+        } else if ((position = numberString.indexOf('.')) != -1) {
+            // numbers with '.'
+            String beforeDot = numberString.substring(0, position);
+            if (beforeDot.length() > 0) {
+                expandReal(beforeDot, wordRelation);
+            }
+            wordRelation.addWord("point");
+            String afterDot = numberString.substring(position + 1);
+            if (afterDot.length() > 0) {
+                expandDigits(afterDot, wordRelation);
+            }
+        } else {
+            // everything else
+            expandNumber(numberString, wordRelation);
+        }
+    }
+
+    /**
+     * Expands the given string of letters as a list of single char symbols.
+     *
+     * @param letters the string of letters to expand
+     * @param wordRelation words are added to this Relation
+     */
+    public static void expandLetters(String letters, WordRelation wordRelation) {
+        letters = letters.toLowerCase();
+        char c;
+
+        for (int i = 0; i < letters.length(); i++) {
+            // if this is a number
+            c = letters.charAt(i);
+            if (Character.isDigit(c)) {
+                wordRelation.addWord(digit2num[c - '0']);
+            } else if (letters.equals("a")) {
+                wordRelation.addWord("_a");
+            } else {
+                wordRelation.addWord(String.valueOf(c));
+            }
+        }
+    }
+
+    /**
+     * Returns the integer value of the given string of Roman numerals.
+     *
+     * @param roman the string of Roman numbers
+     *
+     * @return the integer value
+     */
+    public static int expandRoman(String roman) {
+        int value = 0;
+
+        for (int p = 0; p < roman.length(); p++) {
+            char c = roman.charAt(p);
+            if (c == 'X') {
+                value += 10;
+            } else if (c == 'V') {
+                value += 5;
+            } else if (c == 'I') {
+                if (p + 1 < roman.length()) {
+                    char p1 = roman.charAt(p + 1);
+                    if (p1 == 'V') {
+                        value += 4;
+                        p++;
+                    } else if (p1 == 'X') {
+                        value += 9;
+                        p++;
+                    } else {
+                        value += 1;
+                    }
+                } else {
+                    value += 1;
+                }
+            }
+        }
+        return value;
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PathExtractor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PathExtractor.java
@ -0,0 +1,264 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Interface that Manages a feature or item path. Allows navigation to the
+ * corresponding feature or item. This class in controlled by the following
+ * system properties:
+ *
+ * <pre>
+ *   com.sun.speech.freetts.interpretCartPaths - default false
+ *   com.sun.speech.freetts.lazyCartCompile - default true
+ * </pre>
+ *
+ * com.sun.speech.freetts.interpretCartPaths
+ *
+ * Instances of this class will optionally pre-compile the paths. Pre-compiling
+ * paths reduces the processing time and objects needed to extract a feature or
+ * an item based upon a path.
+ */
+public class PathExtractor {
+    /** Logger instance. */
+    private static final Logger LOGGER = Logger
+            .getLogger(PathExtractor.class.getName());
+
+    /**
+     * If this system property is set to true, paths will not be compiled.
+     */
+    public final static String INTERPRET_PATHS_PROPERTY =
+            "com.sun.speech.freetts.interpretCartPaths";
+
+    /**
+     * If this system property is set to true, CART feature/item paths will
+     * only be compiled as needed.
+     */
+    public final static String LAZY_COMPILE_PROPERTY =
+            "com.sun.speech.freetts.lazyCartCompile";
+
+    private final static boolean INTERPRET_PATHS = System.getProperty(
+            INTERPRET_PATHS_PROPERTY, "false").equals("true");
+    private final static boolean LAZY_COMPILE = System.getProperty(
+            LAZY_COMPILE_PROPERTY, "true").equals("true");
+
+    private String pathAndFeature;
+    private String path;
+    private String feature;
+    private Object[] compiledPath;
+
+    /**
+     * Creates a path for the given feature.
+     * @param pathAndFeature string to use
+     * @param wantFeature do we need features
+     */
+    public PathExtractor(String pathAndFeature, boolean wantFeature) {
+        this.pathAndFeature = pathAndFeature;
+        if (INTERPRET_PATHS) {
+            path = pathAndFeature;
+            return;
+        }
+
+        if (wantFeature) {
+            int lastDot = pathAndFeature.lastIndexOf(".");
+            // string can be of the form "p.feature" or just "feature"
+
+            if (lastDot == -1) {
+                feature = pathAndFeature;
+                path = null;
+            } else {
+                feature = pathAndFeature.substring(lastDot + 1);
+                path = pathAndFeature.substring(0, lastDot);
+            }
+        } else {
+            this.path = pathAndFeature;
+        }
+
+        if (!LAZY_COMPILE) {
+            compiledPath = compile(path);
+        }
+    }
+
+    /**
+     * Finds the item associated with this Path.
+     *
+     * @param item the item to start at
+     * @return the item associated with the path or null
+     */
+    public Item findItem(Item item) {
+
+        if (INTERPRET_PATHS) {
+            return item.findItem(path);
+        }
+
+        if (compiledPath == null) {
+            compiledPath = compile(path);
+        }
+
+        Item pitem = item;
+
+        for (int i = 0; pitem != null && i < compiledPath.length;) {
+            OpEnum op = (OpEnum) compiledPath[i++];
+            if (op == OpEnum.NEXT) {
+                pitem = pitem.getNext();
+            } else if (op == OpEnum.PREV) {
+                pitem = pitem.getPrevious();
+            } else if (op == OpEnum.NEXT_NEXT) {
+                pitem = pitem.getNext();
+                if (pitem != null) {
+                    pitem = pitem.getNext();
+                }
+            } else if (op == OpEnum.PREV_PREV) {
+                pitem = pitem.getPrevious();
+                if (pitem != null) {
+                    pitem = pitem.getPrevious();
+                }
+            } else if (op == OpEnum.PARENT) {
+                pitem = pitem.getParent();
+            } else if (op == OpEnum.DAUGHTER) {
+                pitem = pitem.getDaughter();
+            } else if (op == OpEnum.LAST_DAUGHTER) {
+                pitem = pitem.getLastDaughter();
+            } else if (op == OpEnum.RELATION) {
+                String relationName = (String) compiledPath[i++];
+                pitem =
+                        pitem.getSharedContents()
+                                .getItemRelation(relationName);
+            } else {
+                System.out.println("findItem: bad feature " + op + " in "
+                        + path);
+            }
+        }
+        return pitem;
+    }
+
+    /**
+     * Finds the feature associated with this Path.
+     *
+     * @param item the item to start at
+     * @return the feature associated or "0" if the feature was not found.
+     */
+    public Object findFeature(Item item) {
+
+        if (INTERPRET_PATHS) {
+            return item.findFeature(path);
+        }
+
+        Item pitem = findItem(item);
+        Object results = null;
+        if (pitem != null) {
+            if (LOGGER.isLoggable(Level.FINER)) {
+                LOGGER.finer("findFeature: Item [" + pitem + "], feature '"
+                        + feature + "'");
+            }
+            results = pitem.getFeatures().getObject(feature);
+        }
+
+        results = (results == null) ? "0" : results;
+        if (LOGGER.isLoggable(Level.FINER)) {
+            LOGGER.finer("findFeature: ...results = '" + results + "'");
+        }
+        return results;
+    }
+
+    /**
+     * Compiles the given path into the compiled form
+     *
+     * @param path the path to compile
+     * @return the compiled form which is in the form of an array path
+     *         traversal enums and associated strings
+     */
+    private Object[] compile(String path) {
+        if (path == null) {
+            return new Object[0];
+        }
+
+        List<Object> list = new ArrayList<Object>();
+        StringTokenizer tok = new StringTokenizer(path, ":.");
+
+        while (tok.hasMoreTokens()) {
+            String token = tok.nextToken();
+            OpEnum op = OpEnum.getInstance(token);
+            if (op == null) {
+                throw new Error("Bad path compiled " + path);
+            }
+
+            list.add(op);
+
+            if (op == OpEnum.RELATION) {
+                list.add(tok.nextToken());
+            }
+        }
+        return list.toArray();
+    }
+
+    // inherited for Object
+
+    public String toString() {
+        return pathAndFeature;
+    }
+
+    // TODO: add these to the interface should we support binary
+    // files
+    /*
+     * public void writeBinary(); public void readBinary();
+     */
+}
+
+
+/**
+ * An enumerated type associated with path operations.
+ */
+class OpEnum {
+    static private Map<String, OpEnum> map = new HashMap<String, OpEnum>();
+
+    public final static OpEnum NEXT = new OpEnum("n");
+    public final static OpEnum PREV = new OpEnum("p");
+    public final static OpEnum NEXT_NEXT = new OpEnum("nn");
+    public final static OpEnum PREV_PREV = new OpEnum("pp");
+    public final static OpEnum PARENT = new OpEnum("parent");
+    public final static OpEnum DAUGHTER = new OpEnum("daughter");
+    public final static OpEnum LAST_DAUGHTER = new OpEnum("daughtern");
+    public final static OpEnum RELATION = new OpEnum("R");
+
+    private String name;
+
+    /**
+     * Creates a new OpEnum.. There is a limited set of OpEnums
+     *
+     * @param name the path name for this Enum
+     */
+    private OpEnum(String name) {
+        this.name = name;
+        map.put(name, this);
+    }
+
+    /**
+     * gets an OpEnum thats associated with the given name.
+     *
+     * @param name the name of the OpEnum of interest
+     */
+    public static OpEnum getInstance(String name) {
+        return (OpEnum) map.get(name);
+    }
+
+    // inherited from Object
+    public String toString() {
+        return name;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PrefixFSM.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PrefixFSM.java
@ -0,0 +1,29 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute, 
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.io.IOException;
+import java.net.URL;
+
+/**
+ * Implements a finite state machine that checks if a given string is a prefix.
+ */
+public class PrefixFSM extends PronounceableFSM {
+
+    /**
+     * Constructs a PrefixFSM.
+     * @param url of the fsm
+     * @throws IOException if load failed
+     */
+    public PrefixFSM(URL url) throws IOException {
+        super(url, true);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PronounceableFSM.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/PronounceableFSM.java
@ -0,0 +1,172 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute, 
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.util.StringTokenizer;
+
+/**
+ * Implements a finite state machine that checks if a given string is
+ * pronounceable. If it is pronounceable, the method <code>accept()</code> will
+ * return true.
+ */
+public class PronounceableFSM {
+
+    private static final String VOCAB_SIZE = "VOCAB_SIZE";
+    private static final String NUM_OF_TRANSITIONS = "NUM_OF_TRANSITIONS";
+    private static final String TRANSITIONS = "TRANSITIONS";
+
+    /**
+     * The vocabulary size.
+     */
+    protected int vocabularySize;
+
+    /**
+     * The transitions of this FSM
+     */
+    protected int[] transitions;
+
+    /**
+     * Whether we should scan the input string from the front.
+     */
+    protected boolean scanFromFront;
+
+    /**
+     * Constructs a PronounceableFSM with information in the given URL.
+     * 
+     * @param url the URL that contains the FSM specification
+     * @param scanFromFront indicates whether this FSM should scan the input
+     *        string from the front, or from the back
+     * @throws IOException if something went wrong
+     */
+    public PronounceableFSM(URL url, boolean scanFromFront) throws IOException {
+        this.scanFromFront = scanFromFront;
+        InputStream is = url.openStream();
+        loadText(is);
+        is.close();
+    }
+
+    /**
+     * Constructs a PronounceableFSM with the given attributes.
+     * 
+     * @param vocabularySize the vocabulary size of the FSM
+     * @param transitions the transitions of the FSM
+     * @param scanFromFront indicates whether this FSM should scan the input
+     *        string from the front, or from the back
+     */
+    public PronounceableFSM(int vocabularySize, int[] transitions,
+            boolean scanFromFront) {
+        this.vocabularySize = vocabularySize;
+        this.transitions = transitions;
+        this.scanFromFront = scanFromFront;
+    }
+
+    /**
+     * Loads the ASCII specification of this FSM from the given InputStream.
+     * 
+     * @param is the input stream to load from
+     * 
+     * @throws IOException if an error occurs on input.
+     */
+    private void loadText(InputStream is) throws IOException {
+        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+        String line = null;
+        while ((line = reader.readLine()) != null) {
+            if (!line.startsWith("***")) {
+                if (line.startsWith(VOCAB_SIZE)) {
+                    vocabularySize = parseLastInt(line);
+                } else if (line.startsWith(NUM_OF_TRANSITIONS)) {
+                    int transitionsSize = parseLastInt(line);
+                    transitions = new int[transitionsSize];
+                } else if (line.startsWith(TRANSITIONS)) {
+                    StringTokenizer st = new StringTokenizer(line);
+                    String transition = st.nextToken();
+                    int i = 0;
+                    while (st.hasMoreTokens() && i < transitions.length) {
+                        transition = st.nextToken().trim();
+                        transitions[i++] = Integer.parseInt(transition);
+                    }
+                }
+            }
+        }
+        reader.close();
+    }
+
+    /**
+     * Returns the integer value of the last integer in the given string.
+     * 
+     * @param line the line to parse the integer from
+     * 
+     * @return an integer
+     */
+    private int parseLastInt(String line) {
+        String lastInt = line.trim().substring(line.lastIndexOf(" "));
+        return Integer.parseInt(lastInt.trim());
+    }
+
+    /**
+     * Causes this FSM to transition to the next state given the current state
+     * and input symbol.
+     * 
+     * @param state the current state
+     * @param symbol the input symbol
+     */
+    private int transition(int state, int symbol) {
+        for (int i = state; i < transitions.length; i++) {
+            if ((transitions[i] % vocabularySize) == symbol) {
+                return (transitions[i] / vocabularySize);
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Checks to see if this finite state machine accepts the given input
+     * string.
+     * 
+     * @param inputString the input string to be tested
+     * 
+     * @return true if this FSM accepts, false if it rejects
+     */
+    public boolean accept(String inputString) {
+        int symbol;
+        int state = transition(0, '#');
+        int leftEnd = inputString.length() - 1;
+        int start = (scanFromFront) ? 0 : leftEnd;
+
+        for (int i = start; 0 <= i && i <= leftEnd;) {
+            char c = inputString.charAt(i);
+            if (c == 'n' || c == 'm') {
+                symbol = 'N';
+            } else if ("aeiouy".indexOf(c) != -1) {
+                symbol = 'V';
+            } else {
+                symbol = c;
+            }
+            state = transition(state, symbol);
+            if (state == -1) {
+                return false;
+            } else if (symbol == 'V') {
+                return true;
+            }
+            if (scanFromFront) {
+                i++;
+            } else {
+                i--;
+            }
+        }
+        return false;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Relation.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Relation.java
@ -0,0 +1,145 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import edu.cmu.sphinx.alignment.USEnglishTokenizer;
+
+
+
+/**
+ * Represents an ordered set of {@link Item}s and their associated children. A
+ * relation has a name and a list of items, and is added to an
+ * {@link Utterance} via an {@link USEnglishTokenizer}.
+ */
+public class Relation {
+    private String name;
+    private Utterance owner;
+    private Item head;
+    private Item tail;
+
+    /**
+     * Name of the relation that contains tokens from the original input text.
+     * This is the first thing to be added to the utterance.
+     */
+    public static final String TOKEN = "Token";
+
+    /**
+     * Name of the relation that contains the normalized version of the
+     * original input text.
+     */
+    public static final String WORD = "Word";
+
+    /**
+     * Creates a relation.
+     *
+     * @param name the name of the Relation
+     * @param owner the utterance that contains this relation
+     */
+    Relation(String name, Utterance owner) {
+        this.name = name;
+        this.owner = owner;
+        head = null;
+        tail = null;
+    }
+
+    /**
+     * Retrieves the name of this Relation.
+     *
+     * @return the name of this Relation
+     */
+    public String getName() {
+        return name;
+    }
+
+    /**
+     * Gets the head of the item list.
+     *
+     * @return the head item
+     */
+    public Item getHead() {
+        return head;
+    }
+
+    /**
+     * Sets the head of the item list.
+     *
+     * @param item the new head item
+     */
+    void setHead(Item item) {
+        head = item;
+    }
+
+    /**
+     * Gets the tail of the item list.
+     *
+     * @return the tail item
+     */
+    public Item getTail() {
+        return tail;
+    }
+
+    /**
+     * Sets the tail of the item list.
+     *
+     * @param item the new tail item
+     */
+    void setTail(Item item) {
+        tail = item;
+    }
+
+    /**
+     * Adds a new item to this relation. The item added does not share its
+     * contents with any other item.
+     *
+     * @return the newly added item
+     */
+    public Item appendItem() {
+        return appendItem(null);
+    }
+
+    /**
+     * Adds a new item to this relation. The item added shares its contents
+     * with the original item.
+     *
+     * @param originalItem the ItemContents that will be shared by the new item
+     *
+     * @return the newly added item
+     */
+    public Item appendItem(Item originalItem) {
+        ItemContents contents;
+        Item newItem;
+
+        if (originalItem == null) {
+            contents = null;
+        } else {
+            contents = originalItem.getSharedContents();
+        }
+        newItem = new Item(this, contents);
+        if (head == null) {
+            head = newItem;
+        }
+
+        if (tail != null) {
+            tail.attach(newItem);
+        }
+        tail = newItem;
+        return newItem;
+    }
+
+    /**
+     * Returns the utterance that contains this relation.
+     *
+     * @return the utterance that contains this relation
+     */
+    public Utterance getUtterance() {
+        return owner;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/SuffixFSM.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/SuffixFSM.java
@ -0,0 +1,29 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute, 
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.io.IOException;
+import java.net.URL;
+
+/**
+ * Implements a finite state machine that checks if a given string is a suffix.
+ */
+public class SuffixFSM extends PronounceableFSM {
+
+    /**
+     * Constructs a SuffixFSM.
+     * @param url suffix of FSM
+     * @throws IOException if loading failed
+     */
+    public SuffixFSM(URL url) throws IOException {
+        super(url, false);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Utterance.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/Utterance.java
@ -0,0 +1,229 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import java.util.Iterator;
+
+import edu.cmu.sphinx.alignment.Token;
+
+/**
+ * Holds all the data for an utterance to be spoken. It is incrementally
+ * modified by various UtteranceProcessor implementations. An utterance
+ * contains a set of Features (essential a set of properties) and a set of
+ * Relations. A Relation is an ordered set of Item graphs. The utterance
+ * contains a set of features and implements FeatureSet so that applications
+ * can set/get features directly from the utterance. If a feature query is not
+ * found in the utterance feature set, the query is forwarded to the FeatureSet
+ * of the voice associated with the utterance.
+ */
+public class Utterance {
+    private FeatureSet features;
+    private FeatureSet relations;
+
+    /**
+     * Creates an utterance with the given set of tokenized text.
+     *
+     * @param tokenizer tokenizer to use for utterance.
+     */
+    public Utterance(CharTokenizer tokenizer) {
+        features = new FeatureSet();
+        relations = new FeatureSet();
+        setTokenList(tokenizer);
+    }
+
+    /**
+     * Creates a new relation with the given name and adds it to this
+     * utterance.
+     *
+     * @param name the name of the new relation
+     *
+     * @return the newly created relation
+     */
+    public Relation createRelation(String name) {
+        Relation relation = new Relation(name, this);
+        relations.setObject(name, relation);
+        return relation;
+    }
+
+    /**
+     * Retrieves a relation from this utterance.
+     *
+     * @param name the name of the Relation
+     *
+     * @return the relation or null if the relation is not found
+     */
+    public Relation getRelation(String name) {
+        return (Relation) relations.getObject(name);
+    }
+
+    /**
+     * Determines if this utterance contains a relation with the given name.
+     *
+     * @param name the name of the relation of interest.
+     * @return if relation is present
+     */
+    public boolean hasRelation(String name) {
+        return relations.isPresent(name);
+    }
+
+    /**
+     * Removes the named feature from this set of features.
+     *
+     * @param name the name of the feature of interest
+     */
+    public void remove(String name) {
+        features.remove(name);
+    }
+
+    /**
+     * Convenience method that sets the named feature as an int.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setInt(String name, int value) {
+        features.setInt(name, value);
+    }
+
+    /**
+     * Convenience method that sets the named feature as a float.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setFloat(String name, float value) {
+        features.setFloat(name, value);
+    }
+
+    /**
+     * Convenience method that sets the named feature as a String.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setString(String name, String value) {
+        features.setString(name, value);
+    }
+
+    /**
+     * Sets the named feature.
+     *
+     * @param name the name of the feature
+     * @param value the value of the feature
+     */
+    public void setObject(String name, Object value) {
+        features.setObject(name, value);
+    }
+
+    /**
+     * Returns the Item in the given Relation associated with the given time.
+     *
+     * @param relation the name of the relation
+     * @param time the time
+     * @return the item
+     */
+    public Item getItem(String relation, float time) {
+        Relation segmentRelation = null;
+        String pathName = null;
+
+        if (relation.equals(Relation.WORD)) {
+            pathName = "R:SylStructure.parent.parent.R:Word";
+        } else if (relation.equals(Relation.TOKEN)) {
+            pathName = "R:SylStructure.parent.parent.R:Token.parent";
+        } else {
+            throw new IllegalArgumentException(
+                    "Utterance.getItem(): relation cannot be " + relation);
+        }
+
+        PathExtractor path = new PathExtractor(pathName, false);
+
+        // get the Item in the Segment Relation with the given time
+        Item segmentItem = getItem(segmentRelation, time);
+
+        if (segmentItem != null) {
+            return path.findItem(segmentItem);
+        } else {
+            return null;
+        }
+    }
+
+    private static Item getItem(Relation segmentRelation, float time) {
+        Item lastSegment = segmentRelation.getTail();
+        // If given time is closer to the front than the end, search from
+        // the front; otherwise, start search from end
+        // this might not be the best strategy though.
+        float lastSegmentEndTime = getSegmentEnd(lastSegment);
+        if (time < 0 || lastSegmentEndTime < time) {
+            return null;
+        } else if (lastSegmentEndTime - time > time) {
+            return findFromFront(segmentRelation, time);
+        } else {
+            return findFromEnd(segmentRelation, time);
+        }
+    }
+
+    private static Item findFromEnd(Relation segmentRelation, float time) {
+        Item item = segmentRelation.getTail();
+        while (item != null && getSegmentEnd(item) > time) {
+            item = item.getPrevious();
+        }
+
+        if (item != segmentRelation.getTail()) {
+            item = item.getNext();
+        }
+
+        return item;
+    }
+
+    private static Item findFromFront(Relation segmentRelation, float time) {
+        Item item = segmentRelation.getHead();
+        while (item != null && time > getSegmentEnd(item)) {
+            item = item.getNext();
+        }
+        return item;
+    }
+
+    private static float getSegmentEnd(Item segment) {
+        FeatureSet segmentFeatureSet = segment.getFeatures();
+        return segmentFeatureSet.getFloat("end");
+    }
+
+    /**
+     * Sets the token list for this utterance. Note that this could be
+     * optimized by turning the token list directly into the token relation.
+     *
+     * @param tokenList the tokenList
+     *
+     */
+    private void setTokenList(Iterator<Token> tokenizer) {
+        Relation relation = createRelation(Relation.TOKEN);
+        while (tokenizer.hasNext()) {
+            Token token = tokenizer.next();
+            String tokenWord = token.getWord();
+
+            if (tokenWord != null && tokenWord.length() > 0) {
+                Item item = relation.appendItem();
+
+                FeatureSet featureSet = item.getFeatures();
+                featureSet.setString("name", tokenWord);
+                featureSet.setString("whitespace", token.getWhitespace());
+                featureSet.setString("prepunctuation",
+                        token.getPrepunctuation());
+                featureSet.setString("punc", token.getPostpunctuation());
+                featureSet.setString("file_pos",
+                        String.valueOf(token.getPosition()));
+                featureSet.setString("line_number",
+                        String.valueOf(token.getLineNumber()));
+
+            }
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/WordRelation.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/alignment/tokenizer/WordRelation.java
@ -0,0 +1,85 @@
+/**
+ * Portions Copyright 2001 Sun Microsystems, Inc.
+ * Portions Copyright 1999-2001 Language Technologies Institute,
+ * Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.alignment.tokenizer;
+
+import edu.cmu.sphinx.alignment.USEnglishTokenizer;
+
+/**
+ * Helper class to add words and breaks into a Relation object.
+ */
+public class WordRelation {
+
+    private Relation relation;
+    private USEnglishTokenizer tokenToWords;
+
+    private WordRelation(Relation parentRelation, USEnglishTokenizer tokenToWords) {
+        this.relation = parentRelation;
+        this.tokenToWords = tokenToWords;
+    }
+
+    /**
+     * Creates a WordRelation object with the given utterance and TokenToWords.
+     *
+     * @param utterance the Utterance from which to create a Relation
+     * @param tokenToWords the TokenToWords object to use
+     *
+     * @return a WordRelation object
+     */
+    public static WordRelation createWordRelation(Utterance utterance,
+            USEnglishTokenizer tokenToWords) {
+        Relation relation = utterance.createRelation(Relation.WORD);
+        return new WordRelation(relation, tokenToWords);
+    }
+
+    /**
+     * Adds a break as a feature to the last item in the list.
+     */
+    public void addBreak() {
+        Item wordItem = (Item) relation.getTail();
+        if (wordItem != null) {
+            FeatureSet featureSet = wordItem.getFeatures();
+            featureSet.setString("break", "1");
+        }
+    }
+
+    /**
+     * Adds a word as an Item to this WordRelation object.
+     *
+     * @param word the word to add
+     */
+    public void addWord(String word) {
+        Item tokenItem = tokenToWords.getTokenItem();
+        Item wordItem = tokenItem.createDaughter();
+        FeatureSet featureSet = wordItem.getFeatures();
+        featureSet.setString("name", word);
+        relation.appendItem(wordItem);
+    }
+
+    /**
+     * Sets the last Item in this WordRelation to the given word.
+     *
+     * @param word the word to set
+     */
+    public void setLastWord(String word) {
+        Item lastItem = relation.getTail();
+        FeatureSet featureSet = lastItem.getFeatures();
+        featureSet.setString("name", word);
+    }
+
+    /**
+     * Returns the last item in this WordRelation.
+     *
+     * @return the last item
+     */
+    public Item getTail() {
+        return relation.getTail();
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/AbstractSpeechRecognizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/AbstractSpeechRecognizer.java
@ -0,0 +1,81 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+import java.io.IOException;
+
+import edu.cmu.sphinx.decoder.adaptation.ClusteredDensityFileData;
+import edu.cmu.sphinx.decoder.adaptation.Stats;
+import edu.cmu.sphinx.decoder.adaptation.Transform;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
+import edu.cmu.sphinx.recognizer.Recognizer;
+import edu.cmu.sphinx.result.Result;
+
+
+/**
+ * Base class for high-level speech recognizers.
+ */
+public class AbstractSpeechRecognizer {
+
+    protected final Context context;
+    protected final Recognizer recognizer;
+    
+    protected ClusteredDensityFileData clusters;
+
+    protected final SpeechSourceProvider speechSourceProvider;
+
+    /**
+     * Constructs recognizer object using provided configuration.
+     * @param configuration initial configuration
+     * @throws IOException if IO went wrong
+     */
+    public AbstractSpeechRecognizer(Configuration configuration)
+        throws IOException
+    {
+        this(new Context(configuration));
+    }
+
+    protected AbstractSpeechRecognizer(Context context) throws IOException {
+        this.context = context;
+        recognizer = context.getInstance(Recognizer.class);
+        speechSourceProvider = new SpeechSourceProvider();
+    }
+
+    /**
+     * Returns result of the recognition.
+     * 
+     * @return recognition result or {@code null} if there is no result, e.g., because the
+     * 			microphone or input stream has been closed 
+     */
+    public SpeechResult getResult() {
+        Result result = recognizer.recognize();
+        return null == result ? null : new SpeechResult(result);
+    }
+    
+    public Stats createStats(int numClasses) {
+        clusters = new ClusteredDensityFileData(context.getLoader(), numClasses);
+        return new Stats(context.getLoader(), clusters);
+    }
+
+    public void setTransform(Transform transform) {
+        if (clusters != null) {
+            context.getLoader().update(transform, clusters);
+        }
+    }
+
+    public void loadTransform(String path, int numClass) throws Exception {
+    	clusters = new ClusteredDensityFileData(context.getLoader(), numClass);
+    	Transform transform = new Transform((Sphinx3Loader)context.getLoader(), numClass);
+    	transform.load(path);
+    	context.getLoader().update(transform, clusters);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/Configuration.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/Configuration.java
@ -0,0 +1,139 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+
+/**
+ * Represents common configuration options.
+ *
+ * This configuration is used by high-level recognition classes.
+ *
+ * @see SpeechAligner
+ * @see LiveSpeechRecognizer
+ * @see StreamSpeechRecognizer
+ */
+public class Configuration {
+
+    private String acousticModelPath;
+    private String dictionaryPath;
+    private String languageModelPath;
+    private String grammarPath;
+    private String grammarName;
+
+    private int sampleRate = 16000;
+    private boolean useGrammar = false;
+
+    /**
+     * @return path to acoustic model
+     */
+    public String getAcousticModelPath() {
+        return acousticModelPath;
+    }
+
+    /**
+     * Sets path to acoustic model.
+     * @param acousticModelPath URL of the acoustic model
+     */
+    public void setAcousticModelPath(String acousticModelPath) {
+        this.acousticModelPath = acousticModelPath;
+    }
+
+    /**
+     * @return path to dictionary.
+     */
+    public String getDictionaryPath() {
+        return dictionaryPath;
+    }
+
+    /**
+     * Sets path to dictionary.
+     * @param dictionaryPath URL of the dictionary
+     */
+    public void setDictionaryPath(String dictionaryPath) {
+        this.dictionaryPath = dictionaryPath;
+    }
+
+    /**
+     * @return path to the language model
+     */
+    public String getLanguageModelPath() {
+        return languageModelPath;
+    }
+
+    /**
+     * Sets paths to language model resource.
+     * @param languageModelPath URL of the language model
+     */
+    public void setLanguageModelPath(String languageModelPath) {
+        this.languageModelPath = languageModelPath;
+    }
+
+    /**
+     * @return grammar path
+     */
+    public String getGrammarPath() {
+        return grammarPath;
+    }
+
+    /**
+     * Sets path to grammar resources.
+     * @param grammarPath URL of the grammar
+     */
+    public void setGrammarPath(String grammarPath) {
+        this.grammarPath = grammarPath;
+    }
+
+    /**
+     * @return grammar name
+     */
+    public String getGrammarName() {
+        return grammarName;
+    }
+
+    /**
+     * Sets grammar name if fixed grammar is used.
+     * @param grammarName of the grammar
+     */
+    public void setGrammarName(String grammarName) {
+        this.grammarName = grammarName;
+    }
+
+    /**
+     * @return whether fixed grammar should be used instead of language model.
+     */
+    public boolean getUseGrammar() {
+        return useGrammar;
+    }
+
+    /**
+     * Sets whether fixed grammar should be used instead of language model.
+     * @param useGrammar to use grammar or language model
+     */
+    public void setUseGrammar(boolean useGrammar) {
+        this.useGrammar = useGrammar;
+    }
+
+    /**
+     * @return the configured sample rate.
+     */
+    public int getSampleRate() {
+        return sampleRate;
+    }
+
+    /**
+     * Sets sample rate for the input stream.
+     * @param sampleRate sample rate in Hertz
+     */
+    public void setSampleRate(int sampleRate) {
+        this.sampleRate = sampleRate;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/Context.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/Context.java
@ -0,0 +1,222 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+import static edu.cmu.sphinx.util.props.ConfigurationManagerUtils.resourceToURL;
+import static edu.cmu.sphinx.util.props.ConfigurationManagerUtils.setProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+
+import edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank2;
+import edu.cmu.sphinx.frontend.util.StreamDataSource;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
+import edu.cmu.sphinx.util.TimeFrame;
+import edu.cmu.sphinx.util.props.Configurable;
+import edu.cmu.sphinx.util.props.ConfigurationManager;
+
+
+/**
+ * Helps to tweak configuration without touching XML-file directly.
+ */
+public class Context {
+
+    private final ConfigurationManager configurationManager;
+
+    /**
+     * Constructs builder that uses default XML configuration.
+     * @param config configuration
+     * @throws MalformedURLException if failed to load configuration file
+     */
+    public Context(Configuration config)
+        throws IOException, MalformedURLException
+    {
+        this("resource:/edu/cmu/sphinx/api/default.config.xml", config);
+    }
+
+    /**
+     * Constructs builder using user-supplied XML configuration.
+     *
+     * @param  path path to XML-resource with configuration
+     * @param  config configuration
+     * @throws MalformedURLException if failed to load configuration file
+     * @throws IOException           if failed to load configuration file
+     */
+    public Context(String path, Configuration config)
+        throws IOException, MalformedURLException
+    {
+        configurationManager = new ConfigurationManager(resourceToURL(path));
+
+        setAcousticModel(config.getAcousticModelPath());
+        setDictionary(config.getDictionaryPath());
+
+        if (null != config.getGrammarPath() && config.getUseGrammar())
+            setGrammar(config.getGrammarPath(), config.getGrammarName());
+        if (null != config.getLanguageModelPath() && !config.getUseGrammar())
+            setLanguageModel(config.getLanguageModelPath());
+
+        setSampleRate(config.getSampleRate());
+
+        // Force ConfigurationManager to build the whole graph
+        // in order to enable instance lookup by class.
+        configurationManager.lookup("recognizer");
+    }
+
+    /**
+     * Sets acoustic model location.
+     *
+     * It also reads feat.params which should be located at the root of
+     * acoustic model and sets corresponding parameters of
+     * {@link MelFrequencyFilterBank2} instance.
+     *
+     * @param  path path to directory with acoustic model files
+     *
+     * @throws IOException if failed to read feat.params
+     */
+    public void setAcousticModel(String path) throws IOException {
+        setLocalProperty("acousticModelLoader->location", path);
+        setLocalProperty("dictionary->fillerPath", path + "/noisedict");
+    }
+
+    /**
+     * Sets dictionary.
+     *
+     * @param path path to directory with dictionary files
+     */
+    public void setDictionary(String path) {
+        setLocalProperty("dictionary->dictionaryPath", path);
+    }
+
+    /**
+     * Sets sampleRate.
+     *
+     * @param sampleRate sample rate of the input stream.
+     */
+    public void setSampleRate(int sampleRate) {
+        setLocalProperty("dataSource->sampleRate", Integer.toString(sampleRate));
+    }
+
+    /**
+     * Sets path to the grammar files.
+     *
+     * Enables static grammar and disables probabilistic language model.
+     * JSGF and GrXML formats are supported.
+     *
+     * @param path path to the grammar files
+     * @param name name of the main grammar to use
+     * @see        Context#setLanguageModel(String)
+     */
+    public void setGrammar(String path, String name) {
+        // TODO: use a single param of type File, cache directory part
+        if (name.endsWith(".grxml")) {
+            setLocalProperty("grXmlGrammar->grammarLocation", path + name);
+            setLocalProperty("flatLinguist->grammar", "grXmlGrammar");
+        } else {
+            setLocalProperty("jsgfGrammar->grammarLocation", path);
+            setLocalProperty("jsgfGrammar->grammarName", name);
+            setLocalProperty("flatLinguist->grammar", "jsgfGrammar");
+        }
+        setLocalProperty("decoder->searchManager", "simpleSearchManager");
+    }
+
+    /**
+     * Sets path to the language model.
+     *
+     * Enables probabilistic language model and disables static grammar.
+     * Currently it supports ".lm" and ".dmp" file formats.
+     *
+     * @param  path path to the language model file
+     * @see   Context#setGrammar(String, String)
+     *
+     * @throws IllegalArgumentException if path ends with unsupported extension
+     */
+    public void setLanguageModel(String path) {
+        if (path.endsWith(".lm")) {
+            setLocalProperty("simpleNGramModel->location", path);
+            setLocalProperty(
+                "lexTreeLinguist->languageModel", "simpleNGramModel");
+        } else if (path.endsWith(".dmp")) {
+            setLocalProperty("largeTrigramModel->location", path);
+            setLocalProperty(
+                "lexTreeLinguist->languageModel", "largeTrigramModel");
+        } else {
+            throw new IllegalArgumentException(
+                "Unknown format extension: " + path);
+        }
+        //search manager for LVCSR is set by deafult
+    }
+
+
+    public void setSpeechSource(InputStream stream, TimeFrame timeFrame) {
+        getInstance(StreamDataSource.class).setInputStream(stream, timeFrame);
+        setLocalProperty("trivialScorer->frontend", "liveFrontEnd");
+    }
+
+    /**
+     * Sets byte stream as the speech source.
+     *
+     * @param  stream stream to process
+     */
+    public void setSpeechSource(InputStream stream) {
+        getInstance(StreamDataSource.class).setInputStream(stream);
+        setLocalProperty("trivialScorer->frontend", "liveFrontEnd");
+    }
+
+    /**
+     * Sets property within a "component" tag in configuration.
+     *
+     * Use this method to alter "value" property of a "property" tag inside a
+     * "component" tag of the XML configuration.
+     *
+     * @param  name  property name
+     * @param  value property value
+     * @see          Context#setGlobalProperty(String, Object)
+     */
+    public void setLocalProperty(String name, Object value) {
+        setProperty(configurationManager, name, value.toString());
+    }
+
+    /**
+     * Sets property of a top-level "property" tag.
+     *
+     * Use this method to alter "value" property of a "property" tag whose
+     * parent is the root tag "config" of the XML configuration.
+     *
+     * @param  name  property name
+     * @param  value property value
+     * @see          Context#setLocalProperty(String, Object)
+     */
+    public void setGlobalProperty(String name, Object value) {
+        configurationManager.setGlobalProperty(name, value.toString());
+    }
+
+    /**
+     * Returns instance of the XML configuration by its class.
+     *
+     * @param  clazz class to look up
+     * @param  <C> generic
+     * @return instance of the specified class or null
+     */
+    public <C extends Configurable> C getInstance(Class<C> clazz) {
+        return configurationManager.lookup(clazz);
+    }
+    
+    /**
+     * Returns the Loader object used for loading the acoustic model.
+     * 
+     * @return the loader  object
+     */
+    public Loader getLoader(){
+    	return (Loader) configurationManager.lookup("acousticModelLoader");
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/LiveSpeechRecognizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/LiveSpeechRecognizer.java
@ -0,0 +1,62 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+import java.io.IOException;
+
+import edu.cmu.sphinx.frontend.util.StreamDataSource;
+
+
+/**
+ * High-level class for live speech recognition.
+ */
+public class LiveSpeechRecognizer extends AbstractSpeechRecognizer {
+
+    private final Microphone microphone;
+
+    /**
+     * Constructs new live recognition object.
+     *
+     * @param configuration common configuration
+     * @throws IOException if model IO went wrong
+     */
+    public LiveSpeechRecognizer(Configuration configuration) throws IOException
+    {
+        super(configuration);
+        microphone = speechSourceProvider.getMicrophone();
+        context.getInstance(StreamDataSource.class)
+            .setInputStream(microphone.getStream());
+    }
+
+    /**
+     * Starts recognition process.
+     *
+     * @param clear clear cached microphone data
+     * @see         LiveSpeechRecognizer#stopRecognition()
+     */
+    public void startRecognition(boolean clear) {
+        recognizer.allocate();
+        microphone.startRecording();
+    }
+
+    /**
+     * Stops recognition process.
+     *
+     * Recognition process is paused until the next call to startRecognition.
+     *
+     * @see LiveSpeechRecognizer#startRecognition(boolean)
+     */
+    public void stopRecognition() {
+        microphone.stopRecording();
+        recognizer.deallocate();
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/Microphone.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/Microphone.java
@ -0,0 +1,54 @@
+/*
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.api;
+
+import java.io.InputStream;
+
+import javax.sound.sampled.*;
+
+/**
+ * InputStream adapter
+ */
+public class Microphone {
+
+    private final TargetDataLine line;
+    private final InputStream inputStream;
+
+    public Microphone(
+            float sampleRate,
+            int sampleSize,
+            boolean signed,
+            boolean bigEndian) {
+        AudioFormat format =
+            new AudioFormat(sampleRate, sampleSize, 1, signed, bigEndian);
+        try {
+            line = AudioSystem.getTargetDataLine(format);
+            line.open();
+        } catch (LineUnavailableException e) {
+            throw new IllegalStateException(e);
+        }
+        inputStream = new AudioInputStream(line);
+    }
+
+    public void startRecording() {
+        line.start();
+    }
+
+    public void stopRecording() {
+        line.stop();
+    }
+
+    public InputStream getStream() {
+        return inputStream;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/SpeechAligner.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/SpeechAligner.java
@ -0,0 +1,263 @@
+/*
+ * Copyright 2014 Alpha Cephei Inc.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.TreeMap;
+import java.util.logging.Logger;
+
+import edu.cmu.sphinx.alignment.LongTextAligner;
+import edu.cmu.sphinx.alignment.SimpleTokenizer;
+import edu.cmu.sphinx.alignment.TextTokenizer;
+import edu.cmu.sphinx.linguist.language.grammar.AlignerGrammar;
+import edu.cmu.sphinx.linguist.language.ngram.DynamicTrigramModel;
+import edu.cmu.sphinx.recognizer.Recognizer;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.result.WordResult;
+import edu.cmu.sphinx.util.Range;
+import edu.cmu.sphinx.util.TimeFrame;
+
+public class SpeechAligner {
+    private final Logger logger = Logger.getLogger(getClass().getSimpleName());
+
+    private static final int TUPLE_SIZE = 3;
+
+    private final Context context;
+    private final Recognizer recognizer;
+    private final AlignerGrammar grammar;
+    private final DynamicTrigramModel languageModel;
+
+    private TextTokenizer tokenizer;
+
+    public SpeechAligner(String amPath, String dictPath, String g2pPath) throws MalformedURLException, IOException {
+        Configuration configuration = new Configuration();
+        configuration.setAcousticModelPath(amPath);
+        configuration.setDictionaryPath(dictPath);
+
+        context = new Context(configuration);
+        if (g2pPath != null) {
+            context.setLocalProperty("dictionary->g2pModelPath", g2pPath);
+            context.setLocalProperty("dictionary->g2pMaxPron", "2");
+        }
+        context.setLocalProperty("lexTreeLinguist->languageModel", "dynamicTrigramModel");
+        recognizer = context.getInstance(Recognizer.class);
+        grammar = context.getInstance(AlignerGrammar.class);
+        languageModel = context.getInstance(DynamicTrigramModel.class);
+        setTokenizer(new SimpleTokenizer());
+    }
+
+    public List<WordResult> align(URL audioUrl, String transcript) throws IOException {
+        return align(audioUrl, getTokenizer().expand(transcript));
+    }
+
+    /**
+     * Align audio to sentence transcript
+     * 
+     * @param audioUrl audio file URL to process
+     * @param sentenceTranscript cleaned transcript
+     * @return List of aligned words with timings
+     * @throws IOException if IO went wrong
+     */
+    public List<WordResult> align(URL audioUrl, List<String> sentenceTranscript) throws IOException {
+        
+        List<String> transcript = sentenceToWords(sentenceTranscript);
+
+        LongTextAligner aligner = new LongTextAligner(transcript, TUPLE_SIZE);
+        Map<Integer, WordResult> alignedWords = new TreeMap<Integer, WordResult>();
+        Queue<Range> ranges = new LinkedList<Range>();
+        Queue<List<String>> texts = new ArrayDeque<List<String>>();
+        Queue<TimeFrame> timeFrames = new ArrayDeque<TimeFrame>();
+
+        ranges.offer(new Range(0, transcript.size()));
+        texts.offer(transcript);
+        TimeFrame totalTimeFrame = TimeFrame.INFINITE;
+        timeFrames.offer(totalTimeFrame);
+        long lastFrame = TimeFrame.INFINITE.getEnd();
+
+        languageModel.setText(sentenceTranscript);
+        
+        for (int i = 0; i < 4; ++i) {
+            if (i == 1) {
+                context.setLocalProperty("decoder->searchManager", "alignerSearchManager");
+            }
+
+            while (!texts.isEmpty()) {
+                assert texts.size() == ranges.size();
+                assert texts.size() == timeFrames.size();
+
+                List<String> text = texts.poll();
+                TimeFrame frame = timeFrames.poll();
+                Range range = ranges.poll();
+
+
+                logger.info("Aligning frame " + frame + " to text " + text + " range " + range);
+
+                recognizer.allocate();
+
+                if (i >= 1) {
+                    grammar.setWords(text);
+                }
+
+                context.setSpeechSource(audioUrl.openStream(), frame);
+
+                List<WordResult> hypothesis = new ArrayList<WordResult>();
+                Result result;
+                while (null != (result = recognizer.recognize())) {
+                    logger.info("Utterance result " + result.getTimedBestResult(true));
+                    hypothesis.addAll(result.getTimedBestResult(false));
+                }
+
+                if (i == 0) {
+                    if (hypothesis.size() > 0) {
+                        lastFrame = hypothesis.get(hypothesis.size() - 1).getTimeFrame().getEnd();
+                    }
+                }
+
+                List<String> words = new ArrayList<String>();
+                for (WordResult wr : hypothesis) {
+                    words.add(wr.getWord().getSpelling());
+                }
+                int[] alignment = aligner.align(words, range);
+
+                List<WordResult> results = hypothesis;
+
+                logger.info("Decoding result is " + results);
+
+                // dumpAlignment(transcript, alignment, results);
+                dumpAlignmentStats(transcript, alignment, results);
+
+                for (int j = 0; j < alignment.length; j++) {
+                    if (alignment[j] != -1) {
+                        alignedWords.put(alignment[j], hypothesis.get(j));
+                    }
+                }
+
+                recognizer.deallocate();
+            }
+
+            scheduleNextAlignment(transcript, alignedWords, ranges, texts, timeFrames, lastFrame);
+        }
+
+        return new ArrayList<WordResult>(alignedWords.values());
+    }
+
+    public List<String> sentenceToWords(List<String> sentenceTranscript) {
+        ArrayList<String> transcript = new ArrayList<String>();
+        for (String sentence : sentenceTranscript) {
+            String[] words = sentence.split("\\s+");
+            for (String word : words) {
+        	if (word.length() > 0)
+    	            transcript.add(word);
+            }
+        }
+        return transcript;
+    }
+
+    private void dumpAlignmentStats(List<String> transcript, int[] alignment, List<WordResult> results) {
+        int insertions = 0;
+        int deletions = 0;
+        int size = transcript.size();
+
+        int[] aid = alignment;
+        int lastId = -1;
+        for (int ij = 0; ij < aid.length; ++ij) {
+            if (aid[ij] == -1) {
+                insertions++;
+            } else {
+                if (aid[ij] - lastId > 1) {
+                    deletions += aid[ij] - lastId;
+                }
+                lastId = aid[ij];
+            }
+        }
+
+        if (lastId >= 0 && transcript.size() - lastId > 1) {
+            deletions += transcript.size() - lastId;
+        }
+        logger.info(String.format("Size %d deletions %d insertions %d error rate %.2f", size, insertions, deletions,
+                (insertions + deletions) / ((float) size) * 100f));
+    }
+
+    private void scheduleNextAlignment(List<String> transcript, Map<Integer, WordResult> alignedWords, Queue<Range> ranges,
+            Queue<List<String>> texts, Queue<TimeFrame> timeFrames, long lastFrame) {
+        int prevKey = 0;
+        long prevStart = 0;
+        for (Map.Entry<Integer, WordResult> e : alignedWords.entrySet()) {
+            if (e.getKey() - prevKey > 1) {
+                checkedOffer(transcript, texts, timeFrames, ranges, prevKey, e.getKey() + 1, prevStart, e.getValue()
+                        .getTimeFrame().getEnd());
+            }
+            prevKey = e.getKey();
+            prevStart = e.getValue().getTimeFrame().getStart();
+        }
+        if (transcript.size() - prevKey > 1) {
+            checkedOffer(transcript, texts, timeFrames, ranges, prevKey, transcript.size(), prevStart, lastFrame);
+        }
+    }
+
+    public void dumpAlignment(List<String> transcript, int[] alignment, List<WordResult> results) {
+        logger.info("Alignment");
+        int[] aid = alignment;
+        int lastId = -1;
+        for (int ij = 0; ij < aid.length; ++ij) {
+            if (aid[ij] == -1) {
+                logger.info(String.format("+ %s", results.get(ij)));
+            } else {
+                if (aid[ij] - lastId > 1) {
+                    for (String result1 : transcript.subList(lastId + 1, aid[ij])) {
+                        logger.info(String.format("- %-25s", result1));
+                    }
+                } else {
+                    logger.info(String.format("  %-25s", transcript.get(aid[ij])));
+                }
+                lastId = aid[ij];
+            }
+        }
+
+        if (lastId >= 0 && transcript.size() - lastId > 1) {
+            for (String result1 : transcript.subList(lastId + 1, transcript.size())) {
+                logger.info(String.format("- %-25s", result1));
+            }
+        }
+    }
+
+    private void checkedOffer(List<String> transcript, Queue<List<String>> texts, Queue<TimeFrame> timeFrames,
+            Queue<Range> ranges, int start, int end, long timeStart, long timeEnd) {
+
+        double wordDensity = ((double) (timeEnd - timeStart)) / (end - start);
+
+        // Skip range if it's too short, average word is less than 10
+        // milliseconds
+        if (wordDensity < 10.0 && (end - start) > 3) {
+            logger.info("Skipping text range due to a high density " + transcript.subList(start, end).toString());
+            return;
+        }
+
+        texts.offer(transcript.subList(start, end));
+        timeFrames.offer(new TimeFrame(timeStart, timeEnd));
+        ranges.offer(new Range(start, end - 1));
+    }
+
+    public TextTokenizer getTokenizer() {
+        return tokenizer;
+    }
+
+    public void setTokenizer(TextTokenizer wordExpander) {
+        this.tokenizer = wordExpander;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/SpeechResult.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/SpeechResult.java
@ -0,0 +1,91 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import edu.cmu.sphinx.recognizer.Recognizer;
+import edu.cmu.sphinx.result.*;
+
+
+/**
+ * High-level wrapper for {@link Result} instance.
+ */
+public final class SpeechResult {
+
+    private final Result result;
+    private final Lattice lattice;
+
+    /**
+     * Constructs recognition result based on {@link Result} object.
+     *
+     * @param result recognition result returned by {@link Recognizer}
+     */
+    public SpeechResult(Result result) {
+        this.result = result;
+        if (result.toCreateLattice()) {
+            lattice = new Lattice(result);
+            new LatticeOptimizer(lattice).optimize();
+            lattice.computeNodePosteriors(1.0f);
+        } else
+            lattice = null;
+    }
+
+    /**
+     * Returns {@link List} of words of the recognition result.
+     * Within the list words are ordered by time frame.
+     *
+     * @return words that form the result
+     */
+    public List<WordResult> getWords() {
+        return lattice != null ? lattice.getWordResultPath() : result.getTimedBestResult(false);
+    }
+
+    /**
+     * @return string representation of the result.
+     */
+    public String getHypothesis() {
+	return result.getBestResultNoFiller();
+    }
+
+    /**
+     * Return N best hypothesis.
+     *
+     * @param  n number of hypothesis to return
+     * @return   {@link Collection} of several best hypothesis
+     */
+    public Collection<String> getNbest(int n) {
+        if (lattice == null)
+            return new HashSet<String>();
+        return new Nbest(lattice).getNbest(n);
+    }
+
+    /**
+     * Returns lattice for the recognition result.
+     *
+     * @return lattice object
+     */
+    public Lattice getLattice() {
+        return lattice;
+    }
+    
+    /**
+     * Return Result object of current SpeechResult
+     * 
+     * @return Result object stored in this.result
+     */
+    public Result getResult() {
+    	return result;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/SpeechSourceProvider.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/SpeechSourceProvider.java
@ -0,0 +1,20 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+
+public class SpeechSourceProvider {
+
+    Microphone getMicrophone() {
+        return new Microphone(16000, 16, true, false);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/StreamSpeechRecognizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/api/StreamSpeechRecognizer.java
@ -0,0 +1,66 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.api;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import edu.cmu.sphinx.util.TimeFrame;
+
+/**
+ * Speech recognizer that works with audio resources.
+ *
+ * @see LiveSpeechRecognizer live speech recognizer
+ */
+public class StreamSpeechRecognizer extends AbstractSpeechRecognizer {
+
+    /**
+     * Constructs new stream recognizer.
+     *
+     * @param configuration configuration
+     * @throws IOException error occured during model load
+     */
+    public StreamSpeechRecognizer(Configuration configuration)
+        throws IOException
+    {
+        super(configuration);
+    }
+
+    public void startRecognition(InputStream stream) {
+        startRecognition(stream, TimeFrame.INFINITE);
+    }
+
+    /**
+     * Starts recognition process.
+     *
+     * Starts recognition process and optionally clears previous data.
+     *
+     * @param stream input stream to process
+     * @param timeFrame time range of the stream to process
+     * @see StreamSpeechRecognizer#stopRecognition()
+     */
+    public void startRecognition(InputStream stream, TimeFrame timeFrame) {
+        recognizer.allocate();
+        context.setSpeechSource(stream, timeFrame);
+    }
+
+    /**
+     * Stops recognition process.
+     *
+     * Recognition process is paused until the next call to startRecognition.
+     *
+     * @see StreamSpeechRecognizer#startRecognition(InputStream, TimeFrame)
+     */
+    public void stopRecognition() {
+        recognizer.deallocate();
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/AbstractDecoder.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/AbstractDecoder.java
@ -0,0 +1,154 @@
+/*
+ * Copyright 1999-2004 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder;
+
+import edu.cmu.sphinx.decoder.search.SearchManager;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.props.*;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+/** An abstract decoder which implements all functionality which is independent of the used decoding-paradigm (pull/push). */
+public abstract class AbstractDecoder implements ResultProducer, Configurable {
+
+    /**
+     * The property that defines the name of the search manager to use
+     * */
+    @S4Component(type = SearchManager.class)
+    public final static String PROP_SEARCH_MANAGER = "searchManager";
+    protected SearchManager searchManager;
+
+    @S4ComponentList(type = ResultListener.class)
+    public static final String PROP_RESULT_LISTENERS = "resultListeners";
+    protected final List<ResultListener> resultListeners = new ArrayList<ResultListener>();
+
+    /**
+     * If set to true the used search-manager will be automatically allocated
+     * in <code>newProperties()</code>.
+     * */
+    @S4Boolean(defaultValue = false)
+    public static final String AUTO_ALLOCATE = "autoAllocate";
+
+    /**
+     * If set to <code>false</code> the used search-manager all registered
+     * result listeners will be notified only for final results. Per default
+     * non-final results don't trigger notification, because in most
+     * application the utterance final result will be sufficient.
+     */
+    @S4Boolean(defaultValue = false)
+    public static final String FIRE_NON_FINAL_RESULTS = "fireNonFinalResults";
+    private boolean fireNonFinalResults;
+
+    private String name;
+    protected Logger logger;
+
+    public AbstractDecoder() {
+    }
+
+    /**
+     * Abstract decoder to implement live and batch recognizers
+     * @param searchManager search manager to use
+     * @param fireNonFinalResults to fire result during decoding
+     * @param autoAllocate automatic allocate all components
+     * @param resultListeners listeners to get noification
+     */
+    public AbstractDecoder(SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> resultListeners) {
+        String name = getClass().getName();
+             init( name, Logger.getLogger(name),
+                   searchManager, fireNonFinalResults, autoAllocate, resultListeners);        
+    }
+
+    /**
+     * Decode frames until recognition is complete
+     *
+     * @param referenceText the reference text (or null)
+     * @return a result
+     */
+    public abstract Result decode(String referenceText);
+
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        init( ps.getInstanceName(), ps.getLogger(), (SearchManager) ps.getComponent(PROP_SEARCH_MANAGER), ps.getBoolean(FIRE_NON_FINAL_RESULTS), ps.getBoolean(AUTO_ALLOCATE), ps.getComponentList(PROP_RESULT_LISTENERS, ResultListener.class));
+    }
+
+    private void init(String name, Logger logger, SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> listeners) {
+        this.name = name;
+        this.logger = logger;
+
+        this.searchManager = searchManager;
+        this.fireNonFinalResults = fireNonFinalResults;
+
+        if (autoAllocate) {
+            searchManager.allocate();
+        }
+
+        for (ResultListener listener : listeners) {
+            addResultListener(listener);
+        }
+    }
+
+
+    /** Allocate resources necessary for decoding */
+    public void allocate() {
+        searchManager.allocate();
+    }
+
+
+    /** Deallocate resources */
+    public void deallocate() {
+        searchManager.deallocate();
+    }
+
+
+    /**
+     * Adds a result listener to this recognizer. A result listener is called whenever a new result is generated by the
+     * recognizer. This method can be called in any state.
+     *
+     * @param resultListener the listener to add
+     */
+    public void addResultListener(ResultListener resultListener) {
+        resultListeners.add(resultListener);
+    }
+
+
+    /**
+     * Removes a previously added result listener. This method can be called in any state.
+     *
+     * @param resultListener the listener to remove
+     */
+    public void removeResultListener(ResultListener resultListener) {
+        resultListeners.remove(resultListener);
+    }
+
+
+    /**
+     * Fires new results as soon as they become available.
+     *
+     * @param result the new result
+     */
+    protected void fireResultListeners(Result result) {
+        if (fireNonFinalResults || result.isFinal()) {
+            for (ResultListener resultListener : resultListeners) {
+                resultListener.newResult(result);
+            }
+        }else {
+            logger.finer("skipping non-final result " + result);
+        }
+    }
+
+
+    @Override
+    public String toString() {
+        return name;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/Decoder.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/Decoder.java
@ -0,0 +1,74 @@
+/*
+ * Copyright 1999-2004 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder;
+
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Integer;
+import edu.cmu.sphinx.decoder.search.SearchManager;
+
+import java.util.List;
+
+/** The primary decoder class */
+public class Decoder extends AbstractDecoder {
+
+    public Decoder() {
+        // Keep this or else XML configuration fails.
+    }
+
+    /** The property for the number of features to recognize at once. */
+    @S4Integer(defaultValue = Integer.MAX_VALUE)
+    public final static String PROP_FEATURE_BLOCK_SIZE = "featureBlockSize";
+    private int featureBlockSize;
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        featureBlockSize = ps.getInt(PROP_FEATURE_BLOCK_SIZE);
+    }
+
+    /**
+     * Main decoder
+     *
+     * @param searchManager search manager to configure search space
+     * @param fireNonFinalResults should we notify about non-final results
+     * @param autoAllocate automatic allocation of all componenets
+     * @param resultListeners listeners to get signals
+     * @param featureBlockSize frequency of notification about results
+     */
+    public Decoder( SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> resultListeners, int featureBlockSize) {
+        super( searchManager, fireNonFinalResults, autoAllocate, resultListeners);
+        this.featureBlockSize = featureBlockSize;
+    }
+
+    /**
+     * Decode frames until recognition is complete.
+     *
+     * @param referenceText the reference text (or null)
+     * @return a result
+     */
+    @Override
+    public Result decode(String referenceText) {
+        searchManager.startRecognition();
+        Result result;
+        do {
+            result = searchManager.recognize(featureBlockSize);
+            if (result != null) {
+                result.setReferenceText(referenceText);
+                fireResultListeners(result);
+            }
+        } while (result != null && !result.isFinal());
+        searchManager.stopRecognition();
+        return result;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/FrameDecoder.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/FrameDecoder.java
@ -0,0 +1,104 @@
+/*
+ *
+ * Copyright 1999-2004 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.frontend.endpoint.SpeechEndSignal;
+import edu.cmu.sphinx.frontend.endpoint.SpeechStartSignal;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.decoder.search.SearchManager;
+
+import java.util.List;
+
+
+/**
+ * A decoder which does not use the common pull-principle of S4 but processes only one single frame on each call of
+ * <code>decode()</code>. When using this decoder, make sure that the <code>AcousticScorer</code> used by the
+ * <code>SearchManager</code> can access some buffered <code>Data</code>s.
+ */
+public class FrameDecoder extends AbstractDecoder implements DataProcessor {
+
+    private DataProcessor predecessor;
+
+    private boolean isRecognizing;
+    private Result result;
+
+    public FrameDecoder( SearchManager searchManager, boolean fireNonFinalResults, boolean autoAllocate, List<ResultListener> listeners) {
+        super(searchManager, fireNonFinalResults, autoAllocate, listeners);
+    }    
+    
+    public FrameDecoder() {
+    }
+
+    /**
+     * Decode a single frame.
+     *
+     * @param referenceText the reference text (or null)
+     * @return a result
+     */
+    @Override
+    public Result decode(String referenceText) {
+        return searchManager.recognize(1);
+    }
+
+    public Data getData() throws DataProcessingException {
+        Data d = getPredecessor().getData();
+
+        if (isRecognizing && (d instanceof FloatData || d instanceof DoubleData || d instanceof SpeechEndSignal)) {
+            result = decode(null);
+
+            if (result != null) {
+                fireResultListeners(result);
+                result = null;
+            }
+        }
+
+        // we also trigger recogntion on a DataEndSignal to allow threaded scorers to shut down correctly
+        if (d instanceof DataEndSignal) {
+            searchManager.stopRecognition();
+        }
+
+        if (d instanceof SpeechStartSignal) {
+            searchManager.startRecognition();
+            isRecognizing = true;
+            result = null;
+        }
+
+        if (d instanceof SpeechEndSignal) {
+            searchManager.stopRecognition();
+
+            //fire results which were not yet final
+            if (result != null)
+                fireResultListeners(result);
+
+            isRecognizing = false;
+        }
+
+        return d;
+    }
+
+
+    public DataProcessor getPredecessor() {
+        return predecessor;
+    }
+
+
+    public void setPredecessor(DataProcessor predecessor) {
+        this.predecessor = predecessor;
+    }
+
+
+    public void initialize() {
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/ResultListener.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/ResultListener.java
@ -0,0 +1,30 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder;
+
+import edu.cmu.sphinx.util.props.Configurable;
+import edu.cmu.sphinx.result.Result;
+
+import java.util.EventListener;
+
+/** The listener interface for being informed when new results are generated. */
+public interface ResultListener extends EventListener, Configurable {
+
+    /**
+     * Method called when a new result is generated
+     *
+     * @param result the new result
+     */
+    public void newResult(Result result);
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/ResultProducer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/ResultProducer.java
@ -0,0 +1,33 @@
+/*
+ * Copyright 1999-2004 Carnegie Mellon University.
+ * Portions Copyright 2004 Sun Microsystems, Inc.
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder;
+
+import edu.cmu.sphinx.util.props.Configurable;
+
+/**
+ * Some API-elements shared by components which are able to produce <code>Result</code>s.
+ *
+ * @see edu.cmu.sphinx.result.Result
+ */
+public interface ResultProducer extends Configurable {
+
+    /** Registers a new listener for <code>Result</code>.
+     * @param resultListener listener to add
+     */
+    void addResultListener(ResultListener resultListener);
+
+
+    /** Removes a listener from this <code>ResultProducer</code>-instance.
+     * @param resultListener listener to remove
+     */
+    void removeResultListener(ResultListener resultListener);
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/adaptation/ClusteredDensityFileData.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/adaptation/ClusteredDensityFileData.java
@ -0,0 +1,174 @@
+package edu.cmu.sphinx.decoder.adaptation;
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import org.apache.commons.math3.util.FastMath;
+
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Pool;
+
+/**
+ * Used for clustering gaussians. The clustering is performed by Euclidean
+ * distance criterion. The "k-means" clustering algorithm is used for clustering
+ * the gaussians.
+ * 
+ * @author Bogdan Petcu
+ */
+public class ClusteredDensityFileData {
+
+    private int numberOfClusters;
+    private int[] corespondingClass;
+
+    public ClusteredDensityFileData(Loader loader, int numberOfClusters) {
+        this.numberOfClusters = numberOfClusters;
+        kMeansClustering(loader, 30);
+    }
+
+    public int getNumberOfClusters() {
+        return this.numberOfClusters;
+    }
+
+    /**
+     * Used for accessing the index that is specific to a gaussian.
+     * 
+     * @param gaussian
+     *            provided in a i * numStates + gaussianIndex form.
+     * @return class index
+     */
+    public int getClassIndex(int gaussian) {
+        return corespondingClass[gaussian];
+    }
+
+    /**
+     * Computes euclidean distance between 2 n-dimensional points.
+     * 
+     * @param a
+     *            - n-dimensional "a" point
+     * @param b
+     *            - n-dimensional "b" point
+     * @return the euclidean distance between a and b.
+     */
+    private float euclidianDistance(float[] a, float[] b) {
+        double s = 0, d;
+
+        for (int i = 0; i < a.length; i++) {
+            d = a[i] - b[i];
+            s += d * d;
+        }
+
+        return (float) FastMath.sqrt(s);
+    }
+
+    /**
+     * Checks if the two float array have the same components
+     * 
+     * @param a
+     *            - float array a
+     * @param b
+     *            - float array b
+     * @return true if values from a are equal to the ones in b, else false.
+     */
+    private boolean isEqual(float[] a, float[] b) {
+        if (a.length != b.length) {
+            return false;
+        }
+
+        for (int i = 0; i < a.length; i++) {
+            if (a[i] != b[i]) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Performs k-means-clustering algorithm for clustering gaussians.
+     * Clustering is done using euclidean distance criterium.
+     * 
+     * @param maxIterations
+     */
+    private void kMeansClustering(Loader loader, int maxIterations) {
+        Pool<float[]> initialData = loader.getMeansPool();
+        ArrayList<float[]> oldCentroids = new ArrayList<float[]>(
+                numberOfClusters);
+        ArrayList<float[]> centroids = new ArrayList<float[]>(numberOfClusters);
+        int numberOfElements = initialData.size(), nrOfIterations = maxIterations, index;
+        int[] count = new int[numberOfClusters];
+        double distance, min;
+        float[] currentValue, centroid;
+        float[][][] array = new float[numberOfClusters][numberOfElements][];
+        boolean converged = false;
+        Random randomGenerator = new Random();
+
+        for (int i = 0; i < numberOfClusters; i++) {
+            index = randomGenerator.nextInt(numberOfElements);
+            centroids.add(initialData.get(index));
+            oldCentroids.add(initialData.get(index));
+            count[i] = 0;
+        }
+
+        index = 0;
+
+        while (!converged && nrOfIterations > 0) {
+            corespondingClass = new int[initialData.size()];
+            array = new float[numberOfClusters][numberOfElements][];
+
+            for (int i = 0; i < numberOfClusters; i++) {
+                oldCentroids.set(i, centroids.get(i));
+                count[i] = 0;
+            }
+
+            for (int i = 0; i < initialData.size(); i++) {
+                currentValue = initialData.get(i);
+                min = this.euclidianDistance(oldCentroids.get(0), currentValue);
+                index = 0;
+
+                for (int k = 1; k < numberOfClusters; k++) {
+                    distance = this.euclidianDistance(oldCentroids.get(k),
+                            currentValue);
+
+                    if (distance < min) {
+                        min = distance;
+                        index = k;
+                    }
+                }
+
+                array[index][count[index]] = currentValue;
+                corespondingClass[i] = index;
+                count[index]++;
+
+            }
+
+            for (int i = 0; i < numberOfClusters; i++) {
+                centroid = new float[initialData.get(0).length];
+
+                if (count[i] > 0) {
+
+                    for (int j = 0; j < count[i]; j++) {
+                        for (int k = 0; k < initialData.get(0).length; k++) {
+                            centroid[k] += array[i][j][k];
+                        }
+                    }
+
+                    for (int k = 0; k < initialData.get(0).length; k++) {
+                        centroid[k] /= count[i];
+                    }
+
+                    centroids.set(i, centroid);
+                }
+            }
+
+            converged = true;
+
+            for (int i = 0; i < numberOfClusters; i++) {
+                converged = converged
+                        && (this.isEqual(centroids.get(i), oldCentroids.get(i)));
+            }
+
+            nrOfIterations--;
+        }
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/adaptation/Stats.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/adaptation/Stats.java
@ -0,0 +1,235 @@
+package edu.cmu.sphinx.decoder.adaptation;
+
+import edu.cmu.sphinx.api.SpeechResult;
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.frontend.FloatData;
+import edu.cmu.sphinx.linguist.HMMSearchState;
+import edu.cmu.sphinx.linguist.SearchState;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
+import edu.cmu.sphinx.util.LogMath;
+
+/**
+ * This class is used for estimating a MLLR transform for each cluster of data.
+ * The clustering must be previously performed using
+ * ClusteredDensityFileData.java
+ * 
+ * @author Bogdan Petcu
+ */
+public class Stats {
+
+	private ClusteredDensityFileData means;
+	private double[][][][][] regLs;
+	private double[][][][] regRs;
+	private int nrOfClusters;
+	private Sphinx3Loader loader;
+	private float varFlor;
+	private LogMath logMath = LogMath.getLogMath();;
+
+	public Stats(Loader loader, ClusteredDensityFileData means) {
+		this.loader = (Sphinx3Loader) loader;
+		this.nrOfClusters = means.getNumberOfClusters();
+		this.means = means;
+		this.varFlor = (float) 1e-5;
+		this.invertVariances();
+		this.init();
+	}
+
+	private void init() {
+		int len = loader.getVectorLength()[0];
+		this.regLs = new double[nrOfClusters][][][][];
+		this.regRs = new double[nrOfClusters][][][];
+
+		for (int i = 0; i < nrOfClusters; i++) {
+			this.regLs[i] = new double[loader.getNumStreams()][][][];
+			this.regRs[i] = new double[loader.getNumStreams()][][];
+
+			for (int j = 0; j < loader.getNumStreams(); j++) {
+				len = loader.getVectorLength()[j];
+				this.regLs[i][j] = new double[len][len + 1][len + 1];
+				this.regRs[i][j] = new double[len][len + 1];
+			}
+		}
+	}
+
+	public ClusteredDensityFileData getClusteredData() {
+		return this.means;
+	}
+
+	public double[][][][][] getRegLs() {
+		return regLs;
+	}
+
+	public double[][][][] getRegRs() {
+		return regRs;
+	}
+
+	/**
+	 * Used for inverting variances.
+	 */
+	private void invertVariances() {
+
+		for (int i = 0; i < loader.getNumStates(); i++) {
+			for (int k = 0; k < loader.getNumGaussiansPerState(); k++) {
+				for (int l = 0; l < loader.getVectorLength()[0]; l++) {
+					if (loader.getVariancePool().get(
+							i * loader.getNumGaussiansPerState() + k)[l] <= 0.) {
+						this.loader.getVariancePool().get(
+								i * loader.getNumGaussiansPerState() + k)[l] = (float) 0.5;
+					} else if (loader.getVariancePool().get(
+							i * loader.getNumGaussiansPerState() + k)[l] < varFlor) {
+						this.loader.getVariancePool().get(
+								i * loader.getNumGaussiansPerState() + k)[l] = (float) (1. / varFlor);
+					} else {
+						this.loader.getVariancePool().get(
+								i * loader.getNumGaussiansPerState() + k)[l] = (float) (1. / loader
+								.getVariancePool().get(
+										i * loader.getNumGaussiansPerState()
+												+ k)[l]);
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Computes posterior values for the each component.
+	 * 
+	 * @param componentScores
+	 *            from which the posterior values are computed.
+	 * @param numStreams
+	 *            Number of feature streams
+	 * @return posterior values for all components.
+	 */
+	private float[] computePosterios(float[] componentScores, int numStreams) {
+		float[] posteriors = componentScores; 
+		
+		int step = componentScores.length / numStreams;
+		int startIdx = 0;
+		for (int i = 0; i < numStreams; i++) {
+			float max = posteriors[startIdx];
+			for (int j = startIdx + 1; j < startIdx + step; j++) {
+				if (posteriors[j] > max) {
+					max = posteriors[j];
+				}
+			}
+
+			for (int j = startIdx; j < startIdx + step; j++) {
+				posteriors[j] = (float) logMath.logToLinear(posteriors[j] - max);
+			}
+			startIdx += step;
+		}
+
+		return posteriors;
+	}
+
+	/**
+	 * This method is used for directly collect and use counts. The counts are
+	 * collected and stored separately for each cluster.
+	 * 
+	 * @param result
+	 *            Result object to collect counts from.
+	 * @throws Exception if something went wrong
+	 */
+	public void collect(SpeechResult result) throws Exception {
+		Token token = result.getResult().getBestToken();
+		float[] componentScore, featureVector, posteriors, tmean;
+		int[] len;
+		float dnom, wtMeanVar, wtDcountVar, wtDcountVarMean, mean;
+		int mId, cluster;
+		int numStreams, gauPerState;
+
+		if (token == null)
+			throw new Exception("Best token not found!");
+
+		do {
+			FloatData feature = (FloatData) token.getData();
+			SearchState ss = token.getSearchState();
+
+			if (!(ss instanceof HMMSearchState && ss.isEmitting())) {
+				token = token.getPredecessor();
+				continue;
+			}
+
+			componentScore = token.calculateComponentScore(feature);
+			featureVector = FloatData.toFloatData(feature).getValues();
+			mId = (int) ((HMMSearchState) token.getSearchState()).getHMMState()
+					.getMixtureId();
+			if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
+				// use CI phone ID for tied mixture model
+				mId = ((Sphinx3Loader) loader).getSenone2Ci()[mId];
+			len = loader.getVectorLength();
+			numStreams = loader.getNumStreams();
+			gauPerState = loader.getNumGaussiansPerState();
+			posteriors = this.computePosterios(componentScore, numStreams);
+			int featVectorStartIdx = 0;
+
+			for (int i = 0; i < numStreams; i++) {
+				for (int j = 0; j < gauPerState; j++) {
+
+					cluster = means.getClassIndex(mId * numStreams
+							* gauPerState + i * gauPerState + j);
+					dnom = posteriors[i * gauPerState + j];
+					if (dnom > 0.) {
+						tmean = loader.getMeansPool().get(
+								mId * numStreams * gauPerState + i
+										* gauPerState + j);
+
+						for (int k = 0; k < len[i]; k++) {
+							mean = posteriors[i * gauPerState + j]
+									* featureVector[k + featVectorStartIdx];
+							wtMeanVar = mean
+									* loader.getVariancePool().get(
+											mId * numStreams * gauPerState + i
+													* gauPerState + j)[k];
+							wtDcountVar = dnom
+									* loader.getVariancePool().get(
+											mId * numStreams * gauPerState + i
+													* gauPerState + j)[k];
+
+							for (int p = 0; p < len[i]; p++) {
+								wtDcountVarMean = wtDcountVar * tmean[p];
+
+								for (int q = p; q < len[i]; q++) {
+									regLs[cluster][i][k][p][q] += wtDcountVarMean
+											* tmean[q];
+								}
+								regLs[cluster][i][k][p][len[i]] += wtDcountVarMean;
+								regRs[cluster][i][k][p] += wtMeanVar * tmean[p];
+							}
+							regLs[cluster][i][k][len[i]][len[i]] += wtDcountVar;
+							regRs[cluster][i][k][len[i]] += wtMeanVar;
+
+						}
+					}
+				}
+				featVectorStartIdx += len[i];
+			}
+			token = token.getPredecessor();
+		} while (token != null);
+	}
+
+	/**
+	 * Fill lower part of Legetter's set of G matrices.
+	 */
+	public void fillRegLowerPart() {
+		for (int i = 0; i < this.nrOfClusters; i++) {
+			for (int j = 0; j < loader.getNumStreams(); j++) {
+				for (int l = 0; l < loader.getVectorLength()[j]; l++) {
+					for (int p = 0; p <= loader.getVectorLength()[j]; p++) {
+						for (int q = p + 1; q <= loader.getVectorLength()[j]; q++) {
+							regLs[i][j][l][q][p] = regLs[i][j][l][p][q];
+						}
+					}
+				}
+			}
+		}
+	}
+
+    public Transform createTransform() {
+        Transform transform = new Transform(loader, nrOfClusters);
+        transform.update(this);
+        return transform;
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/adaptation/Transform.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/adaptation/Transform.java
@ -0,0 +1,179 @@
+package edu.cmu.sphinx.decoder.adaptation;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.util.Scanner;
+
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.ArrayRealVector;
+import org.apache.commons.math3.linear.DecompositionSolver;
+import org.apache.commons.math3.linear.LUDecomposition;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
+
+public class Transform {
+
+    private float[][][][] As;
+    private float[][][] Bs;
+    private Sphinx3Loader loader;
+    private int nrOfClusters;
+
+    public Transform(Sphinx3Loader loader, int nrOfClusters) {
+        this.loader = loader;
+        this.nrOfClusters = nrOfClusters;
+    }
+
+    /**
+     * Used for access to A matrix.
+     * 
+     * @return A matrix (representing A from A*x + B = C)
+     */
+    public float[][][][] getAs() {
+        return As;
+    }
+
+    /**
+     * Used for access to B matrix.
+     * 
+     * @return B matrix (representing B from A*x + B = C)
+     */
+    public float[][][] getBs() {
+        return Bs;
+    }
+
+    /**
+     * Writes the transformation to file in a format that could further be used
+     * in Sphinx3 and Sphinx4.
+     *
+     * @param filePath path to store transform matrix
+     * @param index index of transform to store 
+     * @throws Exception if something went wrong
+     */
+    public void store(String filePath, int index) throws Exception {
+        PrintWriter writer = new PrintWriter(filePath, "UTF-8");
+
+        // nMllrClass
+        writer.println("1");
+        writer.println(loader.getNumStreams());
+
+        for (int i = 0; i < loader.getNumStreams(); i++) {
+            writer.println(loader.getVectorLength()[i]);
+
+            for (int j = 0; j < loader.getVectorLength()[i]; j++) {
+                for (int k = 0; k < loader.getVectorLength()[i]; ++k) {
+                    writer.print(As[index][i][j][k]);
+                    writer.print(" ");
+                }
+                writer.println();
+            }
+
+            for (int j = 0; j < loader.getVectorLength()[i]; j++) {
+                writer.print(Bs[index][i][j]);
+                writer.print(" ");
+
+            }
+            writer.println();
+
+            for (int j = 0; j < loader.getVectorLength()[i]; j++) {
+                writer.print("1.0 ");
+
+            }
+            writer.println();
+        }
+        writer.close();
+    }
+
+    /**
+     * Used for computing the actual transformations (A and B matrices). These
+     * are stored in As and Bs.
+     */
+    private void computeMllrTransforms(double[][][][][] regLs,
+            double[][][][] regRs) {
+        int len;
+        DecompositionSolver solver;
+        RealMatrix coef;
+        RealVector vect, ABloc;
+
+        for (int c = 0; c < nrOfClusters; c++) {
+            this.As[c] = new float[loader.getNumStreams()][][];
+            this.Bs[c] = new float[loader.getNumStreams()][];
+
+            for (int i = 0; i < loader.getNumStreams(); i++) {
+                len = loader.getVectorLength()[i];
+                this.As[c][i] = new float[len][len];
+                this.Bs[c][i] = new float[len];
+
+                for (int j = 0; j < len; ++j) {
+                    coef = new Array2DRowRealMatrix(regLs[c][i][j], false);
+                    solver = new LUDecomposition(coef).getSolver();
+                    vect = new ArrayRealVector(regRs[c][i][j], false);
+                    ABloc = solver.solve(vect);
+
+                    for (int k = 0; k < len; ++k) {
+                        this.As[c][i][j][k] = (float) ABloc.getEntry(k);
+                    }
+
+                    this.Bs[c][i][j] = (float) ABloc.getEntry(len);
+                }
+            }
+        }
+    }
+
+    /**
+     * Read the transformation from a file
+     * 
+     * @param filePath file path to load transform
+     * @throws Exception if something went wrong
+     */
+    public void load(String filePath) throws Exception {
+
+        Scanner input = new Scanner(new File(filePath));
+        int numStreams, nMllrClass;
+        int[] vectorLength = new int[1];
+
+        nMllrClass = input.nextInt();
+        
+        assert nMllrClass == 1;
+        
+        numStreams = input.nextInt();
+
+        this.As = new float[nMllrClass][][][];
+        this.Bs = new float[nMllrClass][][];
+
+        for (int i = 0; i < numStreams; i++) {
+            vectorLength[i] = input.nextInt();
+
+            int length = vectorLength[i];
+
+            this.As[0] = new float[numStreams][length][length];
+            this.Bs[0] = new float[numStreams][length];
+
+            for (int j = 0; j < length; j++) {
+                for (int k = 0; k < length; ++k) {
+                    As[0][i][j][k] = input.nextFloat();
+                }
+            }
+
+            for (int j = 0; j < length; j++) {
+                Bs[0][i][j] = input.nextFloat();
+            }
+        }
+        input.close();
+    }
+
+    /**
+     * Stores in current object a transform generated on the provided stats.
+     * 
+     * @param stats
+     *            provided stats that were previously collected from Result
+     *            objects.
+     */
+    public void update(Stats stats) {
+        stats.fillRegLowerPart();
+        As = new float[nrOfClusters][][][];
+        Bs = new float[nrOfClusters][][];
+        this.computeMllrTransforms(stats.getRegLs(), stats.getRegRs());
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/pruner/NullPruner.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/pruner/NullPruner.java
@ -0,0 +1,71 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.pruner;
+
+import edu.cmu.sphinx.decoder.search.ActiveList;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+/** A Null pruner. Does no actual pruning */
+public class NullPruner implements Pruner {
+
+
+    /* (non-Javadoc)
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    public void newProperties(PropertySheet ps) throws PropertyException {
+    }
+
+
+    /** Creates a simple pruner */
+    public NullPruner() {
+    }
+
+
+    /** starts the pruner */
+    public void startRecognition() {
+    }
+
+
+    /**
+     * prunes the given set of states
+     *
+     * @param activeList the active list of tokens
+     * @return the pruned (and possibly new) activeList
+     */
+    public ActiveList prune(ActiveList activeList) {
+        return activeList;
+    }
+
+
+    /** Performs post-recognition cleanup. */
+    public void stopRecognition() {
+    }
+
+
+    /* (non-Javadoc)
+    * @see edu.cmu.sphinx.decoder.pruner.Pruner#allocate()
+    */
+    public void allocate() {
+
+    }
+
+
+    /* (non-Javadoc)
+    * @see edu.cmu.sphinx.decoder.pruner.Pruner#deallocate()
+    */
+    public void deallocate() {
+
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/pruner/Pruner.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/pruner/Pruner.java
@ -0,0 +1,49 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.pruner;
+
+import edu.cmu.sphinx.decoder.search.ActiveList;
+import edu.cmu.sphinx.util.props.Configurable;
+
+
+/** Provides a mechanism for pruning a set of StateTokens */
+public interface Pruner extends Configurable {
+
+    /** Starts the pruner */
+    public void startRecognition();
+
+
+    /**
+     * prunes the given set of states
+     *
+     * @param stateTokenList a list containing StateToken objects to be scored
+     * @return the pruned list, (may be the sample list as stateTokenList)
+     */
+    public ActiveList prune(ActiveList stateTokenList);
+
+
+    /** Performs post-recognition cleanup. */
+    public void stopRecognition();
+
+
+    /** Allocates resources necessary for this pruner */
+    public void allocate();
+
+
+    /** Deallocates resources necessary for this pruner */
+    public void deallocate();
+
+
+}
+
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/pruner/SimplePruner.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/pruner/SimplePruner.java
@ -0,0 +1,80 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.pruner;
+
+import edu.cmu.sphinx.decoder.search.ActiveList;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+/** Performs the default pruning behavior which is to invoke the purge on the active list */
+public class SimplePruner implements Pruner {
+
+    private String name;
+
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+     */
+    public void newProperties(PropertySheet ps) throws PropertyException {
+    }
+
+
+    public SimplePruner() {
+    }
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.util.props.Configurable#getName()
+     */
+    public String getName() {
+        return name;
+    }
+
+
+    /** Starts the pruner */
+    public void startRecognition() {
+    }
+
+
+    /**
+     * prunes the given set of states
+     *
+     * @param activeList a activeList of tokens
+     */
+    public ActiveList prune(ActiveList activeList) {
+        return activeList.purge();
+    }
+
+
+    /** Performs post-recognition cleanup. */
+    public void stopRecognition() {
+    }
+
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.decoder.pruner.Pruner#allocate()
+     */
+    public void allocate() {
+    }
+
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.decoder.pruner.Pruner#deallocate()
+     */
+    public void deallocate() {
+
+    }
+
+
+}
+
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/AcousticScorer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/AcousticScorer.java
@ -0,0 +1,57 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.util.props.Configurable;
+
+import java.util.List;
+
+/** Provides a mechanism for scoring a set of HMM states */
+public interface AcousticScorer extends Configurable {
+
+    /** Allocates resources for this scorer */
+    public void allocate();
+
+
+    /** Deallocates resources for this scorer */
+    public void deallocate();
+
+
+    /** starts the scorer */
+    public void startRecognition();
+
+
+    /** stops the scorer */
+    public void stopRecognition();
+
+    /**
+     * Scores the given set of states over previously stored acoustic data if any or a new one
+     *
+     * @param scorableList a list containing Scoreable objects to be scored
+     * @return the best scoring scoreable, or null if there are no more frames to score
+     */
+    public Data calculateScores(List<? extends Scoreable> scorableList);
+    
+    /**
+     * Scores the given set of states over previously acoustic data from frontend
+     * and stores latter in the queue
+     *
+     * @param scorableList a list containing Scoreable objects to be scored
+     * @return the best scoring scoreable, or null if there are no more frames to score
+     */
+    public Data calculateScoresAndStoreData(List<? extends Scoreable> scorableList);
+
+}
+
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/BackgroundModelNormalizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/BackgroundModelNormalizer.java
@ -0,0 +1,67 @@
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager;
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Component;
+
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * Normalizes a set of Tokens against the best scoring Token of a background model.
+ *
+ * @author Holger Brandl
+ */
+public class BackgroundModelNormalizer implements ScoreNormalizer {
+
+    /**
+     * The active list provider used to determined the best token for normalization. If this reference is not defined no
+     * normalization will be applied.
+     */
+    @S4Component(type = SimpleBreadthFirstSearchManager.class, mandatory = false)
+    public static final String ACTIVE_LIST_PROVIDER = "activeListProvider";
+    private SimpleBreadthFirstSearchManager activeListProvider;
+
+    private Logger logger;
+
+    public BackgroundModelNormalizer() {       
+    }
+
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        this.activeListProvider = (SimpleBreadthFirstSearchManager) ps.getComponent(ACTIVE_LIST_PROVIDER);
+        this.logger = ps.getLogger();
+
+        logger.warning("no active list set.");
+    }
+
+    /**
+     * @param activeListProvider The active list provider used to determined the best token for normalization. If this reference is not defined no
+     * normalization will be applied.
+     */
+    public BackgroundModelNormalizer(SimpleBreadthFirstSearchManager activeListProvider) {
+        this.activeListProvider = activeListProvider;
+        this.logger = Logger.getLogger(getClass().getName());
+
+        logger.warning("no active list set.");
+    }
+    
+    public Scoreable normalize(List<? extends Scoreable> scoreableList, Scoreable bestToken) {
+        if (activeListProvider == null) {
+            return bestToken;
+        }
+
+        Token normToken = activeListProvider.getActiveList().getBestToken();
+
+        float normScore = normToken.getScore();
+
+        for (Scoreable scoreable : scoreableList) {
+            if (scoreable instanceof Token) {
+                scoreable.normalizeScore(normScore);
+            }
+        }
+
+        return bestToken;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/MaxScoreNormalizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/MaxScoreNormalizer.java
@ -0,0 +1,30 @@
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+import java.util.List;
+
+/**
+ * Performs a simple normalization of all token-scores by
+ *
+ * @author Holger Brandl
+ */
+public class MaxScoreNormalizer implements ScoreNormalizer {
+
+
+    public void newProperties(PropertySheet ps) throws PropertyException {
+    }
+
+    public MaxScoreNormalizer() {
+    }
+
+
+    public Scoreable normalize(List<? extends Scoreable> scoreableList, Scoreable bestToken) {
+        for (Scoreable scoreable : scoreableList) {
+            scoreable.normalizeScore(bestToken.getScore());
+        }
+
+        return bestToken;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/ScoreNormalizer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/ScoreNormalizer.java
@ -0,0 +1,27 @@
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.util.props.Configurable;
+
+import java.util.List;
+
+/**
+ * Describes all API-elements that are necessary  to normalize token-scores after these have been computed by an
+ * AcousticScorer.
+ *
+ * @author Holger Brandl
+ * @see edu.cmu.sphinx.decoder.scorer.AcousticScorer
+ * @see edu.cmu.sphinx.decoder.search.Token
+ */
+public interface ScoreNormalizer extends Configurable {
+
+    /**
+     * Normalizes the scores of a set of Tokens.
+     *
+     * @param scoreableList The set of scores to be normalized
+     * @param bestToken     The best scoring Token of the above mentioned list. Although not strictly necessary it's
+     *                      included because of convenience reasons and to reduce computational overhead.
+     * @return The best token after the all <code>Token</code>s have been normalized. In most cases normalization won't
+     *         change the order but to keep the API open for any kind of approach it seemed reasonable to include this.
+     */
+    Scoreable normalize(List<? extends Scoreable> scoreableList, Scoreable bestToken);
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/ScoreProvider.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/ScoreProvider.java
@ -0,0 +1,35 @@
+/*
+ * Copyright 1999-2010 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.frontend.Data;
+
+/** Thing that can provide the score */
+public interface ScoreProvider {
+
+    /**
+     * Provides the score
+     *
+     * @param data data to score
+     * @return the score
+     */
+    public float getScore(Data data);
+
+    /**
+     * Provides component score
+     *
+     * @param feature data to score
+     * @return the score
+     */
+     public float[] getComponentScore(Data feature);
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/Scoreable.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/Scoreable.java
@ -0,0 +1,68 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.frontend.Data;
+
+import java.util.Comparator;
+
+/** Represents an entity that can be scored against a data */
+public interface Scoreable extends Data {
+
+    /**
+     * A {@code Scoreable} comparator that is used to order scoreables according to their score,
+     * in descending order.
+     *
+     * <p>Note: since a higher score results in a lower natural order,
+     * statements such as {@code Collections.min(list, Scoreable.COMPARATOR)}
+     * actually return the Scoreable with the <b>highest</b> score,
+     * in contrast to the natural meaning of the word "min".   
+     */
+    Comparator<Scoreable> COMPARATOR = new Comparator<Scoreable>() {
+        public int compare(Scoreable t1, Scoreable t2) {
+            if (t1.getScore() > t2.getScore()) {
+                return -1;
+            } else if (t1.getScore() == t2.getScore()) {
+                return 0;
+            } else {
+                return 1;
+            }
+        }
+    };
+
+    /**
+     * Calculates a score against the given data. The score can be retrieved with get score
+     *
+     * @param data     the data to be scored
+     * @return the score for the data
+     */
+    public float calculateScore(Data data);
+
+
+    /**
+     * Retrieves a previously calculated (and possibly normalized) score
+     *
+     * @return the score
+     */
+    public float getScore();
+
+
+    /**
+     * Normalizes a previously calculated score
+     *
+     * @param maxScore maximum score to use for norm
+     * @return the normalized score
+     */
+    public float normalizeScore(float maxScore);
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/SimpleAcousticScorer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/SimpleAcousticScorer.java
@ -0,0 +1,194 @@
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.frontend.endpoint.SpeechEndSignal;
+import edu.cmu.sphinx.frontend.util.DataUtil;
+import edu.cmu.sphinx.util.props.ConfigurableAdapter;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Component;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Implements some basic scorer functionality, including a simple default
+ * acoustic scoring implementation which scores within the current thread, that
+ * can be changed by overriding the {@link #doScoring} method.
+ * 
+ * <p>
+ * Note that all scores are maintained in LogMath log base.
+ * 
+ * @author Holger Brandl
+ */
+public class SimpleAcousticScorer extends ConfigurableAdapter implements AcousticScorer {
+
+    /** Property the defines the frontend to retrieve features from for scoring */
+    @S4Component(type = BaseDataProcessor.class)
+    public final static String FEATURE_FRONTEND = "frontend";
+    protected BaseDataProcessor frontEnd;
+
+    /**
+     * An optional post-processor for computed scores that will normalize
+     * scores. If not set, no normalization will applied and the token scores
+     * will be returned unchanged.
+     */
+    @S4Component(type = ScoreNormalizer.class, mandatory = false)
+    public final static String SCORE_NORMALIZER = "scoreNormalizer";
+    protected ScoreNormalizer scoreNormalizer;
+
+    private LinkedList<Data> storedData;
+    private boolean seenEnd = false;
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        this.frontEnd = (BaseDataProcessor) ps.getComponent(FEATURE_FRONTEND);
+        this.scoreNormalizer = (ScoreNormalizer) ps.getComponent(SCORE_NORMALIZER);
+        storedData = new LinkedList<Data>();
+    }
+
+    /**
+     * @param frontEnd
+     *            the frontend to retrieve features from for scoring
+     * @param scoreNormalizer
+     *            optional post-processor for computed scores that will
+     *            normalize scores. If not set, no normalization will applied
+     *            and the token scores will be returned unchanged.
+     */
+    public SimpleAcousticScorer(BaseDataProcessor frontEnd, ScoreNormalizer scoreNormalizer) {
+        initLogger();
+        this.frontEnd = frontEnd;
+        this.scoreNormalizer = scoreNormalizer;
+        storedData = new LinkedList<Data>();
+    }
+
+    public SimpleAcousticScorer() {
+    }
+
+    /**
+     * Scores the given set of states.
+     * 
+     * @param scoreableList
+     *            A list containing scoreable objects to be scored
+     * @return The best scoring scoreable, or <code>null</code> if there are no
+     *         more features to score
+     */
+    public Data calculateScores(List<? extends Scoreable> scoreableList) {
+        Data data;
+        if (storedData.isEmpty()) {
+            while ((data = getNextData()) instanceof Signal) {
+                if (data instanceof SpeechEndSignal) {
+                    seenEnd = true;
+                    break;
+                }
+                if (data instanceof DataEndSignal) {
+                    if (seenEnd)
+                        return null;
+                    else
+                        break;
+                }
+            }
+            if (data == null)
+                return null;
+        } else {
+            data = storedData.poll();
+        }
+
+        return calculateScoresForData(scoreableList, data);
+    }
+
+    public Data calculateScoresAndStoreData(List<? extends Scoreable> scoreableList) {
+        Data data;
+        while ((data = getNextData()) instanceof Signal) {
+            if (data instanceof SpeechEndSignal) {
+                seenEnd = true;
+                break;
+            }
+            if (data instanceof DataEndSignal) {
+                if (seenEnd)
+                    return null;
+                else
+                    break;
+            }
+        }
+        if (data == null)
+            return null;
+
+        storedData.add(data);
+
+        return calculateScoresForData(scoreableList, data);
+    }
+
+    protected Data calculateScoresForData(List<? extends Scoreable> scoreableList, Data data) {
+        if (data instanceof SpeechEndSignal || data instanceof DataEndSignal) {
+            return data;
+        }
+
+        if (scoreableList.isEmpty())
+            return null;
+
+        // convert the data to FloatData if not yet done
+        if (data instanceof DoubleData)
+            data = DataUtil.DoubleData2FloatData((DoubleData) data);
+
+        Scoreable bestToken = doScoring(scoreableList, data);
+
+        // apply optional score normalization
+        if (scoreNormalizer != null && bestToken instanceof Token)
+            bestToken = scoreNormalizer.normalize(scoreableList, bestToken);
+
+        return bestToken;
+    }
+
+    protected Data getNextData() {
+        Data data = frontEnd.getData();
+        return data;
+    }
+
+    public void startRecognition() {
+        storedData.clear();
+    }
+
+    public void stopRecognition() {
+        // nothing needs to be done here
+    }
+
+    /**
+     * Scores a a list of <code>Scoreable</code>s given a <code>Data</code>
+     * -object.
+     * 
+     * @param scoreableList
+     *            The list of Scoreables to be scored
+     * @param data
+     *            The <code>Data</code>-object to be used for scoring.
+     * @param <T> type for scorables
+     * @return the best scoring <code>Scoreable</code> or <code>null</code> if
+     *         the list of scoreables was empty.
+     */
+    protected <T extends Scoreable> T doScoring(List<T> scoreableList, Data data) {
+
+        T best = null;
+        float bestScore = -Float.MAX_VALUE;
+
+        for (T item : scoreableList) {
+    	    item.calculateScore(data);
+    	    if (item.getScore() > bestScore) {
+    		bestScore = item.getScore();
+    		best = item;
+    	    }
+        }
+        return best;
+    }
+
+    // Even if we don't do any meaningful allocation here, we implement the
+    // methods because most extending scorers do need them either.
+
+    public void allocate() {
+    }
+
+    public void deallocate() {
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/ThreadedAcousticScorer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/scorer/ThreadedAcousticScorer.java
@ -0,0 +1,200 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.scorer;
+
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.frontend.BaseDataProcessor;
+import edu.cmu.sphinx.frontend.DataProcessingException;
+import edu.cmu.sphinx.util.CustomThreadFactory;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Boolean;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+import java.util.*;
+import java.util.concurrent.*;
+
+/**
+ * An acoustic scorer that breaks the scoring up into a configurable number of separate threads.
+ * <p>
+ * All scores are maintained in LogMath log base
+ */
+public class ThreadedAcousticScorer extends SimpleAcousticScorer {
+
+    /**
+     * The property that controls the thread priority of scoring threads.
+     * Must be a value between {@link Thread#MIN_PRIORITY} and {@link Thread#MAX_PRIORITY}, inclusive.
+     * The default is {@link Thread#NORM_PRIORITY}.
+     */
+    @S4Integer(defaultValue = Thread.NORM_PRIORITY)
+    public final static String PROP_THREAD_PRIORITY = "threadPriority";
+
+    /**
+     * The property that controls the number of threads that are used to score HMM states. If the isCpuRelative
+     * property is false, then is is the exact number of threads that are used to score HMM states. If the isCpuRelative
+     * property is true, then this value is combined with the number of available processors on the system. If you want
+     * to have one thread per CPU available to score states, set the NUM_THREADS property to 0 and the isCpuRelative to
+     * true. If you want exactly one thread to process scores set NUM_THREADS to 1 and isCpuRelative to false.
+     * <p>
+     * If the value is 1 isCpuRelative is false no additional thread will be instantiated, and all computation will be
+     * done in the calling thread itself. The default value is 0.
+     */
+    @S4Integer(defaultValue = 0)
+    public final static String PROP_NUM_THREADS = "numThreads";
+
+    /**
+     * The property that controls whether the number of available CPUs on the system is used when determining
+     * the number of threads to use for scoring. If true, the NUM_THREADS property is combined with the available number
+     * of CPUS to determine the number of threads. Note that the number of threads is contained to be never lower than
+     * zero. Also, if the number of threads is 0, the states are scored on the calling thread, no separate threads are
+     * started. The default value is false.
+     */
+    @S4Boolean(defaultValue = true)
+    public final static String PROP_IS_CPU_RELATIVE = "isCpuRelative";
+
+    /**
+     * The property that controls the minimum number of scoreables sent to a thread. This is used to prevent
+     * over threading of the scoring that could happen if the number of threads is high compared to the size of the
+     * active list. The default is 50
+     */
+    @S4Integer(defaultValue = 10)
+    public final static String PROP_MIN_SCOREABLES_PER_THREAD = "minScoreablesPerThread";
+
+    private final static String className = ThreadedAcousticScorer.class.getSimpleName();
+
+    private int numThreads;         // number of threads in use
+    private int threadPriority;
+    private int minScoreablesPerThread; // min scoreables sent to a thread
+    private ExecutorService executorService;
+
+    /**
+     * @param frontEnd
+     *            the frontend to retrieve features from for scoring
+     * @param scoreNormalizer
+     *            optional post-processor for computed scores that will
+     *            normalize scores. If not set, no normalization will applied
+     *            and the token scores will be returned unchanged.
+     * @param minScoreablesPerThread
+     *            the number of threads that are used to score HMM states. If
+     *            the isCpuRelative property is false, then is is the exact
+     *            number of threads that are used to score HMM states. If the
+     *            isCpuRelative property is true, then this value is combined
+     *            with the number of available processors on the system. If you
+     *            want to have one thread per CPU available to score states, set
+     *            the NUM_THREADS property to 0 and the isCpuRelative to true.
+     *            If you want exactly one thread to process scores set
+     *            NUM_THREADS to 1 and isCpuRelative to false.
+     *            <p>
+     *            If the value is 1 isCpuRelative is false no additional thread
+     *            will be instantiated, and all computation will be done in the
+     *            calling thread itself. The default value is 0.
+     * @param cpuRelative
+     *            controls whether the number of available CPUs on the system is
+     *            used when determining the number of threads to use for
+     *            scoring. If true, the NUM_THREADS property is combined with
+     *            the available number of CPUS to determine the number of
+     *            threads. Note that the number of threads is constrained to be
+     *            never lower than zero. Also, if the number of threads is 0,
+     *            the states are scored on the calling thread, no separate
+     *            threads are started. The default value is false.
+     * @param numThreads
+     *            the minimum number of scoreables sent to a thread. This is
+     *            used to prevent over threading of the scoring that could
+     *            happen if the number of threads is high compared to the size
+     *            of the active list. The default is 50
+     * @param threadPriority
+     *            the thread priority of scoring threads. Must be a value between
+     *            {@link Thread#MIN_PRIORITY} and {@link Thread#MAX_PRIORITY}, inclusive.
+     *            The default is {@link Thread#NORM_PRIORITY}.
+     */
+    public ThreadedAcousticScorer(BaseDataProcessor frontEnd, ScoreNormalizer scoreNormalizer,
+                                  int minScoreablesPerThread, boolean cpuRelative, int numThreads, int threadPriority) {
+        super(frontEnd, scoreNormalizer);
+        init(minScoreablesPerThread, cpuRelative, numThreads, threadPriority);
+    }
+
+    public ThreadedAcousticScorer() {
+    }
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        init(ps.getInt(PROP_MIN_SCOREABLES_PER_THREAD), ps.getBoolean(PROP_IS_CPU_RELATIVE),
+            ps.getInt(PROP_NUM_THREADS), ps.getInt(PROP_THREAD_PRIORITY));
+    }
+
+    private void init(int minScoreablesPerThread, boolean cpuRelative, int numThreads, int threadPriority) {
+        this.minScoreablesPerThread = minScoreablesPerThread;
+        if (cpuRelative) {
+            numThreads += Runtime.getRuntime().availableProcessors();
+        }
+        this.numThreads = numThreads;
+        this.threadPriority = threadPriority;
+    }
+
+    @Override
+    public void allocate() {
+        super.allocate();
+        if (executorService == null) {
+            if (numThreads > 1) {
+                logger.fine("# of scoring threads: " + numThreads);
+                executorService = Executors.newFixedThreadPool(numThreads,
+                    new CustomThreadFactory(className, true, threadPriority));
+            } else {
+                logger.fine("no scoring threads");
+            }
+        }
+    }
+
+    @Override
+    public void deallocate() {
+        super.deallocate();
+        if (executorService != null) {
+            executorService.shutdown();
+            executorService = null;
+        }
+    }
+
+    @Override
+    protected <T extends Scoreable> T doScoring(List<T> scoreableList, final Data data) {
+        if (numThreads > 1) {
+            int totalSize = scoreableList.size();
+            int jobSize = Math.max((totalSize + numThreads - 1) / numThreads, minScoreablesPerThread);
+
+            if (jobSize < totalSize) {
+                List<Callable<T>> tasks = new ArrayList<Callable<T>>();
+                for (int from = 0, to = jobSize; from < totalSize; from = to, to += jobSize) {
+                    final List<T> scoringJob = scoreableList.subList(from, Math.min(to, totalSize));
+                    tasks.add(new Callable<T>() {
+                        public T call() throws Exception {
+                            return ThreadedAcousticScorer.super.doScoring(scoringJob, data);
+                        }
+                    });
+                }
+
+                List<T> finalists = new ArrayList<T>(tasks.size());
+       
+                try {
+                    for (Future<T> result : executorService.invokeAll(tasks))
+                        finalists.add(result.get());
+                } catch (Exception e) {
+                    throw new DataProcessingException("No scoring jobs ended", e);
+                }
+                
+                return Collections.min(finalists, Scoreable.COMPARATOR);
+            }
+        }
+        // if no additional threads are necessary, do the scoring in the calling thread
+        return super.doScoring(scoreableList, data);
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/ActiveList.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/ActiveList.java
@ -0,0 +1,117 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.search;
+
+import java.util.List;
+import edu.cmu.sphinx.util.props.*;
+
+/**
+ * An active list is maintained as a sorted list
+ * <p>
+ * Note that all scores are represented in LogMath logbase
+ */
+public interface ActiveList extends Iterable<Token> {
+
+    /**
+     * property that sets the desired (or target) size for this active list.  This is sometimes referred to as the beam
+     * size
+     */
+    @S4Integer(defaultValue = 2000)
+    public final static String PROP_ABSOLUTE_BEAM_WIDTH = "absoluteBeamWidth";
+
+    /**
+     * Property that sets the minimum score relative to the maximum score in the list for pruning.  Tokens with a score
+     * less than relativeBeamWidth * maximumScore will be pruned from the list
+     */
+    @S4Double(defaultValue = 0.0)
+    public final static String PROP_RELATIVE_BEAM_WIDTH = "relativeBeamWidth";
+    /**
+     * Property that indicates whether or not the active list will implement 'strict pruning'.  When strict pruning is
+     * enabled, the active list will not remove tokens from the active list until they have been completely scored.  If
+     * strict pruning is not enabled, tokens can be removed from the active list based upon their entry scores. The
+     * default setting is false (disabled).
+     */
+
+    @S4Boolean(defaultValue = true)
+    public final static String PROP_STRICT_PRUNING = "strictPruning";
+
+    /**
+     * Adds the given token to the list, keeping track of the lowest scoring token
+     *
+     * @param token the token to add
+     */
+    public void add(Token token);
+
+    /**
+     * Purges the active list of excess members returning a (potentially new) active list
+     *
+     * @return a purged active list
+     */
+    public ActiveList purge();
+
+
+    /**
+     * Returns the size of this list
+     *
+     * @return the size
+     */
+    public int size();
+
+
+    /**
+     * Gets the list of all tokens
+     *
+     * @return the set of tokens
+     */
+    public List<Token> getTokens();
+
+    /**
+     * gets the beam threshold best upon the best scoring token
+     *
+     * @return the beam threshold
+     */
+    public float getBeamThreshold();
+
+
+    /**
+     * gets the best score in the list
+     *
+     * @return the best score
+     */
+    public float getBestScore();
+
+
+    /**
+     * Sets the best scoring token for this active list
+     *
+     * @param token the best scoring token
+     */
+    public void setBestToken(Token token);
+
+
+    /**
+     * Gets the best scoring token for this active list
+     *
+     * @return the best scoring token
+     */
+    public Token getBestToken();
+
+
+    /**
+     * Creates a new empty version of this active list with the same general properties.
+     *
+     * @return a new active list.
+     */
+    public ActiveList newInstance();
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/ActiveListFactory.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/ActiveListFactory.java
@ -0,0 +1,79 @@
+/*
+ * 
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.util.LogMath;
+import edu.cmu.sphinx.util.props.*;
+
+/** Creates new active lists. */
+public abstract class ActiveListFactory implements Configurable {
+
+
+    /**
+     * property that sets the desired (or target) size for this active list.  This is sometimes referred to as the beam
+     * size
+     */
+    @S4Integer(defaultValue = -1)
+    public final static String PROP_ABSOLUTE_BEAM_WIDTH = "absoluteBeamWidth";
+
+    /**
+     * Property that sets the minimum score relative to the maximum score in the list for pruning.  Tokens with a score
+     * less than relativeBeamWidth * maximumScore will be pruned from the list
+     */
+    @S4Double(defaultValue = 1E-80)
+    public final static String PROP_RELATIVE_BEAM_WIDTH = "relativeBeamWidth";
+
+    /**
+     * Property that indicates whether or not the active list will implement 'strict pruning'.  When strict pruning is
+     * enabled, the active list will not remove tokens from the active list until they have been completely scored.  If
+     * strict pruning is not enabled, tokens can be removed from the active list based upon their entry scores. The
+     * default setting is false (disabled).
+     */
+    @S4Boolean(defaultValue = true)
+    public final static String PROP_STRICT_PRUNING = "strictPruning";
+
+    protected LogMath logMath;
+    protected int absoluteBeamWidth;
+    protected float logRelativeBeamWidth;
+
+    /**
+     * 
+     * @param absoluteBeamWidth beam for absolute pruning
+     * @param relativeBeamWidth beam for relative pruning
+     */
+    public ActiveListFactory(int absoluteBeamWidth,double relativeBeamWidth){
+        logMath = LogMath.getLogMath();
+        this.absoluteBeamWidth = absoluteBeamWidth;
+        this.logRelativeBeamWidth = logMath.linearToLog(relativeBeamWidth);      
+    }
+
+    public ActiveListFactory() {
+    }
+
+
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        logMath = LogMath.getLogMath();
+        absoluteBeamWidth = ps.getInt(PROP_ABSOLUTE_BEAM_WIDTH);
+        double relativeBeamWidth = ps.getDouble(PROP_RELATIVE_BEAM_WIDTH);
+
+        logRelativeBeamWidth = logMath.linearToLog(relativeBeamWidth);
+    }
+
+
+    /**
+     * Creates a new active list of a particular type
+     *
+     * @return the active list
+     */
+    public abstract ActiveList newInstance();
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/ActiveListManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/ActiveListManager.java
@ -0,0 +1,77 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.util.props.Configurable;
+import edu.cmu.sphinx.util.props.S4Double;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+import java.util.Iterator;
+
+/** An active list is maintained as a sorted list */
+public interface ActiveListManager extends Configurable {
+
+    /** The property that specifies the absolute word beam width */
+    @S4Integer(defaultValue = 2000)
+    public final static String PROP_ABSOLUTE_WORD_BEAM_WIDTH =
+            "absoluteWordBeamWidth";
+
+    /** The property that specifies the relative word beam width */
+    @S4Double(defaultValue = 0.0)
+    public final static String PROP_RELATIVE_WORD_BEAM_WIDTH =
+            "relativeWordBeamWidth";
+
+    /**
+     * Adds the given token to the list
+     *
+     * @param token the token to add
+     */
+    public void add(Token token);
+
+
+    /**
+     * Returns an Iterator of all the non-emitting ActiveLists. The iteration order is the same as the search state
+     * order.
+     *
+     * @return an Iterator of non-emitting ActiveLists
+     */
+    public Iterator<ActiveList> getNonEmittingListIterator();
+
+
+    /**
+     * Returns the emitting ActiveList from the manager
+     *
+     * @return the emitting ActiveList
+     */
+    public ActiveList getEmittingList();
+
+    
+    /**
+     * Clears emitting list in manager
+     */
+	public void clearEmittingList();
+
+	
+    /** Dumps out debug info for the active list manager */
+    public void dump();
+
+
+    /**
+     * Sets the total number of state types to be managed
+     *
+     * @param numStateOrder the total number of state types
+     */
+    public void setNumStateOrder(int numStateOrder);
+
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/AlternateHypothesisManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/AlternateHypothesisManager.java
@ -0,0 +1,87 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.search;
+
+
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+
+import java.util.*;
+
+/**
+ * Manager for pruned hypothesis
+ * 
+ * @author Joe Woelfel
+ */
+public class AlternateHypothesisManager {
+
+    private final Map<Token, List<Token>> viterbiLoserMap = new HashMap<Token, List<Token>>();
+    private final int maxEdges;
+
+
+    /**
+     * Creates an alternate hypotheses manager
+     *
+     * @param maxEdges the maximum edges allowed
+     */
+    public AlternateHypothesisManager(int maxEdges) {
+        this.maxEdges = maxEdges;
+    }
+
+
+    /**
+     * Collects adds alternate predecessors for a token that would have lost because of viterbi.
+     *
+     * @param token       - a token that has an alternate lower scoring predecessor that still might be of interest
+     * @param predecessor - a predecessor that scores lower than token.getPredecessor().
+     */
+
+    public void addAlternatePredecessor(Token token, Token predecessor) {
+        assert predecessor != token.getPredecessor();
+        List<Token> list = viterbiLoserMap.get(token);
+        if (list == null) {
+            list = new ArrayList<Token>();
+            viterbiLoserMap.put(token, list);
+        }
+        list.add(predecessor);
+    }
+
+
+    /**
+     * Returns a list of alternate predecessors for a token.
+     *
+     * @param token - a token that may have alternate lower scoring predecessor that still might be of interest
+     * @return A list of predecessors that scores lower than token.getPredecessor().
+     */
+    public List<Token> getAlternatePredecessors(Token token) {
+        return viterbiLoserMap.get(token);
+    }
+
+
+    /** Purge all but max number of alternate preceding token hypotheses. */
+    public void purge() {
+
+        int max = maxEdges - 1;
+
+        for (Map.Entry<Token, List<Token>> entry : viterbiLoserMap.entrySet()) {
+            List<Token> list = entry.getValue();
+            Collections.sort(list, Scoreable.COMPARATOR);
+            List<Token> newList = list.subList(0, list.size() > max ? max : list.size());
+            viterbiLoserMap.put(entry.getKey(), newList);
+        }
+    }
+
+	public boolean hasAlternatePredecessors(Token token) {
+		return viterbiLoserMap.containsKey(token);
+	}
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/PartitionActiveListFactory.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/PartitionActiveListFactory.java
@ -0,0 +1,270 @@
+/*
+ * 
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/** A factory for PartitionActiveLists */
+public class PartitionActiveListFactory extends ActiveListFactory {
+
+    /**
+     * 
+     * @param absoluteBeamWidth beam for absolute pruning
+     * @param relativeBeamWidth beam for relative pruning
+     */
+    public PartitionActiveListFactory(int absoluteBeamWidth, double relativeBeamWidth) {
+        super(absoluteBeamWidth, relativeBeamWidth);
+    }
+
+    public PartitionActiveListFactory() {
+
+    }
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
+    */
+    @Override
+    public ActiveList newInstance() {
+        return new PartitionActiveList(absoluteBeamWidth, logRelativeBeamWidth);
+    }
+
+
+    /**
+     * An active list that does absolute beam with pruning by partitioning the
+     * token list based on absolute beam width, instead of sorting the token
+     * list, and then chopping the list up with the absolute beam width. The
+     * expected run time of this partitioning algorithm is O(n), instead of O(n log n) 
+     * for merge sort.
+     * <p>
+     * This class is not thread safe and should only be used by a single thread.
+     * <p>
+     * Note that all scores are maintained in the LogMath log base.
+     */
+    class PartitionActiveList implements ActiveList {
+
+        private int size;
+        private final int absoluteBeamWidth;
+        private final float logRelativeBeamWidth;
+        private Token bestToken;
+        // when the list is changed these things should be
+        // changed/updated as well
+        private Token[] tokenList;
+        private final Partitioner partitioner = new Partitioner();
+
+
+        /** Creates an empty active list
+         * @param absoluteBeamWidth beam for absolute pruning
+         * @param logRelativeBeamWidth beam for relative pruning
+         */
+        public PartitionActiveList(int absoluteBeamWidth,
+                                   float logRelativeBeamWidth) {
+            this.absoluteBeamWidth = absoluteBeamWidth;
+            this.logRelativeBeamWidth = logRelativeBeamWidth;
+            int listSize = 2000;
+            if (absoluteBeamWidth > 0) {
+                listSize = absoluteBeamWidth / 3;
+            }
+            this.tokenList = new Token[listSize];
+        }
+
+
+        /**
+         * Adds the given token to the list
+         *
+         * @param token the token to add
+         */
+        public void add(Token token) {
+            if (size < tokenList.length) {
+                tokenList[size] = token;
+                size++;
+            } else {
+                // token array too small, double the capacity
+                doubleCapacity();
+                add(token);
+            }
+            if (bestToken == null || token.getScore() > bestToken.getScore()) {
+                bestToken = token;
+            }
+        }
+
+
+        /** Doubles the capacity of the Token array. */
+        private void doubleCapacity() {
+            tokenList = Arrays.copyOf(tokenList, tokenList.length * 2);
+        }
+
+        
+        /**
+         * Purges excess members. Remove all nodes that fall below the relativeBeamWidth
+         *
+         * @return a (possible new) active list
+         */
+        public ActiveList purge() {
+            // if the absolute beam is zero, this means there
+            // should be no constraint on the abs beam size at all
+            // so we will only be relative beam pruning, which means
+            // that we don't have to sort the list
+            if (absoluteBeamWidth > 0) {
+                // if we have an absolute beam, then we will
+                // need to sort the tokens to apply the beam
+                if (size > absoluteBeamWidth) {
+                    size = partitioner.partition(tokenList, size,
+                            absoluteBeamWidth) + 1;
+                }
+            }
+            return this;
+        }
+
+
+        /**
+         * gets the beam threshold best upon the best scoring token
+         *
+         * @return the beam threshold
+         */
+        public float getBeamThreshold() {
+            return getBestScore() + logRelativeBeamWidth;
+        }
+
+
+        /**
+         * gets the best score in the list
+         *
+         * @return the best score
+         */
+        public float getBestScore() {
+            float bestScore = -Float.MAX_VALUE;
+            if (bestToken != null) {
+                bestScore = bestToken.getScore();
+            }
+            // A sanity check
+            // for (Token t : this) {
+            //    if (t.getScore() > bestScore) {
+            //         System.out.println("GBS: found better score "
+            //             + t + " vs. " + bestScore);
+            //    }
+            // }
+            return bestScore;
+        }
+
+
+        /**
+         * Sets the best scoring token for this active list
+         *
+         * @param token the best scoring token
+         */
+        public void setBestToken(Token token) {
+            bestToken = token;
+        }
+
+
+        /**
+         * Gets the best scoring token for this active list
+         *
+         * @return the best scoring token
+         */
+        public Token getBestToken() {
+            return bestToken;
+        }
+
+
+        /**
+         * Retrieves the iterator for this tree.
+         *
+         * @return the iterator for this token list
+         */
+        public Iterator<Token> iterator() {
+            return (new TokenArrayIterator(tokenList, size));
+        }
+
+
+        /**
+         * Gets the list of all tokens
+         *
+         * @return the list of tokens
+         */
+        public List<Token> getTokens() {
+            return Arrays.asList(tokenList).subList(0, size);
+        }
+
+        /**
+         * Returns the number of tokens on this active list
+         *
+         * @return the size of the active list
+         */
+        public final int size() {
+            return size;
+        }
+
+
+        /* (non-Javadoc)
+        * @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
+        */
+        public ActiveList newInstance() {
+            return PartitionActiveListFactory.this.newInstance();
+        }
+    }
+}
+
+class TokenArrayIterator implements Iterator<Token> {
+
+    private final Token[] tokenArray;
+    private final int size;
+    private int pos;
+
+
+    TokenArrayIterator(Token[] tokenArray, int size) {
+        this.tokenArray = tokenArray;
+        this.pos = 0;
+        this.size = size;
+    }
+
+
+    /** Returns true if the iteration has more tokens. */
+    public boolean hasNext() {
+        return pos < size;
+    }
+
+
+    /** Returns the next token in the iteration. */
+    public Token next() throws NoSuchElementException {
+        if (pos >= tokenArray.length) {
+            throw new NoSuchElementException();
+        }
+        return tokenArray[pos++];
+    }
+
+
+    /** Unimplemented, throws an Error if called. */
+    public void remove() {
+        throw new Error("TokenArrayIterator.remove() unimplemented");
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/Partitioner.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/Partitioner.java
@ -0,0 +1,180 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.search;
+
+import java.util.Arrays;
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+
+/**
+ * Partitions a list of tokens according to the token score, used
+ * in {@link PartitionActiveListFactory}. This method is supposed 
+ * to provide O(n) performance so it's more preferable than 
+ */
+public class Partitioner {
+
+    /** Max recursion depth **/
+    final private int MAX_DEPTH = 50;
+
+
+    /**
+     * Partitions sub-array of tokens around the end token. 
+     * Put all elements less or equal then pivot to the start of the array,
+     * shifting new pivot position
+     *
+     * @param tokens the token array to partition
+     * @param start      the starting index of the subarray
+     * @param end      the pivot and the ending index of the subarray, inclusive
+     * @return the index (after partitioning) of the element around which the array is partitioned
+     */
+    private int endPointPartition(Token[] tokens, int start, int end) {
+        Token pivot = tokens[end];
+        float pivotScore = pivot.getScore();
+               
+        int i = start;
+        int j = end - 1;
+        
+        while (true) {
+
+            while (i < end && tokens[i].getScore() >= pivotScore)
+                i++;                
+            while (j > i && tokens[j].getScore() < pivotScore)
+                j--;
+            
+            if (j <= i)
+                break;
+            
+            Token current = tokens[j];
+            setToken(tokens, j, tokens[i]);
+            setToken(tokens, i, current);            
+        }
+
+        setToken(tokens, end, tokens[i]);
+        setToken(tokens, i, pivot);
+        return i;
+    }
+
+
+    /**
+     * Partitions sub-array of tokens around the x-th token by selecting the midpoint of the token array as the pivot.
+     * Partially solves issues with slow performance on already sorted arrays.
+     *
+     * @param tokens the token array to partition
+     * @param start      the starting index of the subarray
+     * @param end      the ending index of the subarray, inclusive
+     * @return the index of the element around which the array is partitioned
+     */
+    private int midPointPartition(Token[] tokens, int start, int end) {
+        int middle = (start + end) >>> 1;
+        Token temp = tokens[end];
+        setToken(tokens, end, tokens[middle]);
+        setToken(tokens, middle, temp);
+        return endPointPartition(tokens, start, end);
+    }
+
+
+    /**
+     * Partitions the given array of tokens in place, so that the highest scoring n token will be at the beginning of
+     * the array, not in any order.
+     *
+     * @param tokens the array of tokens to partition
+     * @param size   the number of tokens to partition
+     * @param n      the number of tokens in the final partition
+     * @return the index of the last element in the partition
+     */
+    public int partition(Token[] tokens, int size, int n) {
+        if (tokens.length > n) {
+            return midPointSelect(tokens, 0, size - 1, n, 0);
+        } else {
+            return findBest(tokens, size);
+        }
+    }
+
+    /**
+     * Simply find the best token and put it in the last slot
+     * 
+     * @param tokens array of tokens
+     * @param size the number of tokens to partition
+     * @return index of the best token
+     */
+    private int findBest(Token[] tokens, int size) {
+        int r = -1;
+        float lowestScore = Float.MAX_VALUE;
+        for (int i = 0; i < tokens.length; i++) {
+            float currentScore = tokens[i].getScore();
+            if (currentScore <= lowestScore) {
+                lowestScore = currentScore;
+                r = i; // "r" is the returned index
+            }
+        }
+
+        // exchange tokens[r] <=> last token,
+        // where tokens[r] has the lowest score
+        int last = size - 1;
+        if (last >= 0) {
+            Token lastToken = tokens[last];
+            setToken(tokens, last, tokens[r]);
+            setToken(tokens, r, lastToken);
+        }
+
+        // return the last index
+        return last;
+    }
+
+
+    private void setToken(Token[] list, int index, Token token) {
+        list[index] = token;
+    }
+
+    /**
+     * Selects the token with the ith largest token score.
+     *
+     * @param tokens       the token array to partition
+     * @param start        the starting index of the subarray
+     * @param end          the ending index of the subarray, inclusive
+     * @param targetSize   target size of the partition
+     * @param depth        recursion depth to avoid stack overflow and fall back to simple partition.
+     * @return the index of the token with the ith largest score
+     */
+    private int midPointSelect(Token[] tokens, int start, int end, int targetSize, int depth) {
+        if (depth > MAX_DEPTH) {
+            return simplePointSelect (tokens, start, end, targetSize);
+        }
+        if (start == end) {
+            return start;
+        }
+        int partitionToken = midPointPartition(tokens, start, end);
+        int newSize = partitionToken - start + 1;
+        if (targetSize == newSize) {
+            return partitionToken;
+        } else if (targetSize < newSize) {
+            return midPointSelect(tokens, start, partitionToken - 1, targetSize, depth + 1);
+        } else {
+            return midPointSelect(tokens, partitionToken + 1, end, targetSize - newSize, depth + 1);
+        }
+    }
+    
+    /**
+     * Fallback method to get the partition
+     *
+     * @param tokens       the token array to partition
+     * @param start        the starting index of the subarray
+     * @param end          the ending index of the subarray, inclusive
+     * @param targetSize   target size of the partition
+     * @return the index of the token with the ith largest score
+     */
+    private int simplePointSelect(Token[] tokens, int start, int end, int targetSize) {
+        Arrays.sort(tokens, start, end + 1, Scoreable.COMPARATOR);
+        return start + targetSize - 1;
+    }
+    
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SearchManager.java
@ -0,0 +1,64 @@
+/*
+* Copyright 1999-2002 Carnegie Mellon University.
+* Portions Copyright 2002 Sun Microsystems, Inc.
+* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+* All Rights Reserved.  Use is subject to license terms.
+*
+* See the file "license.terms" for information on usage and
+* redistribution of this file, and for a DISCLAIMER OF ALL
+* WARRANTIES.
+*
+*/
+
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.props.Configurable;
+
+/**
+ * Defines the interface for the SearchManager. The SearchManager's primary role is to execute the search for a given
+ * number of frames. The SearchManager will return interim results as the recognition proceeds and when recognition
+ * completes a final result will be returned.
+ */
+public interface SearchManager extends Configurable {
+
+    /**
+     * Allocates the resources necessary for this search. This should be called once before an recognitions are
+     * performed
+     */
+    public void allocate();
+
+
+    /**
+     * Deallocates resources necessary for this search. This should be called once after all recognitions are completed
+     * at the search manager is no longer needed.
+     */
+    public void deallocate();
+
+
+    /**
+     * Prepares the SearchManager for recognition.  This method must be called before <code> recognize </code> is
+     * called. Typically, <code> start </code>  and <code> stop </code>  are called bracketing an utterance.
+     */
+    public void startRecognition();
+
+
+    /** Performs post-recognition cleanup. This method should be called after recognize returns a final result. */
+    public void stopRecognition();
+
+
+    /**
+     * Performs recognition. Processes no more than the given number of frames before returning. This method returns a
+     * partial result after nFrames have been processed, or a final result if recognition completes while processing
+     * frames.  If a final result is returned, the actual number of frames processed can be retrieved from the result.
+     * This method may block while waiting for frames to arrive.
+     *
+     * @param nFrames the maximum number of frames to process. A final result may be returned before all nFrames are
+     *                processed.
+     * @return the recognition result, the result may be a partial or a final result; or return null if no frames are
+     *         arrived
+     */
+    public Result recognize(int nFrames);
+}
+
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SimpleActiveListFactory.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SimpleActiveListFactory.java
@ -0,0 +1,222 @@
+/*
+ * 
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+/** A factory for simple active lists */
+public class SimpleActiveListFactory extends ActiveListFactory {
+
+    /**
+     * Creates factory for simple active lists
+     * @param absoluteBeamWidth absolute pruning beam
+     * @param relativeBeamWidth relative pruning beam
+     */
+    public SimpleActiveListFactory(int absoluteBeamWidth,
+            double relativeBeamWidth)
+    {
+        super(absoluteBeamWidth, relativeBeamWidth);
+    }
+
+    public SimpleActiveListFactory() {
+        
+    }
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
+    */
+    @Override
+    public ActiveList newInstance() {
+        return new SimpleActiveList(absoluteBeamWidth, logRelativeBeamWidth);
+    }
+
+
+    /**
+     * An active list that tries to be simple and correct. This type of active list will be slow, but should exhibit
+     * correct behavior. Faster versions of the ActiveList exist (HeapActiveList, TreeActiveList).
+     * <p>
+     * This class is not thread safe and should only be used by a single thread.
+     * <p>
+     * Note that all scores are maintained in the LogMath log domain
+     */
+    class SimpleActiveList implements ActiveList {
+
+        private int absoluteBeamWidth = 2000;
+        private final float logRelativeBeamWidth;
+        private Token bestToken;
+        private List<Token> tokenList = new LinkedList<Token>();
+
+
+        /**
+         * Creates an empty active list
+         *
+         * @param absoluteBeamWidth    the absolute beam width
+         * @param logRelativeBeamWidth the relative beam width (in the log domain)
+         */
+        public SimpleActiveList(int absoluteBeamWidth,
+                                float logRelativeBeamWidth) {
+            this.absoluteBeamWidth = absoluteBeamWidth;
+            this.logRelativeBeamWidth = logRelativeBeamWidth;
+        }
+
+
+        /**
+         * Adds the given token to the list
+         *
+         * @param token the token to add
+         */
+        public void add(Token token) {
+            tokenList.add(token);
+            if (bestToken == null || token.getScore() > bestToken.getScore()) {
+                bestToken = token;
+            }
+        }
+
+
+        /**
+         * Replaces an old token with a new token
+         *
+         * @param oldToken the token to replace (or null in which case, replace works like add).
+         * @param newToken the new token to be placed in the list.
+         */
+        public void replace(Token oldToken, Token newToken) {
+            add(newToken);
+            if (oldToken != null) {
+                if (!tokenList.remove(oldToken)) {
+                    // Some optional debugging code here to dump out the paths
+                    // when this "should never happen" error happens
+                    // System.out.println("SimpleActiveList: remove "
+                    //         + oldToken + " missing, but replaced by "
+                    //         + newToken);
+                    // oldToken.dumpTokenPath(true);
+                    // newToken.dumpTokenPath(true);
+                }
+            }
+        }
+
+
+        /**
+         * Purges excess members. Remove all nodes that fall below the relativeBeamWidth
+         *
+         * @return a (possible new) active list
+         */
+        public ActiveList purge() {
+            if (absoluteBeamWidth > 0 && tokenList.size() > absoluteBeamWidth) {
+                Collections.sort(tokenList, Scoreable.COMPARATOR);
+                tokenList = tokenList.subList(0, absoluteBeamWidth);
+            }
+            return this;
+        }
+
+
+        /**
+         * Retrieves the iterator for this tree.
+         *
+         * @return the iterator for this token list
+         */
+        public Iterator<Token> iterator() {
+            return tokenList.iterator();
+        }
+
+
+        /**
+         * Gets the set of all tokens
+         *
+         * @return the set of tokens
+         */
+        public List<Token> getTokens() {
+            return tokenList;
+        }
+
+
+        /**
+         * Returns the number of tokens on this active list
+         *
+         * @return the size of the active list
+         */
+        public final int size() {
+            return tokenList.size();
+        }
+
+
+        /**
+         * gets the beam threshold best upon the best scoring token
+         *
+         * @return the beam threshold
+         */
+        public float getBeamThreshold() {
+            return getBestScore() + logRelativeBeamWidth;
+        }
+
+
+        /**
+         * gets the best score in the list
+         *
+         * @return the best score
+         */
+        public float getBestScore() {
+            float bestScore = -Float.MAX_VALUE;
+            if (bestToken != null) {
+                bestScore = bestToken.getScore();
+            }
+            return bestScore;
+        }
+
+
+        /**
+         * Sets the best scoring token for this active list
+         *
+         * @param token the best scoring token
+         */
+        public void setBestToken(Token token) {
+            bestToken = token;
+        }
+
+
+        /**
+         * Gets the best scoring token for this active list
+         *
+         * @return the best scoring token
+         */
+        public Token getBestToken() {
+            return bestToken;
+        }
+
+
+        /* (non-Javadoc)
+        * @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
+        */
+        public ActiveList newInstance() {
+            return SimpleActiveListFactory.this.newInstance();
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SimpleActiveListManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SimpleActiveListManager.java
@ -0,0 +1,244 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Boolean;
+import edu.cmu.sphinx.util.props.S4ComponentList;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.logging.Logger;
+
+/**
+ * A list of ActiveLists. Different token types are placed in different lists.
+ * <p>
+ * This class is not thread safe and should only be used by a single thread.
+ */
+public class SimpleActiveListManager implements ActiveListManager {
+
+    /**
+     * This property is used in the Iterator returned by the getNonEmittingListIterator() method. When the
+     * Iterator.next() method is called, this property determines whether the lists prior to that returned by next() are
+     * empty (they should be empty). If they are not empty, an Error will be thrown.
+     */
+    @S4Boolean(defaultValue = false)
+    public static final String PROP_CHECK_PRIOR_LISTS_EMPTY = "checkPriorListsEmpty";
+    
+    /** The property that defines the name of the active list factory to be used by this search manager. */
+    @S4ComponentList(type = ActiveListFactory.class)
+    public final static String PROP_ACTIVE_LIST_FACTORIES = "activeListFactories";
+
+    // --------------------------------------
+    // Configuration data
+    // --------------------------------------
+    private Logger logger;
+    private boolean checkPriorLists;
+    private List<ActiveListFactory> activeListFactories;
+    private ActiveList[] currentActiveLists;
+
+
+    /**
+     * Create a simple list manager
+     * @param activeListFactories factories
+     * @param checkPriorLists check prior lists during operation
+     */
+    public SimpleActiveListManager(List<ActiveListFactory> activeListFactories, boolean checkPriorLists) {
+        this.logger = Logger.getLogger( getClass().getName() );
+
+        this.activeListFactories = activeListFactories;
+        this.checkPriorLists = checkPriorLists;
+    }
+
+    public SimpleActiveListManager() {
+        
+    }
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        logger = ps.getLogger();
+
+        activeListFactories = ps.getComponentList(PROP_ACTIVE_LIST_FACTORIES, ActiveListFactory.class);
+        checkPriorLists = ps.getBoolean(PROP_CHECK_PRIOR_LISTS_EMPTY);
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.ActiveListManager#setNumStateOrder(java.lang.Class[])
+    */
+    public void setNumStateOrder(int numStateOrder) {
+        // check to make sure that we have the correct
+        // number of active list factories for the given search states
+        currentActiveLists = new ActiveList[numStateOrder];
+
+        if (activeListFactories.isEmpty()) {
+            logger.severe("No active list factories configured");
+            throw new Error("No active list factories configured");
+        }
+        if (activeListFactories.size() != currentActiveLists.length) {
+            logger.warning("Need " + currentActiveLists.length +
+                    " active list factories, found " +
+                    activeListFactories.size());
+        }
+        createActiveLists();
+    }
+
+
+    /**
+     * Creates the emitting and non-emitting active lists. When creating the non-emitting active lists, we will look at
+     * their respective beam widths (eg, word beam, unit beam, state beam).
+     */
+    private void createActiveLists() {
+        int nlists = activeListFactories.size();
+        for (int i = 0; i < currentActiveLists.length; i++) {
+            int which = i;
+            if (which >= nlists) {
+                which = nlists - 1;
+            }
+            ActiveListFactory alf = activeListFactories.get(which);
+            currentActiveLists[i] = alf.newInstance();
+        }
+    }
+
+
+    /**
+     * Adds the given token to the list
+     *
+     * @param token the token to add
+     */
+    public void add(Token token) {
+        ActiveList activeList = findListFor(token);
+        if (activeList == null) {
+            throw new Error("Cannot find ActiveList for "
+                    + token.getSearchState().getClass());
+        }
+        activeList.add(token);
+    }
+
+
+    /**
+     * Given a token find the active list associated with the token type
+     *
+     * @param token
+     * @return the active list
+     */
+    private ActiveList findListFor(Token token) {
+        return currentActiveLists[token.getSearchState().getOrder()];
+    }
+
+
+    /**
+     * Returns the emitting ActiveList from the manager
+     *
+     * @return the emitting ActiveList
+     */
+    public ActiveList getEmittingList() {
+        ActiveList list = currentActiveLists[currentActiveLists.length - 1];
+        return list;
+    }
+
+    
+    /**
+     * Clears emitting list in manager
+     */
+	public void clearEmittingList() {
+        ActiveList list = currentActiveLists[currentActiveLists.length - 1];
+		currentActiveLists[currentActiveLists.length - 1] = list.newInstance();	
+	}
+
+	
+    /**
+     * Returns an Iterator of all the non-emitting ActiveLists. The iteration order is the same as the search state
+     * order.
+     *
+     * @return an Iterator of non-emitting ActiveLists
+     */
+    public Iterator<ActiveList> getNonEmittingListIterator() {
+        return (new NonEmittingListIterator());
+    }
+
+
+    private class NonEmittingListIterator implements Iterator<ActiveList> {
+
+        private int listPtr;
+
+
+        public NonEmittingListIterator() {
+            listPtr = -1;
+        }
+
+
+        public boolean hasNext() {
+            return listPtr + 1 < currentActiveLists.length - 1;
+        }
+
+
+        public ActiveList next() throws NoSuchElementException {
+            listPtr++;
+
+            if (listPtr >= currentActiveLists.length) {
+                throw new NoSuchElementException();
+            }
+            if (checkPriorLists) {
+                checkPriorLists();
+            }
+            return currentActiveLists[listPtr];
+        }
+
+
+        /** Check that all lists prior to listPtr is empty. */
+        private void checkPriorLists() {
+            for (int i = 0; i < listPtr; i++) {
+                ActiveList activeList = currentActiveLists[i];
+                if (activeList.size() > 0) {
+                    throw new Error("At while processing state order"
+                            + listPtr + ", state order " + i + " not empty");
+                }
+            }
+        }
+
+
+        public void remove() {
+            currentActiveLists[listPtr] =
+                    currentActiveLists[listPtr].newInstance();
+        }
+    }
+
+
+    /** Outputs debugging info for this list manager */
+    public void dump() {
+        System.out.println("--------------------");
+        for (ActiveList al : currentActiveLists) {
+            dumpList(al);
+        }
+    }
+
+
+    /**
+     * Dumps out debugging info for the given active list
+     *
+     * @param al the active list to dump
+     */
+    private void dumpList(ActiveList al) {
+        System.out.println("Size: " + al.size() + " Best token: " + al.getBestToken());
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SimpleBreadthFirstSearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SimpleBreadthFirstSearchManager.java
@ -0,0 +1,680 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.decoder.pruner.Pruner;
+import edu.cmu.sphinx.decoder.scorer.AcousticScorer;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.linguist.Linguist;
+import edu.cmu.sphinx.linguist.SearchState;
+import edu.cmu.sphinx.linguist.SearchStateArc;
+import edu.cmu.sphinx.linguist.WordSearchState;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.LogMath;
+import edu.cmu.sphinx.util.StatisticsVariable;
+import edu.cmu.sphinx.util.Timer;
+import edu.cmu.sphinx.util.TimerPool;
+import edu.cmu.sphinx.util.props.*;
+
+import java.util.*;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.io.IOException;
+
+/**
+ * Provides the breadth first search. To perform recognition an application should call initialize before recognition
+ * begins, and repeatedly call <code> recognize </code> until Result.isFinal() returns true. Once a final result has
+ * been obtained, <code> terminate </code> should be called.
+ * <p>
+ * All scores and probabilities are maintained in the log math log domain.
+ * <p>
+ * For information about breadth first search please refer to "Spoken Language Processing", X. Huang, PTR
+ */
+
+// TODO - need to add in timing code.
+public class SimpleBreadthFirstSearchManager extends TokenSearchManager {
+
+    /** The property that defines the name of the linguist to be used by this search manager. */
+    @S4Component(type = Linguist.class)
+    public final static String PROP_LINGUIST = "linguist";
+
+    /** The property that defines the name of the linguist to be used by this search manager. */
+    @S4Component(type = Pruner.class)
+    public final static String PROP_PRUNER = "pruner";
+
+    /** The property that defines the name of the scorer to be used by this search manager. */
+    @S4Component(type = AcousticScorer.class)
+    public final static String PROP_SCORER = "scorer";
+
+    /** The property that defines the name of the active list factory to be used by this search manager. */
+    @S4Component(type = ActiveListFactory.class)
+    public final static String PROP_ACTIVE_LIST_FACTORY = "activeListFactory";
+
+    /**
+     * The property that when set to <code>true</code> will cause the recognizer to count up all the tokens in the
+     * active list after every frame.
+     */
+    @S4Boolean(defaultValue = false)
+    public final static String PROP_SHOW_TOKEN_COUNT = "showTokenCount";
+
+    /**
+     * The property that sets the minimum score relative to the maximum score in the word list for pruning. Words with a
+     * score less than relativeBeamWidth * maximumScore will be pruned from the list
+     */
+    @S4Double(defaultValue = 0.0)
+    public final static String PROP_RELATIVE_WORD_BEAM_WIDTH = "relativeWordBeamWidth";
+
+    /**
+     * The property that controls whether or not relative beam pruning will be performed on the entry into a
+     * state.
+     */
+    @S4Boolean(defaultValue = false)
+    public final static String PROP_WANT_ENTRY_PRUNING = "wantEntryPruning";
+
+    /**
+     * The property that controls the number of frames processed for every time the decode growth step is skipped.
+     * Setting this property to zero disables grow skipping. Setting this number to a small integer will increase the
+     * speed of the decoder but will also decrease its accuracy. The higher the number, the less often the grow code is
+     * skipped.
+     */
+    @S4Integer(defaultValue = 0)
+    public final static String PROP_GROW_SKIP_INTERVAL = "growSkipInterval";
+
+
+    protected Linguist linguist; // Provides grammar/language info
+    private Pruner pruner; // used to prune the active list
+    private AcousticScorer scorer; // used to score the active list
+    protected int currentFrameNumber; // the current frame number
+    protected long currentCollectTime; // the current frame number
+    protected ActiveList activeList; // the list of active tokens
+    protected List<Token> resultList; // the current set of results
+    protected LogMath logMath;
+
+    private Logger logger;
+    private String name;
+
+    // ------------------------------------
+    // monitoring data
+    // ------------------------------------
+
+    private Timer scoreTimer; // TODO move these timers out
+    private Timer pruneTimer;
+    protected Timer growTimer;
+    private StatisticsVariable totalTokensScored;
+    private StatisticsVariable tokensPerSecond;
+    private StatisticsVariable curTokensScored;
+    private StatisticsVariable tokensCreated;
+    private StatisticsVariable viterbiPruned;
+    private StatisticsVariable beamPruned;
+
+    // ------------------------------------
+    // Working data
+    // ------------------------------------
+
+    protected boolean showTokenCount;
+    private boolean wantEntryPruning;
+    protected Map<SearchState, Token> bestTokenMap;
+    private float logRelativeWordBeamWidth;
+    private int totalHmms;
+    private double startTime;
+    private float threshold;
+    private float wordThreshold;
+    private int growSkipInterval;
+    protected ActiveListFactory activeListFactory;
+    protected boolean streamEnd;
+
+    public SimpleBreadthFirstSearchManager() {
+        
+    }
+
+    /**
+     * Creates a manager for simple search
+     * 
+     * @param linguist linguist to configure search space
+     * @param pruner pruner to prune extra paths
+     * @param scorer scorer to estimate token probability
+     * @param activeListFactory factory for list of tokens
+     * @param showTokenCount show count of the tokens during decoding
+     * @param relativeWordBeamWidth relative pruning beam for lookahead
+     * @param growSkipInterval interval to skip growth step
+     * @param wantEntryPruning entry pruning
+     */
+    public SimpleBreadthFirstSearchManager(Linguist linguist, Pruner pruner,
+                                           AcousticScorer scorer, ActiveListFactory activeListFactory,
+                                           boolean showTokenCount, double relativeWordBeamWidth,
+                                           int growSkipInterval, boolean wantEntryPruning) {
+        this.name = getClass().getName();
+        this.logger = Logger.getLogger(name);
+        this.logMath = LogMath.getLogMath();
+        this.linguist = linguist;
+        this.pruner = pruner;
+        this.scorer = scorer;
+        this.activeListFactory = activeListFactory;
+        this.showTokenCount = showTokenCount;
+        this.growSkipInterval = growSkipInterval;
+        this.wantEntryPruning = wantEntryPruning;
+        this.logRelativeWordBeamWidth = logMath.linearToLog(relativeWordBeamWidth);
+        this.keepAllTokens = true;
+    }
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        
+        logMath = LogMath.getLogMath();
+        logger = ps.getLogger();
+        name = ps.getInstanceName();
+
+        linguist = (Linguist) ps.getComponent(PROP_LINGUIST);
+        pruner = (Pruner) ps.getComponent(PROP_PRUNER);
+        scorer = (AcousticScorer) ps.getComponent(PROP_SCORER);
+        activeListFactory = (ActiveListFactory) ps.getComponent(PROP_ACTIVE_LIST_FACTORY);
+        showTokenCount = ps.getBoolean(PROP_SHOW_TOKEN_COUNT);
+
+        double relativeWordBeamWidth = ps.getDouble(PROP_RELATIVE_WORD_BEAM_WIDTH);
+        growSkipInterval = ps.getInt(PROP_GROW_SKIP_INTERVAL);
+        wantEntryPruning = ps.getBoolean(PROP_WANT_ENTRY_PRUNING);
+        logRelativeWordBeamWidth = logMath.linearToLog(relativeWordBeamWidth);
+        
+        this.keepAllTokens = true;      
+    }
+
+
+    /** Called at the start of recognition. Gets the search manager ready to recognize */
+    public void startRecognition() {
+        logger.finer("starting recognition");
+
+        linguist.startRecognition();
+        pruner.startRecognition();
+        scorer.startRecognition();
+        localStart();
+        if (startTime == 0.0) {
+            startTime = System.currentTimeMillis();
+        }
+    }
+
+
+    /**
+     * Performs the recognition for the given number of frames.
+     *
+     * @param nFrames the number of frames to recognize
+     * @return the current result or null if there is no Result (due to the lack of frames to recognize)
+     */
+    public Result recognize(int nFrames) {
+        boolean done = false;
+        Result result = null;
+        streamEnd = false;
+ 
+        for (int i = 0; i < nFrames && !done; i++) {
+            done = recognize();
+        }
+
+        // generate a new temporary result if the current token is based on a final search state
+        // remark: the first check for not null is necessary in cases that the search space does not contain scoreable tokens.
+        if (activeList.getBestToken() != null) {
+            // to make the current result as correct as possible we undo the last search graph expansion here
+            ActiveList fixedList = undoLastGrowStep();
+            	
+            // Now create the result using the fixed active-list.
+            if (!streamEnd)
+           		result =
+                    new Result(fixedList, resultList, currentFrameNumber, done, linguist.getSearchGraph().getWordTokenFirst(), false);
+        }
+
+        if (showTokenCount) {
+            showTokenCount();
+        }
+
+        return result;
+    }
+
+
+    /**
+     * Because the growBranches() is called although no data is left after the last speech frame, the ordering of the
+     * active-list might depend on the transition probabilities and (penalty-scores) only. Therefore we need to undo the last
+     * grow-step up to final states or the last emitting state in order to fix the list.
+     * @return newly created list
+     */
+    protected ActiveList undoLastGrowStep() {
+        ActiveList fixedList = activeList.newInstance();
+
+        for (Token token : activeList) {
+            Token curToken = token.getPredecessor();
+
+            // remove the final states that are not the real final ones because they're just hide prior final tokens:
+            while (curToken.getPredecessor() != null && (
+                    (curToken.isFinal() && curToken.getPredecessor() != null && !curToken.getPredecessor().isFinal())
+                            || (curToken.isEmitting() && curToken.getData() == null) // the so long not scored tokens
+                            || (!curToken.isFinal() && !curToken.isEmitting()))) {
+                curToken = curToken.getPredecessor();
+            }
+
+            fixedList.add(curToken);
+        }
+
+        return fixedList;
+    }
+
+
+    /** Terminates a recognition */
+    public void stopRecognition() {
+        localStop();
+        scorer.stopRecognition();
+        pruner.stopRecognition();
+        linguist.stopRecognition();
+
+        logger.finer("recognition stopped");
+    }
+
+
+    /**
+     * Performs recognition for one frame. Returns true if recognition has been completed.
+     *
+     * @return <code>true</code> if recognition is completed.
+     */
+    protected boolean recognize() {
+        boolean more = scoreTokens(); // score emitting tokens
+        if (more) {
+            pruneBranches(); // eliminate poor branches
+            currentFrameNumber++;
+            if (growSkipInterval == 0
+                    || (currentFrameNumber % growSkipInterval) != 0) {
+                growBranches(); // extend remaining branches
+            }
+        }
+        return !more;
+    }
+
+
+    /** Gets the initial grammar node from the linguist and creates a GrammarNodeToken */
+    protected void localStart() {
+        currentFrameNumber = 0;
+        curTokensScored.value = 0;
+        ActiveList newActiveList = activeListFactory.newInstance();
+        SearchState state = linguist.getSearchGraph().getInitialState();
+        newActiveList.add(new Token(state, -1));
+        activeList = newActiveList;
+
+        growBranches();
+    }
+
+
+    /** Local cleanup for this search manager */
+    protected void localStop() {
+    }
+
+
+    /**
+     * Goes through the active list of tokens and expands each token, finding the set of successor tokens until all the
+     * successor tokens are emitting tokens.
+     */
+    protected void growBranches() {
+        int mapSize = activeList.size() * 10;
+        if (mapSize == 0) {
+            mapSize = 1;
+        }
+        growTimer.start();
+        bestTokenMap = new HashMap<SearchState, Token>(mapSize);
+        ActiveList oldActiveList = activeList;
+        resultList = new LinkedList<Token>();
+        activeList = activeListFactory.newInstance();
+        threshold = oldActiveList.getBeamThreshold();
+        wordThreshold = oldActiveList.getBestScore() + logRelativeWordBeamWidth;
+
+        for (Token token : oldActiveList) {
+            collectSuccessorTokens(token);
+        }
+        growTimer.stop();
+        if (logger.isLoggable(Level.FINE)) {
+            int hmms = activeList.size();
+            totalHmms += hmms;
+            logger.fine("Frame: " + currentFrameNumber + " Hmms: "
+                    + hmms + "  total " + totalHmms);
+        }
+    }
+
+
+    /**
+     * Calculate the acoustic scores for the active list. The active list should contain only emitting tokens.
+     *
+     * @return <code>true</code> if there are more frames to score, otherwise, false
+     */
+    protected boolean scoreTokens() {
+        boolean hasMoreFrames = false;
+
+        scoreTimer.start();
+        Data data = scorer.calculateScores(activeList.getTokens());
+        scoreTimer.stop();
+        
+        Token bestToken = null;
+        if (data instanceof Token) {
+            bestToken = (Token)data;
+        } else if (data == null) {
+        	streamEnd = true;
+    	}
+        
+        if (bestToken != null) {
+            hasMoreFrames = true;
+            currentCollectTime = bestToken.getCollectTime();
+            activeList.setBestToken(bestToken);
+        }
+
+        // update statistics
+        curTokensScored.value += activeList.size();
+        totalTokensScored.value += activeList.size();
+        tokensPerSecond.value = totalTokensScored.value / getTotalTime();
+
+//        if (logger.isLoggable(Level.FINE)) {
+//            logger.fine(currentFrameNumber + " " + activeList.size()
+//                    + " " + curTokensScored.value + " "
+//                    + (int) tokensPerSecond.value);
+//        }
+
+        return hasMoreFrames;
+    }
+
+
+    /**
+     * Returns the total time since we start4ed
+     *
+     * @return the total time (in seconds)
+     */
+    private double getTotalTime() {
+        return (System.currentTimeMillis() - startTime) / 1000.0;
+    }
+
+
+    /** Removes unpromising branches from the active list */
+    protected void pruneBranches() {
+        int startSize = activeList.size();
+        pruneTimer.start();
+        activeList = pruner.prune(activeList);
+        beamPruned.value += startSize - activeList.size();
+        pruneTimer.stop();
+    }
+
+
+    /**
+     * Gets the best token for this state
+     *
+     * @param state the state of interest
+     * @return the best token
+     */
+    protected Token getBestToken(SearchState state) {
+        Token best = bestTokenMap.get(state);
+        if (logger.isLoggable(Level.FINER) && best != null) {
+            logger.finer("BT " + best + " for state " + state);
+        }
+        return best;
+    }
+
+
+    /**
+     * Sets the best token for a given state
+     *
+     * @param token the best token
+     * @param state the state
+     * @return the previous best token for the given state, or null if no previous best token
+     */
+    protected Token setBestToken(Token token, SearchState state) {
+        return bestTokenMap.put(state, token);
+    }
+
+
+    public ActiveList getActiveList() {
+        return activeList;
+    }
+
+
+    /**
+     * Collects the next set of emitting tokens from a token and accumulates them in the active or result lists
+     *
+     * @param token the token to collect successors from
+     */
+    protected void collectSuccessorTokens(Token token) {
+        SearchState state = token.getSearchState();
+        // If this is a final state, add it to the final list
+        if (token.isFinal()) {
+            resultList.add(token);
+        }
+        if (token.getScore() < threshold) {
+            return;
+        }
+        if (state instanceof WordSearchState
+                && token.getScore() < wordThreshold) {
+            return;
+        }
+        SearchStateArc[] arcs = state.getSuccessors();
+        // For each successor
+        // calculate the entry score for the token based upon the
+        // predecessor token score and the transition probabilities
+        // if the score is better than the best score encountered for
+        // the SearchState and frame then create a new token, add
+        // it to the lattice and the SearchState.
+        // If the token is an emitting token add it to the list,
+        // otherwise recursively collect the new tokens successors.
+        for (SearchStateArc arc : arcs) {
+            SearchState nextState = arc.getState();
+            // We're actually multiplying the variables, but since
+            // these come in log(), multiply gets converted to add
+            float logEntryScore = token.getScore() + arc.getProbability();
+            if (wantEntryPruning) { // false by default
+                if (logEntryScore < threshold) {
+                    continue;
+                }
+                if (nextState instanceof WordSearchState
+                        && logEntryScore < wordThreshold) {
+                    continue;
+                }
+            }
+            Token predecessor = getResultListPredecessor(token);
+            
+            // if not emitting, check to see if we've already visited
+            // this state during this frame. Expand the token only if we
+            // haven't visited it already. This prevents the search
+            // from getting stuck in a loop of states with no
+            // intervening emitting nodes. This can happen with nasty
+            // jsgf grammars such as ((foo*)*)*
+            if (!nextState.isEmitting()) {
+                Token newToken = new Token(predecessor, nextState, logEntryScore,
+                        arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), 
+                        currentCollectTime);
+                tokensCreated.value++;
+                if (!isVisited(newToken)) {
+                    collectSuccessorTokens(newToken);
+                }
+                continue;
+            }
+            
+            Token bestToken = getBestToken(nextState);
+            if (bestToken == null) {        
+                Token newToken = new Token(predecessor, nextState, logEntryScore,
+                        arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), 
+                        currentFrameNumber);
+                tokensCreated.value++;
+                setBestToken(newToken, nextState);
+                activeList.add(newToken);
+            } else {
+                if (bestToken.getScore() <= logEntryScore) {
+                    bestToken.update(predecessor, nextState, logEntryScore,
+                            arc.getInsertionProbability(),
+                            arc.getLanguageProbability(), 
+                            currentCollectTime);
+                    viterbiPruned.value++;
+                } else {
+                    viterbiPruned.value++;
+                }
+            }
+        }
+    }
+
+
+    /**
+     * Determines whether or not we've visited the state associated with this token since the previous frame.
+     *
+     * @param t the token to check
+     * @return true if we've visited the search state since the last frame
+     */
+    private boolean isVisited(Token t) {
+        SearchState curState = t.getSearchState();
+
+        t = t.getPredecessor();
+
+        while (t != null && !t.isEmitting()) {
+            if (curState.equals(t.getSearchState())) {
+                return true;
+            }
+            t = t.getPredecessor();
+        }
+        return false;
+    }
+
+
+    /** Counts all the tokens in the active list (and displays them). This is an expensive operation. */
+    protected void showTokenCount() {
+        if (logger.isLoggable(Level.INFO)) {
+            Set<Token> tokenSet = new HashSet<Token>();
+            for (Token token : activeList) {
+                while (token != null) {
+                    tokenSet.add(token);
+                    token = token.getPredecessor();
+                }
+            }
+            logger.info("Token Lattice size: " + tokenSet.size());
+            tokenSet = new HashSet<Token>();
+            for (Token token : resultList) {
+                while (token != null) {
+                    tokenSet.add(token);
+                    token = token.getPredecessor();
+                }
+            }
+            logger.info("Result Lattice size: " + tokenSet.size());
+        }
+    }
+
+
+    /**
+     * Returns the best token map.
+     *
+     * @return the best token map
+     */
+    protected Map<SearchState, Token> getBestTokenMap() {
+        return bestTokenMap;
+    }
+
+
+    /**
+     * Sets the best token Map.
+     *
+     * @param bestTokenMap the new best token Map
+     */
+    protected void setBestTokenMap(Map<SearchState, Token> bestTokenMap) {
+        this.bestTokenMap = bestTokenMap;
+    }
+
+
+    /**
+     * Returns the result list.
+     *
+     * @return the result list
+     */
+    public List<Token> getResultList() {
+        return resultList;
+    }
+
+
+    /**
+     * Returns the current frame number.
+     *
+     * @return the current frame number
+     */
+    public int getCurrentFrameNumber() {
+        return currentFrameNumber;
+    }
+
+
+    /**
+     * Returns the Timer for growing.
+     *
+     * @return the Timer for growing
+     */
+    public Timer getGrowTimer() {
+        return growTimer;
+    }
+
+
+    /**
+     * Returns the tokensCreated StatisticsVariable.
+     *
+     * @return the tokensCreated StatisticsVariable.
+     */
+    public StatisticsVariable getTokensCreated() {
+        return tokensCreated;
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.SearchManager#allocate()
+    */
+    public void allocate() {
+        totalTokensScored = StatisticsVariable
+                .getStatisticsVariable("totalTokensScored");
+        tokensPerSecond = StatisticsVariable
+                .getStatisticsVariable("tokensScoredPerSecond");
+        curTokensScored = StatisticsVariable
+                .getStatisticsVariable("curTokensScored");
+        tokensCreated = StatisticsVariable
+                .getStatisticsVariable("tokensCreated");
+        viterbiPruned = StatisticsVariable
+                .getStatisticsVariable("viterbiPruned");
+        beamPruned = StatisticsVariable.getStatisticsVariable("beamPruned");
+
+
+        try {
+            linguist.allocate();
+            pruner.allocate();
+            scorer.allocate();
+        } catch (IOException e) {
+            throw new RuntimeException("Allocation of search manager resources failed", e);
+        }
+
+        scoreTimer = TimerPool.getTimer(this, "Score");
+        pruneTimer = TimerPool.getTimer(this, "Prune");
+        growTimer = TimerPool.getTimer(this, "Grow");
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.SearchManager#deallocate()
+    */
+    public void deallocate() {
+	try {
+            scorer.deallocate();
+            pruner.deallocate();
+            linguist.deallocate();
+        } catch (IOException e) {
+            throw new RuntimeException("Deallocation of search manager resources failed", e);
+        }
+    }
+
+
+    @Override
+    public String toString() {
+        return name;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SkewPruningSearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SkewPruningSearchManager.java
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SortingActiveListFactory.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/SortingActiveListFactory.java
@ -0,0 +1,207 @@
+/*
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author plamere
+ */
+public class SortingActiveListFactory extends ActiveListFactory {
+    /**
+     * @param absoluteBeamWidth absolute pruning beam
+     * @param relativeBeamWidth relative pruning beam
+     */
+    public SortingActiveListFactory(int absoluteBeamWidth,
+            double relativeBeamWidth)
+    {
+        super(absoluteBeamWidth, relativeBeamWidth);
+    }
+
+    public SortingActiveListFactory() {
+
+    }
+    
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
+    */
+    @Override
+    public ActiveList newInstance() {
+        return new SortingActiveList(absoluteBeamWidth, logRelativeBeamWidth);
+    }
+
+
+    /**
+     * An active list that tries to be simple and correct. This type of active list will be slow, but should exhibit
+     * correct behavior. Faster versions of the ActiveList exist (HeapActiveList, TreeActiveList).
+     * <p>
+     * This class is not thread safe and should only be used by a single thread.
+     * <p>
+     * Note that all scores are maintained in the LogMath log base.
+     */
+
+    class SortingActiveList implements ActiveList {
+
+        private final static int DEFAULT_SIZE = 1000;
+        private final int absoluteBeamWidth;
+        private final float logRelativeBeamWidth;
+        private Token bestToken;
+        // when the list is changed these things should be
+        // changed/updated as well
+        private List<Token> tokenList;
+
+
+        /** 
+         * Creates an empty active list
+         * 
+         * @param absoluteBeamWidth beam for absolute pruning
+         * @param logRelativeBeamWidth beam for relative pruning
+         */
+        public SortingActiveList(int absoluteBeamWidth, float logRelativeBeamWidth) {
+            this.absoluteBeamWidth = absoluteBeamWidth;
+            this.logRelativeBeamWidth = logRelativeBeamWidth;
+
+            int initListSize = absoluteBeamWidth > 0 ? absoluteBeamWidth : DEFAULT_SIZE;
+            this.tokenList = new ArrayList<Token>(initListSize);
+        }
+
+
+        /**
+         * Adds the given token to the list
+         *
+         * @param token the token to add
+         */
+        public void add(Token token) {
+            tokenList.add(token);
+            if (bestToken == null || token.getScore() > bestToken.getScore()) {
+                bestToken = token;
+            }
+        }
+
+        /**
+         * Purges excess members. Reduce the size of the token list to the absoluteBeamWidth
+         *
+         * @return a (possible new) active list
+         */
+        public ActiveList purge() {
+            // if the absolute beam is zero, this means there
+            // should be no constraint on the abs beam size at all
+            // so we will only be relative beam pruning, which means
+            // that we don't have to sort the list
+            if (absoluteBeamWidth > 0 && tokenList.size() > absoluteBeamWidth) {
+                Collections.sort(tokenList, Scoreable.COMPARATOR);
+                tokenList = tokenList.subList(0, absoluteBeamWidth);
+            }
+            return this;
+        }
+
+
+        /**
+         * gets the beam threshold best upon the best scoring token
+         *
+         * @return the beam threshold
+         */
+        public float getBeamThreshold() {
+            return getBestScore() + logRelativeBeamWidth;
+        }
+
+
+        /**
+         * gets the best score in the list
+         *
+         * @return the best score
+         */
+        public float getBestScore() {
+            float bestScore = -Float.MAX_VALUE;
+            if (bestToken != null) {
+                bestScore = bestToken.getScore();
+            }
+            return bestScore;
+        }
+
+
+        /**
+         * Sets the best scoring token for this active list
+         *
+         * @param token the best scoring token
+         */
+        public void setBestToken(Token token) {
+            bestToken = token;
+        }
+
+
+        /**
+         * Gets the best scoring token for this active list
+         *
+         * @return the best scoring token
+         */
+        public Token getBestToken() {
+            return bestToken;
+        }
+
+
+        /**
+         * Retrieves the iterator for this tree.
+         *
+         * @return the iterator for this token list
+         */
+        public Iterator<Token> iterator() {
+            return tokenList.iterator();
+        }
+
+
+        /**
+         * Gets the list of all tokens
+         *
+         * @return the list of tokens
+         */
+        public List<Token> getTokens() {
+            return tokenList;
+        }
+
+        /**
+         * Returns the number of tokens on this active list
+         *
+         * @return the size of the active list
+         */
+        public final int size() {
+            return tokenList.size();
+        }
+
+
+        /* (non-Javadoc)
+        * @see edu.cmu.sphinx.decoder.search.ActiveList#newInstance()
+        */
+        public ActiveList newInstance() {
+            return SortingActiveListFactory.this.newInstance();
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/Token.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/Token.java
@ -0,0 +1,477 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+import edu.cmu.sphinx.decoder.scorer.ScoreProvider;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.frontend.FloatData;
+import edu.cmu.sphinx.linguist.HMMSearchState;
+import edu.cmu.sphinx.linguist.SearchState;
+import edu.cmu.sphinx.linguist.UnitSearchState;
+import edu.cmu.sphinx.linguist.WordSearchState;
+import edu.cmu.sphinx.linguist.acoustic.Unit;
+import edu.cmu.sphinx.linguist.dictionary.Pronunciation;
+import edu.cmu.sphinx.linguist.dictionary.Word;
+
+import java.text.DecimalFormat;
+import java.util.*;
+
+/**
+ * Represents a single state in the recognition trellis. Subclasses of a token are used to represent the various
+ * emitting state.
+ * <p>
+ * All scores are maintained in LogMath log base
+ */
+public class Token implements Scoreable {
+
+    private static int curCount;
+    private static int lastCount;
+    private static final DecimalFormat scoreFmt = new DecimalFormat("0.0000000E00");
+    private static final DecimalFormat numFmt = new DecimalFormat("0000");
+
+    private Token predecessor;
+
+    private float logLanguageScore;
+    private float logTotalScore;
+    private float logInsertionScore;
+    private float logAcousticScore;
+    
+    private SearchState searchState;
+
+    private long collectTime;
+    private Data data;
+
+    /**
+     * Internal constructor for a token. Used by classes Token, CombineToken, ParallelToken
+     *
+     * @param predecessor             the predecessor for this token
+     * @param state                   the SentenceHMMState associated with this token
+     * @param logTotalScore           the total entry score for this token (in LogMath log base)
+     * @param logInsertionScore       the insertion score associated with this token (in LogMath log base)
+     * @param logLanguageScore        the language score associated with this token (in LogMath log base)
+     * @param collectTime             the frame collection time
+     */
+    public Token(Token predecessor,
+                 SearchState state,
+                 float logTotalScore,
+                 float logInsertionScore,
+                 float logLanguageScore,                 
+                 long collectTime) {
+        this.predecessor = predecessor;
+        this.searchState = state;
+        this.logTotalScore = logTotalScore;
+        this.logInsertionScore = logInsertionScore;
+        this.logLanguageScore = logLanguageScore;
+        this.collectTime = collectTime;
+        curCount++;
+    }
+
+
+    /**
+     * Creates the initial token with the given word history depth
+     *
+     * @param state       the SearchState associated with this token
+     * @param collectTime collection time of this token
+     */
+    public Token(SearchState state, long collectTime) {
+        this(null, state, 0.0f, 0.0f, 0.0f, collectTime);
+    }
+
+
+    /**
+     * Creates a Token with the given acoustic and language scores and predecessor.
+     *
+     * @param predecessor previous token
+     * @param logTotalScore total score
+     * @param logAcousticScore the log acoustic score
+     * @param logInsertionScore the log insertion score
+     * @param logLanguageScore the log language score
+     */
+    public Token(Token predecessor,
+                 float logTotalScore, 
+                 float logAcousticScore,
+                 float logInsertionScore,
+                 float logLanguageScore) {
+        this(predecessor, null, logTotalScore, logInsertionScore, logLanguageScore, 0);
+        this.logAcousticScore = logAcousticScore;
+    }
+
+
+    /**
+     * Returns the predecessor for this token, or null if this token has no predecessors
+     *
+     * @return the predecessor
+     */
+    public Token getPredecessor() {
+        return predecessor;
+    }
+
+
+    /**
+     * Collect time is different from frame number because some frames might be skipped in silence detector
+     * 
+     * @return collection time in milliseconds
+     */
+    public long getCollectTime() {
+        return collectTime;
+    }
+
+
+    /** Sets the feature for this Token.
+     * @param data features
+     */
+    public void setData(Data data) {
+        this.data = data;
+        if (data instanceof FloatData) {
+            collectTime = ((FloatData)data).getCollectTime();
+        }
+    }
+
+
+    /**
+     * Returns the feature for this Token.
+     *
+     * @return the feature for this Token
+     */
+    public Data getData() {
+        return data;
+    }
+
+
+    /**
+     * Returns the score for the token. The score is a combination of language and acoustic scores
+     *
+     * @return the score of this frame (in logMath log base)
+     */
+    public float getScore() {
+        return logTotalScore;
+    }
+
+
+    /**
+     * Calculates a score against the given feature. The score can be retrieved 
+     * with get score. The token will keep a reference to the scored feature-vector.
+     *
+     * @param feature the feature to be scored
+     * @return the score for the feature
+     */
+    public float calculateScore(Data feature) {
+        
+        logAcousticScore = ((ScoreProvider) searchState).getScore(feature);
+
+        logTotalScore += logAcousticScore;
+
+        setData(feature);
+
+        return logTotalScore;
+    }
+    
+    public float[] calculateComponentScore(Data feature){
+    	return ((ScoreProvider) searchState).getComponentScore(feature);
+    }
+
+
+    /**
+     * Normalizes a previously calculated score
+     *
+     * @param maxLogScore the score to normalize this score with
+     * @return the normalized score
+     */
+    public float normalizeScore(float maxLogScore) {
+        logTotalScore -= maxLogScore;
+        logAcousticScore -= maxLogScore;
+        return logTotalScore;
+    }
+
+    /**
+     * Sets the score for this token
+     *
+     * @param logScore the new score for the token (in logMath log base)
+     */
+    public void setScore(float logScore) {
+        this.logTotalScore = logScore;
+    }
+
+
+    /**
+     * Returns the language score associated with this token
+     *
+     * @return the language score (in logMath log base)
+     */
+    public float getLanguageScore() {
+        return logLanguageScore;
+    }
+
+    /**
+     * Returns the insertion score associated with this token.
+     * Insertion score is the score of the transition between
+     * states. It might be transition score from the acoustic model,
+     * phone insertion score or word insertion probability from
+     * the linguist.
+     *
+     * @return the language score (in logMath log base)
+     */
+    public float getInsertionScore() {
+        return logInsertionScore;
+    }
+
+
+    /** 
+     * Returns the acoustic score for this token (in logMath log base).
+     * Acoustic score is a sum of frame GMM.
+     *
+     * @return score
+     */
+    public float getAcousticScore() {
+        return logAcousticScore;
+    }
+
+
+    /**
+     * Returns the SearchState associated with this token
+     *
+     * @return the searchState
+     */
+    public SearchState getSearchState() {
+        return searchState;
+    }
+
+
+    /**
+     * Determines if this token is associated with an emitting state. An emitting state is a state that can be scored
+     * acoustically.
+     *
+     * @return <code>true</code> if this token is associated with an emitting state
+     */
+    public boolean isEmitting() {
+        return searchState.isEmitting();
+    }
+
+
+    /**
+     * Determines if this token is associated with a final SentenceHMM state.
+     *
+     * @return <code>true</code> if this token is associated with a final state
+     */
+    public boolean isFinal() {
+        return searchState.isFinal();
+    }
+
+
+    /**
+     * Determines if this token marks the end of a word
+     *
+     * @return <code>true</code> if this token marks the end of a word
+     */
+    public boolean isWord() {
+        return searchState instanceof WordSearchState;
+    }
+
+
+    /**
+     * Retrieves the string representation of this object
+     *
+     * @return the string representation of this object
+     */
+    @Override
+    public String toString() {
+        return
+            numFmt.format(getCollectTime()) + ' ' +
+            scoreFmt.format(getScore()) + ' ' +
+            scoreFmt.format(getAcousticScore()) + ' ' +
+            scoreFmt.format(getLanguageScore()) + ' ' +
+            getSearchState();
+    }
+
+
+    /** dumps a branch of tokens */
+    public void dumpTokenPath() {
+        dumpTokenPath(true);
+    }
+
+
+    /**
+     * dumps a branch of tokens
+     *
+     * @param includeHMMStates if true include all sentence hmm states
+     */
+    public void dumpTokenPath(boolean includeHMMStates) {
+        Token token = this;
+        List<Token> list = new ArrayList<Token>();
+
+        while (token != null) {
+            list.add(token);
+            token = token.getPredecessor();
+        }
+        for (int i = list.size() - 1; i >= 0; i--) {
+            token = list.get(i);
+            if (includeHMMStates ||
+                    (!(token.getSearchState() instanceof HMMSearchState))) {
+                System.out.println("  " + token);
+            }
+        }
+        System.out.println();
+    }
+
+
+    /**
+     * Returns the string of words leading up to this token.
+     *
+     * @param wantFiller         if true, filler words are added
+     * @param wantPronunciations if true append [ phoneme phoneme ... ] after each word
+     * @return the word path
+     */
+    public String getWordPath(boolean wantFiller, boolean wantPronunciations) {
+        StringBuilder sb = new StringBuilder();
+        Token token = this;
+
+        while (token != null) {
+            if (token.isWord()) {
+                WordSearchState wordState =
+                        (WordSearchState) token.getSearchState();
+                Pronunciation pron = wordState.getPronunciation();
+                Word word = wordState.getPronunciation().getWord();
+
+//                System.out.println(token.getFrameNumber() + " " + word + " " + token.logLanguageScore + " " + token.logAcousticScore);
+
+                if (wantFiller || !word.isFiller()) {
+                    if (wantPronunciations) {
+                        sb.insert(0, ']');
+                        Unit[] u = pron.getUnits();
+                        for (int i = u.length - 1; i >= 0; i--) {
+                            if (i < u.length - 1) sb.insert(0, ',');
+                            sb.insert(0, u[i].getName());
+                        }
+                        sb.insert(0, '[');
+                    }
+                    sb.insert(0, word.getSpelling());
+                    sb.insert(0, ' ');
+                }
+            }
+            token = token.getPredecessor();
+        }
+        return sb.toString().trim();
+    }
+
+
+    /**
+     * Returns the string of words for this token, with no embedded filler words
+     *
+     * @return the string of words
+     */
+    public String getWordPathNoFiller() {
+        return getWordPath(false, false);
+    }
+
+
+    /**
+     * Returns the string of words for this token, with embedded silences
+     *
+     * @return the string of words
+     */
+    public String getWordPath() {
+        return getWordPath(true, false);
+    }
+
+
+    /**
+     * Returns the string of words and units for this token, with embedded silences.
+     *
+     * @return the string of words and units
+     */
+    public String getWordUnitPath() {
+        StringBuilder sb = new StringBuilder();
+        Token token = this;
+
+        while (token != null) {
+            SearchState searchState = token.getSearchState();
+            if (searchState instanceof WordSearchState) {
+                WordSearchState wordState = (WordSearchState) searchState;
+                Word word = wordState.getPronunciation().getWord();
+                sb.insert(0, ' ' + word.getSpelling());
+            } else if (searchState instanceof UnitSearchState) {
+                UnitSearchState unitState = (UnitSearchState) searchState;
+                Unit unit = unitState.getUnit();
+                sb.insert(0, ' ' + unit.getName());
+            }
+            token = token.getPredecessor();
+        }
+        return sb.toString().trim();
+    }
+
+
+    /**
+     * Returns the word of this Token, the search state is a WordSearchState. If the search state is not a
+     * WordSearchState, return null.
+     *
+     * @return the word of this Token, or null if this is not a word token
+     */
+    public Word getWord() {
+        if (isWord()) {
+            WordSearchState wordState = (WordSearchState) searchState;
+            return wordState.getPronunciation().getWord();
+        } else {
+            return null;
+        }
+    }
+
+
+    /** Shows the token count */
+    public static void showCount() {
+        System.out.println("Cur count: " + curCount + " new " +
+                (curCount - lastCount));
+        lastCount = curCount;
+    }
+
+
+    /**
+     * Determines if this branch is valid
+     *
+     * @return true if the token and its predecessors are valid
+     */
+    public boolean validate() {
+        return true;
+    }
+
+
+    /**
+     * Return the DecimalFormat object for formatting the print out of scores.
+     *
+     * @return the DecimalFormat object for formatting score print outs
+     */
+    protected static DecimalFormat getScoreFormat() {
+        return scoreFmt;
+    }
+
+
+    /**
+     * Return the DecimalFormat object for formatting the print out of numbers
+     *
+     * @return the DecimalFormat object for formatting number print outs
+     */
+    protected static DecimalFormat getNumberFormat() {
+        return numFmt;
+    }
+
+    public void update(Token predecessor, SearchState nextState,
+            float logEntryScore, float insertionProbability,
+            float languageProbability, long collectTime) {
+        this.predecessor = predecessor;
+        this.searchState = nextState;
+        this.logTotalScore = logEntryScore;
+        this.logInsertionScore = insertionProbability;
+        this.logLanguageScore = languageProbability;
+        this.collectTime = collectTime;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/TokenHeapSearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/TokenHeapSearchManager.java
@ -0,0 +1,172 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.search;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+import edu.cmu.sphinx.linguist.SearchState;
+/**
+ * The token heap search manager that maintains the heap of best tokens for each
+ * search state instead of single one best token
+ * 
+ */
+public class TokenHeapSearchManager extends WordPruningBreadthFirstSearchManager {
+
+    protected final int maxTokenHeapSize = 3;
+
+    Map<Object, TokenHeap> bestTokenMap;
+
+    @Override
+    protected void createBestTokenMap() {
+        int mapSize = activeList.size() << 2;
+        if (mapSize == 0) {
+            mapSize = 1;
+        }
+        bestTokenMap = new HashMap<Object, TokenHeap>(mapSize, 0.3F);
+    }
+
+    @Override
+    protected void setBestToken(Token token, SearchState state) {
+        TokenHeap th = bestTokenMap.get(state);
+        if (th == null) {
+            th = new TokenHeap(maxTokenHeapSize);
+            bestTokenMap.put(state, th);
+        }
+        th.add(token);
+    }
+
+    @Override
+    protected Token getBestToken(SearchState state) {
+        // new way... if the heap for this state isn't full return
+        // null, otherwise return the worst scoring token
+        TokenHeap th = bestTokenMap.get(state);
+        Token t;
+
+        if (th == null) {
+            return null;
+        } else if ((t = th.get(state)) != null) {
+            return t;
+        } else if (!th.isFull()) {
+            return null;
+        } else {
+            return th.getSmallest();
+        }
+    }
+
+    /**
+     * A quick and dirty token heap that allows us to perform token stack
+     * experiments. It is not very efficient. We will likely replace this with
+     * something better once we figure out how we want to prune things.
+     */
+
+    class TokenHeap {
+
+        final Token[] tokens;
+        int curSize;
+
+        /**
+         * Creates a token heap with the maximum size
+         * 
+         * @param maxSize
+         *            the maximum size of the heap
+         */
+        TokenHeap(int maxSize) {
+            tokens = new Token[maxSize];
+        }
+
+        /**
+         * Adds a token to the heap
+         * 
+         * @param token
+         *            the token to add
+         */
+        void add(Token token) {
+            // first, if an identical state exists, replace
+            // it.
+
+            if (!tryReplace(token)) {
+                if (curSize < tokens.length) {
+                    tokens[curSize++] = token;
+                } else if (token.getScore() > tokens[curSize - 1].getScore()) {
+                    tokens[curSize - 1] = token;
+                }
+            }
+            fixupInsert();
+        }
+
+        /**
+         * Returns the smallest scoring token on the heap
+         * 
+         * @return the smallest scoring token
+         */
+        Token getSmallest() {
+            if (curSize == 0) {
+                return null;
+            } else {
+                return tokens[curSize - 1];
+            }
+        }
+
+        /**
+         * Determines if the heap is full
+         * 
+         * @return <code>true</code> if the heap is full
+         */
+        boolean isFull() {
+            return curSize == tokens.length;
+        }
+
+        /**
+         * Checks to see if there is already a token t on the heap that has the
+         * same search state. If so, this token replaces that one
+         * 
+         * @param t
+         *            the token to try to add to the heap
+         * @return <code>true</code> if the token was added
+         */
+        private boolean tryReplace(Token t) {
+            for (int i = 0; i < curSize; i++) {
+                if (t.getSearchState().equals(tokens[i].getSearchState())) {
+                    assert t.getScore() > tokens[i].getScore();
+                    tokens[i] = t;
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        /** Orders the heap after an insert */
+        private void fixupInsert() {
+            Arrays.sort(tokens, 0, curSize - 1, Scoreable.COMPARATOR);
+        }
+
+        /**
+         * returns a token on the heap that matches the given search state
+         * 
+         * @param s
+         *            the search state
+         * @return the token that matches, or null
+         */
+        Token get(SearchState s) {
+            for (int i = 0; i < curSize; i++) {
+                if (tokens[i].getSearchState().equals(s)) {
+                    return tokens[i];
+                }
+            }
+            return null;
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/TokenSearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/TokenSearchManager.java
@ -0,0 +1,86 @@
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Boolean;
+
+abstract public class TokenSearchManager implements SearchManager {
+
+    /** The property that specifies whether to build a word lattice. */
+    @S4Boolean(defaultValue = true)
+    public final static String PROP_BUILD_WORD_LATTICE = "buildWordLattice";
+
+    /**
+     * The property that controls whether or not we keep all tokens. If this is
+     * set to false, only word tokens are retained, otherwise all tokens are
+     * retained.
+     */
+    @S4Boolean(defaultValue = false)
+    public final static String PROP_KEEP_ALL_TOKENS = "keepAllTokens";
+
+    protected boolean buildWordLattice;
+    protected boolean keepAllTokens;
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
+     * .props.PropertySheet)
+     */
+    public void newProperties(PropertySheet ps) throws PropertyException {       
+        buildWordLattice = ps.getBoolean(PROP_BUILD_WORD_LATTICE);
+        keepAllTokens = ps.getBoolean(PROP_KEEP_ALL_TOKENS);
+    }
+
+    /**
+     * Find the token to use as a predecessor in resultList given a candidate
+     * predecessor. There are three cases here:
+     * 
+     * <ul>
+     * <li>We want to store everything in resultList. In that case
+     * {@link #keepAllTokens} is set to true and we just store everything that
+     * was built before.
+     * <li>We are only interested in sequence of words. In this case we just
+     * keep word tokens and ignore everything else. In this case timing and
+     * scoring information is lost since we keep scores in emitting tokens.
+     * <li>We want to keep words but we want to keep scores to build a lattice
+     * from the result list later and {@link #buildWordLattice} is set to true.
+     * In this case we want to insert intermediate token to store the score and
+     * this token will be used during lattice path collapse to get score on
+     * edge. See {@link edu.cmu.sphinx.result.Lattice} for details of resultList
+     * compression.
+     * </ul>
+     * 
+     * @param token
+     *            the token of interest
+     * @return the immediate successor word token
+     */
+    protected Token getResultListPredecessor(Token token) {
+
+        if (keepAllTokens) {
+            return token;
+        }
+
+        if(!buildWordLattice) {
+            if (token.isWord())
+                return token;
+            else
+                return token.getPredecessor();
+        }
+
+        float logAcousticScore = 0.0f;
+        float logLanguageScore = 0.0f;
+        float logInsertionScore = 0.0f;
+
+        while (token != null && !token.isWord()) {
+            logAcousticScore += token.getAcousticScore();
+            logLanguageScore += token.getLanguageScore();
+            logInsertionScore += token.getInsertionScore();
+            token = token.getPredecessor();
+        }
+
+        return new Token(token, token.getScore(), logInsertionScore, logAcousticScore, logLanguageScore);
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/WordActiveListFactory.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/WordActiveListFactory.java
@ -0,0 +1,259 @@
+/*
+ * 
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electronic Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.decoder.search;
+
+import edu.cmu.sphinx.decoder.scorer.Scoreable;
+import edu.cmu.sphinx.linguist.WordSearchState;
+import edu.cmu.sphinx.linguist.dictionary.Word;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+import java.util.*;
+
+/**
+ * A factory for WordActiveList. The word active list is active list designed to hold word tokens only. In addition to
+ * the usual active list properties such as absolute and relative beams, the word active list allows restricting the
+ * number of copies of any particular word in the word beam.  Also the word active list can restrict the number of
+ * fillers in the beam.
+ */
+public class WordActiveListFactory extends ActiveListFactory {
+
+    /** property that sets the max paths for a single word. (zero disables this feature) */
+    @S4Integer(defaultValue = 0)
+    public final static String PROP_MAX_PATHS_PER_WORD = "maxPathsPerWord";
+
+    /** property that sets the max filler words allowed in the beam. (zero disables this feature) */
+    @S4Integer(defaultValue = 1)
+    public final static String PROP_MAX_FILLER_WORDS = "maxFillerWords";
+
+    private int maxPathsPerWord;
+    private int maxFiller;
+
+    /**
+     * Create factory for word active list
+     * @param absoluteBeamWidth beam for absolute pruning
+     * @param relativeBeamWidth beam for relative pruning
+     * @param maxPathsPerWord maximum number of path to keep per word
+     * @param maxFiller maximum number of fillers
+     */
+    public WordActiveListFactory(int absoluteBeamWidth,
+            double relativeBeamWidth, int maxPathsPerWord, int maxFiller )
+    {
+        super(absoluteBeamWidth, relativeBeamWidth);
+        this.maxPathsPerWord = maxPathsPerWord;
+        this.maxFiller = maxFiller;
+    }
+
+    public WordActiveListFactory() {
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+
+        maxPathsPerWord = ps.getInt(PROP_MAX_PATHS_PER_WORD);
+        maxFiller = ps.getInt(PROP_MAX_FILLER_WORDS);
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.decoder.search.ActiveListFactory#newInstance()
+    */
+    @Override
+    public ActiveList newInstance() {
+        return new WordActiveList();
+    }
+
+
+    /**
+     * An active list that manages words. Guarantees only one version of a word.
+     * <p>
+     * <p>
+     * Note that all scores are maintained in the LogMath log domain
+     */
+    class WordActiveList implements ActiveList {
+
+        private Token bestToken;
+        private List<Token> tokenList = new LinkedList<Token>();
+
+
+        /**
+         * Adds the given token to the list
+         *
+         * @param token the token to add
+         */
+        public void add(Token token) {
+            tokenList.add(token);
+            if (bestToken == null || token.getScore() > bestToken.getScore()) {
+                bestToken = token;
+            }
+        }
+
+
+        /**
+         * Replaces an old token with a new token
+         *
+         * @param oldToken the token to replace (or null in which case, replace works like add).
+         * @param newToken the new token to be placed in the list.
+         */
+        public void replace(Token oldToken, Token newToken) {
+            add(newToken);
+            if (oldToken != null) {
+                tokenList.remove(oldToken);
+            }
+        }
+
+
+        /**
+         * Purges excess members. Remove all nodes that fall below the relativeBeamWidth
+         *
+         * @return a (possible new) active list
+         */
+
+        public ActiveList purge() {
+            int fillerCount = 0;
+            Map<Word, Integer> countMap = new HashMap<Word, Integer>();
+            Collections.sort(tokenList, Scoreable.COMPARATOR);
+            // remove word duplicates
+            for (Iterator<Token> i = tokenList.iterator(); i.hasNext();) {
+                Token token = i.next();
+                WordSearchState wordState = (WordSearchState)token.getSearchState();
+
+                Word word = wordState.getPronunciation().getWord();
+
+                // only allow  maxFiller words
+                if (maxFiller > 0) {
+                    if (word.isFiller()) {
+                        if (fillerCount < maxFiller) {
+                            fillerCount++;
+                        } else {
+                            i.remove();
+                            continue;
+                        }
+                    }
+                }
+
+                if (maxPathsPerWord > 0) {
+                    Integer count = countMap.get(word);
+                    int c = count == null ? 0 : count;
+
+                    // Since the tokens are sorted by score we only
+                    // keep the n tokens for a particular word
+
+                    if (c < maxPathsPerWord - 1) {
+                        countMap.put(word, c + 1);
+                    } else {
+                        i.remove();
+                    }
+                }
+            }
+
+            if (tokenList.size() > absoluteBeamWidth) {
+                tokenList = tokenList.subList(0, absoluteBeamWidth);
+            }
+
+            return this;
+        }
+
+
+        /**
+         * Retrieves the iterator for this tree.
+         *
+         * @return the iterator for this token list
+         */
+        public Iterator<Token> iterator() {
+            return tokenList.iterator();
+        }
+
+
+        /**
+         * Gets the set of all tokens
+         *
+         * @return the set of tokens
+         */
+        public List<Token> getTokens() {
+            return tokenList;
+        }
+
+
+        /**
+         * Returns the number of tokens on this active list
+         *
+         * @return the size of the active list
+         */
+        public final int size() {
+            return tokenList.size();
+        }
+
+
+        /**
+         * gets the beam threshold best upon the best scoring token
+         *
+         * @return the beam threshold
+         */
+        public float getBeamThreshold() {
+            return getBestScore() + logRelativeBeamWidth;
+        }
+
+
+        /**
+         * gets the best score in the list
+         *
+         * @return the best score
+         */
+        public float getBestScore() {
+            float bestScore = -Float.MAX_VALUE;
+            if (bestToken != null) {
+                bestScore = bestToken.getScore();
+            }
+            return bestScore;
+        }
+
+
+        /**
+         * Sets the best scoring token for this active list
+         *
+         * @param token the best scoring token
+         */
+        public void setBestToken(Token token) {
+            bestToken = token;
+        }
+
+
+        /**
+         * Gets the best scoring token for this active list
+         *
+         * @return the best scoring token
+         */
+        public Token getBestToken() {
+            return bestToken;
+        }
+
+
+        /* (non-Javadoc)
+        * @see edu.cmu.sphinx.decoder.search.ActiveList#createNew()
+        */
+        public ActiveList newInstance() {
+            return WordActiveListFactory.this.newInstance();
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/WordPruningBreadthFirstLookaheadSearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/WordPruningBreadthFirstLookaheadSearchManager.java
@ -0,0 +1,497 @@
+/*
+ * Copyright 2014 Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.search;
+
+// a test search manager.
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+import edu.cmu.sphinx.decoder.pruner.Pruner;
+import edu.cmu.sphinx.decoder.scorer.AcousticScorer;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.linguist.Linguist;
+import edu.cmu.sphinx.linguist.SearchState;
+import edu.cmu.sphinx.linguist.SearchStateArc;
+import edu.cmu.sphinx.linguist.WordSearchState;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader;
+import edu.cmu.sphinx.linguist.allphone.PhoneHmmSearchState;
+import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeHMMState;
+import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeNonEmittingHMMState;
+import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeWordState;
+import edu.cmu.sphinx.linguist.lextree.LexTreeLinguist.LexTreeEndUnitState;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Component;
+import edu.cmu.sphinx.util.props.S4Double;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+/**
+ * Provides the breadth first search with fast match heuristic included to
+ * reduce amount of tokens created.
+ * <p>
+ * All scores and probabilities are maintained in the log math log domain.
+ */
+
+public class WordPruningBreadthFirstLookaheadSearchManager extends WordPruningBreadthFirstSearchManager {
+
+    /** The property that to get direct access to gau for score caching control. */
+    @S4Component(type = Loader.class)
+    public final static String PROP_LOADER = "loader";
+
+    /**
+     * The property that defines the name of the linguist to be used for fast
+     * match.
+     */
+    @S4Component(type = Linguist.class)
+    public final static String PROP_FASTMATCH_LINGUIST = "fastmatchLinguist";
+
+    @S4Component(type = ActiveListFactory.class)
+    /** The property that defines the type active list factory for fast match */
+    public final static String PROP_FM_ACTIVE_LIST_FACTORY = "fastmatchActiveListFactory";
+
+    @S4Double(defaultValue = 1.0)
+    public final static String PROP_LOOKAHEAD_PENALTY_WEIGHT = "lookaheadPenaltyWeight";
+
+    /**
+     * The property that controls size of lookahead window. Acceptable values
+     * are in range [1..10].
+     */
+    @S4Integer(defaultValue = 5)
+    public final static String PROP_LOOKAHEAD_WINDOW = "lookaheadWindow";
+
+    // -----------------------------------
+    // Configured Subcomponents
+    // -----------------------------------
+    private Linguist fastmatchLinguist; // Provides phones info for fastmatch
+    private Loader loader;
+    private ActiveListFactory fastmatchActiveListFactory;
+
+    // -----------------------------------
+    // Lookahead data
+    // -----------------------------------
+    private int lookaheadWindow;
+    private float lookaheadWeight;
+    private HashMap<Integer, Float> penalties;
+    private LinkedList<FrameCiScores> ciScores;
+
+    // -----------------------------------
+    // Working data
+    // -----------------------------------
+    private int currentFastMatchFrameNumber; // the current frame number for
+                                             // lookahead matching
+    protected ActiveList fastmatchActiveList; // the list of active tokens for
+                                              // fast match
+    protected Map<SearchState, Token> fastMatchBestTokenMap;
+    private boolean fastmatchStreamEnd;
+
+    /**
+     * Creates a pruning manager with lookahead
+     * @param linguist a linguist for search space
+     * @param fastmatchLinguist a linguist for fast search space
+     * @param pruner pruner to drop tokens
+     * @param loader model loader
+     * @param scorer scorer to estimate token probability
+     * @param activeListManager active list manager to store tokens
+     * @param fastmatchActiveListFactory fast match active list factor to store phoneloop tokens
+     * @param showTokenCount show count during decoding
+     * @param relativeWordBeamWidth relative beam for lookahead pruning
+     * @param growSkipInterval skip interval for grown
+     * @param checkStateOrder check order of states during growth
+     * @param buildWordLattice build a lattice during decoding
+     * @param maxLatticeEdges max edges to keep in lattice
+     * @param acousticLookaheadFrames frames to do lookahead
+     * @param keepAllTokens keep tokens including emitting tokens
+     * @param lookaheadWindow window for lookahead
+     * @param lookaheadWeight weight for lookahead pruning
+     */
+    public WordPruningBreadthFirstLookaheadSearchManager(Linguist linguist, Linguist fastmatchLinguist, Loader loader,
+            Pruner pruner, AcousticScorer scorer, ActiveListManager activeListManager,
+            ActiveListFactory fastmatchActiveListFactory, boolean showTokenCount, double relativeWordBeamWidth,
+            int growSkipInterval, boolean checkStateOrder, boolean buildWordLattice, int lookaheadWindow, float lookaheadWeight,
+            int maxLatticeEdges, float acousticLookaheadFrames, boolean keepAllTokens) {
+
+        super(linguist, pruner, scorer, activeListManager, showTokenCount, relativeWordBeamWidth, growSkipInterval,
+                checkStateOrder, buildWordLattice, maxLatticeEdges, acousticLookaheadFrames, keepAllTokens);
+
+        this.loader = loader;
+        this.fastmatchLinguist = fastmatchLinguist;
+        this.fastmatchActiveListFactory = fastmatchActiveListFactory;
+        this.lookaheadWindow = lookaheadWindow;
+        this.lookaheadWeight = lookaheadWeight;
+        if (lookaheadWindow < 1 || lookaheadWindow > 10)
+            throw new IllegalArgumentException("Unsupported lookahead window size: " + lookaheadWindow
+                    + ". Value in range [1..10] is expected");
+        this.ciScores = new LinkedList<FrameCiScores>();
+        this.penalties = new HashMap<Integer, Float>();
+        if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
+            ((Sphinx3Loader) loader).setGauScoresQueueLength(lookaheadWindow + 2);
+    }
+
+    public WordPruningBreadthFirstLookaheadSearchManager() {
+
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
+     * .props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+
+        fastmatchLinguist = (Linguist) ps.getComponent(PROP_FASTMATCH_LINGUIST);
+        fastmatchActiveListFactory = (ActiveListFactory) ps.getComponent(PROP_FM_ACTIVE_LIST_FACTORY);
+        loader = (Loader) ps.getComponent(PROP_LOADER);
+        lookaheadWindow = ps.getInt(PROP_LOOKAHEAD_WINDOW);
+        lookaheadWeight = ps.getFloat(PROP_LOOKAHEAD_PENALTY_WEIGHT);
+        if (lookaheadWindow < 1 || lookaheadWindow > 10)
+            throw new PropertyException(WordPruningBreadthFirstLookaheadSearchManager.class.getName(), PROP_LOOKAHEAD_WINDOW,
+                    "Unsupported lookahead window size: " + lookaheadWindow + ". Value in range [1..10] is expected");
+        ciScores = new LinkedList<FrameCiScores>();
+        penalties = new HashMap<Integer, Float>();
+        if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
+            ((Sphinx3Loader) loader).setGauScoresQueueLength(lookaheadWindow + 2);
+    }
+
+    /**
+     * Performs the recognition for the given number of frames.
+     * 
+     * @param nFrames
+     *            the number of frames to recognize
+     * @return the current result
+     */
+    @Override
+    public Result recognize(int nFrames) {
+        boolean done = false;
+        Result result = null;
+        streamEnd = false;
+
+        for (int i = 0; i < nFrames && !done; i++) {
+            if (!fastmatchStreamEnd)
+                fastMatchRecognize();
+            penalties.clear();
+            ciScores.poll();
+            done = recognize();
+        }
+
+        if (!streamEnd) {
+            result = new Result(loserManager, activeList, resultList, currentCollectTime, done, linguist.getSearchGraph()
+                    .getWordTokenFirst(), true);
+        }
+
+        // tokenTypeTracker.show();
+        if (showTokenCount) {
+            showTokenCount();
+        }
+        return result;
+    }
+
+    private void fastMatchRecognize() {
+        boolean more = scoreFastMatchTokens();
+
+        if (more) {
+            pruneFastMatchBranches();
+            currentFastMatchFrameNumber++;
+            createFastMatchBestTokenMap();
+            growFastmatchBranches();
+        }
+    }
+
+    /**
+     * creates a new best token map with the best size
+     */
+    protected void createFastMatchBestTokenMap() {
+        int mapSize = fastmatchActiveList.size() * 10;
+        if (mapSize == 0) {
+            mapSize = 1;
+        }
+        fastMatchBestTokenMap = new HashMap<SearchState, Token>(mapSize);
+    }
+
+    /**
+     * Gets the initial grammar node from the linguist and creates a
+     * GrammarNodeToken
+     */
+    @Override
+    protected void localStart() {
+        currentFastMatchFrameNumber = 0;
+        if (loader instanceof Sphinx3Loader && ((Sphinx3Loader) loader).hasTiedMixtures())
+            ((Sphinx3Loader) loader).clearGauScores();
+        // prepare fast match active list
+        fastmatchActiveList = fastmatchActiveListFactory.newInstance();
+        SearchState fmInitState = fastmatchLinguist.getSearchGraph().getInitialState();
+        fastmatchActiveList.add(new Token(fmInitState, currentFastMatchFrameNumber));
+        createFastMatchBestTokenMap();
+        growFastmatchBranches();
+        fastmatchStreamEnd = false;
+        for (int i = 0; (i < lookaheadWindow - 1) && !fastmatchStreamEnd; i++)
+            fastMatchRecognize();
+
+        super.localStart();
+    }
+
+    /**
+     * Goes through the fast match active list of tokens and expands each token,
+     * finding the set of successor tokens until all the successor tokens are
+     * emitting tokens.
+     */
+    protected void growFastmatchBranches() {
+        growTimer.start();
+        ActiveList oldActiveList = fastmatchActiveList;
+        fastmatchActiveList = fastmatchActiveListFactory.newInstance();
+        float fastmathThreshold = oldActiveList.getBeamThreshold();
+        // TODO more precise range of baseIds, remove magic number
+        float[] frameCiScores = new float[100];
+
+        Arrays.fill(frameCiScores, -Float.MAX_VALUE);
+        float frameMaxCiScore = -Float.MAX_VALUE;
+        for (Token token : oldActiveList) {
+            float tokenScore = token.getScore();
+            if (tokenScore < fastmathThreshold)
+                continue;
+            // filling max ci scores array that will be used in general search
+            // token score composing
+            if (token.getSearchState() instanceof PhoneHmmSearchState) {
+                int baseId = ((PhoneHmmSearchState) token.getSearchState()).getBaseId();
+                if (frameCiScores[baseId] < tokenScore)
+                    frameCiScores[baseId] = tokenScore;
+                if (frameMaxCiScore < tokenScore)
+                    frameMaxCiScore = tokenScore;
+            }
+            collectFastMatchSuccessorTokens(token);
+        }
+        ciScores.add(new FrameCiScores(frameCiScores, frameMaxCiScore));
+        growTimer.stop();
+    }
+
+    protected boolean scoreFastMatchTokens() {
+        boolean moreTokens;
+        scoreTimer.start();
+        Data data = scorer.calculateScoresAndStoreData(fastmatchActiveList.getTokens());
+        scoreTimer.stop();
+
+        Token bestToken = null;
+        if (data instanceof Token) {
+            bestToken = (Token) data;
+        } else {
+            fastmatchStreamEnd = true;
+        }
+
+        moreTokens = (bestToken != null);
+        fastmatchActiveList.setBestToken(bestToken);
+
+        // monitorWords(activeList);
+        monitorStates(fastmatchActiveList);
+
+        // System.out.println("BEST " + bestToken);
+
+        curTokensScored.value += fastmatchActiveList.size();
+        totalTokensScored.value += fastmatchActiveList.size();
+
+        return moreTokens;
+    }
+
+    /** Removes unpromising branches from the fast match active list */
+    protected void pruneFastMatchBranches() {
+        pruneTimer.start();
+        fastmatchActiveList = pruner.prune(fastmatchActiveList);
+        pruneTimer.stop();
+    }
+
+    protected Token getFastMatchBestToken(SearchState state) {
+        return fastMatchBestTokenMap.get(state);
+    }
+
+    protected void setFastMatchBestToken(Token token, SearchState state) {
+        fastMatchBestTokenMap.put(state, token);
+    }
+
+    protected void collectFastMatchSuccessorTokens(Token token) {
+        SearchState state = token.getSearchState();
+        SearchStateArc[] arcs = state.getSuccessors();
+        // For each successor
+        // calculate the entry score for the token based upon the
+        // predecessor token score and the transition probabilities
+        // if the score is better than the best score encountered for
+        // the SearchState and frame then create a new token, add
+        // it to the lattice and the SearchState.
+        // If the token is an emitting token add it to the list,
+        // otherwise recursively collect the new tokens successors.
+        for (SearchStateArc arc : arcs) {
+            SearchState nextState = arc.getState();
+            // We're actually multiplying the variables, but since
+            // these come in log(), multiply gets converted to add
+            float logEntryScore = token.getScore() + arc.getProbability();
+            Token predecessor = getResultListPredecessor(token);
+
+            // if not emitting, check to see if we've already visited
+            // this state during this frame. Expand the token only if we
+            // haven't visited it already. This prevents the search
+            // from getting stuck in a loop of states with no
+            // intervening emitting nodes. This can happen with nasty
+            // jsgf grammars such as ((foo*)*)*
+            if (!nextState.isEmitting()) {
+                Token newToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), currentFastMatchFrameNumber);
+                tokensCreated.value++;
+                if (!isVisited(newToken)) {
+                    collectFastMatchSuccessorTokens(newToken);
+                }
+                continue;
+            }
+
+            Token bestToken = getFastMatchBestToken(nextState);
+            if (bestToken == null) {
+                Token newToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), currentFastMatchFrameNumber);
+                tokensCreated.value++;
+                setFastMatchBestToken(newToken, nextState);
+                fastmatchActiveList.add(newToken);
+            } else {
+                if (bestToken.getScore() <= logEntryScore) {
+                    bestToken.update(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                            arc.getLanguageProbability(), currentFastMatchFrameNumber);
+                }
+            }
+        }
+    }
+
+    /**
+     * Collects the next set of emitting tokens from a token and accumulates
+     * them in the active or result lists
+     * 
+     * @param token
+     *            the token to collect successors from be immediately expanded
+     *            are placed. Null if we should always expand all nodes.
+     */
+    @Override
+    protected void collectSuccessorTokens(Token token) {
+
+        // tokenTracker.add(token);
+        // tokenTypeTracker.add(token);
+
+        // If this is a final state, add it to the final list
+
+        if (token.isFinal()) {
+            resultList.add(getResultListPredecessor(token));
+            return;
+        }
+
+        // if this is a non-emitting token and we've already
+        // visited the same state during this frame, then we
+        // are in a grammar loop, so we don't continue to expand.
+        // This check only works properly if we have kept all of the
+        // tokens (instead of skipping the non-word tokens).
+        // Note that certain linguists will never generate grammar loops
+        // (lextree linguist for example). For these cases, it is perfectly
+        // fine to disable this check by setting keepAllTokens to false
+
+        if (!token.isEmitting() && (keepAllTokens && isVisited(token))) {
+            return;
+        }
+
+        SearchState state = token.getSearchState();
+        SearchStateArc[] arcs = state.getSuccessors();
+        Token predecessor = getResultListPredecessor(token);
+
+        // For each successor
+        // calculate the entry score for the token based upon the
+        // predecessor token score and the transition probabilities
+        // if the score is better than the best score encountered for
+        // the SearchState and frame then create a new token, add
+        // it to the lattice and the SearchState.
+        // If the token is an emitting token add it to the list,
+        // otherwise recursively collect the new tokens successors.
+
+        float tokenScore = token.getScore();
+        float beamThreshold = activeList.getBeamThreshold();
+        boolean stateProducesPhoneHmms = state instanceof LexTreeNonEmittingHMMState || state instanceof LexTreeWordState
+                || state instanceof LexTreeEndUnitState;
+        for (SearchStateArc arc : arcs) {
+            SearchState nextState = arc.getState();
+
+            // prune states using lookahead heuristics
+            if (stateProducesPhoneHmms) {
+                if (nextState instanceof LexTreeHMMState) {
+                    Float penalty;
+                    int baseId = ((LexTreeHMMState) nextState).getHMMState().getHMM().getBaseUnit().getBaseID();
+                    if ((penalty = penalties.get(baseId)) == null)
+                        penalty = updateLookaheadPenalty(baseId);
+                    if ((tokenScore + lookaheadWeight * penalty) < beamThreshold)
+                        continue;
+                }
+            }
+
+            if (checkStateOrder) {
+                checkStateOrder(state, nextState);
+            }
+
+            // We're actually multiplying the variables, but since
+            // these come in log(), multiply gets converted to add
+            float logEntryScore = tokenScore + arc.getProbability();
+
+            Token bestToken = getBestToken(nextState);
+
+            if (bestToken == null) {
+                Token newBestToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), currentCollectTime);
+                tokensCreated.value++;
+                setBestToken(newBestToken, nextState);
+                activeListAdd(newBestToken);
+            } else if (bestToken.getScore() < logEntryScore) {
+                // System.out.println("Updating " + bestToken + " with " +
+                // newBestToken);
+                Token oldPredecessor = bestToken.getPredecessor();
+                bestToken.update(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), currentCollectTime);
+                if (buildWordLattice && nextState instanceof WordSearchState) {
+                    loserManager.addAlternatePredecessor(bestToken, oldPredecessor);
+                }
+            } else if (buildWordLattice && nextState instanceof WordSearchState) {
+                if (predecessor != null) {
+                    loserManager.addAlternatePredecessor(bestToken, predecessor);
+                }
+            }
+        }
+    }
+
+    private Float updateLookaheadPenalty(int baseId) {
+        if (ciScores.isEmpty())
+            return 0.0f;
+        float penalty = -Float.MAX_VALUE;
+        for (FrameCiScores frameCiScores : ciScores) {
+            float diff = frameCiScores.scores[baseId] - frameCiScores.maxScore;
+            if (diff > penalty)
+                penalty = diff;
+        }
+        penalties.put(baseId, penalty);
+        return penalty;
+    }
+
+    private class FrameCiScores {
+        public final float[] scores;
+        public final float maxScore;
+
+        public FrameCiScores(float[] scores, float maxScore) {
+            this.scores = scores;
+            this.maxScore = maxScore;
+        }
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/WordPruningBreadthFirstSearchManager.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/WordPruningBreadthFirstSearchManager.java
@ -0,0 +1,796 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.decoder.search;
+
+// a test search manager.
+
+import edu.cmu.sphinx.decoder.pruner.Pruner;
+import edu.cmu.sphinx.decoder.scorer.AcousticScorer;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.linguist.*;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.LogMath;
+import edu.cmu.sphinx.util.StatisticsVariable;
+import edu.cmu.sphinx.util.Timer;
+import edu.cmu.sphinx.util.TimerPool;
+import edu.cmu.sphinx.util.props.*;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Provides the breadth first search. To perform recognition an application
+ * should call initialize before recognition begins, and repeatedly call
+ * <code> recognize </code> until Result.isFinal() returns true. Once a final
+ * result has been obtained, <code> stopRecognition </code> should be called.
+ * <p>
+ * All scores and probabilities are maintained in the log math log domain.
+ */
+
+public class WordPruningBreadthFirstSearchManager extends TokenSearchManager {
+
+    /**
+     * The property that defines the name of the linguist to be used by this
+     * search manager.
+     */
+    @S4Component(type = Linguist.class)
+    public final static String PROP_LINGUIST = "linguist";
+
+    /**
+     * The property that defines the name of the linguist to be used by this
+     * search manager.
+     */
+    @S4Component(type = Pruner.class)
+    public final static String PROP_PRUNER = "pruner";
+
+    /**
+     * The property that defines the name of the scorer to be used by this
+     * search manager.
+     */
+    @S4Component(type = AcousticScorer.class)
+    public final static String PROP_SCORER = "scorer";
+
+    /**
+     * The property than, when set to <code>true</code> will cause the
+     * recognizer to count up all the tokens in the active list after every
+     * frame.
+     */
+    @S4Boolean(defaultValue = false)
+    public final static String PROP_SHOW_TOKEN_COUNT = "showTokenCount";
+
+    /**
+     * The property that controls the number of frames processed for every time
+     * the decode growth step is skipped. Setting this property to zero disables
+     * grow skipping. Setting this number to a small integer will increase the
+     * speed of the decoder but will also decrease its accuracy. The higher the
+     * number, the less often the grow code is skipped. Values like 6-8 is known
+     * to be the good enough for large vocabulary tasks. That means that one of
+     * 6 frames will be skipped.
+     */
+    @S4Integer(defaultValue = 0)
+    public final static String PROP_GROW_SKIP_INTERVAL = "growSkipInterval";
+
+    /** The property that defines the type of active list to use */
+    @S4Component(type = ActiveListManager.class)
+    public final static String PROP_ACTIVE_LIST_MANAGER = "activeListManager";
+
+    /** The property for checking if the order of states is valid. */
+    @S4Boolean(defaultValue = false)
+    public final static String PROP_CHECK_STATE_ORDER = "checkStateOrder";
+
+    /** The property that specifies the maximum lattice edges */
+    @S4Integer(defaultValue = 100)
+    public final static String PROP_MAX_LATTICE_EDGES = "maxLatticeEdges";
+
+    /**
+     * The property that controls the amount of simple acoustic lookahead
+     * performed. Setting the property to zero (the default) disables simple
+     * acoustic lookahead. The lookahead need not be an integer.
+     */
+    @S4Double(defaultValue = 0)
+    public final static String PROP_ACOUSTIC_LOOKAHEAD_FRAMES = "acousticLookaheadFrames";
+
+    /** The property that specifies the relative beam width */
+    @S4Double(defaultValue = 0.0)
+    // TODO: this should be a more meaningful default e.g. the common 1E-80
+    public final static String PROP_RELATIVE_BEAM_WIDTH = "relativeBeamWidth";
+
+    // -----------------------------------
+    // Configured Subcomponents
+    // -----------------------------------
+    protected Linguist linguist; // Provides grammar/language info
+    protected Pruner pruner; // used to prune the active list
+    protected AcousticScorer scorer; // used to score the active list
+    private ActiveListManager activeListManager;
+    protected LogMath logMath;
+
+    // -----------------------------------
+    // Configuration data
+    // -----------------------------------
+    protected Logger logger;
+    protected boolean showTokenCount;
+    protected boolean checkStateOrder;
+    private int growSkipInterval;
+    protected float relativeBeamWidth;
+    protected float acousticLookaheadFrames;
+    private int maxLatticeEdges = 100;
+
+    // -----------------------------------
+    // Instrumentation
+    // -----------------------------------
+    protected Timer scoreTimer;
+    protected Timer pruneTimer;
+    protected Timer growTimer;
+    protected StatisticsVariable totalTokensScored;
+    protected StatisticsVariable curTokensScored;
+    protected StatisticsVariable tokensCreated;
+    private long tokenSum;
+    private int tokenCount;
+
+    // -----------------------------------
+    // Working data
+    // -----------------------------------
+    protected int currentFrameNumber; // the current frame number
+    protected long currentCollectTime; // the current frame number
+    protected ActiveList activeList; // the list of active tokens
+    protected List<Token> resultList; // the current set of results
+    protected Map<SearchState, Token> bestTokenMap;
+    protected AlternateHypothesisManager loserManager;
+    private int numStateOrder;
+    // private TokenTracker tokenTracker;
+    // private TokenTypeTracker tokenTypeTracker;
+    protected boolean streamEnd;
+
+    /**
+     * Creates a pruning manager withs separate lists for tokens
+     * @param linguist a linguist for search space
+     * @param pruner pruner to drop tokens
+     * @param scorer scorer to estimate token probability
+     * @param activeListManager active list manager to store tokens
+     * @param showTokenCount show count during decoding
+     * @param relativeWordBeamWidth relative beam for lookahead pruning
+     * @param growSkipInterval skip interval for grown
+     * @param checkStateOrder check order of states during growth
+     * @param buildWordLattice build a lattice during decoding
+     * @param maxLatticeEdges max edges to keep in lattice
+     * @param acousticLookaheadFrames frames to do lookahead
+     * @param keepAllTokens keep tokens including emitting tokens
+     */
+    public WordPruningBreadthFirstSearchManager(Linguist linguist, Pruner pruner, AcousticScorer scorer,
+            ActiveListManager activeListManager, boolean showTokenCount, double relativeWordBeamWidth, int growSkipInterval,
+            boolean checkStateOrder, boolean buildWordLattice, int maxLatticeEdges, float acousticLookaheadFrames,
+            boolean keepAllTokens) {
+
+        this.logger = Logger.getLogger(getClass().getName());
+        this.logMath = LogMath.getLogMath();
+        this.linguist = linguist;
+        this.pruner = pruner;
+        this.scorer = scorer;
+        this.activeListManager = activeListManager;
+        this.showTokenCount = showTokenCount;
+        this.growSkipInterval = growSkipInterval;
+        this.checkStateOrder = checkStateOrder;
+        this.buildWordLattice = buildWordLattice;
+        this.maxLatticeEdges = maxLatticeEdges;
+        this.acousticLookaheadFrames = acousticLookaheadFrames;
+        this.keepAllTokens = keepAllTokens;
+
+        this.relativeBeamWidth = logMath.linearToLog(relativeWordBeamWidth);
+    }
+
+    public WordPruningBreadthFirstSearchManager() {
+
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
+     * .props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+
+        logMath = LogMath.getLogMath();
+        logger = ps.getLogger();
+
+        linguist = (Linguist) ps.getComponent(PROP_LINGUIST);
+        pruner = (Pruner) ps.getComponent(PROP_PRUNER);
+        scorer = (AcousticScorer) ps.getComponent(PROP_SCORER);
+        activeListManager = (ActiveListManager) ps.getComponent(PROP_ACTIVE_LIST_MANAGER);
+        showTokenCount = ps.getBoolean(PROP_SHOW_TOKEN_COUNT);
+        growSkipInterval = ps.getInt(PROP_GROW_SKIP_INTERVAL);
+
+        checkStateOrder = ps.getBoolean(PROP_CHECK_STATE_ORDER);
+        maxLatticeEdges = ps.getInt(PROP_MAX_LATTICE_EDGES);
+        acousticLookaheadFrames = ps.getFloat(PROP_ACOUSTIC_LOOKAHEAD_FRAMES);
+
+        relativeBeamWidth = logMath.linearToLog(ps.getDouble(PROP_RELATIVE_BEAM_WIDTH));
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.cmu.sphinx.decoder.search.SearchManager#allocate()
+     */
+    public void allocate() {
+        // tokenTracker = new TokenTracker();
+        // tokenTypeTracker = new TokenTypeTracker();
+
+        scoreTimer = TimerPool.getTimer(this, "Score");
+        pruneTimer = TimerPool.getTimer(this, "Prune");
+        growTimer = TimerPool.getTimer(this, "Grow");
+
+        totalTokensScored = StatisticsVariable.getStatisticsVariable("totalTokensScored");
+        curTokensScored = StatisticsVariable.getStatisticsVariable("curTokensScored");
+        tokensCreated = StatisticsVariable.getStatisticsVariable("tokensCreated");
+
+        try {
+            linguist.allocate();
+            pruner.allocate();
+            scorer.allocate();
+        } catch (IOException e) {
+            throw new RuntimeException("Allocation of search manager resources failed", e);
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.cmu.sphinx.decoder.search.SearchManager#deallocate()
+     */
+    public void deallocate() {
+        try {
+            scorer.deallocate();
+            pruner.deallocate();
+            linguist.deallocate();
+        } catch (IOException e) {
+            throw new RuntimeException("Deallocation of search manager resources failed", e);
+        }
+    }
+
+    /**
+     * Called at the start of recognition. Gets the search manager ready to
+     * recognize
+     */
+    public void startRecognition() {
+        linguist.startRecognition();
+        pruner.startRecognition();
+        scorer.startRecognition();
+        localStart();
+    }
+
+    /**
+     * Performs the recognition for the given number of frames.
+     * 
+     * @param nFrames
+     *            the number of frames to recognize
+     * @return the current result
+     */
+    public Result recognize(int nFrames) {
+        boolean done = false;
+        Result result = null;
+        streamEnd = false;
+
+        for (int i = 0; i < nFrames && !done; i++) {
+            done = recognize();
+        }
+
+        if (!streamEnd) {
+            result = new Result(loserManager, activeList, resultList, currentCollectTime, done, linguist.getSearchGraph()
+                    .getWordTokenFirst(), true);
+        }
+
+        // tokenTypeTracker.show();
+        if (showTokenCount) {
+            showTokenCount();
+        }
+        return result;
+    }
+
+    protected boolean recognize() {
+
+        activeList = activeListManager.getEmittingList();
+        boolean more = scoreTokens();
+
+        if (more) {
+            pruneBranches();
+            currentFrameNumber++;
+            if (growSkipInterval == 0 || (currentFrameNumber % growSkipInterval) != 0) {
+                clearCollectors();
+                growEmittingBranches();
+                growNonEmittingBranches();
+            }
+        }
+        return !more;
+    }
+
+    /**
+     * Clears lists and maps before next expansion stage
+     */
+    private void clearCollectors() {
+        resultList = new LinkedList<Token>();
+        createBestTokenMap();
+        activeListManager.clearEmittingList();
+    }
+
+    /**
+     * creates a new best token map with the best size
+     */
+    protected void createBestTokenMap() {
+        int mapSize = activeList.size() * 10;
+        if (mapSize == 0) {
+            mapSize = 1;
+        }
+        bestTokenMap = new HashMap<SearchState, Token>(mapSize, 0.3F);
+    }
+
+    /** Terminates a recognition */
+    public void stopRecognition() {
+        localStop();
+        scorer.stopRecognition();
+        pruner.stopRecognition();
+        linguist.stopRecognition();
+    }
+
+    /**
+     * Gets the initial grammar node from the linguist and creates a
+     * GrammarNodeToken
+     */
+    protected void localStart() {
+        SearchGraph searchGraph = linguist.getSearchGraph();
+        currentFrameNumber = 0;
+        curTokensScored.value = 0;
+        numStateOrder = searchGraph.getNumStateOrder();
+        activeListManager.setNumStateOrder(numStateOrder);
+        if (buildWordLattice) {
+            loserManager = new AlternateHypothesisManager(maxLatticeEdges);
+        }
+
+        SearchState state = searchGraph.getInitialState();
+
+        activeList = activeListManager.getEmittingList();
+        activeList.add(new Token(state, -1));
+
+        clearCollectors();
+
+        growBranches();
+        growNonEmittingBranches();
+        // tokenTracker.setEnabled(false);
+        // tokenTracker.startUtterance();
+    }
+
+    /** Local cleanup for this search manager */
+    protected void localStop() {
+        // tokenTracker.stopUtterance();
+    }
+
+    /**
+     * Goes through the active list of tokens and expands each token, finding
+     * the set of successor tokens until all the successor tokens are emitting
+     * tokens.
+     */
+    protected void growBranches() {
+        growTimer.start();
+        float relativeBeamThreshold = activeList.getBeamThreshold();
+        if (logger.isLoggable(Level.FINE)) {
+            logger.fine("Frame: " + currentFrameNumber + " thresh : " + relativeBeamThreshold + " bs "
+                    + activeList.getBestScore() + " tok " + activeList.getBestToken());
+        }
+        for (Token token : activeList) {
+            if (token.getScore() >= relativeBeamThreshold && allowExpansion(token)) {
+                collectSuccessorTokens(token);
+            }
+        }
+        growTimer.stop();
+    }
+
+    /**
+     * Grows the emitting branches. This version applies a simple acoustic
+     * lookahead based upon the rate of change in the current acoustic score.
+     */
+    protected void growEmittingBranches() {
+        if (acousticLookaheadFrames <= 0.0f) {
+            growBranches();
+            return;
+        }
+        growTimer.start();
+        float bestScore = -Float.MAX_VALUE;
+        for (Token t : activeList) {
+            float score = t.getScore() + t.getAcousticScore() * acousticLookaheadFrames;
+            if (score > bestScore) {
+                bestScore = score;
+            }
+        }
+        float relativeBeamThreshold = bestScore + relativeBeamWidth;
+        for (Token t : activeList) {
+            if (t.getScore() + t.getAcousticScore() * acousticLookaheadFrames > relativeBeamThreshold)
+                collectSuccessorTokens(t);
+        }
+        growTimer.stop();
+    }
+
+    /**
+     * Grow the non-emitting branches, until the tokens reach an emitting state.
+     */
+    private void growNonEmittingBranches() {
+        for (Iterator<ActiveList> i = activeListManager.getNonEmittingListIterator(); i.hasNext();) {
+            activeList = i.next();
+            if (activeList != null) {
+                i.remove();
+                pruneBranches();
+                growBranches();
+            }
+        }
+    }
+
+    /**
+     * Calculate the acoustic scores for the active list. The active list should
+     * contain only emitting tokens.
+     * 
+     * @return <code>true</code> if there are more frames to score, otherwise,
+     *         false
+     */
+    protected boolean scoreTokens() {
+        boolean moreTokens;
+        scoreTimer.start();
+        Data data = scorer.calculateScores(activeList.getTokens());
+        scoreTimer.stop();
+
+        Token bestToken = null;
+        if (data instanceof Token) {
+            bestToken = (Token) data;
+        } else if (data == null) {
+            streamEnd = true;
+        }
+
+        if (bestToken != null) {
+            currentCollectTime = bestToken.getCollectTime();
+        }
+        
+        moreTokens = (bestToken != null);
+        activeList.setBestToken(bestToken);
+
+        // monitorWords(activeList);
+        monitorStates(activeList);
+
+        // System.out.println("BEST " + bestToken);
+
+        curTokensScored.value += activeList.size();
+        totalTokensScored.value += activeList.size();
+
+        return moreTokens;
+    }
+
+    /**
+     * Keeps track of and reports all of the active word histories for the given
+     * active list
+     * 
+     * @param activeList
+     *            the active list to track
+     */
+    @SuppressWarnings("unused")
+    private void monitorWords(ActiveList activeList) {
+
+        // WordTracker tracker1 = new WordTracker(currentFrameNumber);
+        //
+        // for (Token t : activeList) {
+        // tracker1.add(t);
+        // }
+        // tracker1.dump();
+        //
+        // TokenTracker tracker2 = new TokenTracker();
+        //
+        // for (Token t : activeList) {
+        // tracker2.add(t);
+        // }
+        // tracker2.dumpSummary();
+        // tracker2.dumpDetails();
+        //
+        // TokenTypeTracker tracker3 = new TokenTypeTracker();
+        //
+        // for (Token t : activeList) {
+        // tracker3.add(t);
+        // }
+        // tracker3.dump();
+
+        // StateHistoryTracker tracker4 = new
+        // StateHistoryTracker(currentFrameNumber);
+
+        // for (Token t : activeList) {
+        // tracker4.add(t);
+        // }
+        // tracker4.dump();
+    }
+
+    /**
+     * Keeps track of and reports statistics about the number of active states
+     * 
+     * @param activeList
+     *            the active list of states
+     */
+    protected void monitorStates(ActiveList activeList) {
+
+        tokenSum += activeList.size();
+        tokenCount++;
+
+        if ((tokenCount % 1000) == 0) {
+            logger.info("Average Tokens/State: " + (tokenSum / tokenCount));
+        }
+    }
+
+    /** Removes unpromising branches from the active list */
+    protected void pruneBranches() {
+        pruneTimer.start();
+        activeList = pruner.prune(activeList);
+        pruneTimer.stop();
+    }
+
+    /**
+     * Gets the best token for this state
+     * 
+     * @param state
+     *            the state of interest
+     * @return the best token
+     */
+    protected Token getBestToken(SearchState state) {
+        return bestTokenMap.get(state);
+    }
+
+    /**
+     * Sets the best token for a given state
+     * 
+     * @param token
+     *            the best token
+     * @param state
+     *            the state
+     */
+    protected void setBestToken(Token token, SearchState state) {
+        bestTokenMap.put(state, token);
+    }
+
+    /**
+     * Checks that the given two states are in legitimate order.
+     * 
+     * @param fromState parent state
+     * @param toState child state
+     */
+    protected void checkStateOrder(SearchState fromState, SearchState toState) {
+        if (fromState.getOrder() == numStateOrder - 1) {
+            return;
+        }
+
+        if (fromState.getOrder() > toState.getOrder()) {
+            throw new Error("IllegalState order: from " + fromState.getClass().getName() + ' ' + fromState.toPrettyString()
+                    + " order: " + fromState.getOrder() + " to " + toState.getClass().getName() + ' ' + toState.toPrettyString()
+                    + " order: " + toState.getOrder());
+        }
+    }
+
+    /**
+     * Collects the next set of emitting tokens from a token and accumulates
+     * them in the active or result lists
+     * 
+     * @param token
+     *            the token to collect successors from be immediately expanded
+     *            are placed. Null if we should always expand all nodes.
+     */
+    protected void collectSuccessorTokens(Token token) {
+
+        // tokenTracker.add(token);
+        // tokenTypeTracker.add(token);
+
+        // If this is a final state, add it to the final list
+
+        if (token.isFinal()) {
+            resultList.add(getResultListPredecessor(token));
+            return;
+        }
+
+        // if this is a non-emitting token and we've already
+        // visited the same state during this frame, then we
+        // are in a grammar loop, so we don't continue to expand.
+        // This check only works properly if we have kept all of the
+        // tokens (instead of skipping the non-word tokens).
+        // Note that certain linguists will never generate grammar loops
+        // (lextree linguist for example). For these cases, it is perfectly
+        // fine to disable this check by setting keepAllTokens to false
+
+        if (!token.isEmitting() && (keepAllTokens && isVisited(token))) {
+            return;
+        }
+
+        SearchState state = token.getSearchState();
+        SearchStateArc[] arcs = state.getSuccessors();
+        Token predecessor = getResultListPredecessor(token);
+
+        // For each successor
+        // calculate the entry score for the token based upon the
+        // predecessor token score and the transition probabilities
+        // if the score is better than the best score encountered for
+        // the SearchState and frame then create a new token, add
+        // it to the lattice and the SearchState.
+        // If the token is an emitting token add it to the list,
+        // otherwise recursively collect the new tokens successors.
+
+        for (SearchStateArc arc : arcs) {
+            SearchState nextState = arc.getState();
+
+            if (checkStateOrder) {
+                checkStateOrder(state, nextState);
+            }
+
+            // We're actually multiplying the variables, but since
+            // these come in log(), multiply gets converted to add
+            float logEntryScore = token.getScore() + arc.getProbability();
+
+            Token bestToken = getBestToken(nextState);
+
+            if (bestToken == null) {
+                Token newBestToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), currentCollectTime);
+                tokensCreated.value++;
+                setBestToken(newBestToken, nextState);
+                activeListAdd(newBestToken);
+            } else if (bestToken.getScore() < logEntryScore) {
+                // System.out.println("Updating " + bestToken + " with " +
+                // newBestToken);
+                Token oldPredecessor = bestToken.getPredecessor();
+                bestToken.update(predecessor, nextState, logEntryScore, arc.getInsertionProbability(),
+                        arc.getLanguageProbability(), currentCollectTime);
+                if (buildWordLattice && nextState instanceof WordSearchState) {
+                    loserManager.addAlternatePredecessor(bestToken, oldPredecessor);
+                }
+            } else if (buildWordLattice && nextState instanceof WordSearchState) {
+                if (predecessor != null) {
+                    loserManager.addAlternatePredecessor(bestToken, predecessor);
+                }
+            }
+        }
+    }
+
+    /**
+     * Determines whether or not we've visited the state associated with this
+     * token since the previous frame.
+     * 
+     * @param t token to check
+     * @return true if we've visited the search state since the last frame
+     */
+    protected boolean isVisited(Token t) {
+        SearchState curState = t.getSearchState();
+
+        t = t.getPredecessor();
+
+        while (t != null && !t.isEmitting()) {
+            if (curState.equals(t.getSearchState())) {
+                System.out.println("CS " + curState + " match " + t.getSearchState());
+                return true;
+            }
+            t = t.getPredecessor();
+        }
+        return false;
+    }
+
+    protected void activeListAdd(Token token) {
+        activeListManager.add(token);
+    }
+
+    /**
+     * Determine if the given token should be expanded
+     * 
+     * @param t
+     *            the token to test
+     * @return <code>true</code> if the token should be expanded
+     */
+    protected boolean allowExpansion(Token t) {
+        return true; // currently disabled
+    }
+
+    /**
+     * Counts all the tokens in the active list (and displays them). This is an
+     * expensive operation.
+     */
+    protected void showTokenCount() {
+        Set<Token> tokenSet = new HashSet<Token>();
+
+        for (Token token : activeList) {
+            while (token != null) {
+                tokenSet.add(token);
+                token = token.getPredecessor();
+            }
+        }
+
+        System.out.println("Token Lattice size: " + tokenSet.size());
+
+        tokenSet = new HashSet<Token>();
+
+        for (Token token : resultList) {
+            while (token != null) {
+                tokenSet.add(token);
+                token = token.getPredecessor();
+            }
+        }
+
+        System.out.println("Result Lattice size: " + tokenSet.size());
+    }
+
+    /**
+     * Returns the ActiveList.
+     * 
+     * @return the ActiveList
+     */
+    public ActiveList getActiveList() {
+        return activeList;
+    }
+
+    /**
+     * Sets the ActiveList.
+     * 
+     * @param activeList
+     *            the new ActiveList
+     */
+    public void setActiveList(ActiveList activeList) {
+        this.activeList = activeList;
+    }
+
+    /**
+     * Returns the result list.
+     * 
+     * @return the result list
+     */
+    public List<Token> getResultList() {
+        return resultList;
+    }
+
+    /**
+     * Sets the result list.
+     * 
+     * @param resultList
+     *            the new result list
+     */
+    public void setResultList(List<Token> resultList) {
+        this.resultList = resultList;
+    }
+
+    /**
+     * Returns the current frame number.
+     * 
+     * @return the current frame number
+     */
+    public int getCurrentFrameNumber() {
+        return currentFrameNumber;
+    }
+
+    /**
+     * Returns the Timer for growing.
+     * 
+     * @return the Timer for growing
+     */
+    public Timer getGrowTimer() {
+        return growTimer;
+    }
+
+    /**
+     * Returns the tokensCreated StatisticsVariable.
+     * 
+     * @return the tokensCreated StatisticsVariable.
+     */
+    public StatisticsVariable getTokensCreated() {
+        return tokensCreated;
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/StateHistoryTracker.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/StateHistoryTracker.java
@ -0,0 +1,140 @@
+package edu.cmu.sphinx.decoder.search.stats;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.linguist.WordSequence;
+
+/** A class that keeps track of word histories */
+
+public class StateHistoryTracker {
+
+    final Map<WordSequence, WordStats> statMap;
+    final int frameNumber;
+    int stateCount;
+    int maxWordHistories;
+
+    /**
+     * Creates a word tracker for the given frame number
+     *
+     * @param frameNumber the frame number
+     */
+    public StateHistoryTracker(int frameNumber) {
+        statMap = new HashMap<WordSequence, WordStats>();
+        this.frameNumber = frameNumber;
+    }
+
+
+    /**
+     * Adds a word history for the given token to the word tracker
+     *
+     * @param t the token to add
+     */
+    public void add(Token t) {
+        stateCount++;
+        WordSequence ws = getWordSequence(t);
+        WordStats stats = statMap.get(ws);
+        if (stats == null) {
+            stats = new WordStats(ws);
+            statMap.put(ws, stats);
+        }
+        stats.update(t);
+    }
+
+
+    /** Dumps the word histories in the tracker */
+    public void dump() {
+        dumpSummary();
+        List<WordStats> stats = new ArrayList<WordStats>(statMap.values());
+        Collections.sort(stats, WordStats.COMPARATOR);
+        for (WordStats stat : stats) {
+            System.out.println("   " + stat);
+        }
+    }
+
+
+    /** Dumps summary information in the tracker */
+    void dumpSummary() {
+        System.out.println("Frame: " + frameNumber + " states: " + stateCount
+                + " histories " + statMap.size());
+    }
+
+
+    /**
+     * Given a token, gets the history sequence
+     *
+     * @param token the token of interest
+     * @return the word sequence for the token
+     */
+    private WordSequence getWordSequence(Token token) {
+        return token.getSearchState().getWordHistory();
+    }
+    
+    /** Keeps track of statistics for a particular word sequence */
+
+    static class WordStats {
+
+        public final static Comparator<WordStats> COMPARATOR = new Comparator<WordStats>() {
+            public int compare(WordStats ws1, WordStats ws2) {
+                if (ws1.maxScore > ws2.maxScore) {
+                    return -1;
+                } else if (ws1.maxScore == ws2.maxScore) {
+                    return 0;
+                } else {
+                    return 1;
+                }
+            }
+        };
+
+        private int size;
+        private float maxScore;
+        private float minScore;
+        private final WordSequence ws;
+
+        /**
+         * Creates a word statistics for the given sequence
+         *
+         * @param ws the word sequence
+         */
+        WordStats(WordSequence ws) {
+            size = 0;
+            maxScore = -Float.MAX_VALUE;
+            minScore = Float.MAX_VALUE;
+            this.ws = ws;
+        }
+
+
+        /**
+         * Updates the statistics based upon the scores for the given token
+         *
+         * @param t the token
+         */
+        void update(Token t) {
+            size++;
+            if (t.getScore() > maxScore) {
+                maxScore = t.getScore();
+            }
+            if (t.getScore() < minScore) {
+                minScore = t.getScore();
+            }
+        }
+
+
+        /**
+         * Returns a string representation of the statistics
+         *
+         * @return a string representation
+         */
+        @Override
+        public String toString() {
+            return "states:" + size + " max:" + maxScore + " min:" + minScore + ' '
+                    + ws;
+        }
+    }
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/TokenTracker.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/TokenTracker.java
@ -0,0 +1,198 @@
+package edu.cmu.sphinx.decoder.search.stats;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.linguist.HMMSearchState;
+
+/** This debugging class is used to track the number of active tokens per state */
+
+public class TokenTracker {
+
+    private Map<Object, TokenStats> stateMap;
+    private boolean enabled;
+    private int frame;
+
+    private int utteranceStateCount;
+    private int utteranceMaxStates;
+    private int utteranceSumStates;
+
+
+    /**
+     * Enables or disables the token tracker
+     *
+     * @param enabled if <code>true</code> the tracker is enabled
+     */
+    void setEnabled(boolean enabled) {
+        this.enabled = enabled;
+    }
+
+
+    /** Starts the per-utterance tracking */
+    void startUtterance() {
+        if (enabled) {
+            frame = 0;
+            utteranceStateCount = 0;
+            utteranceMaxStates = -Integer.MAX_VALUE;
+            utteranceSumStates = 0;
+        }
+    }
+
+
+    /** stops the per-utterance tracking */
+    void stopUtterance() {
+        if (enabled) {
+            dumpSummary();
+        }
+    }
+
+
+    /** Starts the per-frame tracking */
+    void startFrame() {
+        if (enabled) {
+            stateMap = new HashMap<Object, TokenStats>();
+        }
+    }
+
+
+    /**
+     * Adds a new token to the tracker
+     *
+     * @param t the token to add.
+     */
+    public void add(Token t) {
+        if (enabled) {
+            TokenStats stats = getStats(t);
+            stats.update(t);
+        }
+    }
+
+
+    /** Stops the per-frame tracking */
+    void stopFrame() {
+        if (enabled) {
+            frame++;
+            dumpDetails();
+        }
+    }
+
+
+    /** Dumps summary info about the tokens */
+    public void dumpSummary() {
+        if (enabled) {
+            float avgStates = 0f;
+            if (utteranceStateCount > 0) {
+                avgStates = ((float) utteranceSumStates) / utteranceStateCount;
+            }
+            System.out.print("# Utterance stats ");
+            System.out.print(" States: " + utteranceStateCount / frame);
+
+            if (utteranceStateCount > 0) {
+                System.out.print(" Paths: " + utteranceSumStates / frame);
+                System.out.print(" Max: " + utteranceMaxStates);
+                System.out.print(" Avg: " + avgStates);
+            }
+
+            System.out.println();
+        }
+    }
+
+
+    /** Dumps detailed info about the tokens */
+    public void dumpDetails() {
+        if (enabled) {
+            int maxStates = -Integer.MAX_VALUE;
+            int hmmCount = 0;
+            int sumStates = 0;
+
+            for (TokenStats stats : stateMap.values()) {
+                if (stats.isHMM) {
+                    hmmCount++;
+                }
+                sumStates += stats.count;
+                utteranceSumStates += stats.count;
+                if (stats.count > maxStates) {
+                    maxStates = stats.count;
+                }
+
+                if (stats.count > utteranceMaxStates) {
+                    utteranceMaxStates = stats.count;
+                }
+            }
+
+            utteranceStateCount += stateMap.size();
+
+            float avgStates = 0f;
+            if (!stateMap.isEmpty()) {
+                avgStates = ((float) sumStates) / stateMap.size();
+            }
+            System.out.print("# Frame " + frame);
+            System.out.print(" States: " + stateMap.size());
+
+            if (!stateMap.isEmpty()) {
+                System.out.print(" Paths: " + sumStates);
+                System.out.print(" Max: " + maxStates);
+                System.out.print(" Avg: " + avgStates);
+                System.out.print(" HMM: " + hmmCount);
+            }
+
+            System.out.println();
+        }
+    }
+
+
+    /**
+     * Gets the statistics for a particular token
+     *
+     * @param t the token of interest
+     * @return the token statistics associated with the given token
+     */
+    private TokenStats getStats(Token t) {
+        TokenStats stats = stateMap.get(t.getSearchState()
+                .getLexState());
+        if (stats == null) {
+            stats = new TokenStats();
+            stateMap.put(t.getSearchState().getLexState(), stats);
+        }
+        return stats;
+    }
+
+    /**
+     * A class for keeping track of statistics about tokens. Tracks the count,
+     * minimum and maximum score for a particular state.
+     */
+    class TokenStats {
+
+        int count;
+        float maxScore;
+        float minScore;
+        boolean isHMM;
+
+
+        TokenStats() {
+            count = 0;
+            maxScore = -Float.MAX_VALUE;
+            minScore = Float.MIN_VALUE;
+        }
+
+
+        /** Update this state with the given token
+         * @param t*/
+        public void update(Token t) {
+            count++;
+            if (t.getScore() > maxScore) {
+                maxScore = t.getScore();
+            }
+
+            if (t.getScore() < minScore) {
+                minScore = t.getScore();
+            }
+
+            isHMM = t.getSearchState() instanceof HMMSearchState;
+        }
+    }
+
+}
+
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/TokenTypeTracker.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/TokenTypeTracker.java
@ -0,0 +1,80 @@
+package edu.cmu.sphinx.decoder.search.stats;
+
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.linguist.HMMSearchState;
+import edu.cmu.sphinx.linguist.SearchState;
+import edu.cmu.sphinx.linguist.UnitSearchState;
+import edu.cmu.sphinx.linguist.WordSearchState;
+import edu.cmu.sphinx.linguist.acoustic.HMM;
+
+/**
+ * A tool for tracking the types tokens created and placed in the beam
+ * <p>
+ * TODO: Develop a mechanism  for adding trackers such as these in a more general fashion.
+ */
+public class TokenTypeTracker {
+    // keep track of the various types of states
+
+    private int numWords;
+    private int numUnits;
+    private int numOthers;
+    private int numHMMBegin;
+    private int numHMMEnd;
+    private int numHMMSingle;
+    private int numHMMInternal;
+    private int numTokens;
+
+
+    /**
+     * Adds a token to this tracker. Records statistics about the type of token.
+     *
+     * @param t the token to track
+     */
+    public void add(Token t) {
+        numTokens++;
+        SearchState s = t.getSearchState();
+
+        if (s instanceof WordSearchState) {
+            numWords++;
+        } else if (s instanceof UnitSearchState) {
+            numUnits++;
+        } else if (s instanceof HMMSearchState) {
+            HMM hmm = ((HMMSearchState) s).getHMMState().getHMM();
+            switch (hmm.getPosition()) {
+                case BEGIN: numHMMBegin++; break;
+                case END: numHMMEnd++; break;
+                case SINGLE: numHMMSingle++; break;
+                case INTERNAL: numHMMInternal++; break;
+                default: break;
+            }
+        } else {
+            numOthers++;
+        }
+    }
+
+
+    /** Shows the accumulated statistics */
+    public void dump() {
+        System.out.println("TotalTokens: " + numTokens);
+        System.out.println("      Words: " + numWords + pc(numWords));
+        System.out.println("      Units: " + numUnits + pc(numUnits));
+        System.out.println("      HMM-b: " + numHMMBegin + pc(numHMMBegin));
+        System.out.println("      HMM-e: " + numHMMEnd + pc(numHMMEnd));
+        System.out.println("      HMM-s: " + numHMMSingle + pc(numHMMSingle));
+        System.out.println("      HMM-i: " + numHMMInternal +
+                pc(numHMMInternal));
+        System.out.println("     Others: " + numOthers + pc(numOthers));
+    }
+
+
+    /**
+     * Utility method for generating integer percents
+     *
+     * @param num the value to be converted into percent
+     * @return a string representation as a percent
+     */
+    private String pc(int num) {
+        int percent = ((100 * num) / numTokens);
+        return " (" + percent + "%)";
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/WordTracker.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/decoder/search/stats/WordTracker.java
@ -0,0 +1,155 @@
+package edu.cmu.sphinx.decoder.search.stats;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import edu.cmu.sphinx.decoder.search.Token;
+import edu.cmu.sphinx.linguist.WordSearchState;
+import edu.cmu.sphinx.linguist.WordSequence;
+import edu.cmu.sphinx.linguist.dictionary.Word;
+
+/** A class that keeps track of word histories */
+
+public class WordTracker {
+
+    final Map<WordSequence, WordStats> statMap;
+    final int frameNumber;
+    int stateCount;
+    int maxWordHistories;
+
+
+    /**
+     * Creates a word tracker for the given frame number
+     *
+     * @param frameNumber the frame number
+     */
+    public WordTracker(int frameNumber) {
+        statMap = new HashMap<WordSequence, WordStats>();
+        this.frameNumber = frameNumber;
+    }
+
+
+    /**
+     * Adds a word history for the given token to the word tracker
+     *
+     * @param t the token to add
+     */
+    public void add(Token t) {
+        stateCount++;
+        WordSequence ws = getWordSequence(t);
+        WordStats stats = statMap.get(ws);
+        if (stats == null) {
+            stats = new WordStats(ws);
+            statMap.put(ws, stats);
+        }
+        stats.update(t);
+    }
+
+
+    /** Dumps the word histories in the tracker */
+    public void dump() {
+        dumpSummary();
+        List<WordStats> stats = new ArrayList<WordStats>(statMap.values());
+        Collections.sort(stats, WordStats.COMPARATOR);
+        for (WordStats stat : stats) {
+            System.out.println("   " + stat);
+        }
+    }
+
+
+    /** Dumps summary information in the tracker */
+    void dumpSummary() {
+        System.out.println("Frame: " + frameNumber + " states: " + stateCount
+                + " histories " + statMap.size());
+    }
+
+
+    /**
+     * Given a token, gets the word sequence represented by the token
+     *
+     * @param token the token of interest
+     * @return the word sequence for the token
+     */
+    private WordSequence getWordSequence(Token token) {
+        List<Word> wordList = new LinkedList<Word>();
+
+        while (token != null) {
+            if (token.isWord()) {
+                WordSearchState wordState = (WordSearchState) token
+                        .getSearchState();
+                Word word = wordState.getPronunciation().getWord();
+                wordList.add(0, word);
+            }
+            token = token.getPredecessor();
+        }
+        return new WordSequence(wordList);
+    }
+    
+    /** Keeps track of statistics for a particular word sequence */
+
+    static class WordStats {
+
+        public final static Comparator<WordStats> COMPARATOR = new Comparator<WordStats>() {
+            public int compare(WordStats ws1, WordStats ws2) {
+                if (ws1.maxScore > ws2.maxScore) {
+                    return -1;
+                } else if (ws1.maxScore == ws2.maxScore) {
+                    return 0;
+                } else {
+                    return 1;
+                }
+            }
+        };
+
+        private int size;
+        private float maxScore;
+        private float minScore;
+        private final WordSequence ws;
+
+        /**
+         * Creates a word statistics for the given sequence
+         *
+         * @param ws the word sequence
+         */
+        WordStats(WordSequence ws) {
+            size = 0;
+            maxScore = -Float.MAX_VALUE;
+            minScore = Float.MAX_VALUE;
+            this.ws = ws;
+        }
+
+
+        /**
+         * Updates the statistics based upon the scores for the given token
+         *
+         * @param t the token
+         */
+        void update(Token t) {
+            size++;
+            if (t.getScore() > maxScore) {
+                maxScore = t.getScore();
+            }
+            if (t.getScore() < minScore) {
+                minScore = t.getScore();
+            }
+        }
+
+
+        /**
+         * Returns a string representation of the statistics
+         *
+         * @return a string representation
+         */
+        @Override
+        public String toString() {
+            return "states:" + size + " max:" + maxScore + " min:" + minScore + ' '
+                    + ws;
+        }
+    }
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/AutoCepstrum.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/AutoCepstrum.java
@ -0,0 +1,274 @@
+/*
+ * Copyright 2013 Carnegie Mellon University. All Rights Reserved. Use is
+ * subject to license terms. See the file "license.terms" for information on
+ * usage and redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+package edu.cmu.sphinx.frontend;
+
+import static java.lang.Double.parseDouble;
+import static java.lang.Integer.parseInt;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import edu.cmu.sphinx.frontend.denoise.Denoise;
+import edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank;
+import edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank2;
+import edu.cmu.sphinx.frontend.transform.*;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.KaldiLoader;
+import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;
+import edu.cmu.sphinx.util.props.*;
+
+
+/**
+ * Cepstrum is an auto-configurable DataProcessor which is used to compute a
+ * specific cepstrum (for a target acoustic model) given the spectrum. The
+ * Cepstrum is computed using a pipeline of front end components which are
+ * selected, customized or ignored depending on the feat.params file which
+ * characterizes the target acoustic model for which this cepstrum is computed.
+ * A typical legacy MFCC Cepstrum will use a MelFrequencyFilterBank, followed
+ * by a DiscreteCosineTransform. A typical denoised MFCC Cepstrum will use a
+ * MelFrequencyFilterBank, followed by a Denoise component, followed by a
+ * DiscreteCosineTransform2, followed by a Lifter component. The
+ * MelFrequencyFilterBank parameters (numberFilters, minimumFrequency and
+ * maximumFrequency) are auto-configured based on the values found in
+ * feat.params.
+ * 
+ * @author Horia Cucu
+ */
+public class AutoCepstrum extends BaseDataProcessor {
+
+    /**
+     * The property specifying the acoustic model for which this cepstrum will
+     * be configured. For this acoustic model (AM) it is mandatory to specify a
+     * location in the configuration file. The Cepstrum will be configured
+     * based on the feat.params file that will be found in the specified AM
+     * location.
+     */
+    @S4Component(type = Loader.class)
+    public final static String PROP_LOADER = "loader";
+    protected Loader loader;
+
+    /**
+     * The filter bank which will be used for creating the cepstrum. The filter
+     * bank is always inserted in the pipeline and its minimum frequency,
+     * maximum frequency and number of filters are configured based on the
+     * "lowerf", "upperf" and "nfilt" values in the feat.params file of the
+     * target acoustic model.
+     */
+    protected BaseDataProcessor filterBank;
+
+    /**
+     * The denoise component which could be used for creating the cepstrum. The
+     * denoise component is inserted in the pipeline only if
+     * "-remove_noise yes" is specified in the feat.params file of the target
+     * acoustic model.
+     */
+    protected Denoise denoise;
+
+    /**
+     * The property specifying the DCT which will be used for creating the
+     * cepstrum. If "-transform legacy" is specified in the feat.params file of
+     * the target acoustic model or if the "-transform" parameter does not
+     * appear in this file at all, the legacy DCT component is inserted in the
+     * pipeline. If "-transform dct" is specified in the feat.params file of
+     * the target acoustic model, then the current DCT component is inserted in
+     * the pipeline.
+     */
+    protected DiscreteCosineTransform dct;
+
+    /**
+     * The lifter component which could be used for creating the cepstrum. The
+     * lifter component is inserted in the pipeline only if
+     * "-lifter &lt;lifterValue&gt;" is specified in the feat.params file of the
+     * target acoustic model.
+     */
+    protected Lifter lifter;
+
+    /**
+     * The list of <code>DataProcessor</code>s which were auto-configured for
+     * this Cepstrum component.
+     */
+    protected List<DataProcessor> selectedDataProcessors;
+
+    public AutoCepstrum(Loader loader) throws IOException {
+        initLogger();
+        this.loader = loader;
+        loader.load();
+        initDataProcessors();
+    }
+
+    public AutoCepstrum() {
+    }
+
+    /*
+     * (non-Javadoc)
+     * @see
+     * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
+     * .props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        loader = (Loader) ps.getComponent(PROP_LOADER);
+        try {
+            loader.load();
+        } catch (IOException e) {
+            throw new PropertyException(e);
+        }
+        initDataProcessors();
+    }
+
+    private void initDataProcessors() {
+        try {
+            Properties featParams = loader.getProperties();
+            selectedDataProcessors = new ArrayList<DataProcessor>();
+
+            double lowFreq = parseDouble(featParams.getProperty("-lowerf"));
+            double hiFreq = parseDouble(featParams.getProperty("-upperf"));
+            int numFilter = parseInt(featParams.getProperty("-nfilt"));
+
+            // TODO: should not be there, but for now me must preserve
+            // backward compatibility with the legacy code.
+            if (loader instanceof KaldiLoader)
+                filterBank = new MelFrequencyFilterBank2(lowFreq,
+                                                         hiFreq,
+                                                         numFilter);
+            else
+                filterBank = new MelFrequencyFilterBank(lowFreq,
+                                                        hiFreq,
+                                                        numFilter);
+
+            selectedDataProcessors.add(filterBank);
+
+            if ((featParams.get("-remove_noise") == null)
+                    || (featParams.get("-remove_noise").equals("yes"))) {
+                denoise = new Denoise(Denoise.class.getField("LAMBDA_POWER")
+                                              .getAnnotation(S4Double.class)
+                                              .defaultValue(),
+                                      Denoise.class.getField("LAMBDA_A")
+                                              .getAnnotation(S4Double.class)
+                                              .defaultValue(),
+                                      Denoise.class.getField("LAMBDA_B")
+                                              .getAnnotation(S4Double.class)
+                                              .defaultValue(),
+                                      Denoise.class.getField("LAMBDA_T")
+                                              .getAnnotation(S4Double.class)
+                                              .defaultValue(),
+                                      Denoise.class.getField("MU_T")
+                                              .getAnnotation(S4Double.class)
+                                              .defaultValue(),
+                                      Denoise.class.getField("MAX_GAIN")
+                                              .getAnnotation(S4Double.class)
+                                              .defaultValue(),
+                                      Denoise.class.getField("SMOOTH_WINDOW")
+                                              .getAnnotation(S4Integer.class)
+                                              .defaultValue());
+                // denoise.newProperties();
+                denoise.setPredecessor(selectedDataProcessors
+                        .get(selectedDataProcessors.size() - 1));
+                selectedDataProcessors.add(denoise);
+            }
+
+            if ((featParams.get("-transform") != null)
+                    && (featParams.get("-transform").equals("dct"))) {
+                dct = new DiscreteCosineTransform2(
+                                                   numFilter,
+                                                   DiscreteCosineTransform.class
+                                                           .getField("PROP_CEPSTRUM_LENGTH")
+                                                           .getAnnotation(S4Integer.class)
+                                                           .defaultValue());
+            } else if ((featParams.get("-transform") != null)
+                    && (featParams.get("-transform").equals("kaldi")))
+            {
+                dct = new KaldiDiscreteCosineTransform(
+                                                       numFilter,
+                                                       DiscreteCosineTransform.class
+                                                               .getField("PROP_CEPSTRUM_LENGTH")
+                                                               .getAnnotation(S4Integer.class)
+                                                               .defaultValue());
+            } else {
+                dct = new DiscreteCosineTransform(numFilter,
+                                                  DiscreteCosineTransform.class
+                                                          .getField("PROP_CEPSTRUM_LENGTH")
+                                                          .getAnnotation(S4Integer.class)
+                                                          .defaultValue());
+            }
+            dct.setPredecessor(selectedDataProcessors
+                    .get(selectedDataProcessors.size() - 1));
+            selectedDataProcessors.add(dct);
+
+            if (featParams.get("-lifter") != null) {
+                lifter = new Lifter(Integer.parseInt((String) featParams
+                        .get("-lifter")));
+                lifter.setPredecessor(selectedDataProcessors
+                        .get(selectedDataProcessors.size() - 1));
+                selectedDataProcessors.add(lifter);
+            }
+            logger.info("Cepstrum component auto-configured as follows: "
+                    + toString());
+        } catch (NoSuchFieldException exc) {
+            throw new RuntimeException(exc);
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * @see
+     * edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend
+     * .CommonConfig)
+     */
+    @Override
+    public void initialize() {
+        super.initialize();
+
+        for (DataProcessor dataProcessor : selectedDataProcessors)
+            dataProcessor.initialize();
+    }
+
+    /**
+     * Returns the processed Data output, basically calls
+     * <code>getData()</code> on the last processor.
+     * 
+     * @return a Data object that has been processed by the cepstrum
+     * @throws DataProcessingException if a data processor error occurs
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+        DataProcessor dp;
+        dp = selectedDataProcessors.get(selectedDataProcessors.size() - 1);
+        return dp.getData();
+    }
+
+    /**
+     * Sets the predecessor for this DataProcessor. The predecessor is actually
+     * the spectrum builder.
+     * 
+     * @param predecessor the predecessor of this DataProcessor
+     */
+    @Override
+    public void setPredecessor(DataProcessor predecessor) {
+        filterBank.setPredecessor(predecessor);
+    }
+
+    /**
+     * Returns a description of this Cepstrum component in the format:
+     * &lt;cepstrum name&gt; {&lt;DataProcessor1&gt;, &lt;DataProcessor2&gt; ...
+     * &lt;DataProcessorN&gt;}
+     * 
+     * @return a description of this Cepstrum
+     */
+    @Override
+    public String toString() {
+        StringBuilder description = new StringBuilder(super.toString())
+                .append(" {");
+        for (DataProcessor dp : selectedDataProcessors)
+            description.append(dp).append(", ");
+        description.setLength(description.length() - 2);
+        return description.append('}').toString();
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/BaseDataProcessor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/BaseDataProcessor.java
@ -0,0 +1,61 @@
+/*
+ * Copyright 2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+import edu.cmu.sphinx.util.props.ConfigurableAdapter;
+
+/**
+ * An abstract DataProcessor implementing elements common to all concrete DataProcessors, such as name, predecessor, and
+ * timer.
+ */
+public abstract class BaseDataProcessor extends ConfigurableAdapter implements DataProcessor {
+
+    private DataProcessor predecessor;
+
+    public BaseDataProcessor() {
+    }
+
+    /**
+     * Returns the processed Data output.
+     *
+     * @return an Data object that has been processed by this DataProcessor
+     * @throws DataProcessingException if a data processor error occurs
+     */
+    public abstract Data getData() throws DataProcessingException;
+
+
+    /** Initializes this DataProcessor. This is typically called after the DataProcessor has been configured. */
+    public void initialize() {
+    }
+
+
+    /**
+     * Returns the predecessor DataProcessor.
+     *
+     * @return the predecessor
+     */
+    public DataProcessor getPredecessor() {
+        return predecessor;
+    }
+
+
+    /**
+     * Sets the predecessor DataProcessor. This method allows dynamic reconfiguration of the front end.
+     *
+     * @param predecessor the new predecessor of this DataProcessor
+     */
+    public void setPredecessor(DataProcessor predecessor) {
+        this.predecessor = predecessor;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/Data.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/Data.java
@ -0,0 +1,28 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+/**
+ * Implements the interface for all Data objects that passes between
+ * DataProcessors.
+ *
+ * Subclass of Data can contain the actual data, or be a signal
+ * (e.g., data start, data end, speech start, speech end).
+ *
+ * @see Data
+ * @see FrontEnd
+ */
+public interface Data {
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataBlocker.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataBlocker.java
@ -0,0 +1,113 @@
+package edu.cmu.sphinx.frontend;
+
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Double;
+
+import java.util.LinkedList;
+
+/**
+ * A <code>DataProcessor</code> which wraps incoming <code>DoubleData</code>-objects into equally size blocks of defined
+ * length.
+ */
+public class DataBlocker extends BaseDataProcessor {
+
+    /** The property for the block size of generated data-blocks in milliseconds. */
+    @S4Double(defaultValue = 10)
+    public static final String PROP_BLOCK_SIZE_MS = "blockSizeMs";
+
+    private double blockSizeMs;
+    private int blockSizeSamples = Integer.MAX_VALUE;
+
+    private int curFirstSamplePos;
+    private int sampleRate = -1;
+
+    private final LinkedList<DoubleData> inBuffer = new LinkedList<DoubleData>();
+
+    private int curInBufferSize;
+
+
+    public DataBlocker() {
+    }
+
+    /**
+     * @param blockSizeMs block size in milliseconds
+     */
+    public DataBlocker(double blockSizeMs) {
+        initLogger();
+        this.blockSizeMs = blockSizeMs;
+    }
+
+    @Override
+    public void newProperties(PropertySheet propertySheet) throws PropertyException {
+        super.newProperties(propertySheet);
+        blockSizeMs = propertySheet.getDouble(PROP_BLOCK_SIZE_MS);
+    }
+
+
+    public double getBlockSizeMs() {
+        return blockSizeMs;
+    }
+
+
+    @Override
+    public Data getData() throws DataProcessingException {
+        while (curInBufferSize < blockSizeSamples || curInBufferSize == 0) {
+            Data data = getPredecessor().getData();
+
+            if (data instanceof DataStartSignal) {
+                sampleRate = ((DataStartSignal) data).getSampleRate();
+                blockSizeSamples = (int) Math.round(sampleRate * blockSizeMs / 1000);
+
+                curInBufferSize = 0;
+                curFirstSamplePos = 0;
+                
+                inBuffer.clear();
+            }
+
+            if (!(data instanceof DoubleData)) {
+                return data;
+            }
+
+            DoubleData dd = (DoubleData) data;
+
+            inBuffer.add(dd);
+            curInBufferSize += dd.getValues().length;
+        }
+
+        // now we are ready to merge all data blocks into one
+        double[] newSampleBlock = new double[blockSizeSamples];
+
+        int copiedSamples = 0;
+
+        long firstSample = inBuffer.get(0).getFirstSampleNumber() + curFirstSamplePos;
+
+        while (!inBuffer.isEmpty()) {
+            DoubleData dd = inBuffer.remove(0);
+            double[] values = dd.getValues();
+            int copyLength = Math.min(blockSizeSamples - copiedSamples, values.length - curFirstSamplePos);
+
+            System.arraycopy(values, curFirstSamplePos, newSampleBlock, copiedSamples, copyLength);
+
+            // does the current data-object contains more samples than necessary? -> keep the rest for the next block
+            if (copyLength < (values.length - curFirstSamplePos)) {
+                assert inBuffer.isEmpty();
+
+                curFirstSamplePos += copyLength;
+                inBuffer.add(0, dd);
+                break;
+            } else {
+                copiedSamples += copyLength;
+                curFirstSamplePos = 0;
+            }
+        }
+
+        curInBufferSize = inBuffer.isEmpty() ? 0 : inBuffer.get(0).getValues().length - curFirstSamplePos;
+
+//        for (int i = 0; i < newSampleBlock.length; i++) {
+//            newSampleBlock[i] *= 10;
+//        }
+        return new DoubleData(newSampleBlock, sampleRate, firstSample);
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataEndSignal.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataEndSignal.java
@ -0,0 +1,70 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+/**
+ * A signal that indicates the end of data.
+ *
+ * @see Data
+ * @see DataProcessor
+ * @see Signal
+ */
+public class DataEndSignal extends Signal {
+
+    private final long duration;
+
+
+    /**
+     * Constructs a DataEndSignal.
+     *
+     * @param duration the duration of the entire data stream in milliseconds
+     */
+    public DataEndSignal(long duration) {
+        this(duration, System.currentTimeMillis());
+    }
+
+
+    /**
+     * Constructs a DataEndSignal with the given creation time.
+     *
+     * @param duration the duration of the entire data stream in milliseconds
+     * @param time     the creation time of the DataEndSignal
+     */
+    public DataEndSignal(long duration, long time) {
+        super(time);
+        this.duration = duration;
+    }
+
+
+    /**
+     * Returns the duration of the entire data stream in milliseconds
+     *
+     * @return the duration of the entire data stream in milliseconds
+     */
+    public long getDuration() {
+        return duration;
+    }
+
+
+    /**
+     * Returns the string "DataEndSignal".
+     *
+     * @return the string "DataEndSignal"
+     */
+    @Override
+    public String toString() {
+        return ("DataEndSignal: creation time: " + getTime() + ", duration: " +
+                getDuration() + "ms");
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataProcessingException.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataProcessingException.java
@ -0,0 +1,51 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.frontend;
+
+/** Thrown to indicate that a DataProcessor has problems processing incoming Data objects. */
+@SuppressWarnings("serial")
+public class DataProcessingException extends RuntimeException {
+
+    /** Constructs a DataProcessingException with no detailed message. */
+    public DataProcessingException() {
+        super();
+    }
+
+    /**
+     * Constructs a DataProcessingException with the specified detail message.
+     *
+     * @param message the detail message
+     */
+    public DataProcessingException(String message) {
+        super(message);
+    }
+
+    /**
+     * Constructs a DataProcessingException with the specified detail message and cause.
+     *
+     * @param message the detail message
+     * @param cause the cause
+     */
+    public DataProcessingException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    /**
+     * Constructs a DataProcessingException with the specified cause.
+     *
+     * @param cause the cause
+     */
+    public DataProcessingException(Throwable cause) {
+        super(cause);
+    }
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataProcessor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataProcessor.java
@ -0,0 +1,68 @@
+/*
+ * Copyright 2004 Carnegie Mellon University.  
+ * Portions Copyright 2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ * @see FrontEnd
+ */
+package edu.cmu.sphinx.frontend;
+
+import edu.cmu.sphinx.util.props.Configurable;
+
+/**
+ * A processor that performs a signal processing function.
+ *
+ * Since a DataProcessor usually belongs to a particular front end pipeline,
+ * you can name the pipeline it belongs to in the {@link #initialize()
+ * initialize} method. (Note, however, that it is not always the case that a
+ * DataProcessor belongs to a particular pipeline. For example, the {@link
+ * edu.cmu.sphinx.frontend.util.Microphone Microphone}class is a DataProcessor,
+ * but it usually does not belong to any particular pipeline.  
+ * <p> 
+ * Each
+ * DataProcessor usually have a predecessor as well. This is the previous
+ * DataProcessor in the pipeline. Again, not all DataProcessors have
+ * predecessors.  
+ * <p>
+ * Calling {@link #getData() getData}will return the
+ * processed Data object.
+ */
+public interface DataProcessor extends Configurable {
+
+    /**
+     * Initializes this DataProcessor.
+     *
+     * This is typically called after the DataProcessor has been configured.
+     */
+    public void initialize();
+
+
+    /**
+     * Returns the processed Data output.
+     *
+     * @return an Data object that has been processed by this DataProcessor
+     * @throws DataProcessingException if a data processor error occurs
+     */
+    public abstract Data getData() throws DataProcessingException;
+
+
+    /**
+     * Returns the predecessor DataProcessor.
+     *
+     * @return the predecessor
+     */
+    public DataProcessor getPredecessor();
+
+
+    /**
+     * Sets the predecessor DataProcessor. This method allows dynamic reconfiguration of the front end.
+     *
+     * @param predecessor the new predecessor of this DataProcessor
+     */
+    public void setPredecessor(DataProcessor predecessor);
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataStartSignal.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DataStartSignal.java
@ -0,0 +1,62 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+/**
+ * A signal that indicates the start of data.
+ *
+ * @see Data
+ * @see DataProcessor
+ * @see Signal
+ */
+public class DataStartSignal extends Signal {
+
+    private final int sampleRate;
+
+    /**
+     * Constructs a DataStartSignal at the given time.
+     *
+     * @param sampleRate the sampling rate of the started data stream.
+     * @param time       the time this DataStartSignal is created
+     */
+    public DataStartSignal(int sampleRate, long time) {
+        super(time);
+        this.sampleRate = sampleRate;
+    }
+
+    /**
+     * Constructs a DataStartSignal at the given time.
+     *
+     * @param sampleRate  the sampling rate of the started data stream.
+     */
+    public DataStartSignal(int sampleRate) {
+        this(sampleRate, System.currentTimeMillis());
+    }
+
+    /**
+     * Returns the string "DataStartSignal".
+     *
+     * @return the string "DataStartSignal"
+     */
+    @Override
+    public String toString() {
+        return "DataStartSignal: creation time: " + getTime();
+    }
+
+
+    /** @return the sampling rate of the started data stream. */
+    public int getSampleRate() {
+        return sampleRate;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DoubleData.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/DoubleData.java
@ -0,0 +1,119 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+import edu.cmu.sphinx.util.machlearn.OVector;
+
+/** A Data object that holds data of primitive type double. */
+@SuppressWarnings("serial")
+public class DoubleData extends OVector implements Data {
+
+    private int sampleRate;
+    private long firstSampleNumber;
+    private long collectTime;
+
+
+    /**
+     * Constructs a new <code>Data</code> object with values only. All other internal fields like
+     * sampling rate etc. are initialized to -1.
+     * @param values source values
+     */
+    public DoubleData(double[] values) {
+        super(values);
+    }
+
+
+    /**
+     * Constructs a Data object with the given values, collect time, and first sample number.
+     *
+     * @param values            the data values
+     * @param sampleRate        the sample rate of the data
+     * @param firstSampleNumber the position of the first sample in the original data
+     */
+    public DoubleData(double[] values, int sampleRate,
+                      long firstSampleNumber) {
+        super(values);
+
+        this.sampleRate = sampleRate;
+        this.collectTime = firstSampleNumber * 1000 / sampleRate;
+        this.firstSampleNumber = firstSampleNumber;
+    }
+
+    /**
+     * Constructs a Data object with the given values, collect time, and first sample number.
+     *
+     * @param values            the data values
+     * @param sampleRate        the sample rate of the data
+     * @param collectTime       the time at which this data is collected
+     * @param firstSampleNumber the position of the first sample in the original data
+     */
+    public DoubleData(double[] values, int sampleRate,
+                      long collectTime, long firstSampleNumber) {
+        super(values);
+
+        this.sampleRate = sampleRate;
+        this.collectTime = collectTime;
+        this.firstSampleNumber = firstSampleNumber;
+    }
+
+    /**
+     * @return a string that describes the data.
+     */
+    @Override
+    public String toString() {
+        return ("DoubleData: " + sampleRate + "Hz, first sample #: " +
+                firstSampleNumber + ", collect time: " + collectTime);
+    }
+
+
+    /**
+     * @return the sample rate of the data.
+     */
+    public int getSampleRate() {
+        return sampleRate;
+    }
+
+
+    /**
+     * @return the position of the first sample in the original data. The very first sample number
+     * is zero.
+     */
+    public long getFirstSampleNumber() {
+        return firstSampleNumber;
+    }
+
+
+    /**
+     * Returns the time in milliseconds at which the audio data is collected.
+     *
+     * @return the difference, in milliseconds, between the time the audio data is collected and
+     *         midnight, January 1, 1970
+     */
+    public long getCollectTime() {
+        return collectTime;
+    }
+
+    @Override
+    public DoubleData clone() throws CloneNotSupportedException {
+        try {
+            DoubleData data = (DoubleData)super.clone();
+            data.sampleRate = sampleRate;
+            data.collectTime = collectTime;
+            data.firstSampleNumber = firstSampleNumber;
+            return data;
+        } catch (CloneNotSupportedException e) {
+            throw new InternalError(e.toString());
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/FloatData.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/FloatData.java
@ -0,0 +1,119 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ */
+
+package edu.cmu.sphinx.frontend;
+
+import edu.cmu.sphinx.util.MatrixUtils;
+
+/**
+ * A Data object that holds data of primitive type float.
+ *
+ * @see Data
+ */
+public class FloatData implements Data, Cloneable {
+
+    private final float[] values;
+    private final int sampleRate;
+    private final long firstSampleNumber;
+    private final long collectTime;
+
+    /**
+     * Constructs a Data object with the given values, sample rate, collect time, and first sample number.
+     *
+     * @param values            the data values
+     * @param sampleRate        the sample rate of the data
+     * @param firstSampleNumber the position of the first sample in the original data
+     */
+    public FloatData(float[] values, int sampleRate, long firstSampleNumber) {
+        this(values, sampleRate, firstSampleNumber * 1000 / sampleRate, firstSampleNumber);
+    }
+
+    /**
+     * Constructs a Data object with the given values, sample rate, collect time, and first sample number.
+     *
+     * @param values            the data values
+     * @param sampleRate        the sample rate of the data
+     * @param collectTime       the time at which this data is collected
+     * @param firstSampleNumber the position of the first sample in the original data
+     */
+    public FloatData(float[] values, int sampleRate,
+                     long collectTime, long firstSampleNumber) {
+        this.values = values;
+        this.sampleRate = sampleRate;
+        this.collectTime = collectTime;
+        this.firstSampleNumber = firstSampleNumber;
+    }
+
+
+    /**
+     * @return the values of this data.
+     */
+    public float[] getValues() {
+        return values;
+    }
+
+
+    /**
+     * @return the sample rate of this data.
+     */
+    public int getSampleRate() {
+        return sampleRate;
+    }
+
+
+    /**
+     * @return the position of the first sample in the original data. The very first sample number is zero.
+     */
+    public long getFirstSampleNumber() {
+        return firstSampleNumber;
+    }
+
+
+    /**
+     * Returns the time in milliseconds at which the audio data is collected.
+     *
+     * @return the difference, in milliseconds, between the time the audio data is collected and midnight, January 1,
+     *         1970
+     */
+    public long getCollectTime() {
+        return collectTime;
+    }
+
+    @Override
+    public FloatData clone() throws CloneNotSupportedException {
+        try {
+            FloatData data = (FloatData)super.clone();
+            return data;
+        } catch (CloneNotSupportedException e) {
+            throw new InternalError(e.toString());
+        }
+    }
+
+
+    /** 
+     * Converts a given Data-object into a <code>FloatData</code> if possible.
+     * @param data data to convert
+     * @return converted data
+     */
+    public static FloatData toFloatData(Data data) {
+        FloatData convertData;
+        if (data instanceof FloatData)
+            convertData = (FloatData) data;
+        else if (data instanceof DoubleData) {
+            DoubleData dd = (DoubleData) data;
+            convertData = new FloatData(MatrixUtils.double2float(dd.getValues()), dd.getSampleRate(),
+                    dd.getFirstSampleNumber());
+        } else
+            throw new IllegalArgumentException("data type '" + data.getClass() + "' is not supported");
+
+        return convertData;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/FrontEnd.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/FrontEnd.java
@ -0,0 +1,303 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.cmu.sphinx.util.Timer;
+import edu.cmu.sphinx.util.TimerPool;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4ComponentList;
+
+/**
+ * FrontEnd is a wrapper class for the chain of front end processors. It provides methods for manipulating and
+ * navigating the processors.
+ * <p>
+ * The front end is modeled as a series of data processors, each of which performs a specific signal processing
+ * function. For example, a processor performs Fast-Fourier Transform (FFT) on input data, another processor performs
+ * high-pass filtering. Figure 1 below describes how the front end looks like:
+ * <img alt="Frontend" src="doc-files/frontend.jpg"> <br> <b>Figure 1: The Sphinx4 front end.</b>
+ * <p>
+ * Each such data processor implements the {@link edu.cmu.sphinx.frontend.DataProcessor} interface. Objects that
+ * implements the {@link edu.cmu.sphinx.frontend.Data} interface enters and exits the front end, and go between the
+ * processors in the front end. The input data to the front end is typically audio data, but this front end allows any
+ * input type. Similarly, the output data is typically features, but this front end allows any output type. You can
+ * configure the front end to accept any input type and return any output type. We will describe the configuration of
+ * the front end in more detail below.
+ * <p>
+ * <b>The Pull Model of the Front End</b>
+ * <p>
+ * The front end uses a pull model. To obtain output from the front end, one would call the method:
+ * <p>
+ * <code> FrontEnd frontend = ... // see how to obtain the front end below <br>Data output = frontend.getData();
+ * </code>
+ * <p>
+ * Calling {@link #getData() getData} on the front end would in turn call the getData() method on the last
+ * DataProcessor, which in turn calls the getData() method on the second last DataProcessor, and so on, until the
+ * getData() method on the first DataProcessor is called, which reads Data objects from the input. The input to the
+ * front end is actually another DataProcessor, and is usually (though not necessarily) part of the front end and is not
+ * shown in the figure above. If you want to maintain some control of the input DataProcessor, you can create it
+ * separately, and use the {@link #setDataSource(edu.cmu.sphinx.frontend.DataProcessor) setDataSource} method to set it
+ * as the input DataProcessor. In that case, the input DataProcessor will be prepended to the existing chain of
+ * DataProcessors. One common input DataProcessor is the {@link edu.cmu.sphinx.frontend.util.Microphone}, which
+ * implements the DataProcessor interface.
+ * <p>
+ * <code> DataProcessor microphone = new Microphone(); <br>microphone.initialize(...);
+ * <br>frontend.setDataSource(microphone); </code>
+ * <p>
+ * Another common input DataProcessor is the {@link edu.cmu.sphinx.frontend.util.StreamDataSource}. It turns a Java
+ * {@link java.io.InputStream} into Data objects. It is usually used in batch mode decoding.
+ * <p>
+ * <b>Configuring the front end</b>
+ * <p>
+ * The front end must be configured through the Sphinx properties file. For details about configuring the front end,
+ * refer to the document <a href="doc-files/FrontEndConfiguration.html">Configuring the Front End</a>.
+ * <p>
+ * Current state-of-the-art front ends generate features that contain Mel-frequency cepstral coefficients (MFCC). To
+ * specify such a front end (called a 'pipeline') in Sphinx-4, insert the following lines in the Sphinx-4 configuration
+ * file:
+ * <pre>
+ * &lt;component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd"&gt;
+ *     &lt;propertylist name="pipeline"&gt;
+ *        &lt;item&gt;preemphasizer&lt;/item&gt;
+ *        &lt;item&gt;windower&lt;/item&gt;
+ *        &lt;item&gt;dft&lt;/item&gt;
+ *        &lt;item&gt;melFilterBank&lt;/item&gt;
+ *        &lt;item&gt;dct&lt;/item&gt;
+ *        &lt;item&gt;batchCMN&lt;/item&gt;
+ *        &lt;item&gt;featureExtractor&lt;/item&gt;
+ *     &lt;/propertylist&gt;
+ * &lt;/component&gt;
+ *
+ * &lt;component name="preemphasizer" type="{@link edu.cmu.sphinx.frontend.filter.Preemphasizer
+ * edu.cmu.sphinx.frontend.filter.Preemphasizer}"/&gt;
+ * &lt;component name="windower" type="{@link edu.cmu.sphinx.frontend.window.RaisedCosineWindower
+ * edu.cmu.sphinx.frontend.window.RaisedCosineWindower}"/&gt;
+ * &lt;component name="dft" type="{@link edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform
+ * edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform}"/&gt;
+ * &lt;component name="melFilterBank" type="{@link edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank2
+ * edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank}"/&gt;
+ * &lt;component name="dct" type="{@link edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform
+ * edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform}"/&gt;
+ * &lt;component name="batchCMN" type="{@link edu.cmu.sphinx.frontend.feature.BatchCMN
+ * edu.cmu.sphinx.frontend.feature.BatchCMN}"/&gt;
+ * &lt;component name="featureExtractor" type="{@link edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor
+ * edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor}"/&gt;
+ * </pre>
+ * Note: In this example, 'mfcFrontEnd' becomes the name of the front end.
+ * <p>
+ * Sphinx-4 also allows you to: <ul> <li>specify multiple front end pipelines</li> <li>specify multiple instance of the
+ * same DataProcessor in the same pipeline</li> </ul>
+ * <p>
+ * For details on how to do this, refer to the document <a href="doc-files/FrontEndConfiguration.html">Configuring the
+ * Front End</a>.
+ * <p>
+ * <b>Obtaining a Front End</b>
+ * <p>
+ * In order to obtain a front end, it must be specified in the configuration file. The Sphinx-4 front end is connected
+ * to the rest of the system via the scorer. We will continue with the above example to show how the scorer will obtain
+ * the front end. In the configuration file, the scorer should be specified as follows:
+ * <pre>
+ * &lt;component name="scorer" type="edu.cmu.sphinx.decoder.scorer.SimpleAcousticScorer"&gt;
+ *     &lt;property name="frontend" value="mfcFrontEnd"/&gt;
+ * &lt;/component&gt;
+ * </pre>
+ * In the SimpleAcousticScorer, the front end is obtained in the {@link edu.cmu.sphinx.util.props.Configurable#newProperties
+ * newProperties} method as follows:
+ * <pre>
+ * public void newProperties(PropertySheet ps) throws PropertyException {
+ *     FrontEnd frontend = (FrontEnd) ps.getComponent("frontend", FrontEnd.class);
+ * }
+ * </pre>
+ */
+public class FrontEnd extends BaseDataProcessor {
+
+    /** the name of the property list of all the components of the frontend pipe line */
+    @S4ComponentList(type = DataProcessor.class)
+    public final static String PROP_PIPELINE = "pipeline";
+
+
+    // ----------------------------
+    // Configuration data
+    // -----------------------------
+    private List<DataProcessor> frontEndList;
+    private Timer timer;
+
+    private DataProcessor first;
+    private DataProcessor last;
+    private final List<SignalListener> signalListeners = new ArrayList<SignalListener>();
+
+    public FrontEnd(List<DataProcessor> frontEndList) {
+        initLogger();
+        this.frontEndList = frontEndList;
+        init();
+    }
+
+    public FrontEnd() {
+
+    }
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        frontEndList = ps.getComponentList(PROP_PIPELINE, DataProcessor.class);
+        init();
+    }
+
+    private void init() {
+        this.timer = TimerPool.getTimer(this, "Frontend");
+        
+        last = null;
+        for (DataProcessor dp : frontEndList) {
+            assert dp != null;
+
+            if (last != null)
+                dp.setPredecessor(last);
+
+            if (first == null) {
+                first = dp;
+            }
+            last = dp;
+        }
+        initialize();
+    }
+
+
+    /* (non-Javadoc)
+    * @see edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend.CommonConfig)
+    */
+    @Override
+    public void initialize() {
+        super.initialize();
+        for (DataProcessor dp : frontEndList) {
+            dp.initialize();
+        }
+    }
+
+
+    /**
+     * Sets the source of data for this front end. It basically sets the predecessor of the first DataProcessor of this
+     * front end.
+     *
+     * @param dataSource the source of data
+     */
+    public void setDataSource(DataProcessor dataSource) {
+        first.setPredecessor(dataSource);
+    }
+
+
+    /** Returns the collection of <code>DataProcessor</code>s of this <code>FrontEnd</code>.
+     * @return list of processors
+     */
+    public List<DataProcessor> getElements() {
+        return frontEndList;
+    }
+
+    /**
+     * Returns the processed Data output, basically calls <code>getData()</code> on the last processor.
+     *
+     * @return Data object that has been processed by this front end
+     * @throws DataProcessingException if a data processor error occurs
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+        timer.start();
+        Data data = last.getData();
+
+        // fire the signal listeners if its a signal
+        if (data instanceof Signal) {
+            fireSignalListeners((Signal) data);
+        }
+        timer.stop();
+        return data;
+    }
+
+
+    /**
+     * Sets the source of data for this front end. It basically calls <code>setDataSource(dataSource)</code>.
+     *
+     * @param dataSource the source of data
+     */
+    @Override
+    public void setPredecessor(DataProcessor dataSource) {
+        setDataSource(dataSource);
+    }
+
+
+    /**
+     * Add a listener to be called when a signal is detected.
+     *
+     * @param listener the listener to be added
+     */
+    public void addSignalListener(SignalListener listener) {
+        signalListeners.add(listener);
+    }
+
+
+    /**
+     * Removes a listener for signals.
+     *
+     * @param listener the listener to be removed
+     */
+    public void removeSignalListener(SignalListener listener) {
+        signalListeners.remove(listener);
+    }
+
+
+    /**
+     * Fire all listeners for signals.
+     *
+     * @param signal the signal that occurred
+     */
+    protected void fireSignalListeners(Signal signal) {
+        for (SignalListener listener : new ArrayList<SignalListener>(signalListeners))
+            listener.signalOccurred(signal);
+    }
+
+
+    /** Returns the last data processor within the <code>DataProcessor</code> chain of this <code>FrontEnd</code>.
+     * @return last processor
+     */
+    public DataProcessor getLastDataProcessor() {
+        return last;
+    }
+
+
+    /**
+     * Returns a description of this FrontEnd in the format: &lt;front end name&gt; {&lt;DataProcessor1&gt;, &lt;DataProcessor2&gt; ...
+     * &lt;DataProcessorN&gt;}
+     *
+     * @return a description of this FrontEnd
+     */
+    @Override
+    public String toString() {
+        if (last == null)
+            return super.toString() + " {}";
+        LinkedList<DataProcessor> list = new LinkedList<DataProcessor>();
+        for (DataProcessor current = last; current != null; current = current.getPredecessor())
+            list.addFirst(current); // add processors in their correct order
+        StringBuilder description = new StringBuilder(super.toString()).append(" {");
+        for (DataProcessor dp : list)
+            description.append(dp).append(", ");
+        description.setLength(description.length() - 2);
+        return description.append('}').toString();
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/GainControlProcessor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/GainControlProcessor.java
@ -0,0 +1,77 @@
+package edu.cmu.sphinx.frontend;
+
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Double;
+
+/**
+ * Allows to modify the gain of an audio-signal.  If the gainFactor is 1 the signal passes this
+ * <code>DataProcessor</code> unchanged.
+ *
+ * @author Holger Brandl
+ */
+public class GainControlProcessor extends BaseDataProcessor {
+
+    @S4Double(defaultValue = 1.0)
+    public static final String GAIN_FACTOR = "gainFactor";
+
+    private double gainFactor;
+
+    public GainControlProcessor(double gainFactor) {
+        initLogger();
+        this.gainFactor = gainFactor;
+    }
+
+    public GainControlProcessor() {        
+    }
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        gainFactor = ps.getDouble(GAIN_FACTOR);
+    }
+
+
+    @Override
+    public Data getData() throws DataProcessingException {
+        Data data = getPredecessor().getData();
+
+        if (data instanceof FloatData) {
+            float[] values = ((FloatData) data).getValues();
+            if (gainFactor != 1.0) {
+                // apply the gain-factor
+                for (int i = 0; i < values.length; i++) {
+                    values[i] *= gainFactor;
+
+                }
+            }
+
+        } else if (data instanceof DoubleData) {
+            double[] values = ((DoubleData) data).getValues();
+            if (gainFactor != 1.0) {
+                // apply the gain-factor
+                for (int i = 0; i < values.length; i++) {
+                    values[i] *= gainFactor;
+
+                }
+            }
+        }
+
+        return data;
+    }
+
+
+    public double getGainFactor() {
+        return gainFactor;
+    }
+
+
+    public void setGainFactor(double gainFactor) {
+        this.gainFactor = gainFactor;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/Signal.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/Signal.java
@ -0,0 +1,68 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Indicates events like beginning or end of data, data dropped, quality changed, etc.. It implements the Data
+ * interface, and it will pass between DataProcessors to inform them about the Data that is passed between
+ * DataProcessors.
+ *
+ * @see Data
+ * @see DataProcessor
+ */
+public class Signal implements Data {
+
+    /** the time this Signal was issued. */
+    private final long time;
+
+    /**
+     * A (lazily initialized) collection of names properties of this signal. This collection might contain infos about
+     * the file being processed, shift-size of frame-length of the windowing process, etc.
+     */
+    private Map<String, Object> props;
+
+
+    /**
+     * Constructs a Signal with the given name.
+     *
+     * @param time the time this Signal is created
+     */
+    protected Signal(long time) {
+        this.time = time;
+    }
+
+
+    /**
+     * Returns the time this Signal was created.
+     *
+     * @return the time this Signal was created
+     */
+    public long getTime() {
+        return time;
+    }
+
+
+    /** 
+     * @return the properties associated to this signal.
+     */
+    public synchronized Map<String, Object> getProps() {
+        if (props == null)
+            props = new HashMap<String, Object>();
+
+        return props;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/SignalListener.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/SignalListener.java
@ -0,0 +1,26 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.frontend;
+
+
+/** The listener interface for being informed when a {@link Signal Signal} is generated. */
+public interface SignalListener {
+
+    /**
+     * Method called when a signal is detected
+     *
+     * @param signal the signal
+     */
+    public void signalOccurred(Signal signal);
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/DataBufferProcessor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/DataBufferProcessor.java
@ -0,0 +1,172 @@
+package edu.cmu.sphinx.frontend.databranch;
+
+import edu.cmu.sphinx.frontend.BaseDataProcessor;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.frontend.DataProcessingException;
+import edu.cmu.sphinx.util.props.*;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * A FIFO-buffer for <code>Data</code>-elements.
+ * <p>
+ * <code>Data</code>s are inserted to the buffer using the <code>processDataFrame</code>-method.
+ */
+public class DataBufferProcessor extends BaseDataProcessor implements DataListener {
+
+    /** The FIFO- data buffer. */
+    private final List<Data> featureBuffer = new LinkedList<Data>();
+
+    /**
+     * If this property is set <code>true</code> the buffer will wait for new data until it returns from a
+     * <code>getData</code>-call. Enable this flag if the buffer should serve as starting point for a new
+     * feature-pull-chain..
+     */
+    @S4Boolean(defaultValue = false)
+    public static final String PROP_WAIT_IF_EMPTY = "waitIfEmpty";
+    private boolean waitIfEmpty;
+
+    /**
+     * The time in milliseconds which will be waited between two attempts to read a data element from the buffer when
+     * being in <code>waitIfEmpty</code>-mode
+     */
+    @S4Integer(defaultValue = 10)
+    public static final String PROP_WAIT_TIME_MS = "waitTimeMs";
+    private long waitTime;
+
+
+    /** The maximal size of the buffer in frames. The oldest frames will be removed if the buffer grows out of bounds. */
+    @S4Integer(defaultValue = 50000)
+    public static final String PROP_BUFFER_SIZE = "maxBufferSize";
+    private int maxBufferSize;
+
+
+    @S4ComponentList(type = Configurable.class, beTolerant = true)
+    public static final String DATA_LISTENERS = "dataListeners";
+    private List<DataListener> dataListeners = new ArrayList<DataListener>();
+
+    /**
+     * @param maxBufferSize The maximal size of the buffer in frames. The oldest frames will be removed if the buffer grows out of bounds.
+     * @param waitIfEmpty If this property is set <code>true</code> the buffer will wait for new data until it returns from a
+     * <code>getData</code>-call. Enable this flag if the buffer should serve as starting point for a new
+     * feature-pull-chain.
+     * @param waitTime The time in milliseconds which will be waited between two attempts to read a data element from the buffer when
+     * being in <code>waitIfEmpty</code>-mode
+     * @param listeners listeners to get notified
+     */
+    public DataBufferProcessor(int maxBufferSize, boolean waitIfEmpty, int waitTime, List<? extends Configurable> listeners) {
+        initLogger();
+        
+        this.maxBufferSize = maxBufferSize;
+        this.waitIfEmpty = waitIfEmpty;
+
+        if (waitIfEmpty) // if false we don't need the value
+            this.waitTime = waitTime;
+
+        for (Configurable configurable : listeners) {
+            assert configurable instanceof DataListener;
+            addDataListener((DataListener) configurable);
+        }
+    }
+
+    public DataBufferProcessor() {
+    }
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+
+        maxBufferSize = ps.getInt(PROP_BUFFER_SIZE);
+        waitIfEmpty = ps.getBoolean(PROP_WAIT_IF_EMPTY);
+
+        if (waitIfEmpty) // if false we don't need the value
+            waitTime = ps.getInt(PROP_WAIT_TIME_MS);
+
+        dataListeners = ps.getComponentList(DATA_LISTENERS, DataListener.class);
+    }
+
+
+    public void processDataFrame(Data data) {
+        featureBuffer.add(data);
+
+        // inform data-listeners if necessary
+        for (DataListener dataListener : dataListeners) {
+            dataListener.processDataFrame(data);
+        }
+
+        //reduce the buffer-size if necessary
+        while (featureBuffer.size() > maxBufferSize) {
+            featureBuffer.remove(0);
+        }
+    }
+
+
+    /**
+     * Returns the processed Data output.
+     *
+     * @return an Data object that has been processed by this DataProcessor
+     * @throws edu.cmu.sphinx.frontend.DataProcessingException
+     *          if a data processor error occurs
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+        Data data = null;
+
+        while (waitIfEmpty && featureBuffer.isEmpty()) {
+            try {
+                Thread.sleep(waitTime);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+
+        if (!featureBuffer.isEmpty()) {
+            data = featureBuffer.remove(0);
+        } else {
+            assert !waitIfEmpty;
+        }
+
+        return data;
+    }
+
+
+    public int getBufferSize() {
+        return featureBuffer.size();
+    }
+
+
+    public void clearBuffer() {
+        featureBuffer.clear();
+    }
+
+
+    public List<Data> getBuffer() {
+        return Collections.unmodifiableList(featureBuffer);
+    }
+
+
+    /** Adds a new listener.
+     * @param l listener to add
+     */
+    public void addDataListener(DataListener l) {
+        if (l == null)
+            return;
+
+        dataListeners.add(l);
+    }
+
+
+    /** Removes a listener.
+     * @param l listener to remove
+     */
+    public void removeDataListener(DataListener l) {
+        if (l == null)
+            return;
+
+        dataListeners.remove(l);
+    }
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/DataListener.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/DataListener.java
@ -0,0 +1,16 @@
+package edu.cmu.sphinx.frontend.databranch;
+
+import edu.cmu.sphinx.frontend.Data;
+
+
+/** 
+ * Defines some API-elements for Data-observer classes. 
+ */
+public interface DataListener {
+
+    /** This method is invoked when a new {@link Data} object becomes available.
+     * @param data feature frame
+     */
+    public void processDataFrame(Data data);
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/DataProducer.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/DataProducer.java
@ -0,0 +1,18 @@
+package edu.cmu.sphinx.frontend.databranch;
+
+import edu.cmu.sphinx.util.props.Configurable;
+
+/** Some API-elements which are shared by components which can generate {@link edu.cmu.sphinx.frontend.Data}s. */
+public interface DataProducer extends Configurable {
+
+    /** Registers a new listener for <code>Data</code>s.
+     * @param l listener to add
+     */
+    void addDataListener(DataListener l);
+
+
+    /** Unregisters a listener for <code>Data</code>s.
+     * @param l listener to remove
+     */
+    void removeDataListener(DataListener l);
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/FrontEndSplitter.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/databranch/FrontEndSplitter.java
@ -0,0 +1,71 @@
+package edu.cmu.sphinx.frontend.databranch;
+
+import edu.cmu.sphinx.frontend.BaseDataProcessor;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.frontend.DataProcessingException;
+import edu.cmu.sphinx.util.props.Configurable;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4ComponentList;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Creates push-branches out of a Frontend. This might be used for for push-decoding or to create new pull-streams
+ *
+ * @see edu.cmu.sphinx.decoder.FrameDecoder
+ * @see edu.cmu.sphinx.frontend.databranch.DataBufferProcessor
+ */
+public class FrontEndSplitter extends BaseDataProcessor implements DataProducer {
+
+
+    @S4ComponentList(type = Configurable.class, beTolerant = true)
+    public static final String PROP_DATA_LISTENERS = "dataListeners";
+    private List<DataListener> listeners = new ArrayList<DataListener>();
+
+    public FrontEndSplitter() {
+    }
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+
+        listeners = ps.getComponentList(PROP_DATA_LISTENERS, DataListener.class);
+    }
+
+
+    /**
+     * Reads and returns the next Data frame or return <code>null</code> if no data is available.
+     *
+     * @return the next Data or <code>null</code> if none is available
+     * @throws edu.cmu.sphinx.frontend.DataProcessingException
+     *          if there is a data processing error
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+        Data input = getPredecessor().getData();
+
+        for (DataListener l : listeners)
+            l.processDataFrame(input);
+
+        return input;
+    }
+
+
+    public void addDataListener(DataListener l) {
+        if (l == null) {
+            return;
+        }
+        listeners.add(l);
+    }
+
+
+    public void removeDataListener(DataListener l) {
+        if (l == null) {
+            return;
+        }
+        listeners.remove(l);
+    }
+}
+
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/denoise/Denoise.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/denoise/Denoise.java
@ -0,0 +1,220 @@
+/*
+ * Copyright 2013 Carnegie Mellon University.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.frontend.denoise;
+
+import java.util.Arrays;
+
+import edu.cmu.sphinx.frontend.BaseDataProcessor;
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.frontend.DataProcessingException;
+import edu.cmu.sphinx.frontend.DataStartSignal;
+import edu.cmu.sphinx.frontend.DoubleData;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Double;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+/**
+ * The noise filter, same as implemented in sphinxbase/sphinxtrain/pocketsphinx.
+ * 
+ * Noise removal algorithm is inspired by the following papers Computationally
+ * Efficient Speech Enchancement by Spectral Minina Tracking by G. Doblinger
+ * 
+ * Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition
+ * by C. Kim.
+ * 
+ * For the recent research and state of art see papers about IMRCA and A
+ * Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency Cepstra
+ * For Robust Speech Recognition by Dong Yu and others
+ * 
+ */
+public class Denoise extends BaseDataProcessor {
+
+    double[] power;
+    double[] noise;
+    double[] floor;
+    double[] peak;
+
+    @S4Double(defaultValue = 0.7)
+    public final static String LAMBDA_POWER = "lambdaPower";
+    double lambdaPower;
+
+    @S4Double(defaultValue = 0.995)
+    public final static String LAMBDA_A = "lambdaA";
+    double lambdaA;
+
+    @S4Double(defaultValue = 0.5)
+    public final static String LAMBDA_B = "lambdaB";
+    double lambdaB;
+
+    @S4Double(defaultValue = 0.85)
+    public final static String LAMBDA_T = "lambdaT";
+    double lambdaT;
+
+    @S4Double(defaultValue = 0.2)
+    public final static String MU_T = "muT";
+    double muT;
+
+    @S4Double(defaultValue = 20.0)
+    public final static String MAX_GAIN = "maxGain";
+    double maxGain;
+
+    @S4Integer(defaultValue = 4)
+    public final static String SMOOTH_WINDOW = "smoothWindow";
+    int smoothWindow;
+
+    final static double EPS = 1e-10;
+
+    public Denoise(double lambdaPower, double lambdaA, double lambdaB,
+            double lambdaT, double muT,
+            double maxGain, int smoothWindow) {
+        this.lambdaPower = lambdaPower;
+        this.lambdaA = lambdaA;
+        this.lambdaB = lambdaB;
+        this.lambdaT = lambdaT;
+        this.muT = muT;
+        this.maxGain = maxGain;
+        this.smoothWindow = smoothWindow;
+    }
+
+    public Denoise() {
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see
+     * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
+     * .props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        lambdaPower = ps.getDouble(LAMBDA_POWER);
+        lambdaA = ps.getDouble(LAMBDA_A);
+        lambdaB = ps.getDouble(LAMBDA_B);
+        lambdaT = ps.getDouble(LAMBDA_T);
+        muT = ps.getDouble(MU_T);
+        maxGain = ps.getDouble(MAX_GAIN);
+        smoothWindow = ps.getInt(SMOOTH_WINDOW);
+    }
+
+    @Override
+    public Data getData() throws DataProcessingException {
+        Data inputData = getPredecessor().getData();
+        int i;
+
+        if (inputData instanceof DataStartSignal) {
+            power = null;
+            noise = null;
+            floor = null;
+            peak = null;
+            return inputData;
+        }
+        if (!(inputData instanceof DoubleData)) {
+            return inputData;
+        }
+
+        DoubleData inputDoubleData = (DoubleData) inputData;
+        double[] input = inputDoubleData.getValues();
+        int length = input.length;
+
+        if (power == null)
+            initStatistics(input, length);
+
+        updatePower(input);
+
+        estimateEnvelope(power, noise);
+
+        double[] signal = new double[length];
+        for (i = 0; i < length; i++) {
+            signal[i] = Math.max(power[i] - noise[i], 0.0);
+        }
+
+        estimateEnvelope(signal, floor);
+
+        tempMasking(signal);
+
+        powerBoosting(signal);
+
+        double[] gain = new double[length];
+        for (i = 0; i < length; i++) {
+            gain[i] = signal[i] / (power[i] + EPS);
+            gain[i] = Math.min(Math.max(gain[i], 1.0 / maxGain), maxGain);
+        }
+        double[] smoothGain = smooth(gain);
+
+        for (i = 0; i < length; i++) {
+            input[i] *= smoothGain[i];
+        }
+
+        return inputData;
+    }
+
+    private double[] smooth(double[] gain) {
+        double[] result = new double[gain.length];
+        for (int i = 0; i < gain.length; i++) {
+            int start = Math.max(i - smoothWindow, 0);
+            int end = Math.min(i + smoothWindow + 1, gain.length);
+            double sum = 0.0;
+            for (int j = start; j < end; j++) {
+                sum += gain[j];
+            }
+            result[i] = sum / (end - start);
+        }
+        return result;
+    }
+
+    private void powerBoosting(double[] signal) {
+        for (int i = 0; i < signal.length; i++) {
+            if (signal[i] < floor[i])
+                signal[i] = floor[i];
+        }
+    }
+
+    private void tempMasking(double[] signal) {
+        for (int i = 0; i < signal.length; i++) {
+            double in = signal[i];
+
+            peak[i] *= lambdaT;
+            if (signal[i] < lambdaT * peak[i])
+                signal[i] = peak[i] * muT;
+
+            if (in > peak[i])
+                peak[i] = in;
+        }
+    }
+
+    private void updatePower(double[] input) {
+        for (int i = 0; i < input.length; i++) {
+            power[i] = lambdaPower * power[i] + (1 - lambdaPower) * input[i];
+        }
+    }
+
+    private void estimateEnvelope(double[] signal, double[] envelope) {
+        for (int i = 0; i < signal.length; i++) {
+            if (signal[i] > envelope[i])
+                envelope[i] = lambdaA * envelope[i] + (1 - lambdaA) * signal[i];
+            else
+                envelope[i] = lambdaB * envelope[i] + (1 - lambdaB) * signal[i];
+        }
+    }
+
+    private void initStatistics(double[] input, int length) {
+        /* no previous data, initialize the statistics */
+        power = Arrays.copyOf(input, length);
+        noise = Arrays.copyOf(input, length);
+        floor = new double[length];
+        peak = new double[length];
+        for (int i = 0; i < length; i++) {
+            floor[i] = input[i] / maxGain;
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/AbstractVoiceActivityDetector.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/AbstractVoiceActivityDetector.java
@ -0,0 +1,27 @@
+/*
+ * Copyright 2010 Carnegie Mellon University.  
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.frontend.endpoint;
+
+import edu.cmu.sphinx.frontend.BaseDataProcessor;
+
+/**
+ * An abstract analyzer that signals about presense of speech in last processing frame.
+ * This information is used in noise filtering components to estimate noise spectrum
+ * for example.
+ */
+public abstract class AbstractVoiceActivityDetector extends BaseDataProcessor {
+
+    /**
+     * Returns the state of speech detected.
+     *
+     * @return if last processed data object was classified as speech.
+     */
+    public abstract boolean isSpeech();	
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechClassifiedData.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechClassifiedData.java
@ -0,0 +1,117 @@
+/*
+ * Copyright 1999-2004 Carnegie Mellon University.  
+ * Portions Copyright 2002-2004 Sun Microsystems, Inc.  
+ * Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend.endpoint;
+
+import edu.cmu.sphinx.frontend.Data;
+import edu.cmu.sphinx.frontend.DoubleData;
+
+
+/** A container for DoubleData class that indicates whether the contained DoubleData is speech or not. */
+public class SpeechClassifiedData implements Data {
+
+    private boolean isSpeech;
+    private final DoubleData data;
+
+
+    /**
+     * Constructs a SpeechClassifiedData object.
+     *
+     * @param doubleData the DoubleData
+     * @param isSpeech   indicates whether the DoubleData is speech
+     */
+    public SpeechClassifiedData(DoubleData doubleData, boolean isSpeech) {
+        this.data = doubleData;
+        this.isSpeech = isSpeech;
+    }
+
+
+    /**
+     * Sets whether this SpeechClassifiedData is speech or not.
+     *
+     * @param isSpeech true if this is speech, false otherwise
+     */
+    public void setSpeech(boolean isSpeech) {
+        this.isSpeech = isSpeech;
+    }
+
+
+    /**
+     * Returns whether this is classified as speech.
+     *
+     * @return true if this is classified as speech, false otherwise
+     */
+    public boolean isSpeech() {
+        return isSpeech;
+    }
+
+
+    /**
+     * Returns the data values.
+     *
+     * @return the data values
+     */
+    public double[] getValues() {
+        return data.getValues();
+    }
+
+
+    /**
+     * Returns the sample rate of the data.
+     *
+     * @return the sample rate of the data
+     */
+    public int getSampleRate() {
+        return data.getSampleRate();
+    }
+
+
+    /**
+     * Returns the time in milliseconds at which the audio data is collected.
+     *
+     * @return the difference, in milliseconds, between the time the audio data is collected and midnight, January 1,
+     *         1970
+     */
+    public long getCollectTime() {
+        return data.getCollectTime();
+    }
+
+
+    /**
+     * Returns the position of the first sample in the original data. The very first sample number is zero.
+     *
+     * @return the position of the first sample in the original data
+     */
+    public long getFirstSampleNumber() {
+        return data.getFirstSampleNumber();
+    }
+
+
+    /**
+     * Returns the DoubleData contained by this SpeechClassifiedData.
+     *
+     * @return the DoubleData contained by this SpeechClassifiedData
+     */
+    public DoubleData getDoubleData() {
+        return data;
+    }
+    
+
+    /**
+     * @return a string that describes the data.
+     */
+    @Override
+    public String toString() {
+        return "SpeechClassifiedData containing " + data.toString() + " classified as " + (isSpeech ? "speech" : "non-speech");
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechClassifier.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechClassifier.java
@ -0,0 +1,267 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend.endpoint;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.util.LogMath;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Double;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+import java.util.logging.Level;
+
+/**
+ * Implements a level tracking endpointer invented by Bent Schmidt Nielsen.
+ * <p>This endpointer is composed of two main steps. 
+ * <ol> 
+ * <li>classification of audio into speech and non-speech
+ * <li>inserting SPEECH_START and SPEECH_END signals around speech and removing non-speech regions 
+ * </ol>
+ * <p>
+ * The first step, classification of audio into speech and non-speech, uses Bent Schmidt Nielsen's algorithm. Each
+ * time audio comes in, the average signal level and the background noise level are updated, using the signal level of
+ * the current audio. If the average signal level is greater than the background noise level by a certain threshold
+ * value (configurable), then the current audio is marked as speech. Otherwise, it is marked as non-speech.
+ * <p>
+ * The second step of this endpointer is documented in the class {@link SpeechMarker SpeechMarker}
+ *
+ * @see SpeechMarker
+ */
+public class SpeechClassifier extends AbstractVoiceActivityDetector {
+
+    /** The property specifying the endpointing frame length in milliseconds. */
+    @S4Integer(defaultValue = 10)
+    public static final String PROP_FRAME_LENGTH_MS = "frameLengthInMs";
+
+    /** The property specifying the minimum signal level used to update the background signal level. */
+    @S4Double(defaultValue = 0)
+    public static final String PROP_MIN_SIGNAL = "minSignal";
+
+    /**
+     * The property specifying the threshold. If the current signal level is greater than the background level by
+     * this threshold, then the current signal is marked as speech. Therefore, a lower threshold will make the
+     * endpointer more sensitive, that is, mark more audio as speech. A higher threshold will make the endpointer less
+     * sensitive, that is, mark less audio as speech.
+     */
+    @S4Double(defaultValue = 10)
+    public static final String PROP_THRESHOLD = "threshold";
+
+    /** The property specifying the adjustment. */
+    @S4Double(defaultValue = 0.003)
+    public static final String PROP_ADJUSTMENT = "adjustment";
+
+    protected final double averageNumber = 1;
+    protected double adjustment;
+    /** average signal level. */
+    protected double level;
+    /** background signal level. */
+    protected double background;
+    /** minimum valid signal level. */
+    protected double minSignal;
+    protected double threshold;
+    protected float frameLengthSec;
+    protected boolean isSpeech;
+
+    /* Statistics */
+    protected long speechFrames;
+    protected long backgroundFrames;
+    protected double totalBackgroundLevel;
+    protected double totalSpeechLevel;
+    
+    public SpeechClassifier(int frameLengthMs, double adjustment, double threshold, double minSignal ) {
+        initLogger();
+        this.frameLengthSec = frameLengthMs / 1000.f;
+
+        this.adjustment = adjustment;
+        this.threshold = threshold;
+        this.minSignal = minSignal;
+
+        initialize();
+    }
+
+    public SpeechClassifier() {
+    }
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        int frameLengthMs = ps.getInt(PROP_FRAME_LENGTH_MS);
+        frameLengthSec = frameLengthMs / 1000.f;
+
+        adjustment = ps.getDouble(PROP_ADJUSTMENT);
+        threshold = ps.getDouble(PROP_THRESHOLD);
+        minSignal = ps.getDouble(PROP_MIN_SIGNAL);
+
+        logger = ps.getLogger();
+        //logger.setLevel(Level.FINEST);
+ 
+        initialize();
+    }
+
+
+    /** Initializes this LevelTracker endpointer and DataProcessor predecessor. */
+    @Override
+    public void initialize() {
+        super.initialize();
+        reset();
+    }
+
+
+    /** Resets this LevelTracker to a starting state. */
+    protected void reset() {
+        level = 0;
+        background = 300;
+        resetStats();
+    }
+
+
+    /**
+     * Returns the logarithm base 10 of the root mean square of the given samples.
+     *
+     * @param samples the samples
+     * @return the calculated log root mean square in log 10
+     */
+    public static double logRootMeanSquare(double[] samples) {
+        assert samples.length > 0;
+        double sumOfSquares = 0.0f;
+        for (double sample : samples) {
+            sumOfSquares += sample * sample;
+        }
+        double rootMeanSquare = Math.sqrt
+                (sumOfSquares / samples.length);
+        rootMeanSquare = Math.max(rootMeanSquare, 1);
+        return (LogMath.log10((float) rootMeanSquare) * 20);
+    }
+
+
+    /**
+     * Classifies the given audio frame as speech or not, and updates the endpointing parameters.
+     *
+     * @param audio the audio frame
+     * @return Data with classification flag
+     */
+    protected SpeechClassifiedData classify(DoubleData audio) {
+        double current = logRootMeanSquare(audio.getValues());
+        isSpeech = false;
+        if (current >= minSignal) {
+            level = ((level * averageNumber) + current) / (averageNumber + 1);
+            if (current < background) {
+                background = current;
+            } else {
+                background += (current - background) * adjustment;
+            }
+            if (level < background) {
+                level = background;
+            }
+            isSpeech = (level - background > threshold);
+        }
+
+        SpeechClassifiedData labeledAudio = new SpeechClassifiedData(audio, isSpeech);
+
+        if (logger.isLoggable(Level.FINEST)) {
+            String speech = "";
+            if (labeledAudio.isSpeech())
+                speech = "*";
+
+            logger.finest("Bkg: " + background + ", level: " + level +
+                    ", current: " + current + ' ' + speech);
+        }
+
+        collectStats (isSpeech);
+        
+        return labeledAudio;
+    }
+
+    /**
+     * Reset statistics
+     */
+    private void resetStats () {
+        backgroundFrames = 1;
+        speechFrames = 1;
+        totalSpeechLevel = 0;
+        totalBackgroundLevel = 0;
+    }
+    
+    /**
+     * Collects the statistics to provide information about signal to noise ratio in channel
+     * 
+     * @param isSpeech if the current frame is classified as speech
+     */
+    private void collectStats(boolean isSpeech) {
+        if (isSpeech) {
+            totalSpeechLevel = totalSpeechLevel + level;
+            speechFrames = speechFrames + 1;
+        } else {
+            totalBackgroundLevel = totalBackgroundLevel + background;
+            backgroundFrames = backgroundFrames + 1;
+        }        
+    }
+
+    /**
+     * Returns the next Data object.
+     *
+     * @return the next Data object, or null if none available
+     * @throws DataProcessingException if a data processing error occurs
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+        Data audio = getPredecessor().getData();
+
+        if (audio instanceof DataStartSignal)
+            reset();
+
+        if (audio instanceof DoubleData) {
+            DoubleData data = (DoubleData) audio;
+            audio = classify(data);
+        }
+        return audio;
+    }
+    
+    /**
+     * Method that returns if current returned frame contains speech. 
+     * It could be used by noise filter for example to adjust noise 
+     * spectrum estimation.
+     * 
+     * @return if current frame is speech 
+     */
+    @Override
+    public boolean isSpeech() {
+    	return isSpeech;
+    }
+    
+    /** 
+     * Retrieves accumulated signal to noise ratio in dbScale 
+     * 
+     * @return signal to noise ratio
+     */
+    public double getSNR () {
+        double snr = (totalBackgroundLevel / backgroundFrames - totalSpeechLevel / speechFrames);
+        logger.fine ("Background " + totalBackgroundLevel / backgroundFrames);
+        logger.fine ("Speech " + totalSpeechLevel / speechFrames);
+        logger.fine ("SNR is " + snr);
+        return snr;
+    }
+ 
+    /** 
+     * Return the estimation if input data was noisy enough to break
+     * recognition. The audio is counted noisy if signal to noise ratio
+     * is less then -20dB.
+     * 
+     * @return estimation of data being noisy
+     */
+    public boolean getNoisy () {
+        return (getSNR() > -20);
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechEndSignal.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechEndSignal.java
@ -0,0 +1,46 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend.endpoint;
+
+import edu.cmu.sphinx.frontend.Signal;
+
+/** A signal that indicates the end of speech. */
+public class SpeechEndSignal extends Signal {
+
+    /** Constructs a SpeechEndSignal. */
+    public SpeechEndSignal() {
+        this(System.currentTimeMillis());
+    }
+
+
+    /**
+     * Constructs a SpeechEndSignal with the given creation time.
+     *
+     * @param time the creation time of the SpeechEndSignal
+     */
+    public SpeechEndSignal(long time) {
+        super(time);
+    }
+
+
+    /**
+     * Returns the string "SpeechEndSignal".
+     *
+     * @return the string "SpeechEndSignal"
+     */
+    @Override
+    public String toString() {
+        return "SpeechEndSignal";
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechMarker.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechMarker.java
@ -0,0 +1,206 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.
+ * Portions Copyright 2002 Sun Microsystems, Inc.
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ *
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.frontend.endpoint;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+import edu.cmu.sphinx.util.props.S4Integer;
+
+import java.util.LinkedList;
+
+/**
+ * Converts a stream of SpeechClassifiedData objects, marked as speech and
+ * non-speech, and mark out the regions that are considered speech. This is done
+ * by inserting SPEECH_START and SPEECH_END signals into the stream.
+ * <p>
+ * The algorithm for inserting the two signals is as follows.
+ * <p>
+ * The algorithm is always in one of two states: 'in-speech' and
+ * 'out-of-speech'. If 'out-of-speech', it will read in audio until we hit audio
+ * that is speech. If we have read more than 'startSpeech' amount of
+ * <i>continuous</i> speech, we consider that speech has started, and insert a
+ * SPEECH_START at 'speechLeader' time before speech first started. The state of
+ * the algorithm changes to 'in-speech'.
+ * <p>
+ * Now consider the case when the algorithm is in 'in-speech' state. If it read
+ * an audio that is speech, it is scheduled for output. If the audio is
+ * non-speech, we read ahead until we have 'endSilence' amount of
+ * <i>continuous</i> non-speech. At the point we consider that speech has ended.
+ * A SPEECH_END signal is inserted at 'speechTrailer' time after the first
+ * non-speech audio. The algorithm returns to 'out-of-speech' state. If any
+ * speech audio is encountered in-between, the accounting starts all over again.
+ * 
+ * While speech audio is processed delay is lowered to some minimal amount. This
+ * helps to segment both slow speech with visible delays and fast speech when
+ * delays are minimal.
+ */
+public class SpeechMarker extends BaseDataProcessor {
+
+    /**
+     * The property for the minimum amount of time in speech (in milliseconds)
+     * to be considered as utterance start.
+     */
+    @S4Integer(defaultValue = 200)
+    public static final String PROP_START_SPEECH = "startSpeech";
+    private int startSpeechTime;
+
+    /**
+     * The property for the amount of time in silence (in milliseconds) to be
+     * considered as utterance end.
+     */
+    @S4Integer(defaultValue = 200)
+    public static final String PROP_END_SILENCE = "endSilence";
+    private int endSilenceTime;
+
+    /**
+     * The property for the amount of time (in milliseconds) before speech start
+     * to be included as speech data.
+     */
+    @S4Integer(defaultValue = 50)
+    public static final String PROP_SPEECH_LEADER = "speechLeader";
+    private int speechLeader;
+
+    private LinkedList<Data> inputQueue; // Audio objects are added to the end
+    private LinkedList<Data> outputQueue; // Audio objects are added to the end
+    private boolean inSpeech;
+    private int speechCount;
+    private int silenceCount;
+    private int startSpeechFrames;
+    private int endSilenceFrames;
+    private int speechLeaderFrames;
+
+    public SpeechMarker(int startSpeechTime, int endSilenceTime, int speechLeader) {
+        initLogger();
+        this.startSpeechTime = startSpeechTime;
+        this.speechLeader = speechLeader;
+        this.endSilenceTime = endSilenceTime;
+    }
+
+    public SpeechMarker() {
+    }
+
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+
+        startSpeechTime = ps.getInt(PROP_START_SPEECH);
+        endSilenceTime = ps.getInt(PROP_END_SILENCE);
+        speechLeader = ps.getInt(PROP_SPEECH_LEADER);
+    }
+
+    /**
+     * Initializes this SpeechMarker
+     */
+    @Override
+    public void initialize() {
+        super.initialize();
+        reset();
+    }
+
+    /**
+     * Resets this SpeechMarker to a starting state.
+     */
+    private void reset() {
+        inSpeech = false;
+        speechCount = 0;
+        silenceCount = 0;
+        startSpeechFrames = startSpeechTime / 10;
+        endSilenceFrames = endSilenceTime / 10;
+        speechLeaderFrames = speechLeader / 10;
+        this.inputQueue = new LinkedList<Data>();
+        this.outputQueue = new LinkedList<Data>();
+    }
+
+    /**
+     * Returns the next Data object.
+     * 
+     * @return the next Data object, or null if none available
+     * @throws DataProcessingException
+     *             if a data processing error occurs
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+
+        while (outputQueue.isEmpty()) {
+            Data data = getPredecessor().getData();
+
+            if (data == null)
+                break;
+
+            if (data instanceof DataStartSignal) {
+                reset();
+                outputQueue.add(data);
+                break;
+            }
+
+            if (data instanceof DataEndSignal) {
+                if (inSpeech) {
+                    outputQueue.add(new SpeechEndSignal());
+                }
+                outputQueue.add(data);
+                break;
+            }
+
+            if (data instanceof SpeechClassifiedData) {
+                SpeechClassifiedData cdata = (SpeechClassifiedData) data;
+
+                if (cdata.isSpeech()) {
+                    speechCount++;
+                    silenceCount = 0;
+                } else {
+                    speechCount = 0;
+                    silenceCount++;
+                }
+
+                if (inSpeech) {
+                    outputQueue.add(data);
+                } else {
+                    inputQueue.add(data);
+                    if (inputQueue.size() > startSpeechFrames + speechLeaderFrames) {
+                        inputQueue.remove(0);
+                    }
+                }
+
+                if (!inSpeech && speechCount == startSpeechFrames) {
+                    inSpeech = true;
+                    outputQueue.add(new SpeechStartSignal(cdata.getCollectTime() - speechLeader - startSpeechFrames));
+                    outputQueue.addAll(inputQueue.subList(
+                            Math.max(0, inputQueue.size() - startSpeechFrames - speechLeaderFrames), inputQueue.size()));
+                    inputQueue.clear();
+                }
+                if (inSpeech && silenceCount == endSilenceFrames) {
+                    inSpeech = false;
+                    outputQueue.add(new SpeechEndSignal(cdata.getCollectTime()));
+                }
+            }
+        }
+
+        // If we have something left, return that
+        if (!outputQueue.isEmpty()) {
+            Data audio = outputQueue.remove(0);
+            if (audio instanceof SpeechClassifiedData) {
+                SpeechClassifiedData data = (SpeechClassifiedData) audio;
+                audio = data.getDoubleData();
+            }
+            return audio;
+        } else {
+            return null;
+        }
+
+    }
+
+    public boolean inSpeech() {
+        return inSpeech;
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechStartSignal.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/endpoint/SpeechStartSignal.java
@ -0,0 +1,46 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+
+package edu.cmu.sphinx.frontend.endpoint;
+
+import edu.cmu.sphinx.frontend.Signal;
+
+/** A signal that indicates the start of speech. */
+public class SpeechStartSignal extends Signal {
+
+    /** Constructs a SpeechStartSignal. */
+    public SpeechStartSignal() {
+        this(System.currentTimeMillis());
+    }
+
+
+    /**
+     * Constructs a SpeechStartSignal at the given time.
+     *
+     * @param time the time this SpeechStartSignal is created
+     */
+    public SpeechStartSignal(long time) {
+        super(time);
+    }
+
+
+    /**
+     * Returns the string "SpeechStartSignal".
+     *
+     * @return the string "SpeechStartSignal"
+     */
+    @Override
+    public String toString() {
+        return "SpeechStartSignal";
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/AbstractFeatureExtractor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/AbstractFeatureExtractor.java
@ -0,0 +1,243 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.frontend.feature;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.frontend.endpoint.*;
+import edu.cmu.sphinx.util.props.*;
+
+import java.util.*;
+
+/**
+ * Abstract base class for windowed feature extractors like DeltasFeatureExtractor, ConcatFeatureExtractor
+ * or S3FeatureExtractor. The main purpose of this it to collect window size cepstra frames in a buffer
+ * and let the extractor compute the feature frame with them.
+ */
+public abstract class AbstractFeatureExtractor extends BaseDataProcessor {
+
+    /** The property for the window of the DeltasFeatureExtractor. */
+    @S4Integer(defaultValue = 3)
+    public static final String PROP_FEATURE_WINDOW = "windowSize";
+
+    private int bufferPosition;
+    private Signal pendingSignal;
+    private LinkedList<Data> outputQueue;
+
+    protected int cepstraBufferEdge;
+    protected int window;
+    protected int currentPosition;
+    protected int cepstraBufferSize;
+    protected DoubleData[] cepstraBuffer;
+
+    public AbstractFeatureExtractor(int window) {
+        initLogger();
+        this.window = window;
+    }
+
+    public AbstractFeatureExtractor() {
+    }
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+    */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+        window = ps.getInt(PROP_FEATURE_WINDOW);
+    }
+
+
+    /*
+    * (non-Javadoc)
+    *
+    * @see edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend.CommonConfig)
+    */
+    @Override
+    public void initialize() {
+        super.initialize();
+        cepstraBufferSize = 256;
+        cepstraBuffer = new DoubleData[cepstraBufferSize];
+        cepstraBufferEdge = cepstraBufferSize - (window * 2 + 2);
+        outputQueue = new LinkedList<Data>();
+        reset();
+    }
+
+
+    /** Resets the DeltasFeatureExtractor to be ready to read the next segment of data. */
+    private void reset() {
+        bufferPosition = 0;
+        currentPosition = 0;
+    }
+
+
+    /**
+     * Returns the next Data object produced by this DeltasFeatureExtractor.
+     *
+     * @return the next available Data object, returns null if no Data is available
+     * @throws DataProcessingException if there is a data processing error
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+        if (outputQueue.isEmpty()) {
+            Data input = getNextData();
+            if (input != null) {
+                if (input instanceof DoubleData) {
+                    addCepstrum((DoubleData) input);
+                    computeFeatures(1);
+                } else if (input instanceof DataStartSignal) {
+                    pendingSignal = null;
+                    outputQueue.add(input);
+                    Data start = getNextData();
+                    int n = processFirstCepstrum(start);
+                    computeFeatures(n);
+                    if (pendingSignal != null) {
+                        outputQueue.add(pendingSignal);
+                    }
+                } else if (input instanceof SpeechEndSignal) {
+                    // when the DataEndSignal is right at the boundary
+                    int n = replicateLastCepstrum();
+                    computeFeatures(n);
+                    outputQueue.add(input);
+                } else if (input instanceof DataEndSignal) {
+                    outputQueue.add(input);
+                }
+            }
+        }
+        return outputQueue.isEmpty() ? null : outputQueue.removeFirst();
+    }
+
+
+    private Data getNextData() throws DataProcessingException {
+        Data d = getPredecessor().getData();
+        while (d != null && !(d instanceof DoubleData || d instanceof DataEndSignal || d instanceof DataStartSignal || d instanceof SpeechEndSignal)) {
+            outputQueue.add(d);
+            d = getPredecessor().getData();
+        }
+
+        return d;
+    }
+
+
+    /**
+     * Replicate the given cepstrum Data object into the first window+1 number of frames in the cepstraBuffer. This is
+     * the first cepstrum in the segment.
+     *
+     * @param cepstrum the Data to replicate
+     * @return the number of Features that can be computed
+     * @throws edu.cmu.sphinx.frontend.DataProcessingException
+     */
+    private int processFirstCepstrum(Data cepstrum)
+            throws DataProcessingException {
+        if (cepstrum instanceof DataEndSignal) {
+            outputQueue.add(cepstrum);
+            return 0;
+        } else if (cepstrum instanceof DataStartSignal) {
+            throw new Error("Too many UTTERANCE_START");
+        } else {
+            // At the start of an utterance, we replicate the first frame
+            // into window+1 frames, and then read the next "window" number
+            // of frames. This will allow us to compute the delta-
+            // double-delta of the first frame.
+            Arrays.fill(cepstraBuffer, 0, window + 1, cepstrum);
+            bufferPosition = window + 1;
+            bufferPosition %= cepstraBufferSize;
+            currentPosition = window;
+            currentPosition %= cepstraBufferSize;
+            int numberFeatures = 1;
+            pendingSignal = null;
+            for (int i = 0; i < window; i++) {
+                Data next = getNextData();
+                if (next != null) {
+                    if (next instanceof DoubleData) {
+                        // just a cepstra
+                        addCepstrum((DoubleData) next);
+                    } else if (next instanceof DataEndSignal || next instanceof SpeechEndSignal) {
+                        // end of segment cepstrum
+                        pendingSignal = (Signal) next;
+                        replicateLastCepstrum();
+                        numberFeatures += i;
+                        break;
+                    } else if (next instanceof DataStartSignal) {
+                        throw new Error("Too many UTTERANCE_START");
+                    }
+                }
+            }
+            return numberFeatures;
+        }
+    }
+
+
+    /**
+     * Adds the given DoubleData object to the cepstraBuffer.
+     *
+     * @param cepstrum the DoubleData object to add
+     */
+    private void addCepstrum(DoubleData cepstrum) {
+        cepstraBuffer[bufferPosition++] = cepstrum;
+        bufferPosition %= cepstraBufferSize;
+    }
+
+
+    /**
+     * Replicate the last frame into the last window number of frames in the cepstraBuffer.
+     *
+     * @return the number of replicated Cepstrum
+     */
+    private int replicateLastCepstrum() {
+        DoubleData last;
+        if (bufferPosition > 0) {
+            last = cepstraBuffer[bufferPosition - 1];
+        } else if (bufferPosition == 0) {
+            last = cepstraBuffer[cepstraBuffer.length - 1];
+        } else {
+            throw new Error("BufferPosition < 0");
+        }
+        for (int i = 0; i < window; i++) {
+            addCepstrum(last);
+        }
+        return window;
+    }
+
+
+    /**
+     * Converts the Cepstrum data in the cepstraBuffer into a FeatureFrame.
+     *
+     * @param totalFeatures the number of Features that will be produced
+     */
+    private void computeFeatures(int totalFeatures) {
+        if (totalFeatures == 1) {
+            computeFeature();
+        } else {
+            // create the Features
+            for (int i = 0; i < totalFeatures; i++) {
+                computeFeature();
+            }
+        }
+    }
+
+
+    /** Computes the next Feature. */
+    private void computeFeature() {
+        Data feature = computeNextFeature();
+        outputQueue.add(feature);
+    }
+
+
+    /**
+     * Computes the next feature. Advances the pointers as well.
+     *
+     * @return the feature Data computed
+     */
+    protected abstract Data computeNextFeature();
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/BatchAGC.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/BatchAGC.java
@ -0,0 +1,115 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.frontend.feature;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.frontend.endpoint.*;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+import java.util.*;
+
+
+/**
+ * Applies automatic gain control (CMN)
+ */
+public class BatchAGC extends BaseDataProcessor {
+
+    private List<Data> cepstraList;
+    private double agc;
+
+    public BatchAGC() {
+        initLogger();
+    }
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+    }
+
+    /** Initializes this BatchCMN. */
+    @Override
+    public void initialize() {
+        super.initialize();
+        cepstraList = new LinkedList<Data>();
+    }
+
+    /**
+     * Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified.
+     *
+     * @return the next available Data object, returns null if no Data object is available
+     * @throws DataProcessingException if there is an error processing data
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+
+        Data output = null;
+
+        if (!cepstraList.isEmpty()) {
+            output = cepstraList.remove(0);
+        } else {
+	    agc = 0.0;
+    	    cepstraList.clear();
+            // read the cepstra of the entire utterance, calculate and substract gain
+            if (readUtterance() > 0) {
+                normalizeList();
+                output = cepstraList.remove(0);
+            }
+        }
+
+        return output;
+    }
+
+
+    /**
+     * Reads the cepstra of the entire Utterance into the cepstraList.
+     *
+     * @return the number cepstra (with Data) read
+     * @throws DataProcessingException if an error occurred reading the Data
+     */
+    private int readUtterance() throws DataProcessingException {
+
+        Data input = null;
+        int numFrames = 0;
+
+        while (true) {
+            input = getPredecessor().getData();
+            if (input == null) {
+		break;
+	    } else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
+                cepstraList.add(input);
+                break;
+	    } else if (input instanceof DoubleData) {
+	        cepstraList.add(input);
+		double c0 = ((DoubleData)input).getValues()[0];
+		if (agc < c0)
+		    agc = c0;
+            } else { // DataStartSignal or other Signal
+                cepstraList.add(input);
+            }
+            numFrames++;
+        }
+
+        return numFrames;
+    }
+
+    /** Normalizes the list of Data. */
+    private void normalizeList() {
+        for (Data data : cepstraList) {
+            if (data instanceof DoubleData) {
+                ((DoubleData)data).getValues()[0] -= agc;
+            }
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/BatchCMN.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/BatchCMN.java
@ -0,0 +1,183 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.frontend.feature;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.frontend.endpoint.*;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.util.*;
+
+
+/**
+ * Applies cepstral mean normalization (CMN), sometimes called channel mean normalization, to incoming cepstral data.
+ *
+ * Its goal is to reduce the distortion caused by the transmission channel.  The output is mean normalized cepstral
+ * data.
+ * <p>
+ * The CMN processing subtracts the mean from all the {@link Data} objects between a {@link
+ * edu.cmu.sphinx.frontend.DataStartSignal} and a {@link DataEndSignal} or between a {@link
+ * edu.cmu.sphinx.frontend.endpoint.SpeechStartSignal} and a {@link SpeechEndSignal}.  BatchCMN will read in all the {@link Data}
+ * objects, calculate the mean, and subtract this mean from all the {@link Data} objects. For a given utterance, it will
+ * only produce an output after reading all the incoming data for the utterance. As a result, this process can introduce
+ * a significant processing delay, which is acceptable for batch processing, but not for live mode. In the latter case,
+ * one should use the {@link LiveCMN}.
+ * <p>
+ * CMN is a technique used to reduce distortions that are introduced by the transfer function of the transmission
+ * channel (e.g., the microphone). Using a transmission channel to transmit the input speech translates to multiplying
+ * the spectrum of the input speech with the transfer function of the channel (the distortion).  Since the cepstrum is
+ * the Fourier Transform of the log spectrum, the logarithm turns the multiplication into a summation. Averaging over
+ * time, the mean is an estimate of the channel, which remains roughly constant. The channel is thus removed from the
+ * cepstrum by subtracting the mean cepstral vector. Intuitively, the mean cepstral vector approximately describes the
+ * spectral characteristics of the transmission channel (e.g., microphone).
+ *
+ * @see LiveCMN
+ */
+public class BatchCMN extends BaseDataProcessor {
+
+    private double[] sums;           // array of current sums
+    private List<Data> cepstraList;
+    private int numberDataCepstra;
+    private DecimalFormat formatter = new DecimalFormat("0.00;-0.00", new DecimalFormatSymbols(Locale.US));;
+
+    public BatchCMN() {
+        initLogger();
+    }
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);       
+    }
+
+
+    /** Initializes this BatchCMN. */
+    @Override
+    public void initialize() {
+        super.initialize();
+        sums = null;
+        cepstraList = new LinkedList<Data>();
+    }
+
+
+    /** Initializes the sums array and clears the cepstra list. */
+    private void reset() {
+        sums = null; // clears the sums array
+        cepstraList.clear();
+        numberDataCepstra = 0;
+    }
+
+
+    /**
+     * Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified.
+     *
+     * @return the next available Data object, returns null if no Data object is available
+     * @throws DataProcessingException if there is an error processing data
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+
+        Data output = null;
+
+        if (!cepstraList.isEmpty()) {
+            output = cepstraList.remove(0);
+        } else {
+            reset();
+            // read the cepstra of the entire utterance, calculate
+            // and apply the cepstral mean
+            if (readUtterance() > 0) {
+                normalizeList();
+                output = cepstraList.remove(0);//getData();
+            }
+        }
+
+        return output;
+    }
+
+
+    /**
+     * Reads the cepstra of the entire Utterance into the cepstraList.
+     *
+     * @return the number cepstra (with Data) read
+     * @throws DataProcessingException if an error occurred reading the Data
+     */
+    private int readUtterance() throws DataProcessingException {
+
+        Data input = null;
+
+        do {
+            input = getPredecessor().getData();
+            if (input != null) {
+                if (input instanceof DoubleData) {
+                    double[] cepstrumData = ((DoubleData) input).getValues();
+                    if (sums == null) {
+                        sums = new double[cepstrumData.length];
+                    } else {
+                        if (sums.length != cepstrumData.length) {
+                            throw new Error
+                                    ("Inconsistent cepstrum lengths: sums: " +
+                                            sums.length + ", cepstrum: " +
+                                            cepstrumData.length);
+                        }
+                    }
+                    if (cepstrumData[0] >= 0) {
+                        // add the cepstrum data to the sums
+                        for (int j = 0; j < cepstrumData.length; j++) {
+                            sums[j] += cepstrumData[j];
+                        }
+                        numberDataCepstra++;
+                    }
+
+                    cepstraList.add(input);
+
+                } else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
+                    cepstraList.add(input);
+                    break;
+                } else { // DataStartSignal or other Signal
+                    cepstraList.add(input);
+                }
+            }
+        } while (input != null);
+
+        return numberDataCepstra;
+    }
+
+
+    /** Normalizes the list of Data. */
+    private void normalizeList() {
+    	StringBuilder cmn = new StringBuilder();
+        // calculate the mean first
+        for (int i = 0; i < sums.length; i++) {
+            sums[i] /= numberDataCepstra;
+            cmn.append (formatter.format(sums[i]));
+            cmn.append(' ');
+        }
+        logger.info(cmn.toString());
+
+        for (Data data : cepstraList) {
+            if (data instanceof DoubleData) {
+                double[] cepstrum = ((DoubleData)data).getValues();
+                for (int j = 0; j < cepstrum.length; j++) {
+                    cepstrum[j] -= sums[j]; // sums[] is now the means[]
+                }
+            }
+        }
+    }
+
+
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/BatchVarNorm.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/BatchVarNorm.java
@ -0,0 +1,156 @@
+/*
+ * Copyright 2010 PC-NG Inc..  
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+
+package edu.cmu.sphinx.frontend.feature;
+
+import edu.cmu.sphinx.frontend.*;
+import edu.cmu.sphinx.frontend.endpoint.*;
+import edu.cmu.sphinx.util.props.PropertyException;
+import edu.cmu.sphinx.util.props.PropertySheet;
+
+import java.util.*;
+
+
+/**
+ * Applies cepstral variance normalization (CVN), so that each coefficient
+ * will have unit variance. You need to put this element after the means
+ * normalizer in frontend pipeline.
+ * <p>
+ * CVN is sited to improve the stability of the decoding with the additive
+ * noise, so it might be useful in some situations.
+ *
+ * @see LiveCMN
+ */
+public class BatchVarNorm extends BaseDataProcessor {
+
+    private double[] variances; // array of current sums
+    private List<Data> cepstraList;
+    private int numberDataCepstra;
+
+    public BatchVarNorm() {
+        initLogger();
+    }
+
+    /* (non-Javadoc)
+     * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
+     */
+    @Override
+    public void newProperties(PropertySheet ps) throws PropertyException {
+        super.newProperties(ps);
+    }
+
+
+    /** Initializes this BatchCMN. */
+    @Override
+    public void initialize() {
+        super.initialize();
+        variances = null;
+        cepstraList = new LinkedList<Data>();
+    }
+
+
+    /** Initializes the sums array and clears the cepstra list. */
+    private void reset() {
+        variances = null; // clears the sums array
+        cepstraList.clear();
+        numberDataCepstra = 0;
+    }
+
+
+    /**
+     * Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified.
+     *
+     * @return the next available Data object, returns null if no Data object is available
+     * @throws DataProcessingException if there is an error processing data
+     */
+    @Override
+    public Data getData() throws DataProcessingException {
+
+        Data output = null;
+
+        if (!cepstraList.isEmpty()) {
+            output = cepstraList.remove(0);
+        } else {
+            reset();
+            // read the cepstra of the entire utterance, calculate
+            // and apply variance normalization
+            if (readUtterance() > 0) {
+                normalizeList();
+                output = cepstraList.remove(0); //getData();
+            }
+        }
+
+        return output;
+    }
+
+
+    /**
+     * Reads the cepstra of the entire Utterance into the cepstraList.
+     *
+     * @return the number cepstra (with Data) read
+     * @throws DataProcessingException if an error occurred reading the Data
+     */
+    private int readUtterance() throws DataProcessingException {
+
+        Data input = null;
+
+        do {
+            input = getPredecessor().getData();
+            if (input != null) {
+                if (input instanceof DoubleData) {
+                    numberDataCepstra++;
+                    double[] cepstrumData = ((DoubleData) input).getValues();
+                    if (variances == null) {
+                        variances = new double[cepstrumData.length];
+                    } else {
+                        if (variances.length != cepstrumData.length) {
+                            throw new Error
+                                    ("Inconsistent cepstrum lengths: sums: " +
+                                            variances.length + ", cepstrum: " +
+                                            cepstrumData.length);
+                        }
+                    }
+                    // add the cepstrum data to the sums
+                    for (int j = 0; j < cepstrumData.length; j++) {
+                        variances[j] += cepstrumData[j] * cepstrumData[j];
+                    }
+                    cepstraList.add(input);
+
+                } else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
+                    cepstraList.add(input);
+                    break;
+                } else { // DataStartSignal or other Signal
+                    cepstraList.add(input);
+                }
+            }
+        } while (input != null);
+
+        return numberDataCepstra;
+    }
+
+
+    /** Normalizes the list of Data. */
+    private void normalizeList() {
+
+        // calculate the variance first
+        for (int i = 0; i < variances.length; i++) {
+            variances[i] = Math.sqrt(numberDataCepstra / variances[i]);
+        }
+
+        for (Data data : cepstraList) {
+            if (data instanceof DoubleData) {
+                double[] cepstrum = ((DoubleData)data).getValues();
+                for (int j = 0; j < cepstrum.length; j++) {
+                    cepstrum[j] *= variances[j];
+                }
+            }
+        }
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/ConcatFeatureExtractor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/ConcatFeatureExtractor.java
@ -0,0 +1,56 @@
+/*
+ * Copyright 2002-2009 Carnegie Mellon University.  
+ * Copyright 2009 PC-NG Inc.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.frontend.feature;
+
+import edu.cmu.sphinx.frontend.*;
+
+/**
+ * This component concatenate the cepstrum from the sequence of frames according to the window size.
+ * It's not supposed to give high accuracy alone, but combined with LDA transform it can give the same
+ * or even better results than conventional delta and delta-delta coefficients. The idea is that
+ * delta-delta computation is also a matrix multiplication thus using automatically generated
+ * with LDA/MLLT matrix we can gain better results.
+ * The model for this feature extractor should be trained with SphinxTrain with 1s_c feature type and
+ * with cepwin option enabled. Don't forget to set the window size accordingly.
+ */
+public class ConcatFeatureExtractor extends AbstractFeatureExtractor {
+
+    public ConcatFeatureExtractor(int window) {
+        super(window);
+    }
+
+    public ConcatFeatureExtractor( ) {
+    }
+
+    /**
+     * Computes the next feature. Advances the pointers as well.
+     *
+     * @return the feature Data computed
+     */
+    @Override
+    protected Data computeNextFeature() {
+        DoubleData currentCepstrum = cepstraBuffer[currentPosition];
+        float[] feature = new float[(window * 2 + 1) * currentCepstrum.getValues().length];
+        int j = 0;
+        for (int k = -window; k <= window; k++) {
+        	int position = (currentPosition + k + cepstraBufferSize) % cepstraBufferSize;
+        	double[] buffer = cepstraBuffer[position].getValues();
+            for (double val : buffer) {
+                feature[j++] = (float)val;
+            }
+        }
+        currentPosition = (currentPosition + 1) % cepstraBufferSize ;
+
+        return (new FloatData(feature,
+                currentCepstrum.getSampleRate(),
+                currentCepstrum.getFirstSampleNumber()));
+    }
+}
--- a/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/DeltasFeatureExtractor.java
+++ b/lib/sphinx4-5prealpha-src/sphinx4-core/src/main/java/edu/cmu/sphinx/frontend/feature/DeltasFeatureExtractor.java
@ -0,0 +1,93 @@
+/*
+ * Copyright 1999-2002 Carnegie Mellon University.  
+ * Portions Copyright 2002 Sun Microsystems, Inc.  
+ * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
+ * All Rights Reserved.  Use is subject to license terms.
+ * 
+ * See the file "license.terms" for information on usage and
+ * redistribution of this file, and for a DISCLAIMER OF ALL 
+ * WARRANTIES.
+ *
+ */
+package edu.cmu.sphinx.frontend.feature;
+
+import edu.cmu.sphinx.frontend.*;
+
+/**
+ * Computes the delta and double delta of input cepstrum (or plp or ...). The delta is the first order derivative and
+ * the double delta (a.k.a. delta delta) is the second order derivative of the original cepstrum. They help model the
+ * speech signal dynamics. The output data is a {@link FloatData} object with a float array of size three times the
+ * original cepstrum, formed by the concatenation of cepstra, delta cepstra, and double delta cepstra. The output is the
+ * feature vector used by the decoder. Figure 1 shows the arrangement of the output feature data array:
+ * <p>
+ * <img alt="Layout of features" src="doc-files/feature.jpg"> <br> <b>Figure 1: Layout of the returned features. </b>
+ * <p>
+ * Suppose that the original cepstrum has a length of N, the first N elements of the feature are just the original
+ * cepstrum, the second N elements are the delta of the cepstrum, and the last N elements are the double delta of the
+ * cepstrum.
+ * <p>
+ * Figure 2 below shows pictorially the computation of the delta and double delta of a cepstrum vector, using the last 3
+ * cepstra and the next 3 cepstra. <img alt="Delta computation" src="doc-files/deltas.jpg"> <br> <b>Figure 2: Delta and double delta vector
+ * computation. </b>
+ * <p>
+ * Referring to Figure 2, the delta is computed by subtracting the cepstrum that is two frames behind of the current
+ * cepstrum from the cepstrum that is two frames ahead of the current cepstrum. The computation of the double delta is
+ * similar. It is computed by subtracting the delta cepstrum one time frame behind from the delta cepstrum one time
+ * frame ahead. Replacing delta cepstra with cepstra, this works out to a formula involving the cepstra that are one and
+ * three behind and after the current cepstrum.
+ */
+public class DeltasFeatureExtractor extends AbstractFeatureExtractor {
+
+    public DeltasFeatureExtractor(int window) {
+        super(window);
+    }
+
+    public DeltasFeatureExtractor( ) {
+    }
+    
+    /**
+     * Computes the next feature. Advances the pointers as well.
+     *
+     * @return the feature Data computed
+     */
+    @Override
+    protected Data computeNextFeature() {
+
+    	int jp1 = (currentPosition - 1 + cepstraBufferSize) % cepstraBufferSize;
+        int jp2 = (currentPosition - 2 + cepstraBufferSize) % cepstraBufferSize;
+        int jp3 = (currentPosition - 3 + cepstraBufferSize) % cepstraBufferSize;
+        int jf1 = (currentPosition + 1) % cepstraBufferSize;
+        int jf2 = (currentPosition + 2) % cepstraBufferSize;
+        int jf3 = (currentPosition + 3) % cepstraBufferSize;
+        
+    	DoubleData currentCepstrum = cepstraBuffer[currentPosition];
+        double[] mfc3f = cepstraBuffer[jf3].getValues();
+        double[] mfc2f = cepstraBuffer[jf2].getValues();
+        double[] mfc1f = cepstraBuffer[jf1].getValues();
+        double[] current = currentCepstrum.getValues();
+        double[] mfc1p = cepstraBuffer[jp1].getValues();
+        double[] mfc2p = cepstraBuffer[jp2].getValues();
+        double[] mfc3p = cepstraBuffer[jp3].getValues();
+        float[] feature = new float[current.length * 3];
+
+        currentPosition = (currentPosition + 1) % cepstraBufferSize;
+
+        // CEP; copy all the cepstrum data
+        int j = 0;
+        for (double val : current) {
+            feature[j++] = (float)val;
+        }
+        // System.arraycopy(current, 0, feature, 0, j);
+        // DCEP: mfc[2] - mfc[-2]
+        for (int k = 0; k < mfc2f.length; k++) {
+            feature[j++] = (float) (mfc2f[k] - mfc2p[k]);
+        }
+        // D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3])
+        for (int k = 0; k < mfc3f.length; k++) {
+            feature[j++] = (float) ((mfc3f[k] - mfc1p[k]) - (mfc1f[k] - mfc3p[k]));
+        }
+        return (new FloatData(feature,
+                currentCepstrum.getSampleRate(),
+                currentCepstrum.getFirstSampleNumber()));
+    }
+}
--- a/Show more
+++ b/Show more