Added voice control

Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2
This commit is contained in:
Ziver Koc 2015-05-13 21:14:10 +00:00
parent 35c92407a3
commit 53da641909
863 changed files with 192681 additions and 0 deletions

View file

@ -0,0 +1,58 @@
<project
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>edu.cmu.sphinx</groupId>
<artifactId>sphinx4-parent</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>sphinx4-samples</artifactId>
<packaging>jar</packaging>
<name>Sphinx4 demo applications</name>
<dependencies>
<dependency>
<groupId>edu.cmu.sphinx</groupId>
<artifactId>sphinx4-core</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.cmu.sphinx</groupId>
<artifactId>sphinx4-data</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>edu.cmu.sphinx.demo.DemoRunner</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,49 @@
package edu.cmu.sphinx.demo;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Map;
import java.util.TreeMap;
import edu.cmu.sphinx.demo.aligner.AlignerDemo;
import edu.cmu.sphinx.demo.dialog.DialogDemo;
import edu.cmu.sphinx.demo.speakerid.SpeakerIdentificationDemo;
import edu.cmu.sphinx.demo.transcriber.TranscriberDemo;
import static java.util.Arrays.copyOfRange;
public class DemoRunner {
static final Class<?>[] paramTypes = new Class<?>[] {String[].class};
private static final Map<String, Class<?>> classes =
new TreeMap<String, Class<?>>();
static {
classes.put("aligner", AlignerDemo.class);
classes.put("dialog", DialogDemo.class);
classes.put("speakerid", SpeakerIdentificationDemo.class);
classes.put("transcriber", TranscriberDemo.class);
}
public static void printUsage() {
System.err.println("Usage: DemoRunner <DEMO> [<ARG> ...]\n");
System.err.println("Demo names:");
for (String name : classes.keySet())
System.err.println(" " + name);
}
public static void main(String[] args) throws Throwable {
if (0 == args.length || !classes.containsKey(args[0])) {
printUsage();
System.exit(1);
}
try {
Method main = classes.get(args[0]).getMethod("main", paramTypes);
main.invoke(null, new Object[]{copyOfRange(args, 1, args.length)});
} catch (InvocationTargetException e) {
throw e.getCause();
}
}
}

View file

@ -0,0 +1,108 @@
/*
* Copyright 1999-2013 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.demo.aligner;
import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import edu.cmu.sphinx.alignment.LongTextAligner;
import edu.cmu.sphinx.api.SpeechAligner;
import edu.cmu.sphinx.result.WordResult;
/**
* This class demonstrates how to align audio to existing transcription and
* receive word timestamps.
* <br>
* In order to initialize the aligner you need to specify several data files
* which might be available on the CMUSphinx website. There should be an
* acoustic model for your language, a dictionary, an optional G2P model to
* convert word strings to pronunciation.
* <br>
* Currently the audio must have specific format (16khz, 16bit, mono), but in
* the future other formats will be supported.
* <br>
* Text should be a clean text in lower case. It should be cleaned from
* punctuation marks, numbers and other non-speakable things. In the future
* automatic cleanup will be supported.
*/
public class AlignerDemo {
private static final String ACOUSTIC_MODEL_PATH =
"resource:/edu/cmu/sphinx/models/en-us/en-us";
private static final String DICTIONARY_PATH =
"resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict";
private static final String TEXT = "one zero zero zero one nine oh two "
+ "one oh zero one eight zero three";
public static void main(String args[]) throws Exception {
URL audioUrl;
String transcript;
if (args.length > 1) {
audioUrl = new File(args[0]).toURI().toURL();
Scanner scanner = new Scanner(new File(args[1]));
scanner.useDelimiter("\\Z");
transcript = scanner.next();
scanner.close();
} else {
audioUrl = AlignerDemo.class.getResource("10001-90210-01803.wav");
transcript = TEXT;
}
String acousticModelPath =
(args.length > 2) ? args[2] : ACOUSTIC_MODEL_PATH;
String dictionaryPath = (args.length > 3) ? args[3] : DICTIONARY_PATH;
String g2pPath = (args.length > 4) ? args[4] : null;
SpeechAligner aligner =
new SpeechAligner(acousticModelPath, dictionaryPath, g2pPath);
List<WordResult> results = aligner.align(audioUrl, transcript);
List<String> stringResults = new ArrayList<String>();
for (WordResult wr : results) {
stringResults.add(wr.getWord().getSpelling());
}
LongTextAligner textAligner =
new LongTextAligner(stringResults, 2);
List<String> sentences = aligner.getTokenizer().expand(transcript);
List<String> words = aligner.sentenceToWords(sentences);
int[] aid = textAligner.align(words);
int lastId = -1;
for (int i = 0; i < aid.length; ++i) {
if (aid[i] == -1) {
System.out.format("- %s\n", words.get(i));
} else {
if (aid[i] - lastId > 1) {
for (WordResult result : results.subList(lastId + 1,
aid[i])) {
System.out.format("+ %-25s [%s]\n", result.getWord()
.getSpelling(), result.getTimeFrame());
}
}
System.out.format(" %-25s [%s]\n", results.get(aid[i])
.getWord().getSpelling(), results.get(aid[i])
.getTimeFrame());
lastId = aid[i];
}
}
if (lastId >= 0 && results.size() - lastId > 1) {
for (WordResult result : results.subList(lastId + 1,
results.size())) {
System.out.format("+ %-25s [%s]\n", result.getWord()
.getSpelling(), result.getTimeFrame());
}
}
}
}

View file

@ -0,0 +1,68 @@
/*
* Copyright 2014 Carnegie Mellon University.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*/
package edu.cmu.sphinx.demo.allphone;
import java.io.InputStream;
import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.Context;
import edu.cmu.sphinx.api.SpeechResult;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.result.WordResult;
import edu.cmu.sphinx.util.TimeFrame;
/**
* A simple example that shows how to transcribe a continuous audio file that
* has multiple utterances in it.
*/
public class AllphoneDemo {
public static void main(String[] args) throws Exception {
System.out.println("Loading models...");
Configuration configuration = new Configuration();
// Load model from the jar
configuration
.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
// You can also load model from folder
// configuration.setAcousticModelPath("file:en-us");
configuration
.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
Context context = new Context(configuration);
context.setLocalProperty("decoder->searchManager", "allphoneSearchManager");
Recognizer recognizer = context.getInstance(Recognizer.class);
InputStream stream = AllphoneDemo.class
.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");
stream.skip(44);
// Simple recognition with generic model
recognizer.allocate();
context.setSpeechSource(stream, TimeFrame.INFINITE);
Result result;
while ((result = recognizer.recognize()) != null) {
SpeechResult speechResult = new SpeechResult(result);
System.out.format("Hypothesis: %s\n", speechResult.getHypothesis());
System.out.println("List of recognized words and their times:");
for (WordResult r : speechResult.getWords()) {
System.out.println(r);
}
System.out.println("Lattice contains "
+ speechResult.getLattice().getNodes().size() + " nodes");
}
recognizer.deallocate();
}
}

View file

@ -0,0 +1,186 @@
/*
* Copyright 2013 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*/
package edu.cmu.sphinx.demo.dialog;
import java.util.HashMap;
import java.util.Map;
import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.LiveSpeechRecognizer;
public class DialogDemo {
private static final String ACOUSTIC_MODEL =
"resource:/edu/cmu/sphinx/models/en-us/en-us";
private static final String DICTIONARY_PATH =
"resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict";
private static final String GRAMMAR_PATH =
"resource:/edu/cmu/sphinx/demo/dialog/";
private static final String LANGUAGE_MODEL =
"resource:/edu/cmu/sphinx/demo/dialog/weather.lm";
private static final Map<String, Integer> DIGITS =
new HashMap<String, Integer>();
static {
DIGITS.put("oh", 0);
DIGITS.put("zero", 0);
DIGITS.put("one", 1);
DIGITS.put("two", 2);
DIGITS.put("three", 3);
DIGITS.put("four", 4);
DIGITS.put("five", 5);
DIGITS.put("six", 6);
DIGITS.put("seven", 7);
DIGITS.put("eight", 8);
DIGITS.put("nine", 9);
}
private static double parseNumber(String[] tokens) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i < tokens.length; ++i) {
if (tokens[i].equals("point"))
sb.append(".");
else
sb.append(DIGITS.get(tokens[i]));
}
return Double.parseDouble(sb.toString());
}
private static void recognizeDigits(LiveSpeechRecognizer recognizer) {
System.out.println("Digits recognition (using GrXML)");
System.out.println("--------------------------------");
System.out.println("Example: one two three");
System.out.println("Say \"101\" to exit");
System.out.println("--------------------------------");
recognizer.startRecognition(true);
while (true) {
String utterance = recognizer.getResult().getHypothesis();
if (utterance.equals("one zero one")
|| utterance.equals("one oh one"))
break;
else
System.out.println(utterance);
}
recognizer.stopRecognition();
}
private static void recognizerBankAccount(LiveSpeechRecognizer recognizer) {
System.out.println("This is bank account voice menu");
System.out.println("-------------------------------");
System.out.println("Example: balance");
System.out.println("Example: withdraw zero point five");
System.out.println("Example: deposit one two three");
System.out.println("Example: back");
System.out.println("-------------------------------");
double savings = .0;
recognizer.startRecognition(true);
while (true) {
String utterance = recognizer.getResult().getHypothesis();
if (utterance.endsWith("back")) {
break;
} else if (utterance.startsWith("deposit")) {
double deposit = parseNumber(utterance.split("\\s"));
savings += deposit;
System.out.format("Deposited: $%.2f\n", deposit);
} else if (utterance.startsWith("withdraw")) {
double withdraw = parseNumber(utterance.split("\\s"));
savings -= withdraw;
System.out.format("Withdrawn: $%.2f\n", withdraw);
} else if (!utterance.endsWith("balance")) {
System.out.println("Unrecognized command: " + utterance);
}
System.out.format("Your savings: $%.2f\n", savings);
}
recognizer.stopRecognition();
}
private static void recognizeWeather(LiveSpeechRecognizer recognizer) {
System.out.println("Try some forecast. End with \"the end\"");
System.out.println("-------------------------------------");
System.out.println("Example: mostly dry some fog patches tonight");
System.out.println("Example: sunny spells on wednesday");
System.out.println("-------------------------------------");
recognizer.startRecognition(true);
while (true) {
String utterance = recognizer.getResult().getHypothesis();
if (utterance.equals("the end"))
break;
else
System.out.println(utterance);
}
recognizer.stopRecognition();
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
configuration.setAcousticModelPath(ACOUSTIC_MODEL);
configuration.setDictionaryPath(DICTIONARY_PATH);
configuration.setGrammarPath(GRAMMAR_PATH);
configuration.setUseGrammar(true);
configuration.setGrammarName("dialog");
LiveSpeechRecognizer jsgfRecognizer =
new LiveSpeechRecognizer(configuration);
configuration.setGrammarName("digits.grxml");
LiveSpeechRecognizer grxmlRecognizer =
new LiveSpeechRecognizer(configuration);
configuration.setUseGrammar(false);
configuration.setLanguageModelPath(LANGUAGE_MODEL);
LiveSpeechRecognizer lmRecognizer =
new LiveSpeechRecognizer(configuration);
jsgfRecognizer.startRecognition(true);
while (true) {
System.out.println("Choose menu item:");
System.out.println("Example: go to the bank account");
System.out.println("Example: exit the program");
System.out.println("Example: weather forecast");
System.out.println("Example: digits\n");
String utterance = jsgfRecognizer.getResult().getHypothesis();
if (utterance.startsWith("exit"))
break;
if (utterance.equals("digits")) {
jsgfRecognizer.stopRecognition();
recognizeDigits(grxmlRecognizer);
jsgfRecognizer.startRecognition(true);
}
if (utterance.equals("bank account")) {
jsgfRecognizer.stopRecognition();
recognizerBankAccount(jsgfRecognizer);
jsgfRecognizer.startRecognition(true);
}
if (utterance.endsWith("weather forecast")) {
jsgfRecognizer.stopRecognition();
recognizeWeather(lmRecognizer);
jsgfRecognizer.startRecognition(true);
}
}
jsgfRecognizer.stopRecognition();
}
}

View file

@ -0,0 +1,123 @@
package edu.cmu.sphinx.demo.speakerid;
import java.net.URL;
import java.util.ArrayList;
import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.SpeechResult;
import edu.cmu.sphinx.api.StreamSpeechRecognizer;
import edu.cmu.sphinx.decoder.adaptation.Stats;
import edu.cmu.sphinx.decoder.adaptation.Transform;
import edu.cmu.sphinx.speakerid.Segment;
import edu.cmu.sphinx.speakerid.SpeakerCluster;
import edu.cmu.sphinx.speakerid.SpeakerIdentification;
import edu.cmu.sphinx.util.TimeFrame;
public class SpeakerIdentificationDemo {
/**
* Returns string version of the given time in milliseconds
*
* @param milliseconds time in milliseconds
* @return time in format mm:ss
*/
public static String time(int milliseconds) {
return (milliseconds / 60000) + ":"
+ (Math.round((double) (milliseconds % 60000) / 1000));
}
/**
*
* @param speakers
* An array of clusters for which it is needed to be printed the
* speakers intervals
* @param fileName
* THe name of file we are processing
*/
public static void printSpeakerIntervals(
ArrayList<SpeakerCluster> speakers, String fileName) {
int idx = 0;
for (SpeakerCluster spk : speakers) {
idx++;
ArrayList<Segment> segments = spk.getSpeakerIntervals();
for (Segment seg : segments)
System.out.println(fileName + " " + " "
+ time(seg.getStartTime()) + " "
+ time(seg.getLength()) + " Speaker" + idx);
}
}
/**
* @param speakers
* An array of clusters for which it is needed to get the
* speakers intervals for decoding with per-speaker adaptation
* with diarization.
* @param url
* Url for the audio
* @throws Exception if something went wrong
*/
public static void speakerAdaptiveDecoding(ArrayList<SpeakerCluster> speakers,
URL url) throws Exception {
Configuration configuration = new Configuration();
// Load model from the jar
configuration
.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
configuration
.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
configuration
.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.dmp");
StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(
configuration);
TimeFrame t;
SpeechResult result;
for (SpeakerCluster spk : speakers) {
Stats stats = recognizer.createStats(1);
ArrayList<Segment> segments = spk.getSpeakerIntervals();
for (Segment s : segments) {
long startTime = s.getStartTime();
long endTime = s.getStartTime() + s.getLength();
t = new TimeFrame(startTime, endTime);
recognizer.startRecognition(url.openStream(), t);
while ((result = recognizer.getResult()) != null) {
stats.collect(result);
}
recognizer.stopRecognition();
}
Transform profile;
// Create the Transformation
profile = stats.createTransform();
recognizer.setTransform(profile);
for (Segment seg : segments) {
long startTime = seg.getStartTime();
long endTime = seg.getStartTime() + seg.getLength();
t = new TimeFrame(startTime, endTime);
// Decode again with updated SpeakerProfile
recognizer.startRecognition(url.openStream(), t);
while ((result = recognizer.getResult()) != null) {
System.out.format("Hypothesis: %s\n",
result.getHypothesis());
}
recognizer.stopRecognition();
}
}
}
public static void main(String[] args) throws Exception {
SpeakerIdentification sd = new SpeakerIdentification();
URL url = SpeakerIdentificationDemo.class.getResource("test.wav");
ArrayList<SpeakerCluster> clusters = sd.cluster(url.openStream());
printSpeakerIntervals(clusters, url.getPath());
speakerAdaptiveDecoding(clusters, url);
}
}

View file

@ -0,0 +1,100 @@
/*
* Copyright 1999-2013 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*/
package edu.cmu.sphinx.demo.transcriber;
import java.io.InputStream;
import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.SpeechResult;
import edu.cmu.sphinx.api.StreamSpeechRecognizer;
import edu.cmu.sphinx.decoder.adaptation.Stats;
import edu.cmu.sphinx.decoder.adaptation.Transform;
import edu.cmu.sphinx.result.WordResult;
/**
* A simple example that shows how to transcribe a continuous audio file that
* has multiple utterances in it.
*/
public class TranscriberDemo {
public static void main(String[] args) throws Exception {
System.out.println("Loading models...");
Configuration configuration = new Configuration();
// Load model from the jar
configuration
.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
// You can also load model from folder
// configuration.setAcousticModelPath("file:en-us");
configuration
.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
configuration
.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.dmp");
StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(
configuration);
InputStream stream = TranscriberDemo.class
.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");
stream.skip(44);
// Simple recognition with generic model
recognizer.startRecognition(stream);
SpeechResult result;
while ((result = recognizer.getResult()) != null) {
System.out.format("Hypothesis: %s\n", result.getHypothesis());
System.out.println("List of recognized words and their times:");
for (WordResult r : result.getWords()) {
System.out.println(r);
}
System.out.println("Best 3 hypothesis:");
for (String s : result.getNbest(3))
System.out.println(s);
}
recognizer.stopRecognition();
// Live adaptation to speaker with speaker profiles
stream = TranscriberDemo.class
.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");
stream.skip(44);
// Stats class is used to collect speaker-specific data
Stats stats = recognizer.createStats(1);
recognizer.startRecognition(stream);
while ((result = recognizer.getResult()) != null) {
stats.collect(result);
}
recognizer.stopRecognition();
// Transform represents the speech profile
Transform transform = stats.createTransform();
recognizer.setTransform(transform);
// Decode again with updated transform
stream = TranscriberDemo.class
.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");
stream.skip(44);
recognizer.startRecognition(stream);
while ((result = recognizer.getResult()) != null) {
System.out.format("Hypothesis: %s\n", result.getHypothesis());
}
recognizer.stopRecognition();
}
}

View file

@ -0,0 +1,42 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<!--
/**
* Copyright 1999-2004 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
-->
<html>
<head><title>Sphinx-4 Aligner Demo</title></head>
<style TYPE="text/css">
pre { font-size: medium; background: #f0f8ff; padding: 2mm;
border-style: ridge ; color: teal }
code { font-size: medium; color: teal }
</style></head>
<body>
<span style="font-family: Times New Roman; ">
<div style="text-align: center;">
<table bgcolor="#99CCFF" width="100%">
<tr>
<td align=center width="100%">
<h1><i>Sphinx-4</i> Aligner Demo</h1>
</td>
</tr>
</table>
</div>
</span>
Aligns audio file to transcription and get times of
words. Can be useful for closed captioning.
</body>
</html>

View file

@ -0,0 +1,29 @@
#JSGF V1.0;
grammar dialog;
<digit> = oh |
zero |
one |
two |
three |
four |
five |
six |
seven |
eight |
nine ;
<number> = <digit>+ [point <digit>+];
<menu_command> = digits |
[go to [the]] bank account |
weather forecast |
exit [[the] program] ;
<bank_command> = [show | check] balance |
deposit <number> |
withdraw <number> |
back ;
public <command> = <menu_command> | <bank_command>;

View file

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE grammar PUBLIC "-//W3C//DTD GRAMMAR 1.0//EN" "http://www.w3.org/TR/speech-grammar/grammar.dtd">
<grammar xmlns="http://www.w3.org/2001/06/grammar" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="en" xsi:schemaLocation="http://www.w3.org/2001/06/grammar http://www.w3.org/TR/speech-grammar/grammar.xsd" version="1.0" mode="voice" root="digits">
<rule id="digits" scope="public">
<item repeat="1-">
<one-of>
<item> one </item>
<item> two </item>
<item> three </item>
<item> four </item>
<item> five </item>
<item> six </item>
<item> seven </item>
<item> eight </item>
<item> nine </item>
<item> zero </item>
<item> oh </item>
</one-of>
</item>
</rule>
</grammar>

View file

@ -0,0 +1 @@
ec4cda3a0b3a0fcaa4d8685188f1f79f6d7f5bcf

View file

@ -0,0 +1,80 @@
Will remain dry apart from perhaps a little drizzle near the northwest
coast.
Extensive mist and fog patches expected also.
Some fog also.
Frost likely in many places.
Any rain or drizzle will die out tomorrow and sunny spells will develop.
Cloudy tonight in the north and northeast with some light rain or drizzle
in places.
Fresh or strong gusty southwest to west winds gradually veering
northwesterly.
Mostly dry calm and clear overnight with little or no wind.
Severe ground frost developing.
Rain or drizzle in the north and northeast will clear tomorrow leaving
a dry day with sunny spells in most areas.
Scattered blustery showers largely dying out later.
Fair weather in all areas, rain in the south and west later.
Showers will slowly become more isolated tonight.
A very cold night.
Continuing mild with a moderate to fresh southerly breeze.
Scattered showers with a risk of thunder later.
Becoming windy overnight but very mild.
Widespread haze and mist with scattered outbreaks of rain.
A few sunny breaks will develop by afternoon principally in the western
half of the country.
A second low pressure centre will move in across northern areas tomorrow.
Cold and windy with occasional showers.
Continuing rather cloudy over most parts of the country.
Tomorrow any fog, low cloud and drizzle will clear with most places dry
and sunny.
It will be cold overnight in most parts of the country with some clear
spells.
A weak ridge of high pressure will cross the country tonight.
Rain spreading to all areas from the west followed later by heavy showers.
Weather mainly fair if rather hazy.
The rest of the night will be dry in most areas though a few showers
are still possibly across northern and northeastern counties.
Windy with rain extending to all parts during the morning.
The rain becoming heavy in many areas.
Very windy with strong southerly winds gusting up to fifty or sixty mph.
Rain in the south and west will spread to remaining north east areas
overnight, heavy and persistent in places.
Clouds will increase from the west as the day goes on.
Misty in the south and southeast with patchy drizzle and a risk of fog.
The rest of the country will be overcast with outbreaks of rain heaviest
and most persistent in the south and southwest.

View file

@ -0,0 +1,104 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<!--
/**
* Copyright 1999-2004 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
-->
<html>
<head><title>Sphinx-4 Transcriber Demo</title></head>
<style TYPE="text/css">
pre { font-size: medium; background: #f0f8ff; padding: 2mm;
border-style: ridge ; color: teal }
code { font-size: medium; color: teal }
</style></head>
<body>
<span style="font-family: Times New Roman; ">
<div style="text-align: center;">
<table bgcolor="#99CCFF" width="100%">
<tr>
<td align=center width="100%">
<h1><i>Sphinx-4</i> Transcriber Demo</h1>
</td>
</tr>
</table>
</div>
</span>
<span style="font-family: Arial; font-size: x-small; ">
<p>
A simple Sphinx-4 application that transcribes a continuous audio file
that has multiple utterances. The audio file should contain connected
digits data. The default file, called "10001-90210-01803.wav", contains
three utterances, separated by silences.
People who want to transcribe non-digits data should
modify the <code>config.xml</code> file to use the correct grammar,
language model, and linguist to do so. Please refer to the
<a href="../../../../../../../doc/ProgrammersGuide.html">Programmer's Guide</a>
on how to modify the configuration file for your purposes.
</p>
<h3>Building</h3>
<p>
Check if the <code>bin</code> directory already has the
<code>Transcriber.jar</code> file. If not, type the following in the top
level directory:
</p>
<code>ant -find demo.xml</code>
<h3>Running</h3>
<p>
To run the demo, type:
</p>
<code>sphinx4 &gt; java -jar bin/Transcriber.jar</code>
<p>
You will see the following result, with each utterance on its own line:
<pre>
one zero zero zero one
nine oh two one oh
zero one eight zero three
</pre>
<p>
<span style="color: FF0000; "><b>NOTE:</b></span>
<ol>
<li>
Make sure that you are using Java<sup>TM</sup> 2 SDK, Standard Edition,
v1.4 or higher.
</li>
<li>
If you have the source distribution, make sure that the JAR file
<code>lib/sphinx4.jar</code> is built. If not, go to the top level
directory and type: <code>ant</code>
</li>
<li>
You can supply your own test files, but they must be digits data.
Just make sure that the audio format is the same as in the
config.xml file, which is 16-bit signed PCM-linear, 16kHz, little-endian.
The audio file format can be any format readable by Java Sound,
e.g., .wav, .au. To test your own file, supply it as an argument.
Suppose your test file is called <code>test.wav</code>, then:
<p><code>java -jar bin/Transcriber.jar test.wav</code>
</li>
</ol>
</p>
</span>
<hr>
Copyright 1999-2004 Carnegie Mellon University.
<br>
Portions Copyright 2002-2004 Sun Microsystems, Inc.
<br>
Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories.
<br>
All Rights Reserved. Usage is subject to <a href="../../../../../../../license.terms">license terms</a>.
</body>
</html>