Added voice control

Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2
This commit is contained in:
Ziver Koc 2015-05-13 21:14:10 +00:00
parent 35c92407a3
commit 53da641909
863 changed files with 192681 additions and 0 deletions

View file

@ -0,0 +1,224 @@
package com.darkprograms.speech.microphone;
import javax.sound.sampled.*;
import java.io.Closeable;
import java.io.File;
/***************************************************************************
* Microphone class that contains methods to capture audio from microphone
*
* @author Luke Kuza, Aaron Gokaslan
***************************************************************************/
public class Microphone implements Closeable{
/**
* TargetDataLine variable to receive data from microphone
*/
private TargetDataLine targetDataLine;
/**
* Enum for current Microphone state
*/
public enum CaptureState {
PROCESSING_AUDIO, STARTING_CAPTURE, CLOSED
}
/**
* Variable for enum
*/
CaptureState state;
/**
* Variable for the audios saved file type
*/
private AudioFileFormat.Type fileType;
/**
* Variable that holds the saved audio file
*/
private File audioFile;
/**
* Gets the current state of Microphone
*
* @return PROCESSING_AUDIO is returned when the Thread is recording Audio and/or saving it to a file<br>
* STARTING_CAPTURE is returned if the Thread is setting variables<br>
* CLOSED is returned if the Thread is not doing anything/not capturing audio
*/
public CaptureState getState() {
return state;
}
/**
* Sets the current state of Microphone
*
* @param state State from enum
*/
private void setState(CaptureState state) {
this.state = state;
}
public File getAudioFile() {
return audioFile;
}
public void setAudioFile(File audioFile) {
this.audioFile = audioFile;
}
public AudioFileFormat.Type getFileType() {
return fileType;
}
public void setFileType(AudioFileFormat.Type fileType) {
this.fileType = fileType;
}
public TargetDataLine getTargetDataLine() {
return targetDataLine;
}
public void setTargetDataLine(TargetDataLine targetDataLine) {
this.targetDataLine = targetDataLine;
}
/**
* Constructor
*
* @param fileType File type to save the audio in<br>
* Example, to save as WAVE use AudioFileFormat.Type.WAVE
*/
public Microphone(AudioFileFormat.Type fileType) {
setState(CaptureState.CLOSED);
setFileType(fileType);
initTargetDataLine();
}
/**
* Initializes the target data line.
*/
private void initTargetDataLine(){
DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, getAudioFormat());
try {
setTargetDataLine((TargetDataLine) AudioSystem.getLine(dataLineInfo));
} catch (LineUnavailableException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return;
}
}
/**
* Captures audio from the microphone and saves it a file
*
* @param audioFile The File to save the audio to
* @throws LineUnavailableException
* @throws Exception Throws an exception if something went wrong
*/
public void captureAudioToFile(File audioFile) throws LineUnavailableException {
setState(CaptureState.STARTING_CAPTURE);
setAudioFile(audioFile);
if(getTargetDataLine() == null){
initTargetDataLine();
}
//Get Audio
new Thread(new CaptureThread()).start();
}
/**
* Captures audio from the microphone and saves it a file
*
* @param audioFile The fully path (String) to a file you want to save the audio in
* @throws LineUnavailableException
* @throws Exception Throws an exception if something went wrong
*/
public void captureAudioToFile(String audioFile) throws LineUnavailableException {
File file = new File(audioFile);
captureAudioToFile(file);
}
/**
* The audio format to save in
*
* @return Returns AudioFormat to be used later when capturing audio from microphone
*/
public AudioFormat getAudioFormat() {
float sampleRate = 8000.0F;
//8000,11025,16000,22050,44100
int sampleSizeInBits = 16;
//8,16
int channels = 1;
//1,2
boolean signed = true;
//true,false
boolean bigEndian = false;
//true,false
return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
}
/**
* Opens the microphone, starting the targetDataLine.
* If it's already open, it does nothing.
*/
public void open(){
if(getTargetDataLine()==null){
initTargetDataLine();
}
if(!getTargetDataLine().isOpen() && !getTargetDataLine().isRunning() && !getTargetDataLine().isActive()){
try {
setState(CaptureState.PROCESSING_AUDIO);
getTargetDataLine().open(getAudioFormat());
getTargetDataLine().start();
} catch (LineUnavailableException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return;
}
}
}
/**
* Close the microphone capture, saving all processed audio to the specified file.<br>
* If already closed, this does nothing
*/
public void close() {
if (getState() == CaptureState.CLOSED) {
} else {
getTargetDataLine().stop();
getTargetDataLine().close();
setState(CaptureState.CLOSED);
}
}
/**
* Thread to capture the audio from the microphone and save it to a file
*/
private class CaptureThread implements Runnable {
/**
* Run method for thread
*/
public void run() {
try {
AudioFileFormat.Type fileType = getFileType();
File audioFile = getAudioFile();
open();
AudioSystem.write(new AudioInputStream(getTargetDataLine()), fileType, audioFile);
//Will write to File until it's closed.
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}

View file

@ -0,0 +1,288 @@
package com.darkprograms.speech.microphone;
import javax.sound.sampled.AudioFileFormat;
import com.darkprograms.speech.util.*;
/********************************************************************************************
* Microphone Analyzer class, detects pitch and volume while extending the microphone class.
* Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
* Currently can be used for audio data analysis.
* Dependencies: FFT.java & Complex.java. Both found in the utility package.
* @author Aaron Gokaslan
********************************************************************************************/
public class MicrophoneAnalyzer extends Microphone {
/**
* Constructor
* @param fileType The file type you want to save in. FLAC recommended.
*/
public MicrophoneAnalyzer(AudioFileFormat.Type fileType){
super(fileType);
}
/**
* Gets the volume of the microphone input
* Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
* @return The volume of the microphone input or -1 if data-line is not available
*/
public int getAudioVolume(){
return getAudioVolume(100);
}
/**
* Gets the volume of the microphone input
* @param interval: The length of time you would like to calculate the volume over in milliseconds.
* @return The volume of the microphone input or -1 if data-line is not available.
*/
public int getAudioVolume(int interval){
return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
}
/**
* Gets the volume of microphone input
* @param numOfBytes The number of bytes you want for volume interpretation
* @return The volume over the specified number of bytes or -1 if data-line is unavailable.
*/
private int calculateAudioVolume(int numOfBytes){
byte[] data = getBytes(numOfBytes);
if(data==null)
return -1;
return calculateRMSLevel(data);
}
/**
* Calculates the volume of AudioData which may be buffered data from a data-line.
* @param audioData The byte[] you want to determine the volume of
* @return the calculated volume of audioData
*/
public static int calculateRMSLevel(byte[] audioData){
long lSum = 0;
for(int i=0; i<audioData.length; i++)
lSum = lSum + audioData[i];
double dAvg = lSum / audioData.length;
double sumMeanSquare = 0d;
for(int j=0; j<audioData.length; j++)
sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);
double averageMeanSquare = sumMeanSquare / audioData.length;
return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);
}
/**
* Returns the number of bytes over interval for useful when figuring out how long to record.
* @param seconds The length in seconds
* @return the number of bytes the microphone will save.
*/
public int getNumOfBytes(int seconds){
return getNumOfBytes((double)seconds);
}
/**
* Returns the number of bytes over interval for useful when figuring out how long to record.
* @param seconds The length in seconds
* @return the number of bytes the microphone will output over the specified time.
*/
public int getNumOfBytes(double seconds){
return (int)(seconds*getAudioFormat().getSampleRate()*getAudioFormat().getFrameSize()+.5);
}
/**
* Returns the a byte[] containing the specified number of bytes
* @param numOfBytes The length of the returned array.
* @return The specified array or null if it cannot.
*/
private byte[] getBytes(int numOfBytes){
if(getTargetDataLine()!=null){
byte[] data = new byte[numOfBytes];
this.getTargetDataLine().read(data, 0, numOfBytes);
return data;
}
return null;//If data cannot be read, returns a null array.
}
/**
* Calculates the fundamental frequency. In other words, it calculates pitch,
* except pitch is far more subjective and subtle. Also note, that readings may occasionally,
* be in error due to the complex nature of sound. This feature is in Beta
* @return The frequency of the sound in Hertz.
*/
public int getFrequency(){
try {
return getFrequency(4096);
} catch (Exception e) {
//This will never happen. Ever...
return -666;
}
}
/**
* Calculates the frequency based off of the number of bytes.
* CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!
* @param numOfBytes The number of bytes which must be a multiple of 2!!!
* @return The calculated frequency in Hertz.
*/
public int getFrequency(int numOfBytes) throws Exception{
if(getTargetDataLine() == null){
return -1;
}
byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion
this.getTargetDataLine().read(data, 0, numOfBytes);
return getFrequency(data);
}
/**
* Calculates the frequency based off of the byte array,
* @param bytes The audioData you want to analyze
* @return The calculated frequency in Hertz.
*/
public int getFrequency(byte[] bytes){
double[] audioData = this.bytesToDoubleArray(bytes);
audioData = applyHanningWindow(audioData);
Complex[] complex = new Complex[audioData.length];
for(int i = 0; i<complex.length; i++){
complex[i] = new Complex(audioData[i], 0);
}
Complex[] fftTransformed = FFT.fft(complex);
return this.calculateFundamentalFrequency(fftTransformed, 4);
}
/**
* Applies a Hanning Window to the data set.
* Hanning Windows are used to increase the accuracy of the FFT.
* One should always apply a window to a dataset before applying an FFT
* @param The data you want to apply the window to
* @return The windowed data set
*/
private double[] applyHanningWindow(double[] data){
return applyHanningWindow(data, 0, data.length);
}
/**
* Applies a Hanning Window to the data set.
* Hanning Windows are used to increase the accuracy of the FFT.
* One should always apply a window to a dataset before applying an FFT
* @param The data you want to apply the window to
* @param The starting index you want to apply a window from
* @param The size of the window
* @return The windowed data set
*/
private double[] applyHanningWindow(double[] signal_in, int pos, int size){
for (int i = pos; i < pos + size; i++){
int j = i - pos; // j = index into Hann window function
signal_in[i] = (double)(signal_in[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * j / size)));
}
return signal_in;
}
/**
* This method calculates the fundamental frequency using Harmonic Product Specturm
* It down samples the FFTData four times and multiplies the arrays
* together to determine the fundamental frequency. This is slightly more computationally
* expensive, but much more accurate. In simpler terms, the function will remove the harmonic frequencies
* which occur at every N value by finding the lowest common divisor among them.
* @param fftData The array returned by the FFT
* @param N the number of times you wish to downsample.
* WARNING: The more times you downsample, the lower the maximum detectable frequency is.
* @return The fundamental frequency in Hertz
*/
private int calculateFundamentalFrequency(Complex[] fftData, int N){
if(N<=0 || fftData == null){ return -1; } //error case
final int LENGTH = fftData.length;//Used to calculate bin size
fftData = removeNegativeFrequencies(fftData);
Complex[][] data = new Complex[N][fftData.length/N];
for(int i = 0; i<N; i++){
for(int j = 0; j<data[0].length; j++){
data[i][j] = fftData[j*(i+1)];
}
}
Complex[] result = new Complex[fftData.length/N];//Combines the arrays
for(int i = 0; i<result.length; i++){
Complex tmp = new Complex(1,0);
for(int j = 0; j<N; j++){
tmp = tmp.times(data[j][i]);
}
result[i] = tmp;
}
int index = this.findMaxMagnitude(result);
return index*getFFTBinSize(LENGTH);
}
/**
* Removes useless data from transform since sound doesn't use complex numbers.
* @param The data you want to remove the complex transforms from
* @return The cleaned data
*/
private Complex[] removeNegativeFrequencies(Complex[] c){
Complex[] out = new Complex[c.length/2];
for(int i = 0; i<out.length; i++){
out[i] = c[i];
}
return out;
}
/**
* Calculates the FFTbin size based off the length of the the array
* Each FFTBin size represents the range of frequencies treated as one.
* For example, if the bin size is 5 then the algorithm is precise to within 5hz.
* Precondition: length cannot be 0.
* @param fftDataLength The length of the array used to feed the FFT algorithm
* @return FFTBin size
*/
private int getFFTBinSize(int fftDataLength){
return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);
}
/**
* Calculates index of the maximum magnitude in a complex array.
* @param The Complex[] you want to get max magnitude from.
* @return The index of the max magnitude
*/
private int findMaxMagnitude(Complex[] input){
//Calculates Maximum Magnitude of the array
double max = Double.MIN_VALUE;
int index = -1;
for(int i = 0; i<input.length; i++){
Complex c = input[i];
double tmp = c.getMagnitude();
if(tmp>max){
max = tmp;;
index = i;
}
}
return index;
}
/**
* Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
* NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
* @param bufferData The buffer read in from the target data line
* @return The double[] that the buffer has been converted into.
*/
private double[] bytesToDoubleArray(byte[] bufferData){
final int bytesRecorded = bufferData.length;
final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;
final double amplification = 100.0; // choose a number as you like
double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
double sample = 0;
for (int b = 0; b < bytesPerSample; b++) {
int v = bufferData[index + b];
if (b < bytesPerSample - 1 || bytesPerSample == 1) {
v &= 0xFF;
}
sample += v << (b * 8);
}
double sample32 = amplification * (sample / 32768.0);
micBufferData[floatIndex] = sample32;
}
return micBufferData;
}
}

View file

@ -0,0 +1,120 @@
package com.darkprograms.speech.recognizer;
import javaFlacEncoder.FLACEncoder;
import javaFlacEncoder.FLACFileOutputStream;
import javaFlacEncoder.StreamConfiguration;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import java.io.File;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/*************************************************************************************************************
* Class that contains methods to encode Wave files to FLAC files
* THIS IS THANKS TO THE javaFlacEncoder Project created here: http://sourceforge.net/projects/javaflacencoder/
************************************************************************************************************/
public class FlacEncoder {
/**
* Constructor
*/
public FlacEncoder() {
}
/**
* Converts a wave file to a FLAC file(in order to POST the data to Google and retrieve a response) <br>
* Sample Rate is 8000 by default
*
* @param inputFile Input wave file
* @param outputFile Output FLAC file
*/
public void convertWaveToFlac(File inputFile, File outputFile) {
StreamConfiguration streamConfiguration = new StreamConfiguration();
streamConfiguration.setSampleRate(8000);
streamConfiguration.setBitsPerSample(16);
streamConfiguration.setChannelCount(1);
try {
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile);
AudioFormat format = audioInputStream.getFormat();
int frameSize = format.getFrameSize();
FLACEncoder flacEncoder = new FLACEncoder();
FLACFileOutputStream flacOutputStream = new FLACFileOutputStream(outputFile);
flacEncoder.setStreamConfiguration(streamConfiguration);
flacEncoder.setOutputStream(flacOutputStream);
flacEncoder.openFLACStream();
int frameLength = (int) audioInputStream.getFrameLength();
if(frameLength <= AudioSystem.NOT_SPECIFIED){
frameLength = 16384;//Arbitrary file size
}
int[] sampleData = new int[frameLength];
byte[] samplesIn = new byte[frameSize];
int i = 0;
while (audioInputStream.read(samplesIn, 0, frameSize) != -1) {
if (frameSize != 1) {
ByteBuffer bb = ByteBuffer.wrap(samplesIn);
bb.order(ByteOrder.LITTLE_ENDIAN);
short shortVal = bb.getShort();
sampleData[i] = shortVal;
} else {
sampleData[i] = samplesIn[0];
}
i++;
}
sampleData = truncateNullData(sampleData, i);
flacEncoder.addSamples(sampleData, i);
flacEncoder.encodeSamples(i, false);
flacEncoder.encodeSamples(flacEncoder.samplesAvailableToEncode(), true);
audioInputStream.close();
flacOutputStream.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Converts a wave file to a FLAC file(in order to POST the data to Google and retrieve a response) <br>
* Sample Rate is 8000 by default
*
* @param inputFile Input wave file
* @param outputFile Output FLAC file
*/
public void convertWaveToFlac(String inputFile, String outputFile) {
convertWaveToFlac(new File(inputFile), new File(outputFile));
}
/**
* Used for when the frame length is unknown to shorten the array to prevent huge blank end space
* @param sampleData The int[] array you want to shorten
* @param index The index you want to shorten it to
* @return The shortened array
*/
private int[] truncateNullData(int[] sampleData, int index){
if(index == sampleData.length) return sampleData;
int[] out = new int[index];
for(int i = 0; i<index; i++){
out[i] = sampleData[i];
}
return out;
}
}

View file

@ -0,0 +1,524 @@
package com.darkprograms.speech.recognizer;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import javaFlacEncoder.FLACFileWriter;
import javax.net.ssl.HttpsURLConnection;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.TargetDataLine;
import com.darkprograms.speech.util.ChunkedOutputStream;
import com.darkprograms.speech.util.StringUtil;
/**
* A class for using Google's Duplex Speech API. Allows for continuous recognition. Requires an API-Key.
* A duplex API opens two connections. One to an upstream and one to a downstream. The system allows
* for continuous chunking on both up and downstream. This, in turn, allows for Google to return data
* as data is sent to it. For this reason, this class uses listeners.
* @author Skylion (Aaron Gokaslan), Robert Rowntree.
*/
public class GSpeechDuplex{
//TODO Cleanup Printlns
/**
* Minimum value for ID
*/
private static final long MIN = 10000000;
/**
* Maximum value for ID
*/
private static final long MAX = 900000009999999L;
/**
* The base URL for the API
*/
private static final String GOOGLE_DUPLEX_SPEECH_BASE = "https://www.google.com/speech-api/full-duplex/v1/";
/**
* Stores listeners
*/
private List<GSpeechResponseListener> responseListeners = new ArrayList<GSpeechResponseListener>();
/**
* User defined API-KEY
*/
private final String API_KEY;
/**
* User-defined language
*/
private String language = "auto";
/**
* The maximum size the API will tolerate
*/
private final static int MAX_SIZE = 1048576;
/**
* Per specification, the final chunk of in a ChunkedOutputStream
*/
private final static byte[] FINAL_CHUNK = new byte[] { '0', '\r', '\n', '\r', '\n' };
/**
* Constructor
* @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
* one by following the process shown at this
* <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>.
*/
public GSpeechDuplex(String API_KEY){
this.API_KEY = API_KEY;
}
/**
* Temporary will be deprecated before release
*/
public String getLanguage(){
return language;
}
/**
* Temporary will be deprecated before release
*/
public void setLanguage(String language){
this.language = language;
}
/**
* Send a FLAC file with the specified sampleRate to the Duplex API
* @param flacFile The file you wish to upload.
* NOTE: Segment the file if duration is greater than 15 seconds.
* @param sampleRate The sample rate of the file.
* @throws IOException If something has gone wrong with reading the file
*/
public void recognize(File flacFile, int sampleRate) throws IOException{
recognize(mapFileIn(flacFile), sampleRate);
}
/**
* Send a byte[] to the URL with a specified sampleRate.
* NOTE: The byte[] should contain no more than 15 seconds of audio.
* Chunking is not fully implemented as of yet. Will not string data together for context yet.
* @param data The byte[] you want to send.
* @param sampleRate The sample rate of aforementioned byte array.
*/
public void recognize(byte[] data, int sampleRate){
if(data.length >= MAX_SIZE){//Temporary Chunking. Does not allow for Google to gather context.
System.out.println("Chunking the audio into smaller parts...");
byte[][] dataArray = chunkAudio(data);
for(byte[]array: dataArray){
recognize(array, sampleRate);
}
}
//Generates a unique ID for the response.
final long PAIR = MIN + (long)(Math.random() * ((MAX - MIN) + 1L));
//Generates the Downstream URL
final String API_DOWN_URL = GOOGLE_DUPLEX_SPEECH_BASE + "down?maxresults=1&pair=" + PAIR;
//Generates the Upstream URL
final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE +
"up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR +
"&key=" + API_KEY ;
//Opens downChannel
this.downChannel(API_DOWN_URL);
//Opens upChannel
this.upChannel(API_UP_URL, chunkAudio(data), sampleRate);
}
/**
* This method allows you to stream a continuous stream of data to the API.
* <p>Note: This feature is experimental.</p>
* @param tl
* @param af
* @throws IOException
* @throws LineUnavailableException
*/
public void recognize(TargetDataLine tl, AudioFormat af) throws IOException, LineUnavailableException{
//Generates a unique ID for the response.
final long PAIR = MIN + (long)(Math.random() * ((MAX - MIN) + 1L));
//Generates the Downstream URL
final String API_DOWN_URL = GOOGLE_DUPLEX_SPEECH_BASE + "down?maxresults=1&pair=" + PAIR;
//Generates the Upstream URL
final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE +
"up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR +
"&key=" + API_KEY + "&continuous"; //Tells Google to constantly monitor the stream;
//TODO Add implementation that sends feedback in real time. Protocol buffers will be necessary.
//Opens downChannel
this.downChannel(API_DOWN_URL);
//Opens upChannel
this.upChannel(API_UP_URL, tl, af);
}
/**
* This code opens a new Thread that connects to the downstream URL. Due to threading,
* the best way to handle this is through the use of listeners.
* @param The URL you want to connect to.
*/
private void downChannel(String urlStr) {
final String url = urlStr;
new Thread ("Downstream Thread") {
public void run() {
// handler for DOWN channel http response stream - httpsUrlConn
// response handler should manage the connection.... ??
// assign a TIMEOUT Value that exceeds by a safe factor
// the amount of time that it will take to write the bytes
// to the UPChannel in a fashion that mimics a liveStream
// of the audio at the applicable Bitrate. BR=sampleRate * bits per sample
// Note that the TLS session uses "* SSLv3, TLS alert, Client hello (1): "
// to wake up the listener when there are additional bytes.
// The mechanics of the TLS session should be transparent. Just use
// httpsUrlConn and allow it enough time to do its work.
Scanner inStream = openHttpsConnection(url);
if(inStream == null){
//ERROR HAS OCCURED
}
while(inStream.hasNextLine()){
String response = inStream.nextLine();
System.out.println("Response: "+response);
if(response.length()>17){//Prevents blank responses from Firing
GoogleResponse gr = new GoogleResponse();
parseResponse(response, gr);
fireResponseEvent(gr);
}
}
inStream.close();
System.out.println("Finished write on down stream...");
}
}.start();
}
/**
* Used to initiate the URL chunking for the upChannel.
* @param urlStr The URL string you want to upload 2
* @param data The data you want to send to the URL
* @param sampleRate The specified sample rate of the data.
*/
private void upChannel(String urlStr, byte[][] data, int sampleRate) {
final String murl = urlStr;
final byte[][] mdata = data;
final int mSampleRate = sampleRate;
new Thread ("Upstream File Thread") {
public void run() {
openHttpsPostConnection(murl, mdata, mSampleRate);
//Google does not return data via this URL
}
}.start();
}
/**
* Streams data from the TargetDataLine to the API.
* @param urlStr The URL to stream to
* @param tl The target data line to stream from.
* @param af The AudioFormat to stream with.
* @throws LineUnavailableException If cannot open or stream the TargetDataLine.
*/
private void upChannel(String urlStr, TargetDataLine tl, AudioFormat af) throws LineUnavailableException{
final String murl = urlStr;
final TargetDataLine mtl = tl;
final AudioFormat maf = af;
if(!mtl.isOpen()){
mtl.open(maf);
mtl.start();
}
new Thread ("Upstream Thread") {
public void run() {
openHttpsPostConnection(murl, mtl, maf);
}
}.start();
}
/**
* Opens a HTTPS connection to the specified URL string
* @param urlStr The URL you want to visit
* @return The Scanner to access aforementioned data.
*/
private Scanner openHttpsConnection(String urlStr) {
int resCode = -1;
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException ("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection)urlConn;
httpConn.setAllowUserInteraction(false);
// TIMEOUT is required
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("GET");
httpConn.connect();
resCode = httpConn.getResponseCode();
if (resCode == HttpsURLConnection.HTTP_OK) {
return new Scanner(httpConn.getInputStream());
}
else{
System.out.println("Error: " + resCode);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* Opens a HTTPSPostConnection that posts data from a TargetDataLine input
* @param murl The URL you want to post to.
* @param mtl The TargetDataLine you want to post data from. <b>Note should be open</b>
* @param maf The AudioFormat of the data you want to post
*/
private void openHttpsPostConnection(final String murl,
final TargetDataLine mtl, final AudioFormat maf) {
URL url;
try {
url = new URL(murl);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException ("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection)urlConn;
httpConn.setAllowUserInteraction(false);
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setChunkedStreamingMode(0);
httpConn.setRequestProperty("Transfer-Encoding", "chunked");
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + (int)maf.getSampleRate());
// also worked with ("Content-Type", "audio/amr; rate=8000");
httpConn.connect();
// this opens a connection, then sends POST & headers.
OutputStream out = httpConn.getOutputStream();
//Note : if the audio is more than 15 seconds
// dont write it to UrlConnInputStream all in one block as this sample does.
// Rather, segment the byteArray and on intermittently, sleeping thread
// supply bytes to the urlConn Stream at a rate that approaches
// the bitrate ( =30K per sec. in this instance ).
System.out.println("Starting to write data to output...");
AudioInputStream ais = new AudioInputStream(mtl);
ChunkedOutputStream os = new ChunkedOutputStream(out);
AudioSystem.write(ais, FLACFileWriter.FLAC, os);
out.write(FINAL_CHUNK);
System.out.println("IO WRITE DONE");
out.close();
// do you need the trailer?
// NOW you can look at the status.
int resCode = httpConn.getResponseCode();
if (resCode / 100 != 2) {
System.out.println("ERROR");
}
}catch(Exception ex){
ex.printStackTrace();
}
}
/**
* Opens a chunked HTTPS post connection and returns a Scanner with incoming data from Google Server
* Used for to get UPStream
* Chunked HTTPS ensures unlimited file size.
* @param urlStr The String for the URL
* @param data The data you want to send the server
* @param sampleRate The sample rate of the flac file.
* @return A Scanner to access the server response. (Probably will never be used)
*/
private Scanner openHttpsPostConnection(String urlStr, byte[][] data, int sampleRate){
byte[][] mextrad = data;
int resCode = -1;
OutputStream out = null;
// int http_status;
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException ("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection)urlConn;
httpConn.setAllowUserInteraction(false);
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setChunkedStreamingMode(0);
httpConn.setRequestProperty("Transfer-Encoding", "chunked");
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
// also worked with ("Content-Type", "audio/amr; rate=8000");
httpConn.connect();
try {
// this opens a connection, then sends POST & headers.
out = httpConn.getOutputStream();
//Note : if the audio is more than 15 seconds
// dont write it to UrlConnInputStream all in one block as this sample does.
// Rather, segment the byteArray and on intermittently, sleeping thread
// supply bytes to the urlConn Stream at a rate that approaches
// the bitrate ( =30K per sec. in this instance ).
System.out.println("Starting to write");
for(byte[] dataArray: mextrad){
out.write(dataArray); // one big block supplied instantly to the underlying chunker wont work for duration > 15 s.
try {
Thread.sleep(1000);//Delays the Audio so Google thinks its a mic.
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
out.write(FINAL_CHUNK);
System.out.println("IO WRITE DONE");
// do you need the trailer?
// NOW you can look at the status.
resCode = httpConn.getResponseCode();
if (resCode / 100 != 2) {
System.out.println("ERROR");
}
} catch (IOException e) {
}
if (resCode == HttpsURLConnection.HTTP_OK) {
return new Scanner(httpConn.getInputStream());
}
else{
System.out.println("HELP: " + resCode);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* Converts the file into a byte[]. Also Android compatible. :)
* @param The File you want to get the byte[] from.
* @return The byte[]
* @throws IOException if something goes wrong in reading the file.
*/
private byte[] mapFileIn(File infile) throws IOException{
return Files.readAllBytes(infile.toPath());
}
/**
* Parses the String into a GoogleResponse object
* @param rawResponse The String you want to parse
* @param gr the GoogleResponse object to save the data into.
*/
private void parseResponse(String rawResponse, GoogleResponse gr){
if(rawResponse == null || !rawResponse.contains("\"result\"")
|| rawResponse.equals("{\"result\":[]}")){ return; }
if(rawResponse.contains("\"confidence\":")){
String confidence = StringUtil.substringBetween(rawResponse, "\"confidence\":", "}");
gr.setConfidence(confidence);
}
else{
gr.setConfidence(String.valueOf(1d));
}
String array = StringUtil.trimString(rawResponse, "[", "]");
if(array.contains("[")){
array = StringUtil.trimString(array, "[", "]");
}
if(array.contains("\"confidence\":")){//Removes confidence phrase if it exists.
array = array.substring(0, array.lastIndexOf(','));
}
String[] parts = array.split(",");
gr.setResponse(parseTranscript(parts[0]));
for(int i = 1; i<parts.length; i++){
gr.getOtherPossibleResponses().add(parseTranscript(parts[i]));
}
}
/**
* Parses each individual "transcript" phrase
* @param The string fragment to parse
* @return The parsed String
*/
private String parseTranscript(String s){
String tmp = s.substring(s.indexOf(":")+1);
if(s.endsWith("}")){
tmp = tmp.substring(0, tmp.length()-1);
}
tmp = StringUtil.stripQuotes(tmp);
if(tmp.charAt(0)==' '){//Removes space at beginning if it exists
tmp = tmp.substring(1);
}
return tmp;
}
/**
* Adds GSpeechResponse Listeners that fire when Google sends a response.
* @param The Listeners you want to add
*/
public synchronized void addResponseListener(GSpeechResponseListener rl){
responseListeners.add(rl);
}
/**
* Removes GSpeechResponseListeners that fire when Google sends a response.
* @param rl
*/
public synchronized void removeResponseListener(GSpeechResponseListener rl){
responseListeners.remove(rl);
}
/**
* Fires responseListeners
* @param gr The Google Response (in this case the response event).
*/
private synchronized void fireResponseEvent(GoogleResponse gr){
for(GSpeechResponseListener gl: responseListeners){
gl.onResponse(gr);
}
}
/**
* Chunks audio into smaller chunks to stream to the duplex API
* @param data The data you want to break into smaller pieces
* @return the byte[][] containing on array of chunks.
*/
private byte[][] chunkAudio(byte[] data) {
if(data.length >= MAX_SIZE){//If larger than 1MB
int frame = MAX_SIZE/2;
int numOfChunks = (int)(data.length/((double)frame)) + 1;
byte[][] data2D = new byte[numOfChunks][];
for(int i = 0, j = 0; i<data.length && j<data2D.length; i+=frame, j++){
int length = (data.length - i < frame)? data.length - i: frame;
System.out.println("LENGTH: " + length);
data2D[j] = new byte[length];
System.arraycopy(data, i, data2D[j], 0, length);
}
return data2D;
}
else{
byte[][] tmpData = new byte[1][data.length];
System.arraycopy(data, 0, tmpData[0], 0, data.length);
return tmpData;
}
}
}

View file

@ -0,0 +1,12 @@
package com.darkprograms.speech.recognizer;
/**
* Response listeners for URL connections.
* @author Skylion
*
*/
public interface GSpeechResponseListener {
public void onResponse(GoogleResponse gr);
}

View file

@ -0,0 +1,89 @@
package com.darkprograms.speech.recognizer;
import java.util.ArrayList;
import java.util.List;
/******************************************************************************
* Class that holds the response and confidence of a Google recognizer request
*
* @author Luke Kuza, Duncan Jauncey, Aaron Gokaslan
******************************************************************************/
public class GoogleResponse {
/**
* Variable that holds the response
*/
private String response;
/**
* Variable that holds the confidence score
*/
private String confidence;
/**
* List that holds other possible responses for this request.
*/
private List<String> otherPossibleResponses = new ArrayList<String>(20);
/**
* Constructor
*/
public GoogleResponse() {
}
/**
* Gets the response text of what was said in the submitted Audio to Google
*
* @return String representation of what was said
*/
public String getResponse() {
return response;
}
/**
* Set the response
*
* @param response The response
*/
protected void setResponse(String response) {
this.response = response;
}
/**
* Gets the confidence score for the specific request
*
* @return The confidence score, ex .922343324323
*/
public String getConfidence() {
return confidence;
}
/**
* Set the confidence score for this request
*
* @param confidence The confidence score
*/
protected void setConfidence(String confidence) {
this.confidence = confidence;
}
/**
* Get other possible responses for this request.
* @return other possible responses
*/
public List<String> getOtherPossibleResponses() {
return otherPossibleResponses;
}
/**
* Gets all returned responses for this request
* @return All returned responses
*/
public List<String> getAllPossibleResponses() {
List<String> tmp = otherPossibleResponses;
tmp.add(0,response);
return tmp;
}
}

View file

@ -0,0 +1,466 @@
package com.darkprograms.speech.recognizer;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import com.darkprograms.speech.util.StringUtil;
/***************************************************************
* Class that submits FLAC audio and retrieves recognized text
*
* @author Luke Kuza, Duncan Jauncey, Aaron Gokaslan
**************************************************************/
@Deprecated
public class Recognizer {
@Deprecated
public enum Languages{
AUTO_DETECT("auto"),//tells Google to auto-detect the language
ARABIC_JORDAN("ar-JO"),
ARABIC_LEBANON("ar-LB"),
ARABIC_QATAR("ar-QA"),
ARABIC_UAE("ar-AE"),
ARABIC_MOROCCO("ar-MA"),
ARABIC_IRAQ("ar-IQ"),
ARABIC_ALGERIA("ar-DZ"),
ARABIC_BAHRAIN("ar-BH"),
ARABIC_LYBIA("ar-LY"),
ARABIC_OMAN("ar-OM"),
ARABIC_SAUDI_ARABIA("ar-SA"),
ARABIC_TUNISIA("ar-TN"),
ARABIC_YEMEN("ar-YE"),
BASQUE("eu"),
CATALAN("ca"),
CZECH("cs"),
DUTCH("nl-NL"),
ENGLISH_AUSTRALIA("en-AU"),
ENGLISH_CANADA("en-CA"),
ENGLISH_INDIA("en-IN"),
ENGLISH_NEW_ZEALAND("en-NZ"),
ENGLISH_SOUTH_AFRICA("en-ZA"),
ENGLISH_UK("en-GB"),
ENGLISH_US("en-US"),
FINNISH("fi"),
FRENCH("fr-FR"),
GALICIAN("gl"),
GERMAN("de-DE"),
HEBREW("he"),
HUNGARIAN("hu"),
ICELANDIC("is"),
ITALIAN("it-IT"),
INDONESIAN("id"),
JAPANESE("ja"),
KOREAN("ko"),
LATIN("la"),
CHINESE_SIMPLIFIED("zh-CN"),
CHINESE_TRANDITIONAL("zh-TW"),
CHINESE_HONGKONG("zh-HK"),
CHINESE_CANTONESE("zh-yue"),
MALAYSIAN("ms-MY"),
NORWEGIAN("no-NO"),
POLISH("pl"),
PIG_LATIN("xx-piglatin"),
PORTUGUESE("pt-PT"),
PORTUGUESE_BRASIL("pt-BR"),
ROMANIAN("ro-RO"),
RUSSIAN("ru"),
SERBIAN("sr-SP"),
SLOVAK("sk"),
SPANISH_ARGENTINA("es-AR"),
SPANISH_BOLIVIA("es-BO"),
SPANISH_CHILE("es-CL"),
SPANISH_COLOMBIA("es-CO"),
SPANISH_COSTA_RICA("es-CR"),
SPANISH_DOMINICAN_REPUBLIC("es-DO"),
SPANISH_ECUADOR("es-EC"),
SPANISH_EL_SALVADOR("es-SV"),
SPANISH_GUATEMALA("es-GT"),
SPANISH_HONDURAS("es-HN"),
SPANISH_MEXICO("es-MX"),
SPANISH_NICARAGUA("es-NI"),
SPANISH_PANAMA("es-PA"),
SPANISH_PARAGUAY("es-PY"),
SPANISH_PERU("es-PE"),
SPANISH_PUERTO_RICO("es-PR"),
SPANISH_SPAIN("es-ES"),
SPANISH_US("es-US"),
SPANISH_URUGUAY("es-UY"),
SPANISH_VENEZUELA("es-VE"),
SWEDISH("sv-SE"),
TURKISH("tr"),
ZULU("zu");
//TODO Clean Up JavaDoc for Overloaded Methods using @link
/**
*Stores the LanguageCode
*/
private final String languageCode;
/**
*Constructor
*/
private Languages(final String languageCode){
this.languageCode = languageCode;
}
public String toString(){
return languageCode;
}
}
/**
* URL to POST audio data and retrieve results
*/
private static final String GOOGLE_RECOGNIZER_URL = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium";
private boolean profanityFilter = true;
private String language = null;
/**
* Constructor
*/
public Recognizer() {
this.setLanguage(Languages.AUTO_DETECT);
}
/**
* Constructor
* @param Language
*/
@Deprecated
public Recognizer(String language) {
this.language = language;
}
/**
* Constructor
* @param language The Languages class for the language you want to designate
*/
public Recognizer(Languages language){
this.language = language.languageCode;
}
/**
* Constructor
* @param profanityFilter
*/
public Recognizer(boolean profanityFilter){
this.profanityFilter = profanityFilter;
}
/**
* Constructor
* @param language
* @param profanityFilter
*/
@Deprecated
public Recognizer(String language, boolean profanityFilter){
this.language = language;
this.profanityFilter = profanityFilter;
}
/**
* Constructor
* @param language
* @param profanityFilter
*/
public Recognizer(Languages language, boolean profanityFilter){
this.language = language.languageCode;
this.profanityFilter = profanityFilter;
}
/**
* Language: Contains all supported languages for Google Speech to Text.
* Setting this to null will make Google use it's own language detection.
* This value is null by default.
* @param language
*/
public void setLanguage(Languages language) {
this.language = language.languageCode;
}
/**Language code. This language code must match the language of the speech to be recognized. ex. en-US ru-RU
* This value is null by default.
* @param language The language code.
*/
@Deprecated
public void setLanguage(String language) {
this.language = language;
}
/**
* Returns the state of profanityFilter
* which enables/disables Google's profanity filter (on by default).
* @return profanityFilter
*/
public boolean getProfanityFilter(){
return profanityFilter;
}
/**
* Language code. This language code must match the language of the speech to be recognized. ex. en-US ru-RU
* This value is null by default.
* @return language the Google language
*/
public String getLanguage(){
return language;
}
/**
* Get recognized data from a Wave file. This method will encode the wave file to a FLAC file
*
* @param waveFile Wave file to recognize
* @param maxResults Maximum number of results to return in response
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForWave(File waveFile, int maxResults) throws IOException{
FlacEncoder flacEncoder = new FlacEncoder();
File flacFile = new File(waveFile + ".flac");
flacEncoder.convertWaveToFlac(waveFile, flacFile);
GoogleResponse googleResponse = getRecognizedDataForFlac(flacFile, maxResults, 8000);
//Delete converted FLAC data
flacFile.delete();
return googleResponse;
}
/**
* Get recognized data from a Wave file. This method will encode the wave file to a FLAC
*
* @param waveFile Wave file to recognize
* @param maxResults the maximum number of results to return in the response
* NOTE: Sample rate of file must be 8000 unless a custom sample rate is specified.
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForWave(String waveFile, int maxResults) throws IOException {
return getRecognizedDataForWave(new File(waveFile), maxResults);
}
/**
* Get recognized data from a FLAC file.
*
* @param flacFile FLAC file to recognize
* @param maxResults the maximum number of results to return in the response
* NOTE: Sample rate of file must be 8000 unless a custom sample rate is specified.
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForFlac(File flacFile, int maxResults) throws IOException {
return getRecognizedDataForFlac(flacFile, maxResults, 8000);
}
/**
* Get recognized data from a FLAC file.
*
* @param flacFile FLAC file to recognize
* @param maxResults the maximum number of results to return in the response
* @param samepleRate The sampleRate of the file. Default is 8000.
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForFlac(File flacFile, int maxResults, int sampleRate) throws IOException{
String response = rawRequest(flacFile, maxResults, sampleRate);
GoogleResponse googleResponse = new GoogleResponse();
parseResponse(response, googleResponse);
return googleResponse;
}
/**
* Get recognized data from a FLAC file.
*
* @param flacFile FLAC file to recognize
* @param maxResults the maximum number of results to return in the response
* @param samepleRate The sampleRate of the file. Default is 8000.
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForFlac(String flacFile, int maxResults, int sampleRate) throws IOException{
return getRecognizedDataForFlac(new File(flacFile), maxResults, sampleRate);
}
/**
* Get recognized data from a FLAC file.
*
* @param flacFile FLAC file to recognize
* @param maxResults the maximum number of results to return in the response
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForFlac(String flacFile, int maxResults) throws IOException {
return getRecognizedDataForFlac(new File(flacFile), maxResults);
}
/**
* Get recognized data from a Wave file. This method will encode the wave file to a FLAC.
* This method will automatically set the language to en-US, or English
*
* @param waveFile Wave file to recognize
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForWave(File waveFile) throws IOException {
return getRecognizedDataForWave(waveFile, 1);
}
/**
* Get recognized data from a Wave file. This method will encode the wave file to a FLAC.
* This method will automatically set the language to en-US, or English
*
* @param waveFile Wave file to recognize
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForWave(String waveFile) throws IOException {
return getRecognizedDataForWave(waveFile, 1);
}
/**
* Get recognized data from a FLAC file.
* This method will automatically set the language to en-US, or English
*
* @param flacFile FLAC file to recognize
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForFlac(File flacFile) throws IOException {
return getRecognizedDataForFlac(flacFile, 1);
}
/**
* Get recognized data from a FLAC file.
* This method will automatically set the language to en-US, or English
*
* @param flacFile FLAC file to recognize
* @return Returns a GoogleResponse, with the response and confidence score
* @throws IOException Throws exception if something goes wrong
*/
public GoogleResponse getRecognizedDataForFlac(String flacFile) throws IOException {
return getRecognizedDataForFlac(flacFile, 1);
}
/**
* Parses the raw response from Google
*
* @param rawResponse The raw, unparsed response from Google
* @return Returns the parsed response in the form of a Google Response.
*/
private void parseResponse(String rawResponse, GoogleResponse googleResponse) {
if (rawResponse == null || !rawResponse.contains("utterance"))
return;
String array = StringUtil.substringBetween(rawResponse, "[", "]");
String[] parts = array.split("}");
boolean first = true;
for( String s : parts ) {
if( first ) {
first = false;
String utterancePart = s.split(",")[0];
String confidencePart = s.split(",")[1];
String utterance = utterancePart.split(":")[1];
String confidence = confidencePart.split(":")[1];
utterance = StringUtil.stripQuotes(utterance);
confidence = StringUtil.stripQuotes(confidence);
if( utterance.equals("null") ) {
utterance = null;
}
if( confidence.equals("null") ) {
confidence = null;
}
googleResponse.setResponse(utterance);
googleResponse.setConfidence(confidence);
} else {
String utterance = s.split(":")[1];
utterance = StringUtil.stripQuotes(utterance);
if( utterance.equals("null") ) {
utterance = null;
}
googleResponse.getOtherPossibleResponses().add(utterance);
}
}
}
/**
* Performs the request to Google with a file <br>
* Request is buffered
*
* @param inputFile Input files to recognize
* @return Returns the raw, unparsed response from Google
* @throws IOException Throws exception if something went wrong
*/
private String rawRequest(File inputFile, int maxResults, int sampleRate) throws IOException{
URL url;
URLConnection urlConn;
OutputStream outputStream;
BufferedReader br;
StringBuilder sb = new StringBuilder(GOOGLE_RECOGNIZER_URL);
if( language != null ) {
sb.append("&lang=");
sb.append(language);
}
else{
sb.append("&lang=auto");
}
if( !profanityFilter ) {
sb.append("&pfilter=0");
}
sb.append("&maxresults=");
sb.append(maxResults);
// URL of Remote Script.
url = new URL(sb.toString());
// Open New URL connection channel.
urlConn = url.openConnection();
// we want to do output.
urlConn.setDoOutput(true);
// No caching
urlConn.setUseCaches(false);
// Specify the header content type.
urlConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
// Send POST output.
outputStream = urlConn.getOutputStream();
FileInputStream fileInputStream = new FileInputStream(inputFile);
byte[] buffer = new byte[256];
while ((fileInputStream.read(buffer, 0, 256)) != -1) {
outputStream.write(buffer, 0, 256);
}
fileInputStream.close();
outputStream.close();
// Get response data.
br = new BufferedReader(new InputStreamReader(urlConn.getInputStream(), Charset.forName("UTF-8")));
String response = br.readLine();
br.close();
return response;
}
}

View file

@ -0,0 +1,282 @@
package com.darkprograms.speech.recognizer;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import javax.net.ssl.HttpsURLConnection;
import javax.xml.ws.http.HTTPException;
import com.darkprograms.speech.util.StringUtil;
/**
* This class uses Google's V2 Hook. The class is returns a chunked respones so listeners must be used.
* The class also requires an API-Key (see Constructor) for details. This class is experimental and
* subject to change as we restructure the API.
* @author Aaron Gokaslan (Skylion)
*/
public class RecognizerChunked {
/**
* Google's API V2 URL
*/
private static final String GOOGLE_SPEECH_URL_V2 = "https://www.google.com/speech-api/v2/recognize";
/**
* API-Key used for requests
*/
private final String API_KEY;
/**
* The language code Google uses to determine the language
* Default value is "auto"
*/
private String language;
/**
* Stores the Response Listeners
*/
private List<GSpeechResponseListener> responseListeners = new ArrayList<GSpeechResponseListener>();
/**
* Constructor
* @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
* one by following the process shown at this
* <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>.
*/
public RecognizerChunked(String API_KEY){
this.API_KEY = API_KEY;
this.language = "auto";
}
/**
* Constructor
* @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
* one by following the process shown at this
* <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>.
* @param language The language you want to use (Iso code)
* Note: This function will most likely be deprecated.
*/
public RecognizerChunked(String API_KEY, String language){
this(API_KEY);
this.language = language;
}
/**
* The current language the Recognizer is set to use. Returns the ISO-Code otherwise,
* it may return "auto."
* @return The ISO-Code or auto if the language the is not specified.
*/
public String getLanguage(){
return language;
}
/**
* Sets the language that the file should return.
* @param language The language as an ISO-Code
*/
public void setLanguage(String language){
this.language = language;
}
/**
* Analyzes the file for speech
* @param infile The file you want to analyze for speech.
* @param sampleRate The sample rate of the audioFile.
* @throws IOException if something goes wrong reading the file.
*/
public void getRecognizedDataForFlac(File infile, int sampleRate) throws IOException{
byte[] data = mapFileIn(infile);
getRecognizedDataForFlac(data, sampleRate);
}
/**
* Analyzes the file for speech
* @param infile The file you want to analyze for speech.
* @param sampleRate The sample rate of the audioFile.
* @throws IOException if something goes wrong reading the file.
*/
public void getRecognizedDataForFlac(String inFile, int sampleRate) throws IOException{
getRecognizedDataForFlac(new File(inFile), sampleRate);
}
/**
* Recognizes the byte data.
* @param data
* @param sampleRate
*/
public void getRecognizedDataForFlac(byte[] data, int sampleRate){
StringBuilder sb = new StringBuilder(GOOGLE_SPEECH_URL_V2);
sb.append("?output=json");
sb.append("&client=chromium");
sb.append("&lang=" + language);
sb.append("&key=" + API_KEY);
String url = sb.toString();
openHttpsPostConnection(url, data, sampleRate);
}
/**
* Opens a chunked response HTTPS line to the specified URL
* @param urlStr The URL string to connect for chunking
* @param data The data you want to send to Google. Speech files under 15 seconds long recommended.
* @param sampleRate The sample rate for your audio file.
*/
private void openHttpsPostConnection(final String urlStr, final byte[] data, final int sampleRate) {
new Thread () {
public void run() {
HttpsURLConnection httpConn = null;
ByteBuffer buff = ByteBuffer.wrap(data);
byte[] destdata = new byte[2048];
int resCode = -1;
OutputStream out = null;
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException ("URL must be HTTPS");
}
httpConn = (HttpsURLConnection)urlConn;
httpConn.setAllowUserInteraction(false);
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setChunkedStreamingMode(0); //TransferType: chunked
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
// this opens a connection, then sends POST & headers.
out = httpConn.getOutputStream();
//beyond 15 sec duration just simply writing the file
// does not seem to work. So buffer it and delay to simulate
// bufferd microphone delivering stream of speech
// re: net.http.ChunkedOutputStream.java
while(buff.remaining() >= destdata.length){
buff.get(destdata);
out.write(destdata);
};
byte[] lastr = new byte[buff.remaining()];
buff.get(lastr, 0, lastr.length);
out.write(lastr);
out.close();
resCode = httpConn.getResponseCode();
if(resCode >= HttpURLConnection.HTTP_UNAUTHORIZED){//Stops here if Google doesn't like us/
throw new HTTPException(HttpURLConnection.HTTP_UNAUTHORIZED);//Throws
}
String line;//Each line that is read back from Google.
BufferedReader br = new BufferedReader(new InputStreamReader(httpConn.getInputStream()));
while ((line = br.readLine( )) != null) {
if(line.length()>19 && resCode > 100 && resCode < HttpURLConnection.HTTP_UNAUTHORIZED){
GoogleResponse gr = new GoogleResponse();
parseResponse(line, gr);
fireResponseEvent(gr);
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
finally {httpConn.disconnect();}
}
}.start();
}
/**
* Converts the file into a byte[].
* @param infile The File you want to specify
* @return a byte array
* @throws IOException if something goes wrong reading the file.
*/
private byte[] mapFileIn(File infile) throws IOException{
FileInputStream fis = new FileInputStream(infile);
try{
FileChannel fc = fis.getChannel(); // Get the file's size and then map it into memory
int sz = (int)fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
byte[] data2 = new byte[bb.remaining()];
bb.get(data2);
return data2;
}
finally{//Ensures resources are closed regardless of whether the action suceeded
fis.close();
}
}
/**
* Parses the response into a Google Response
* @param rawResponse The raw String you want to parse
* @param gr The GoogleResponse you want to parse into ti.
*/
private void parseResponse(String rawResponse, GoogleResponse gr){
if(rawResponse == null || !rawResponse.contains("\"result\"")){ return; }
if(rawResponse.contains("\"confidence\":")){
String confidence = StringUtil.substringBetween(rawResponse, "\"confidence\":", "}");
gr.setConfidence(confidence);
}
else{
gr.setConfidence(String.valueOf(1d));
}
String array = StringUtil.trimString(rawResponse, "[", "]");
if(array.contains("[")){
array = StringUtil.trimString(array, "[", "]");
}
String[] parts = array.split(",");
gr.setResponse(parseTranscript(parts[0]));
for(int i = 1; i<parts.length; i++){
gr.getOtherPossibleResponses().add(parseTranscript(parts[i]));
}
}
/**
* Cleans up the transcript portion of the String
* @param s The string you want to process.
* @return The reformated string.
*/
private String parseTranscript(String s){
String tmp = s.substring(s.indexOf(":")+1);
if(s.endsWith("}")){
tmp = tmp.substring(0, tmp.length()-1);
}
tmp = StringUtil.stripQuotes(tmp);
return tmp;
}
/**
* Adds responseListener that triggers when a response from Google is recieved
* @param rl The response listener you want to add
*/
public synchronized void addResponseListener(GSpeechResponseListener rl){
responseListeners.add(rl);
}
/**
* Removes the specified response listener
* @param rl The response listener
*/
public synchronized void removeResponseListener(GSpeechResponseListener rl){
responseListeners.remove(rl);
}
/**
* Fires the response listener
* @param gr The GoogleResponse as the event object.
*/
private synchronized void fireResponseEvent(GoogleResponse gr){
for(GSpeechResponseListener gl: responseListeners){
gl.onResponse(gr);
}
}
}

View file

@ -0,0 +1,261 @@
package com.darkprograms.speech.synthesiser;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import com.darkprograms.speech.translator.GoogleTranslate;
/*******************************************************************************
* Synthesiser class that connects to Google's unoffical API to retrieve data
*
* @author Luke Kuza, Aaron Gokaslan (Skylion)
*******************************************************************************/
public class Synthesiser {
/**
* URL to query for Google synthesiser
*/
private final static String GOOGLE_SYNTHESISER_URL = "http://translate.google.com/translate_tts?tl=";
/**
* language of the Text you want to translate
*/
private String languageCode;
/**
* LANG_XX_XXXX Variables are language codes.
*/
public static final String LANG_AU_ENGLISH = "en-AU";
public static final String LANG_US_ENGLISH = "en-US";
public static final String LANG_UK_ENGLISH = "en-GB";
public static final String LANG_ES_SPANISH = "es";
public static final String LANG_FR_FRENCH = "fr";
public static final String LANG_DE_GERMAN = "de";
public static final String LANG_PT_PORTUGUESE = "pt-pt";
public static final String LANG_PT_BRAZILIAN = "pt-br";
//Please add on more regional languages as you find them. Also try to include the accent code if you can can.
/**
* Constructor
*/
public Synthesiser() {
languageCode = "auto";
}
/**
* Constructor that takes language code parameter. Specify to "auto" for language autoDetection
*/
public Synthesiser(String languageCode){
this.languageCode = languageCode;
}
/**
* Returns the current language code for the Synthesiser.
* Example: English(Generic) = en, English (US) = en-US, English (UK) = en-GB. and Spanish = es;
* @return the current language code parameter
*/
public String getLanguage(){
return languageCode;
}
/**
* Note: set language to auto to enable automatic language detection.
* Setting to null will also implement Google's automatic language detection
* @param languageCode The language code you would like to modify languageCode to.
*/
public void setLanguage(String languageCode){
this.languageCode = languageCode;
}
/**
* Gets an input stream to MP3 data for the returned information from a request
*
* @param synthText Text you want to be synthesized into MP3 data
* @return Returns an input stream of the MP3 data that is returned from Google
* @throws IOException Throws exception if it can not complete the request
*/
public InputStream getMP3Data(String synthText) throws IOException{
String languageCode = this.languageCode;//Ensures retention of language settings if set to auto
if(languageCode == null || languageCode.equals("") || languageCode.equalsIgnoreCase("auto")){
try{
languageCode = detectLanguage(synthText);//Detects language
if(languageCode == null){
languageCode = "en-us";//Reverts to Default Language if it can't detect it.
}
}
catch(Exception ex){
ex.printStackTrace();
languageCode = "en-us";//Reverts to Default Language if it can't detect it.
}
}
if(synthText.length()>100){
List<String> fragments = parseString(synthText);//parses String if too long
String tmp = getLanguage();
setLanguage(languageCode);//Keeps it from autodetecting each fragment.
InputStream out = getMP3Data(fragments);
setLanguage(tmp);//Reverts it to it's previous Language such as auto.
return out;
}
String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode
URL url = new URL(GOOGLE_SYNTHESISER_URL + languageCode + "&q=" + encoded); //create url
// Open New URL connection channel.
URLConnection urlConn = url.openConnection(); //Open connection
urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required
return urlConn.getInputStream();
}
/**
* Gets an InputStream to MP3Data for the returned information from a request
* @param synthText List of Strings you want to be synthesized into MP3 data
* @return Returns an input stream of all the MP3 data that is returned from Google
* @throws IOException Throws exception if it cannot complete the request
*/
public InputStream getMP3Data(List<String> synthText) throws IOException{
//Uses an executor service pool for concurrency. Limit to 1000 threads max.
ExecutorService pool = Executors.newFixedThreadPool(1000);
//Stores the Future (Data that will be returned in the future)
Set<Future<InputStream>> set = new LinkedHashSet<Future<InputStream>>(synthText.size());
for(String part: synthText){ //Iterates through the list
Callable<InputStream> callable = new MP3DataFetcher(part);//Creates Callable
Future<InputStream> future = pool.submit(callable);//Begins to run Callable
set.add(future);//Adds the response that will be returned to a set.
}
List<InputStream> inputStreams = new ArrayList<InputStream>(set.size());
for(Future<InputStream> future: set){
try {
inputStreams.add(future.get());//Gets the returned data from the future.
} catch (ExecutionException e) {//Thrown if the MP3DataFetcher encountered an error.
Throwable ex = e.getCause();
if(ex instanceof IOException){
throw (IOException)ex;//Downcasts and rethrows it.
}
} catch (InterruptedException e){//Will probably never be called, but just in case...
Thread.currentThread().interrupt();//Interrupts the thread since something went wrong.
}
}
return new SequenceInputStream(Collections.enumeration(inputStreams));//Sequences the stream.
}
/**
* Separates a string into smaller parts so that Google will not reject the request.
* @param input The string you want to separate
* @return A List<String> of the String fragments from your input..
*/
private List<String> parseString(String input){
return parseString (input, new ArrayList<String>());
}
/**
* Separates a string into smaller parts so that Google will not reject the request.
* @param input The string you want to break up into smaller parts
* @param fragments List<String> that you want to add stuff too.
* If you don't have a List<String> already constructed "new ArrayList<String>()" works well.
* @return A list of the fragments of the original String
*/
private List<String> parseString(String input, List<String> fragments){
if(input.length()<=100){//Base Case
fragments.add(input);
return fragments;
}
else{
int lastWord = findLastWord(input);//Checks if a space exists
if(lastWord<=0){
fragments.add(input.substring(0,100));//In case you sent gibberish to Google.
return parseString(input.substring(100), fragments);
}else{
fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion.
return parseString(input.substring(lastWord), fragments);
}
}
}
/**
* Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation.
* Will preferably parse on punctuation to alleviate mid-sentence pausing
* @param input The String you want to search through.
* @return The index of where the last word of the string ends before the index of 99.
*/
private int findLastWord(String input){
if(input.length()<100)
return input.length();
int space = -1;
for(int i = 99; i>0; i--){
char tmp = input.charAt(i);
if(isEndingPunctuation(tmp)){
return i+1;
}
if(space==-1 && tmp == ' '){
space = i;
}
}
if(space>0){
return space;
}
return -1;
}
/**
* Checks if char is an ending character
* Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode)
* @param The char you want check
* @return True if it is, false if not.
*/
private boolean isEndingPunctuation(char input){
return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|';
}
/**
* Automatically determines the language of the original text
* @param text represents the text you want to check the language of
* @return the languageCode in ISO-639
* @throws Exception if it cannot complete the request
*/
public String detectLanguage(String text) throws IOException{
return GoogleTranslate.detectLanguage(text);
}
/**
* This class is a callable.
* A callable is like a runnable except that it can return data and throw exceptions.
* Useful when using futures. Dramatically improves the speed of execution.
* @author Aaron Gokaslan (Skylion)
*/
private class MP3DataFetcher implements Callable<InputStream>{
private String synthText;
public MP3DataFetcher(String synthText){
this.synthText = synthText;
}
public InputStream call() throws IOException{
return getMP3Data(synthText);
}
}
}

View file

@ -0,0 +1,303 @@
package com.darkprograms.speech.synthesiser;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import com.darkprograms.speech.translator.GoogleTranslate;
/**
* This class uses the V2 version of Google's Text to Speech API. While this class requires an API key,
* the endpoint allows for additional specification of parameters including speed and pitch.
* See the constructor for instructions regarding the API_Key.
* @author Skylion (Aaron Gokaslan)
*/
public class SynthesiserV2 {
private static final String GOOGLE_SYNTHESISER_URL = "https://www.google.com/speech-api/v2/synthesize?enc=mpeg" +
"&client=chromium";
/**
* API_KEY used for requests
*/
private final String API_KEY;
/**
* language of the Text you want to translate
*/
private String languageCode;
/**
* The pitch of the generated audio
*/
private double pitch = 1.0;
/**
* The speed of the generated audio
*/
private double speed = 1.0;
/**
* Constructor
* @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
* one by following the process shown at this
* <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>.
*/
public SynthesiserV2(String API_KEY){
this.API_KEY = API_KEY;
}
/**
* Returns the current language code for the Synthesiser.
* Example: English(Generic) = en, English (US) = en-US, English (UK) = en-GB. and Spanish = es;
* @return the current language code parameter
*/
public String getLanguage(){
return languageCode;
}
/**
* Note: set language to auto to enable automatic language detection.
* Setting to null will also implement Google's automatic language detection
* @param languageCode The language code you would like to modify languageCode to.
*/
public void setLanguage(String languageCode){
this.languageCode = languageCode;
}
/**
* @return the pitch
*/
public double getPitch() {
return pitch;
}
/**
* Sets the pitch of the audio.
* Valid values range from 0 to 2 inclusive.
* Values above 1 correspond to higher pitch, values below 1 correspond to lower pitch.
* @param pitch the pitch to set
*/
public void setPitch(double pitch) {
this.pitch = pitch;
}
/**
* @return the speed
*/
public double getSpeed() {
return speed;
}
/**
* Sets the speed of audio.
* Valid values range from 0 to 2 inclusive.
* Values higher than one correspond to faster and vice versa.
* @param speed the speed to set
*/
public void setSpeed(double speed) {
this.speed = speed;
}
/**
* Gets an input stream to MP3 data for the returned information from a request
*
* @param synthText Text you want to be synthesized into MP3 data
* @return Returns an input stream of the MP3 data that is returned from Google
* @throws IOException Throws exception if it can not complete the request
*/
public InputStream getMP3Data(String synthText) throws IOException{
String languageCode = this.languageCode;//Ensures retention of language settings if set to auto
if(languageCode == null || languageCode.equals("") || languageCode.equalsIgnoreCase("auto")){
try{
languageCode = detectLanguage(synthText);//Detects language
if(languageCode == null){
languageCode = "en-us";//Reverts to Default Language if it can't detect it.
}
}
catch(Exception ex){
ex.printStackTrace();
languageCode = "en-us";//Reverts to Default Language if it can't detect it.
}
}
if(synthText.length()>100){
List<String> fragments = parseString(synthText);//parses String if too long
String tmp = getLanguage();
setLanguage(languageCode);//Keeps it from autodetecting each fragment.
InputStream out = getMP3Data(fragments);
setLanguage(tmp);//Reverts it to it's previous Language such as auto.
return out;
}
String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode
StringBuilder sb = new StringBuilder(GOOGLE_SYNTHESISER_URL);
sb.append("&key=" + API_KEY);
sb.append("&text=" + encoded);
sb.append("&lang=" + languageCode);
if(speed>=0 && speed<=2.0){
sb.append("&speed=" + speed/2.0);
}
if(pitch>=0 && pitch<=2.0){
sb.append("&pitch=" + pitch/2.0);
}
URL url = new URL(sb.toString()); //create url
// Open New URL connection channel.
URLConnection urlConn = url.openConnection(); //Open connection
urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required
return urlConn.getInputStream();
}
/**
* Gets an InputStream to MP3Data for the returned information from a request
* @param synthText List of Strings you want to be synthesized into MP3 data
* @return Returns an input stream of all the MP3 data that is returned from Google
* @throws IOException Throws exception if it cannot complete the request
*/
public InputStream getMP3Data(List<String> synthText) throws IOException{
//Uses an executor service pool for concurrency. Limit to 1000 threads max.
ExecutorService pool = Executors.newFixedThreadPool(1000);
//Stores the Future (Data that will be returned in the future)
Set<Future<InputStream>> set = new LinkedHashSet<Future<InputStream>>(synthText.size());
for(String part: synthText){ //Iterates through the list
Callable<InputStream> callable = new MP3DataFetcher(part);//Creates Callable
Future<InputStream> future = pool.submit(callable);//Begins to run Callable
set.add(future);//Adds the response that will be returned to a set.
}
List<InputStream> inputStreams = new ArrayList<InputStream>(set.size());
for(Future<InputStream> future: set){
try {
inputStreams.add(future.get());//Gets the returned data from the future.
} catch (ExecutionException e) {//Thrown if the MP3DataFetcher encountered an error.
Throwable ex = e.getCause();
if(ex instanceof IOException){
throw (IOException)ex;//Downcasts and rethrows it.
}
} catch (InterruptedException e){//Will probably never be called, but just in case...
Thread.currentThread().interrupt();//Interrupts the thread since something went wrong.
}
}
return new SequenceInputStream(Collections.enumeration(inputStreams));//Sequences the stream.
}
/**
* Separates a string into smaller parts so that Google will not reject the request.
* @param input The string you want to separate
* @return A List<String> of the String fragments from your input..
*/
private List<String> parseString(String input){
return parseString (input, new ArrayList<String>());
}
/**
* Separates a string into smaller parts so that Google will not reject the request.
* @param input The string you want to break up into smaller parts
* @param fragments List<String> that you want to add stuff too.
* If you don't have a List<String> already constructed "new ArrayList<String>()" works well.
* @return A list of the fragments of the original String
*/
private List<String> parseString(String input, List<String> fragments){
if(input.length()<=100){//Base Case
fragments.add(input);
return fragments;
}
else{
int lastWord = findLastWord(input);//Checks if a space exists
if(lastWord<=0){
fragments.add(input.substring(0,100));//In case you sent gibberish to Google.
return parseString(input.substring(100), fragments);
}else{
fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion.
return parseString(input.substring(lastWord), fragments);
}
}
}
/**
* Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation.
* Will preferably parse on punctuation to alleviate mid-sentence pausing
* @param input The String you want to search through.
* @return The index of where the last word of the string ends before the index of 99.
*/
private int findLastWord(String input){
if(input.length()<100)
return input.length();
int space = -1;
for(int i = 99; i>0; i--){
char tmp = input.charAt(i);
if(isEndingPunctuation(tmp)){
return i+1;
}
if(space==-1 && tmp == ' '){
space = i;
}
}
if(space>0){
return space;
}
return -1;
}
/**
* Checks if char is an ending character
* Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode)
* @param The char you want check
* @return True if it is, false if not.
*/
private boolean isEndingPunctuation(char input){
return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|';
}
/**
* Automatically determines the language of the original text
* @param text represents the text you want to check the language of
* @return the languageCode in ISO-639
* @throws Exception if it cannot complete the request
*/
public String detectLanguage(String text) throws IOException{
return GoogleTranslate.detectLanguage(text);
}
/**
* This class is a callable.
* A callable is like a runnable except that it can return data and throw exceptions.
* Useful when using futures. Dramatically improves the speed of execution.
* @author Aaron Gokaslan (Skylion)
*/
private class MP3DataFetcher implements Callable<InputStream>{
private String synthText;
public MP3DataFetcher(String synthText){
this.synthText = synthText;
}
public InputStream call() throws IOException{
return getMP3Data(synthText);
}
}
}

View file

@ -0,0 +1,168 @@
package com.darkprograms.speech.translator;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.Locale;
/***************************************************************************************************************
* An API for a Google Translation service in Java.
* Please Note: This API is unofficial and is not supported by Google. Subject to breakage at any time.
* The translator allows for language detection and translation.
* Recommended for translation of user interfaces or speech commands.
* All translation services provided via Google Translate
* @author Aaron Gokaslan (Skylion)
***************************************************************************************************************/
public final class GoogleTranslate { //Class marked as final since all methods are static
/**
* URL to query for Translation
*/
private final static String GOOGLE_TRANSLATE_URL = "http://translate.google.com/translate_a/t?client=t";
/**
* Private to prevent instantiation
*/
private GoogleTranslate(){};
/**
* Converts the ISO-639 code into a friendly language code in the user's default language
* For example, if the language is English and the default locale is French, it will return "anglais"
* Useful for UI Strings
* @param languageCode The ISO639-1
* @return The language in the user's default language
* @see {@link #detectLanguage}
*/
public static String getDisplayLanguage(String languageCode){
return (new Locale(languageCode)).getDisplayLanguage();
}
/**
* Automatically determines the language of the original text
* @param text represents the text you want to check the language of
* @return The ISO-639 code for the language
* @throws IOException if it cannot complete the request
*/
public static String detectLanguage(String text) throws IOException{
String encoded = URLEncoder.encode(text, "UTF-8"); //Encodes the string
URL url = new URL(GOOGLE_TRANSLATE_URL + "&text=" + encoded); //Generates URL
String rawData = urlToText(url);//Gets text from Google
return findLanguage(rawData);
}
/**
* Automatically translates text to a system's default language according to its locale
* Useful for creating international applications as you can translate UI strings
* @param text The text you want to translate
* @return The translated text
* @throws IOException if cannot complete request
*/
public static String translate(String text) throws IOException{
return translate(Locale.getDefault().getLanguage(), text);
}
/**
* Automatically detects language and translate to the targetLanguage
* @param targetLanguage The language you want to translate into in ISO-639 format
* @param text The text you actually want to translate
* @return The translated text.
* @throws IOException if it cannot complete the request
*/
public static String translate(String targetLanguage, String text) throws IOException{
return translate("auto",targetLanguage, text);
}
/**
* Translate text from sourceLanguage to targetLanguage
* Specifying the sourceLanguage greatly improves accuracy over short Strings
* @param sourceLanguage The language you want to translate from in ISO-639 format
* @param targetLanguage The language you want to translate into in ISO-639 format
* @param text The text you actually want to translate
* @return the translated text.
* @throws IOException if it cannot complete the request
*/
public static String translate(String sourceLanguage, String targetLanguage, String text) throws IOException{
String encoded = URLEncoder.encode(text, "UTF-8"); //Encode
//Generates URL
URL url = new URL(GOOGLE_TRANSLATE_URL + "&sl=" + sourceLanguage + "&tl=" + targetLanguage + "&text=" + encoded);
String rawData = urlToText(url);//Gets text from Google
if(rawData==null){
return null;
}
String[] raw = rawData.split("\"");//Parses the JSON
if(raw.length<2){
return null;
}
return raw[1];//Returns the translation
}
/**
* Converts a URL to Text
* @param url that you want to generate a String from
* @return The generated String
* @throws IOException if it cannot complete the request
*/
private static String urlToText(URL url) throws IOException{
URLConnection urlConn = url.openConnection(); //Open connection
//Adding header for user agent is required. Otherwise, Google rejects the request
urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0");
Reader r = new java.io.InputStreamReader(urlConn.getInputStream(), Charset.forName("UTF-8"));//Gets Data Converts to string
StringBuilder buf = new StringBuilder();
while (true) {//Reads String from buffer
int ch = r.read();
if (ch < 0)
break;
buf.append((char) ch);
}
String str = buf.toString();
return str;
}
/**
* Searches RAWData for Language
* @param RAWData the raw String directly from Google you want to search through
* @return The language parsed from the rawData or en-US (English-United States) if Google cannot determine it.
*/
private static String findLanguage(String rawData){
for(int i = 0; i+5<rawData.length(); i++){
boolean dashDetected = rawData.charAt(i+4)=='-';
if(rawData.charAt(i)==',' && rawData.charAt(i+1)== '"'
&& ((rawData.charAt(i+4)=='"' && rawData.charAt(i+5)==',')
|| dashDetected)){
if(dashDetected){
int lastQuote = rawData.substring(i+2).indexOf('"');
if(lastQuote>0)
return rawData.substring(i+2,i+2+lastQuote);
}
else{
String possible = rawData.substring(i+2,i+4);
if(containsLettersOnly(possible)){//Required due to Google's inconsistent formatting.
return possible;
}
}
}
}
return null;
}
/**
* Checks if all characters in text are letters.
* @param text The text you want to determine the validity of.
* @return True if all characters are letter, otherwise false.
*/
private static boolean containsLettersOnly(String text){
for(int i = 0; i<text.length(); i++){
if(!Character.isLetter(text.charAt(i))){
return false;
}
}
return true;
}
}

View file

@ -0,0 +1,190 @@
package com.darkprograms.speech.util;
//TODO Replace this class with something that isn't 20 years old.
//ChunkedOutputStream - an OutputStream that implements HTTP/1.1 chunking
//
//Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>. All rights reserved.
//
//Redistribution and use in source and binary forms, with or without
//modification, are permitted provided that the following conditions
//are met:
//1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
//THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
//ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
//IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
//ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
//FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
//DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
//OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
//HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
//OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
//SUCH DAMAGE.
//
//Visit the ACME Labs Java page for up-to-date versions of this and other
//fine Java utilities: http://www.acme.com/java/
import java.io.*;
import java.util.*;
/// An OutputStream that implements HTTP/1.1 chunking.
//<P>
//This class lets a Servlet send its response data as an HTTP/1.1 chunked
//stream. Chunked streams are a way to send arbitrary-length data without
//having to know beforehand how much you're going to send. They are
//introduced by a "Transfer-Encoding: chunked" header, so you have to
//set that header when you make one of these streams.
//<P>
//Sample usage:
//<BLOCKQUOTE><PRE><CODE>
//res.setHeader( "Transfer-Encoding", "chunked" );
//OutputStream out = res.getOutputStream();
//ChunkedOutputStream chunkOut = new ChunkedOutputStream( out );
//(write data to chunkOut instead of out)
//(optionally set footers)
//chunkOut.done();
//</CODE></PRE></BLOCKQUOTE>
//<P>
//Every time the stream gets flushed, a chunk is sent. When done()
//is called, an empty chunk is sent, marking the end of the chunked
//stream as per the chunking spec.
//<P>
//<A HREF="/resources/classes/Acme/Serve/servlet/http/ChunkedOutputStream.java">Fetch the software.</A><BR>
//<A HREF="/resources/classes/Acme.tar.Z">Fetch the entire Acme package.</A>
public class ChunkedOutputStream extends BufferedOutputStream
{
/// Make a ChunkedOutputStream with a default buffer size.
// @param out the underlying output stream
public ChunkedOutputStream( OutputStream out )
{
super( out );
}
/// Make a ChunkedOutputStream with a specified buffer size.
// @param out the underlying output stream
// @param size the buffer size
public ChunkedOutputStream( OutputStream out, int size )
{
super( out, size );
}
/// Flush the stream. This will write any buffered output
// bytes as a chunk.
// @exception IOException if an I/O error occurred
public synchronized void flush() throws IOException
{
if ( count != 0 )
{
writeBuf( buf, 0, count );
count = 0;
}
}
private Vector footerNames = new Vector();
private Vector footerValues = new Vector();
/// Set a footer. Footers are much like HTTP headers, except that
// they come at the end of the data instead of at the beginning.
public void setFooter( String name, String value )
{
footerNames.addElement( name );
footerValues.addElement( value );
}
/// Indicate the end of the chunked data by sending a zero-length chunk,
// possible including footers.
// @exception IOException if an I/O error occurred
public void done() throws IOException
{
flush();
PrintStream pout = new PrintStream( out );
pout.println( "0" );
if ( footerNames.size() > 0 )
{
// Send footers.
for ( int i = 0; i < footerNames.size(); ++i )
{
String name = (String) footerNames.elementAt( i );
String value = (String) footerValues.elementAt( i );
pout.println( name + ": " + value );
}
}
footerNames = null;
footerValues = null;
pout.println( "" );
pout.flush();
}
/// Make sure that calling close() terminates the chunked stream.
public void close() throws IOException
{
if ( footerNames != null )
done();
super.close();
}
/// Write a sub-array of bytes.
// <P>
// The only reason we have to override the BufferedOutputStream version
// of this is that it writes the array directly to the output stream
// if doesn't fit in the buffer. So we make it use our own chunk-write
// routine instead. Otherwise this is identical to the parent-class
// version.
// @param b the data to be written
// @param off the start offset in the data
// @param len the number of bytes that are written
// @exception IOException if an I/O error occurred
public synchronized void write( byte b[], int off, int len ) throws IOException
{
int avail = buf.length - count;
if ( len <= avail )
{
System.arraycopy( b, off, buf, count, len );
count += len;
return;
}
flush();
writeBuf( b, off, len );
}
private static final byte[] crlf = { 13, 10 };
private byte[] lenBytes = new byte[20]; // big enough for any number in hex
/// The only routine that actually writes to the output stream.
// This is where chunking semantics are implemented.
// @exception IOException if an I/O error occurred
private void writeBuf( byte b[], int off, int len ) throws IOException
{
// Write the chunk length as a hex number.
String lenStr = Integer.toString( len, 16 );
lenStr.getBytes( 0, lenStr.length(), lenBytes, 0 );
out.write( lenBytes );
// Write a CRLF.
out.write( crlf );
// Write the data.
if ( len != 0 )
out.write( b, off, len );
// Write a CRLF.
out.write( crlf );
// And flush the real stream.
out.flush();
}
}

View file

@ -0,0 +1,120 @@
package com.darkprograms.speech.util;
/*************************************************************************
* Compilation: javac Complex.java
* Execution: java Complex
*
* Data type for complex numbers.
*
* The data type is "immutable" so once you create and initialize
* a Complex object, you cannot change it. The "final" keyword
* when declaring re and im enforces this rule, making it a
* compile-time error to change the .re or .im fields after
* they've been initialized.
*
* Class based off of Princeton University's Complex.java class
* @author Aaron Gokaslan, Princeton University
*************************************************************************/
public class Complex {
private final double re; // the real part
private final double im; // the imaginary part
// create a new object with the given real and imaginary parts
public Complex(double real, double imag) {
re = real;
im = imag;
}
// return a string representation of the invoking Complex object
public String toString() {
if (im == 0) return re + "";
if (re == 0) return im + "i";
if (im < 0) return re + " - " + (-im) + "i";
return re + " + " + im + "i";
}
// return abs/modulus/magnitude and angle/phase/argument
public double abs() { return Math.hypot(re, im); } // Math.sqrt(re*re + im*im)
public double phase() { return Math.atan2(im, re); } // between -pi and pi
// return a new Complex object whose value is (this + b)
public Complex plus(Complex b) {
Complex a = this; // invoking object
double real = a.re + b.re;
double imag = a.im + b.im;
return new Complex(real, imag);
}
// return a new Complex object whose value is (this - b)
public Complex minus(Complex b) {
Complex a = this;
double real = a.re - b.re;
double imag = a.im - b.im;
return new Complex(real, imag);
}
// return a new Complex object whose value is (this * b)
public Complex times(Complex b) {
Complex a = this;
double real = a.re * b.re - a.im * b.im;
double imag = a.re * b.im + a.im * b.re;
return new Complex(real, imag);
}
// scalar multiplication
// return a new object whose value is (this * alpha)
public Complex times(double alpha) {
return new Complex(alpha * re, alpha * im);
}
// return a new Complex object whose value is the conjugate of this
public Complex conjugate() { return new Complex(re, -im); }
// return a new Complex object whose value is the reciprocal of this
public Complex reciprocal() {
double scale = re*re + im*im;
return new Complex(re / scale, -im / scale);
}
// return the real or imaginary part
public double re() { return re; }
public double im() { return im; }
// return a / b
public Complex divides(Complex b) {
Complex a = this;
return a.times(b.reciprocal());
}
// return a new Complex object whose value is the complex exponential of this
public Complex exp() {
return new Complex(Math.exp(re) * Math.cos(im), Math.exp(re) * Math.sin(im));
}
// return a new Complex object whose value is the complex sine of this
public Complex sin() {
return new Complex(Math.sin(re) * Math.cosh(im), Math.cos(re) * Math.sinh(im));
}
// return a new Complex object whose value is the complex cosine of this
public Complex cos() {
return new Complex(Math.cos(re) * Math.cosh(im), -Math.sin(re) * Math.sinh(im));
}
// return a new Complex object whose value is the complex tangent of this
public Complex tan() {
return sin().divides(cos());
}
// returns the magnitude of the imaginary number.
public double getMagnitude(){
return Math.sqrt(re*re+im*im);
}
public boolean equals(Complex other){
return (re==other.re) && (im==other.im);
}
}

View file

@ -0,0 +1,133 @@
package com.darkprograms.speech.util;
/*************************************************************************
* Compilation: javac FFT.java
* Execution: java FFT N
* Dependencies: Complex.java
*
* Compute the FFT and inverse FFT of a length N complex sequence.
* Bare bones implementation that runs in O(N log N) time. Our goal
* is to optimize the clarity of the code, rather than performance.
*
* Limitations
* -----------
* - assumes N is a power of 2
*
* - not the most memory efficient algorithm (because it uses
* an object type for representing complex numbers and because
* it re-allocates memory for the subarray, instead of doing
* in-place or reusing a single temporary array)
*
*************************************************************************/
/*************************************************************************
* @author Skylion implementation
* @author Princeton University for the actual algorithm.
************************************************************************/
public class FFT {
// compute the FFT of x[], assuming its length is a power of 2
public static Complex[] fft(Complex[] x) {
int N = x.length;
// base case
if (N == 1) return new Complex[] { x[0] };
// radix 2 Cooley-Tukey FFT
if (N % 2 != 0) { throw new RuntimeException("N is not a power of 2"); }
// fft of even terms
Complex[] even = new Complex[N/2];
for (int k = 0; k < N/2; k++) {
even[k] = x[2*k];
}
Complex[] q = fft(even);
// fft of odd terms
Complex[] odd = even; // reuse the array
for (int k = 0; k < N/2; k++) {
odd[k] = x[2*k + 1];
}
Complex[] r = fft(odd);
// combine
Complex[] y = new Complex[N];
for (int k = 0; k < N/2; k++) {
double kth = -2 * k * Math.PI / N;
Complex wk = new Complex(Math.cos(kth), Math.sin(kth));
y[k] = q[k].plus(wk.times(r[k]));
y[k + N/2] = q[k].minus(wk.times(r[k]));
}
return y;
}
// compute the inverse FFT of x[], assuming its length is a power of 2
public static Complex[] ifft(Complex[] x) {
int N = x.length;
Complex[] y = new Complex[N];
// take conjugate
for (int i = 0; i < N; i++) {
y[i] = x[i].conjugate();
}
// compute forward FFT
y = fft(y);
// take conjugate again
for (int i = 0; i < N; i++) {
y[i] = y[i].conjugate();
}
// divide by N
for (int i = 0; i < N; i++) {
y[i] = y[i].times(1.0 / N);
}
return y;
}
// compute the circular convolution of x and y
public static Complex[] cconvolve(Complex[] x, Complex[] y) {
// should probably pad x and y with 0s so that they have same length
// and are powers of 2
if (x.length != y.length) { throw new RuntimeException("Dimensions don't agree"); }
int N = x.length;
// compute FFT of each sequence
Complex[] a = fft(x);
Complex[] b = fft(y);
// point-wise multiply
Complex[] c = new Complex[N];
for (int i = 0; i < N; i++) {
c[i] = a[i].times(b[i]);
}
// compute inverse FFT
return ifft(c);
}
// compute the linear convolution of x and y
public static Complex[] convolve(Complex[] x, Complex[] y) {
Complex ZERO = new Complex(0, 0);
Complex[] a = new Complex[2*x.length];
for (int i = 0; i < x.length; i++) a[i] = x[i];
for (int i = x.length; i < 2*x.length; i++) a[i] = ZERO;
Complex[] b = new Complex[2*y.length];
for (int i = 0; i < y.length; i++) b[i] = y[i];
for (int i = y.length; i < 2*y.length; i++) b[i] = ZERO;
return cconvolve(a, b);
}
}

View file

@ -0,0 +1,69 @@
package com.darkprograms.speech.util;
/**
* A string utility class for commonly used methods.
* These methods are particularly useful for parsing.
* @author Skylion
*/
public class StringUtil {
private StringUtil() {};//Prevents instantiation
/**
* Removes quotation marks from beginning and end of string.
* @param s The string you want to remove the quotation marks from.
* @return The modified String.
*/
public static String stripQuotes(String s) {
int start = 0;
if( s.startsWith("\"") ) {
start = 1;
}
int end = s.length();
if( s.endsWith("\"") ) {
end = s.length() - 1;
}
return s.substring(start, end);
}
/**
* Returns the first instance of String found exclusively between part1 and part2.
* @param s The String you want to substring.
* @param part1 The beginning of the String you want to search for.
* @param part2 The end of the String you want to search for.
* @return The String between part1 and part2.
* If the s does not contain part1 or part2, the method returns null.
*/
public static String substringBetween(String s, String part1, String part2) {
String sub = null;
int i = s.indexOf(part1);
int j = s.indexOf(part2, i + part1.length());
if (i != -1 && j != -1) {
int nStart = i + part1.length();
sub = s.substring(nStart, j);
}
return sub;
}
/**
* Gets the string exclusively between the first instance of part1 and the last instance of part2.
* @param s The string you want to trim.
* @param part1 The term to trim after first instance.
* @param part2 The term to before last instance of.
* @return The trimmed String
*/
public static String trimString(String s, String part1, String part2){
if(!s.contains(part1) || !s.contains(part2)){
return null;
}
int first = s.indexOf(part1) + part1.length() + 1;
String tmp = s.substring(first);
int last = tmp.lastIndexOf(part2);
tmp = tmp.substring(0, last);
return tmp;
}
}