package com.darkprograms.speech.recognizer; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.nio.file.Files; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import javaFlacEncoder.FLACFileWriter; import javax.net.ssl.HttpsURLConnection; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.LineUnavailableException; import javax.sound.sampled.TargetDataLine; import com.darkprograms.speech.util.ChunkedOutputStream; import com.darkprograms.speech.util.StringUtil; /** * A class for using Google's Duplex Speech API. Allows for continuous recognition. Requires an API-Key. * A duplex API opens two connections. One to an upstream and one to a downstream. The system allows * for continuous chunking on both up and downstream. This, in turn, allows for Google to return data * as data is sent to it. For this reason, this class uses listeners. * @author Skylion (Aaron Gokaslan), Robert Rowntree. */ public class GSpeechDuplex{ //TODO Cleanup Printlns /** * Minimum value for ID */ private static final long MIN = 10000000; /** * Maximum value for ID */ private static final long MAX = 900000009999999L; /** * The base URL for the API */ private static final String GOOGLE_DUPLEX_SPEECH_BASE = "https://www.google.com/speech-api/full-duplex/v1/"; /** * Stores listeners */ private List responseListeners = new ArrayList(); /** * User defined API-KEY */ private final String API_KEY; /** * User-defined language */ private String language = "auto"; /** * The maximum size the API will tolerate */ private final static int MAX_SIZE = 1048576; /** * Per specification, the final chunk of in a ChunkedOutputStream */ private final static byte[] FINAL_CHUNK = new byte[] { '0', '\r', '\n', '\r', '\n' }; /** * Constructor * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting * one by following the process shown at this * url. */ public GSpeechDuplex(String API_KEY){ this.API_KEY = API_KEY; } /** * Temporary will be deprecated before release */ public String getLanguage(){ return language; } /** * Temporary will be deprecated before release */ public void setLanguage(String language){ this.language = language; } /** * Send a FLAC file with the specified sampleRate to the Duplex API * @param flacFile The file you wish to upload. * NOTE: Segment the file if duration is greater than 15 seconds. * @param sampleRate The sample rate of the file. * @throws IOException If something has gone wrong with reading the file */ public void recognize(File flacFile, int sampleRate) throws IOException{ recognize(mapFileIn(flacFile), sampleRate); } /** * Send a byte[] to the URL with a specified sampleRate. * NOTE: The byte[] should contain no more than 15 seconds of audio. * Chunking is not fully implemented as of yet. Will not string data together for context yet. * @param data The byte[] you want to send. * @param sampleRate The sample rate of aforementioned byte array. */ public void recognize(byte[] data, int sampleRate){ if(data.length >= MAX_SIZE){//Temporary Chunking. Does not allow for Google to gather context. System.out.println("Chunking the audio into smaller parts..."); byte[][] dataArray = chunkAudio(data); for(byte[]array: dataArray){ recognize(array, sampleRate); } } //Generates a unique ID for the response. final long PAIR = MIN + (long)(Math.random() * ((MAX - MIN) + 1L)); //Generates the Downstream URL final String API_DOWN_URL = GOOGLE_DUPLEX_SPEECH_BASE + "down?maxresults=1&pair=" + PAIR; //Generates the Upstream URL final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE + "up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR + "&key=" + API_KEY ; //Opens downChannel this.downChannel(API_DOWN_URL); //Opens upChannel this.upChannel(API_UP_URL, chunkAudio(data), sampleRate); } /** * This method allows you to stream a continuous stream of data to the API. *

Note: This feature is experimental.

* @param tl * @param af * @throws IOException * @throws LineUnavailableException */ public void recognize(TargetDataLine tl, AudioFormat af) throws IOException, LineUnavailableException{ //Generates a unique ID for the response. final long PAIR = MIN + (long)(Math.random() * ((MAX - MIN) + 1L)); //Generates the Downstream URL final String API_DOWN_URL = GOOGLE_DUPLEX_SPEECH_BASE + "down?maxresults=1&pair=" + PAIR; //Generates the Upstream URL final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE + "up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR + "&key=" + API_KEY + "&continuous"; //Tells Google to constantly monitor the stream; //TODO Add implementation that sends feedback in real time. Protocol buffers will be necessary. //Opens downChannel this.downChannel(API_DOWN_URL); //Opens upChannel this.upChannel(API_UP_URL, tl, af); } /** * This code opens a new Thread that connects to the downstream URL. Due to threading, * the best way to handle this is through the use of listeners. * @param The URL you want to connect to. */ private void downChannel(String urlStr) { final String url = urlStr; new Thread ("Downstream Thread") { public void run() { // handler for DOWN channel http response stream - httpsUrlConn // response handler should manage the connection.... ?? // assign a TIMEOUT Value that exceeds by a safe factor // the amount of time that it will take to write the bytes // to the UPChannel in a fashion that mimics a liveStream // of the audio at the applicable Bitrate. BR=sampleRate * bits per sample // Note that the TLS session uses "* SSLv3, TLS alert, Client hello (1): " // to wake up the listener when there are additional bytes. // The mechanics of the TLS session should be transparent. Just use // httpsUrlConn and allow it enough time to do its work. Scanner inStream = openHttpsConnection(url); if(inStream == null){ //ERROR HAS OCCURED } while(inStream.hasNextLine()){ String response = inStream.nextLine(); System.out.println("Response: "+response); if(response.length()>17){//Prevents blank responses from Firing GoogleResponse gr = new GoogleResponse(); parseResponse(response, gr); fireResponseEvent(gr); } } inStream.close(); System.out.println("Finished write on down stream..."); } }.start(); } /** * Used to initiate the URL chunking for the upChannel. * @param urlStr The URL string you want to upload 2 * @param data The data you want to send to the URL * @param sampleRate The specified sample rate of the data. */ private void upChannel(String urlStr, byte[][] data, int sampleRate) { final String murl = urlStr; final byte[][] mdata = data; final int mSampleRate = sampleRate; new Thread ("Upstream File Thread") { public void run() { openHttpsPostConnection(murl, mdata, mSampleRate); //Google does not return data via this URL } }.start(); } /** * Streams data from the TargetDataLine to the API. * @param urlStr The URL to stream to * @param tl The target data line to stream from. * @param af The AudioFormat to stream with. * @throws LineUnavailableException If cannot open or stream the TargetDataLine. */ private void upChannel(String urlStr, TargetDataLine tl, AudioFormat af) throws LineUnavailableException{ final String murl = urlStr; final TargetDataLine mtl = tl; final AudioFormat maf = af; if(!mtl.isOpen()){ mtl.open(maf); mtl.start(); } new Thread ("Upstream Thread") { public void run() { openHttpsPostConnection(murl, mtl, maf); } }.start(); } /** * Opens a HTTPS connection to the specified URL string * @param urlStr The URL you want to visit * @return The Scanner to access aforementioned data. */ private Scanner openHttpsConnection(String urlStr) { int resCode = -1; try { URL url = new URL(urlStr); URLConnection urlConn = url.openConnection(); if (!(urlConn instanceof HttpsURLConnection)) { throw new IOException ("URL is not an Https URL"); } HttpsURLConnection httpConn = (HttpsURLConnection)urlConn; httpConn.setAllowUserInteraction(false); // TIMEOUT is required httpConn.setInstanceFollowRedirects(true); httpConn.setRequestMethod("GET"); httpConn.connect(); resCode = httpConn.getResponseCode(); if (resCode == HttpsURLConnection.HTTP_OK) { return new Scanner(httpConn.getInputStream()); } else{ System.out.println("Error: " + resCode); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } /** * Opens a HTTPSPostConnection that posts data from a TargetDataLine input * @param murl The URL you want to post to. * @param mtl The TargetDataLine you want to post data from. Note should be open * @param maf The AudioFormat of the data you want to post */ private void openHttpsPostConnection(final String murl, final TargetDataLine mtl, final AudioFormat maf) { URL url; try { url = new URL(murl); URLConnection urlConn = url.openConnection(); if (!(urlConn instanceof HttpsURLConnection)) { throw new IOException ("URL is not an Https URL"); } HttpsURLConnection httpConn = (HttpsURLConnection)urlConn; httpConn.setAllowUserInteraction(false); httpConn.setInstanceFollowRedirects(true); httpConn.setRequestMethod("POST"); httpConn.setDoOutput(true); httpConn.setChunkedStreamingMode(0); httpConn.setRequestProperty("Transfer-Encoding", "chunked"); httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + (int)maf.getSampleRate()); // also worked with ("Content-Type", "audio/amr; rate=8000"); httpConn.connect(); // this opens a connection, then sends POST & headers. OutputStream out = httpConn.getOutputStream(); //Note : if the audio is more than 15 seconds // dont write it to UrlConnInputStream all in one block as this sample does. // Rather, segment the byteArray and on intermittently, sleeping thread // supply bytes to the urlConn Stream at a rate that approaches // the bitrate ( =30K per sec. in this instance ). System.out.println("Starting to write data to output..."); AudioInputStream ais = new AudioInputStream(mtl); ChunkedOutputStream os = new ChunkedOutputStream(out); AudioSystem.write(ais, FLACFileWriter.FLAC, os); out.write(FINAL_CHUNK); System.out.println("IO WRITE DONE"); out.close(); // do you need the trailer? // NOW you can look at the status. int resCode = httpConn.getResponseCode(); if (resCode / 100 != 2) { System.out.println("ERROR"); } }catch(Exception ex){ ex.printStackTrace(); } } /** * Opens a chunked HTTPS post connection and returns a Scanner with incoming data from Google Server * Used for to get UPStream * Chunked HTTPS ensures unlimited file size. * @param urlStr The String for the URL * @param data The data you want to send the server * @param sampleRate The sample rate of the flac file. * @return A Scanner to access the server response. (Probably will never be used) */ private Scanner openHttpsPostConnection(String urlStr, byte[][] data, int sampleRate){ byte[][] mextrad = data; int resCode = -1; OutputStream out = null; // int http_status; try { URL url = new URL(urlStr); URLConnection urlConn = url.openConnection(); if (!(urlConn instanceof HttpsURLConnection)) { throw new IOException ("URL is not an Https URL"); } HttpsURLConnection httpConn = (HttpsURLConnection)urlConn; httpConn.setAllowUserInteraction(false); httpConn.setInstanceFollowRedirects(true); httpConn.setRequestMethod("POST"); httpConn.setDoOutput(true); httpConn.setChunkedStreamingMode(0); httpConn.setRequestProperty("Transfer-Encoding", "chunked"); httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate); // also worked with ("Content-Type", "audio/amr; rate=8000"); httpConn.connect(); try { // this opens a connection, then sends POST & headers. out = httpConn.getOutputStream(); //Note : if the audio is more than 15 seconds // dont write it to UrlConnInputStream all in one block as this sample does. // Rather, segment the byteArray and on intermittently, sleeping thread // supply bytes to the urlConn Stream at a rate that approaches // the bitrate ( =30K per sec. in this instance ). System.out.println("Starting to write"); for(byte[] dataArray: mextrad){ out.write(dataArray); // one big block supplied instantly to the underlying chunker wont work for duration > 15 s. try { Thread.sleep(1000);//Delays the Audio so Google thinks its a mic. } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } out.write(FINAL_CHUNK); System.out.println("IO WRITE DONE"); // do you need the trailer? // NOW you can look at the status. resCode = httpConn.getResponseCode(); if (resCode / 100 != 2) { System.out.println("ERROR"); } } catch (IOException e) { } if (resCode == HttpsURLConnection.HTTP_OK) { return new Scanner(httpConn.getInputStream()); } else{ System.out.println("HELP: " + resCode); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } /** * Converts the file into a byte[]. Also Android compatible. :) * @param The File you want to get the byte[] from. * @return The byte[] * @throws IOException if something goes wrong in reading the file. */ private byte[] mapFileIn(File infile) throws IOException{ return Files.readAllBytes(infile.toPath()); } /** * Parses the String into a GoogleResponse object * @param rawResponse The String you want to parse * @param gr the GoogleResponse object to save the data into. */ private void parseResponse(String rawResponse, GoogleResponse gr){ if(rawResponse == null || !rawResponse.contains("\"result\"") || rawResponse.equals("{\"result\":[]}")){ return; } if(rawResponse.contains("\"confidence\":")){ String confidence = StringUtil.substringBetween(rawResponse, "\"confidence\":", "}"); gr.setConfidence(confidence); } else{ gr.setConfidence(String.valueOf(1d)); } String array = StringUtil.trimString(rawResponse, "[", "]"); if(array.contains("[")){ array = StringUtil.trimString(array, "[", "]"); } if(array.contains("\"confidence\":")){//Removes confidence phrase if it exists. array = array.substring(0, array.lastIndexOf(',')); } String[] parts = array.split(","); gr.setResponse(parseTranscript(parts[0])); for(int i = 1; i= MAX_SIZE){//If larger than 1MB int frame = MAX_SIZE/2; int numOfChunks = (int)(data.length/((double)frame)) + 1; byte[][] data2D = new byte[numOfChunks][]; for(int i = 0, j = 0; i