Added voice control

Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2
2015-05-13 21:14:10 +00:00 · 2015-05-13 21:14:10 +00:00 · 53da641909
commit 53da641909
parent 35c92407a3
863 changed files with 192681 additions and 0 deletions
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/Microphone.java
+++ b/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/Microphone.java
@ -0,0 +1,224 @@
+package com.darkprograms.speech.microphone;
+
+import javax.sound.sampled.*;
+
+import java.io.Closeable;
+import java.io.File;
+
+/***************************************************************************
+ * Microphone class that contains methods to capture audio from microphone
+ *
+ * @author Luke Kuza, Aaron Gokaslan
+ ***************************************************************************/
+public class Microphone implements Closeable{
+	
+    /**
+     * TargetDataLine variable to receive data from microphone
+     */
+    private TargetDataLine targetDataLine;
+
+    /**
+     * Enum for current Microphone state
+     */
+    public enum CaptureState {
+        PROCESSING_AUDIO, STARTING_CAPTURE, CLOSED
+    }
+
+    /**
+     * Variable for enum
+     */
+    CaptureState state;
+
+    /**
+     * Variable for the audios saved file type
+     */
+    private AudioFileFormat.Type fileType;
+
+    /**
+     * Variable that holds the saved audio file
+     */
+    private File audioFile;
+
+    /**
+     * Gets the current state of Microphone
+     *
+     * @return PROCESSING_AUDIO is returned when the Thread is recording Audio and/or saving it to a file<br>
+     *         STARTING_CAPTURE is returned if the Thread is setting variables<br>
+     *         CLOSED is returned if the Thread is not doing anything/not capturing audio
+     */
+    public CaptureState getState() {
+        return state;
+    }
+
+    /**
+     * Sets the current state of Microphone
+     *
+     * @param state State from enum
+     */
+    private void setState(CaptureState state) {
+        this.state = state;
+    }
+
+    public File getAudioFile() {
+        return audioFile;
+    }
+
+    public void setAudioFile(File audioFile) {
+        this.audioFile = audioFile;
+    }
+
+    public AudioFileFormat.Type getFileType() {
+        return fileType;
+    }
+
+    public void setFileType(AudioFileFormat.Type fileType) {
+        this.fileType = fileType;
+    }
+
+    public TargetDataLine getTargetDataLine() {
+        return targetDataLine;
+    }
+
+    public void setTargetDataLine(TargetDataLine targetDataLine) {
+        this.targetDataLine = targetDataLine;
+    }
+    
+    
+    /**
+     * Constructor
+     *
+     * @param fileType File type to save the audio in<br>
+     *                 Example, to save as WAVE use AudioFileFormat.Type.WAVE
+     */
+    public Microphone(AudioFileFormat.Type fileType) {
+        setState(CaptureState.CLOSED);
+        setFileType(fileType);
+        initTargetDataLine();
+    }
+
+    /**
+     * Initializes the target data line.
+     */
+    private void initTargetDataLine(){
+        DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, getAudioFormat());
+        try {
+			setTargetDataLine((TargetDataLine) AudioSystem.getLine(dataLineInfo));
+		} catch (LineUnavailableException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return;
+		}
+
+    }
+
+
+    /**
+     * Captures audio from the microphone and saves it a file
+     *
+     * @param audioFile The File to save the audio to
+     * @throws LineUnavailableException 
+     * @throws Exception Throws an exception if something went wrong
+     */
+    public void captureAudioToFile(File audioFile) throws LineUnavailableException {
+        setState(CaptureState.STARTING_CAPTURE);
+        setAudioFile(audioFile);
+
+        if(getTargetDataLine() == null){
+        	initTargetDataLine();
+        }
+
+        //Get Audio
+        new Thread(new CaptureThread()).start();
+
+
+    }
+
+    /**
+     * Captures audio from the microphone and saves it a file
+     *
+     * @param audioFile The fully path (String) to a file you want to save the audio in
+     * @throws LineUnavailableException 
+     * @throws Exception Throws an exception if something went wrong
+     */
+    public void captureAudioToFile(String audioFile) throws LineUnavailableException {
+        File file = new File(audioFile);
+        captureAudioToFile(file);
+    }
+
+	
+    /**
+     * The audio format to save in
+     *
+     * @return Returns AudioFormat to be used later when capturing audio from microphone
+     */
+    public AudioFormat getAudioFormat() {
+        float sampleRate = 8000.0F;
+        //8000,11025,16000,22050,44100
+        int sampleSizeInBits = 16;
+        //8,16
+        int channels = 1;
+        //1,2
+        boolean signed = true;
+        //true,false
+        boolean bigEndian = false;
+        //true,false
+        return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
+    }
+
+    /**
+     * Opens the microphone, starting the targetDataLine.
+     * If it's already open, it does nothing.
+     */
+    public void open(){
+        if(getTargetDataLine()==null){
+        	initTargetDataLine();
+        }
+        if(!getTargetDataLine().isOpen() && !getTargetDataLine().isRunning() && !getTargetDataLine().isActive()){
+           	try {
+                setState(CaptureState.PROCESSING_AUDIO);
+        		getTargetDataLine().open(getAudioFormat());
+            	getTargetDataLine().start();
+			} catch (LineUnavailableException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				return;
+			}
+        }
+
+    }
+
+    /**
+     * Close the microphone capture, saving all processed audio to the specified file.<br>
+     * If already closed, this does nothing
+     */
+    public void close() {
+        if (getState() == CaptureState.CLOSED) {
+        } else {
+            getTargetDataLine().stop();
+            getTargetDataLine().close();
+            setState(CaptureState.CLOSED);
+        }
+    }
+
+    /**
+     * Thread to capture the audio from the microphone and save it to a file
+     */
+    private class CaptureThread implements Runnable {
+
+        /**
+         * Run method for thread
+         */
+        public void run() {
+            try {
+                AudioFileFormat.Type fileType = getFileType();
+                File audioFile = getAudioFile();
+                open();
+                AudioSystem.write(new AudioInputStream(getTargetDataLine()), fileType, audioFile);
+                //Will write to File until it's closed.
+            } catch (Exception ex) {
+                ex.printStackTrace();
+            }
+        }
+    }
+
+}
--- a/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java
+++ b/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java
@ -0,0 +1,288 @@
+package com.darkprograms.speech.microphone;
+
+import javax.sound.sampled.AudioFileFormat;
+import com.darkprograms.speech.util.*;
+
+/********************************************************************************************
+ * Microphone Analyzer class, detects pitch and volume while extending the microphone class.
+ * Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
+ * Currently can be used for audio data analysis.
+ * Dependencies: FFT.java & Complex.java. Both found in the utility package.
+ * @author Aaron Gokaslan
+ ********************************************************************************************/
+
+public class MicrophoneAnalyzer extends Microphone {
+
+	/**
+	 * Constructor
+	 * @param fileType The file type you want to save in. FLAC recommended.
+	 */
+	public MicrophoneAnalyzer(AudioFileFormat.Type fileType){
+		super(fileType);
+	}
+	
+    /**
+     * Gets the volume of the microphone input
+     * Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
+     * @return The volume of the microphone input or -1 if data-line is not available
+     */
+    public int getAudioVolume(){
+    	return getAudioVolume(100);
+    }
+    
+    /**
+     * Gets the volume of the microphone input
+     * @param interval: The length of time you would like to calculate the volume over in milliseconds.
+     * @return The volume of the microphone input or -1 if data-line is not available. 
+     */    
+    public int getAudioVolume(int interval){
+    	return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
+    }
+    
+    /**
+     * Gets the volume of microphone input
+     * @param numOfBytes The number of bytes you want for volume interpretation
+     * @return The volume over the specified number of bytes or -1 if data-line is unavailable.
+     */
+    private int calculateAudioVolume(int numOfBytes){
+    	byte[] data = getBytes(numOfBytes);
+    	if(data==null)
+    		return -1;
+    	return calculateRMSLevel(data);
+    }
+    
+    /**
+     * Calculates the volume of AudioData which may be buffered data from a data-line.
+     * @param audioData The byte[] you want to determine the volume of
+     * @return the calculated volume of audioData
+     */
+	public static int calculateRMSLevel(byte[] audioData){
+		long lSum = 0;
+		for(int i=0; i<audioData.length; i++)
+			lSum = lSum + audioData[i];
+
+		double dAvg = lSum / audioData.length;
+
+		double sumMeanSquare = 0d;
+		for(int j=0; j<audioData.length; j++)
+			sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);
+
+		double averageMeanSquare = sumMeanSquare / audioData.length;
+		return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);
+	}
+	
+	/**
+	 * Returns the number of bytes over interval for useful when figuring out how long to record.
+	 * @param seconds The length in seconds
+	 * @return the number of bytes the microphone will save.
+	 */
+	public int getNumOfBytes(int seconds){
+		return getNumOfBytes((double)seconds);
+	}
+	
+	/**
+	 * Returns the number of bytes over interval for useful when figuring out how long to record.
+	 * @param seconds The length in seconds
+	 * @return the number of bytes the microphone will output over the specified time.
+	 */
+	public int getNumOfBytes(double seconds){
+		return (int)(seconds*getAudioFormat().getSampleRate()*getAudioFormat().getFrameSize()+.5);
+	}
+	
+	/**
+	 * Returns the a byte[] containing the specified number of bytes
+	 * @param numOfBytes The length of the returned array.
+	 * @return The specified array or null if it cannot.
+	 */
+	private byte[] getBytes(int numOfBytes){
+		if(getTargetDataLine()!=null){
+    		byte[] data = new byte[numOfBytes];
+    		this.getTargetDataLine().read(data, 0, numOfBytes);
+    		return data;
+		}
+		return null;//If data cannot be read, returns a null array.
+	}
+	
+
+	/**
+	 * Calculates the fundamental frequency. In other words, it calculates pitch,
+	 * except pitch is far more subjective and subtle. Also note, that readings may occasionally,
+	 * be in error due to the complex nature of sound. This feature is in Beta
+	 * @return The frequency of the sound in Hertz.
+	 */
+	public int getFrequency(){
+		try {
+			return getFrequency(4096);
+		} catch (Exception e) {
+			//This will never happen. Ever...
+			return -666;
+		}
+	}
+
+	/**
+	 * Calculates the frequency based off of the number of bytes. 
+	 * CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!
+	 * @param numOfBytes The number of bytes which must be a multiple of 2!!!
+	 * @return The calculated frequency in Hertz.
+	 */
+	public int getFrequency(int numOfBytes) throws Exception{
+		if(getTargetDataLine() == null){
+			return -1;
+		}
+		byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion
+    	this.getTargetDataLine().read(data, 0, numOfBytes);
+		return getFrequency(data);
+	}
+	
+	/**
+	 * Calculates the frequency based off of the byte array,
+	 * @param bytes The audioData you want to analyze
+	 * @return The calculated frequency in Hertz.
+	 */
+	public int getFrequency(byte[] bytes){
+		double[] audioData = this.bytesToDoubleArray(bytes);
+		audioData = applyHanningWindow(audioData);
+		Complex[] complex = new Complex[audioData.length];
+		for(int i = 0; i<complex.length; i++){
+			complex[i] = new Complex(audioData[i], 0);
+		}
+		Complex[] fftTransformed = FFT.fft(complex);
+		return this.calculateFundamentalFrequency(fftTransformed, 4);
+	}
+	
+	/**
+	 * Applies a Hanning Window to the data set.
+	 * Hanning Windows are used to increase the accuracy of the FFT.
+	 * One should always apply a window to a dataset before applying an FFT
+	 * @param The data you want to apply the window to
+	 * @return The windowed data set
+	 */
+	private double[] applyHanningWindow(double[] data){
+		return applyHanningWindow(data, 0, data.length);
+	}
+
+	/**
+	 * Applies a Hanning Window to the data set.
+	 * Hanning Windows are used to increase the accuracy of the FFT.
+	 * One should always apply a window to a dataset before applying an FFT
+	 * @param The data you want to apply the window to
+	 * @param The starting index you want to apply a window from
+	 * @param The size of the window
+	 * @return The windowed data set
+	 */
+	private double[] applyHanningWindow(double[] signal_in, int pos, int size){
+		for (int i = pos; i < pos + size; i++){
+			int j = i - pos; // j = index into Hann window function
+			signal_in[i] = (double)(signal_in[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * j / size)));
+		}
+		return signal_in;
+	}
+
+
+	/**
+	 * This method calculates the fundamental frequency using Harmonic Product Specturm
+	 * It down samples the FFTData four times and multiplies the arrays
+	 * together to determine the fundamental frequency. This is slightly more computationally
+	 * expensive, but much more accurate. In simpler terms, the function will remove the harmonic frequencies
+	 * which occur at every N value by finding the lowest common divisor among them.
+	 * @param fftData The array returned by the FFT
+	 * @param N the number of times you wish to downsample.
+	 * WARNING: The more times you downsample, the lower the maximum detectable frequency is.
+	 * @return The fundamental frequency in Hertz
+	 */
+	private int calculateFundamentalFrequency(Complex[] fftData, int N){
+		if(N<=0 || fftData == null){ return -1; } //error case
+		
+		final int LENGTH = fftData.length;//Used to calculate bin size
+		fftData = removeNegativeFrequencies(fftData);
+		Complex[][] data = new Complex[N][fftData.length/N];
+		for(int i = 0; i<N; i++){
+			for(int j = 0; j<data[0].length; j++){
+				data[i][j] = fftData[j*(i+1)];
+			}
+		}
+		Complex[] result = new Complex[fftData.length/N];//Combines the arrays
+		for(int i = 0; i<result.length; i++){
+			Complex tmp = new Complex(1,0);
+			for(int j = 0; j<N; j++){
+				tmp = tmp.times(data[j][i]);
+			}
+			result[i] = tmp;
+		}
+		int index = this.findMaxMagnitude(result);
+		return index*getFFTBinSize(LENGTH);
+	}
+
+	/**
+	 * Removes useless data from transform since sound doesn't use complex numbers.
+	 * @param The data you want to remove the complex transforms from
+	 * @return The cleaned data
+	 */
+	private Complex[] removeNegativeFrequencies(Complex[] c){
+		Complex[] out = new Complex[c.length/2];
+		for(int i = 0; i<out.length; i++){
+			out[i] = c[i];
+		}
+		return out;
+	}
+	
+	/**
+	 * Calculates the FFTbin size based off the length of the the array
+	 * Each FFTBin size represents the range of frequencies treated as one.
+	 * For example, if the bin size is 5 then the algorithm is precise to within 5hz.
+	 * Precondition: length cannot be 0.
+	 * @param fftDataLength The length of the array used to feed the FFT algorithm
+	 * @return FFTBin size
+	 */
+	private int getFFTBinSize(int fftDataLength){
+		return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);
+	}
+
+	/**
+	 * Calculates index of the maximum magnitude in a complex array.
+	 * @param The Complex[] you want to get max magnitude from.
+	 * @return The index of the max magnitude
+	 */
+	private int findMaxMagnitude(Complex[] input){
+		//Calculates Maximum Magnitude of the array
+		double max = Double.MIN_VALUE;
+		int index = -1;
+		for(int i = 0; i<input.length; i++){
+			Complex c = input[i];
+			double tmp = c.getMagnitude();
+			if(tmp>max){
+				max = tmp;;
+				index = i;
+			}
+		}
+		return index;
+	}
+	
+	/**
+	 * Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
+	 * NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
+	 * @param bufferData The buffer read in from the target data line
+	 * @return The double[] that the buffer has been converted into.
+	 */
+	private double[] bytesToDoubleArray(byte[] bufferData){
+	    final int bytesRecorded = bufferData.length;
+		final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8; 
+	    final double amplification = 100.0; // choose a number as you like
+	    double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
+	    for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
+	        double sample = 0;
+	        for (int b = 0; b < bytesPerSample; b++) {
+	            int v = bufferData[index + b];
+	            if (b < bytesPerSample - 1 || bytesPerSample == 1) {
+	                v &= 0xFF;
+	            }
+	            sample += v << (b * 8);
+	        }
+	        double sample32 = amplification * (sample / 32768.0);
+	        micBufferData[floatIndex] = sample32;
+	        
+	    }
+	    return micBufferData;
+	}
+	
+}