hal/lib/java-speech-api-master/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java

package com.darkprograms.speech.microphone;

import javax.sound.sampled.AudioFileFormat;
import com.darkprograms.speech.util.*;

/********************************************************************************************
 * Microphone Analyzer class, detects pitch and volume while extending the microphone class.
 * Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
 * Currently can be used for audio data analysis.
 * Dependencies: FFT.java & Complex.java. Both found in the utility package.
 * @author Aaron Gokaslan
 ********************************************************************************************/

public class MicrophoneAnalyzer extends Microphone {

	/**
	 * Constructor
	 * @param fileType The file type you want to save in. FLAC recommended.
	 */
	public MicrophoneAnalyzer(AudioFileFormat.Type fileType){
		super(fileType);
	}
	
    /**
     * Gets the volume of the microphone input
     * Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
     * @return The volume of the microphone input or -1 if data-line is not available
     */
    public int getAudioVolume(){
    	return getAudioVolume(100);
    }
    
    /**
     * Gets the volume of the microphone input
     * @param interval: The length of time you would like to calculate the volume over in milliseconds.
     * @return The volume of the microphone input or -1 if data-line is not available. 
     */    
    public int getAudioVolume(int interval){
    	return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
    }
    
    /**
     * Gets the volume of microphone input
     * @param numOfBytes The number of bytes you want for volume interpretation
     * @return The volume over the specified number of bytes or -1 if data-line is unavailable.
     */
    private int calculateAudioVolume(int numOfBytes){
    	byte[] data = getBytes(numOfBytes);
    	if(data==null)
    		return -1;
    	return calculateRMSLevel(data);
    }
    
    /**
     * Calculates the volume of AudioData which may be buffered data from a data-line.
     * @param audioData The byte[] you want to determine the volume of
     * @return the calculated volume of audioData
     */
	public static int calculateRMSLevel(byte[] audioData){
		long lSum = 0;
		for(int i=0; i<audioData.length; i++)
			lSum = lSum + audioData[i];

		double dAvg = lSum / audioData.length;

		double sumMeanSquare = 0d;
		for(int j=0; j<audioData.length; j++)
			sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);

		double averageMeanSquare = sumMeanSquare / audioData.length;
		return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);
	}
	
	/**
	 * Returns the number of bytes over interval for useful when figuring out how long to record.
	 * @param seconds The length in seconds
	 * @return the number of bytes the microphone will save.
	 */
	public int getNumOfBytes(int seconds){
		return getNumOfBytes((double)seconds);
	}
	
	/**
	 * Returns the number of bytes over interval for useful when figuring out how long to record.
	 * @param seconds The length in seconds
	 * @return the number of bytes the microphone will output over the specified time.
	 */
	public int getNumOfBytes(double seconds){
		return (int)(seconds*getAudioFormat().getSampleRate()*getAudioFormat().getFrameSize()+.5);
	}
	
	/**
	 * Returns the a byte[] containing the specified number of bytes
	 * @param numOfBytes The length of the returned array.
	 * @return The specified array or null if it cannot.
	 */
	private byte[] getBytes(int numOfBytes){
		if(getTargetDataLine()!=null){
    		byte[] data = new byte[numOfBytes];
    		this.getTargetDataLine().read(data, 0, numOfBytes);
    		return data;
		}
		return null;//If data cannot be read, returns a null array.
	}
	

	/**
	 * Calculates the fundamental frequency. In other words, it calculates pitch,
	 * except pitch is far more subjective and subtle. Also note, that readings may occasionally,
	 * be in error due to the complex nature of sound. This feature is in Beta
	 * @return The frequency of the sound in Hertz.
	 */
	public int getFrequency(){
		try {
			return getFrequency(4096);
		} catch (Exception e) {
			//This will never happen. Ever...
			return -666;
		}
	}

	/**
	 * Calculates the frequency based off of the number of bytes. 
	 * CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!
	 * @param numOfBytes The number of bytes which must be a multiple of 2!!!
	 * @return The calculated frequency in Hertz.
	 */
	public int getFrequency(int numOfBytes) throws Exception{
		if(getTargetDataLine() == null){
			return -1;
		}
		byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion
    	this.getTargetDataLine().read(data, 0, numOfBytes);
		return getFrequency(data);
	}
	
	/**
	 * Calculates the frequency based off of the byte array,
	 * @param bytes The audioData you want to analyze
	 * @return The calculated frequency in Hertz.
	 */
	public int getFrequency(byte[] bytes){
		double[] audioData = this.bytesToDoubleArray(bytes);
		audioData = applyHanningWindow(audioData);
		Complex[] complex = new Complex[audioData.length];
		for(int i = 0; i<complex.length; i++){
			complex[i] = new Complex(audioData[i], 0);
		}
		Complex[] fftTransformed = FFT.fft(complex);
		return this.calculateFundamentalFrequency(fftTransformed, 4);
	}
	
	/**
	 * Applies a Hanning Window to the data set.
	 * Hanning Windows are used to increase the accuracy of the FFT.
	 * One should always apply a window to a dataset before applying an FFT
	 * @param The data you want to apply the window to
	 * @return The windowed data set
	 */
	private double[] applyHanningWindow(double[] data){
		return applyHanningWindow(data, 0, data.length);
	}

	/**
	 * Applies a Hanning Window to the data set.
	 * Hanning Windows are used to increase the accuracy of the FFT.
	 * One should always apply a window to a dataset before applying an FFT
	 * @param The data you want to apply the window to
	 * @param The starting index you want to apply a window from
	 * @param The size of the window
	 * @return The windowed data set
	 */
	private double[] applyHanningWindow(double[] signal_in, int pos, int size){
		for (int i = pos; i < pos + size; i++){
			int j = i - pos; // j = index into Hann window function
			signal_in[i] = (double)(signal_in[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * j / size)));
		}
		return signal_in;
	}


	/**
	 * This method calculates the fundamental frequency using Harmonic Product Specturm
	 * It down samples the FFTData four times and multiplies the arrays
	 * together to determine the fundamental frequency. This is slightly more computationally
	 * expensive, but much more accurate. In simpler terms, the function will remove the harmonic frequencies
	 * which occur at every N value by finding the lowest common divisor among them.
	 * @param fftData The array returned by the FFT
	 * @param N the number of times you wish to downsample.
	 * WARNING: The more times you downsample, the lower the maximum detectable frequency is.
	 * @return The fundamental frequency in Hertz
	 */
	private int calculateFundamentalFrequency(Complex[] fftData, int N){
		if(N<=0 || fftData == null){ return -1; } //error case
		
		final int LENGTH = fftData.length;//Used to calculate bin size
		fftData = removeNegativeFrequencies(fftData);
		Complex[][] data = new Complex[N][fftData.length/N];
		for(int i = 0; i<N; i++){
			for(int j = 0; j<data[0].length; j++){
				data[i][j] = fftData[j*(i+1)];
			}
		}
		Complex[] result = new Complex[fftData.length/N];//Combines the arrays
		for(int i = 0; i<result.length; i++){
			Complex tmp = new Complex(1,0);
			for(int j = 0; j<N; j++){
				tmp = tmp.times(data[j][i]);
			}
			result[i] = tmp;
		}
		int index = this.findMaxMagnitude(result);
		return index*getFFTBinSize(LENGTH);
	}

	/**
	 * Removes useless data from transform since sound doesn't use complex numbers.
	 * @param The data you want to remove the complex transforms from
	 * @return The cleaned data
	 */
	private Complex[] removeNegativeFrequencies(Complex[] c){
		Complex[] out = new Complex[c.length/2];
		for(int i = 0; i<out.length; i++){
			out[i] = c[i];
		}
		return out;
	}
	
	/**
	 * Calculates the FFTbin size based off the length of the the array
	 * Each FFTBin size represents the range of frequencies treated as one.
	 * For example, if the bin size is 5 then the algorithm is precise to within 5hz.
	 * Precondition: length cannot be 0.
	 * @param fftDataLength The length of the array used to feed the FFT algorithm
	 * @return FFTBin size
	 */
	private int getFFTBinSize(int fftDataLength){
		return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);
	}

	/**
	 * Calculates index of the maximum magnitude in a complex array.
	 * @param The Complex[] you want to get max magnitude from.
	 * @return The index of the max magnitude
	 */
	private int findMaxMagnitude(Complex[] input){
		//Calculates Maximum Magnitude of the array
		double max = Double.MIN_VALUE;
		int index = -1;
		for(int i = 0; i<input.length; i++){
			Complex c = input[i];
			double tmp = c.getMagnitude();
			if(tmp>max){
				max = tmp;;
				index = i;
			}
		}
		return index;
	}
	
	/**
	 * Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
	 * NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
	 * @param bufferData The buffer read in from the target data line
	 * @return The double[] that the buffer has been converted into.
	 */
	private double[] bytesToDoubleArray(byte[] bufferData){
	    final int bytesRecorded = bufferData.length;
		final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8; 
	    final double amplification = 100.0; // choose a number as you like
	    double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
	    for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
	        double sample = 0;
	        for (int b = 0; b < bytesPerSample; b++) {
	            int v = bufferData[index + b];
	            if (b < bytesPerSample - 1 || bytesPerSample == 1) {
	                v &= 0xFF;
	            }
	            sample += v << (b * 8);
	        }
	        double sample32 = amplification * (sample / 32768.0);
	        micBufferData[floatIndex] = sample32;
	        
	    }
	    return micBufferData;
	}
	
}
Added voice control Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2 2015-05-13 21:14:10 +00:00			`package com.darkprograms.speech.microphone;`

			`import javax.sound.sampled.AudioFileFormat;`
			`import com.darkprograms.speech.util.*;`

			`/********************************************************************************************`
			`* Microphone Analyzer class, detects pitch and volume while extending the microphone class.`
			`* Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.`
			`* Currently can be used for audio data analysis.`
			`* Dependencies: FFT.java & Complex.java. Both found in the utility package.`
			`* @author Aaron Gokaslan`
			`********************************************************************************************/`

			`public class MicrophoneAnalyzer extends Microphone {`

			`/**`
			`* Constructor`
			`* @param fileType The file type you want to save in. FLAC recommended.`
			`*/`
			`public MicrophoneAnalyzer(AudioFileFormat.Type fileType){`
			`super(fileType);`
			`}`

			`/**`
			`* Gets the volume of the microphone input`
			`* Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.`
			`* @return The volume of the microphone input or -1 if data-line is not available`
			`*/`
			`public int getAudioVolume(){`
			`return getAudioVolume(100);`
			`}`

			`/**`
			`* Gets the volume of the microphone input`
			`* @param interval: The length of time you would like to calculate the volume over in milliseconds.`
			`* @return The volume of the microphone input or -1 if data-line is not available.`
			`*/`
			`public int getAudioVolume(int interval){`
			`return calculateAudioVolume(this.getNumOfBytes(interval/1000d));`
			`}`

			`/**`
			`* Gets the volume of microphone input`
			`* @param numOfBytes The number of bytes you want for volume interpretation`
			`* @return The volume over the specified number of bytes or -1 if data-line is unavailable.`
			`*/`
			`private int calculateAudioVolume(int numOfBytes){`
			`byte[] data = getBytes(numOfBytes);`
			`if(data==null)`
			`return -1;`
			`return calculateRMSLevel(data);`
			`}`

			`/**`
			`* Calculates the volume of AudioData which may be buffered data from a data-line.`
			`* @param audioData The byte[] you want to determine the volume of`
			`* @return the calculated volume of audioData`
			`*/`
			`public static int calculateRMSLevel(byte[] audioData){`
			`long lSum = 0;`
			`for(int i=0; i<audioData.length; i++)`
			`lSum = lSum + audioData[i];`

			`double dAvg = lSum / audioData.length;`

			`double sumMeanSquare = 0d;`
			`for(int j=0; j<audioData.length; j++)`
			`sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);`

			`double averageMeanSquare = sumMeanSquare / audioData.length;`
			`return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);`
			`}`

			`/**`
			`* Returns the number of bytes over interval for useful when figuring out how long to record.`
			`* @param seconds The length in seconds`
			`* @return the number of bytes the microphone will save.`
			`*/`
			`public int getNumOfBytes(int seconds){`
			`return getNumOfBytes((double)seconds);`
			`}`

			`/**`
			`* Returns the number of bytes over interval for useful when figuring out how long to record.`
			`* @param seconds The length in seconds`
			`* @return the number of bytes the microphone will output over the specified time.`
			`*/`
			`public int getNumOfBytes(double seconds){`
			`return (int)(secondsgetAudioFormat().getSampleRate()getAudioFormat().getFrameSize()+.5);`
			`}`

			`/**`
			`* Returns the a byte[] containing the specified number of bytes`
			`* @param numOfBytes The length of the returned array.`
			`* @return The specified array or null if it cannot.`
			`*/`
			`private byte[] getBytes(int numOfBytes){`
			`if(getTargetDataLine()!=null){`
			`byte[] data = new byte[numOfBytes];`
			`this.getTargetDataLine().read(data, 0, numOfBytes);`
			`return data;`
			`}`
			`return null;//If data cannot be read, returns a null array.`
			`}`


			`/**`
			`* Calculates the fundamental frequency. In other words, it calculates pitch,`
			`* except pitch is far more subjective and subtle. Also note, that readings may occasionally,`
			`* be in error due to the complex nature of sound. This feature is in Beta`
			`* @return The frequency of the sound in Hertz.`
			`*/`
			`public int getFrequency(){`
			`try {`
			`return getFrequency(4096);`
			`} catch (Exception e) {`
			`//This will never happen. Ever...`
			`return -666;`
			`}`
			`}`

			`/**`
			`* Calculates the frequency based off of the number of bytes.`
			`* CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!`
			`* @param numOfBytes The number of bytes which must be a multiple of 2!!!`
			`* @return The calculated frequency in Hertz.`
			`*/`
			`public int getFrequency(int numOfBytes) throws Exception{`
			`if(getTargetDataLine() == null){`
			`return -1;`
			`}`
			`byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion`
			`this.getTargetDataLine().read(data, 0, numOfBytes);`
			`return getFrequency(data);`
			`}`

			`/**`
			`* Calculates the frequency based off of the byte array,`
			`* @param bytes The audioData you want to analyze`
			`* @return The calculated frequency in Hertz.`
			`*/`
			`public int getFrequency(byte[] bytes){`
			`double[] audioData = this.bytesToDoubleArray(bytes);`
			`audioData = applyHanningWindow(audioData);`
			`Complex[] complex = new Complex[audioData.length];`
			`for(int i = 0; i<complex.length; i++){`
			`complex[i] = new Complex(audioData[i], 0);`
			`}`
			`Complex[] fftTransformed = FFT.fft(complex);`
			`return this.calculateFundamentalFrequency(fftTransformed, 4);`
			`}`

			`/**`
			`* Applies a Hanning Window to the data set.`
			`* Hanning Windows are used to increase the accuracy of the FFT.`
			`* One should always apply a window to a dataset before applying an FFT`
			`* @param The data you want to apply the window to`
			`* @return The windowed data set`
			`*/`
			`private double[] applyHanningWindow(double[] data){`
			`return applyHanningWindow(data, 0, data.length);`
			`}`

			`/**`
			`* Applies a Hanning Window to the data set.`
			`* Hanning Windows are used to increase the accuracy of the FFT.`
			`* One should always apply a window to a dataset before applying an FFT`
			`* @param The data you want to apply the window to`
			`* @param The starting index you want to apply a window from`
			`* @param The size of the window`
			`* @return The windowed data set`
			`*/`
			`private double[] applyHanningWindow(double[] signal_in, int pos, int size){`
			`for (int i = pos; i < pos + size; i++){`
			`int j = i - pos; // j = index into Hann window function`
			`signal_in[i] = (double)(signal_in[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * j / size)));`
			`}`
			`return signal_in;`
			`}`


			`/**`
			`* This method calculates the fundamental frequency using Harmonic Product Specturm`
			`* It down samples the FFTData four times and multiplies the arrays`
			`* together to determine the fundamental frequency. This is slightly more computationally`
			`* expensive, but much more accurate. In simpler terms, the function will remove the harmonic frequencies`
			`* which occur at every N value by finding the lowest common divisor among them.`
			`* @param fftData The array returned by the FFT`
			`* @param N the number of times you wish to downsample.`
			`* WARNING: The more times you downsample, the lower the maximum detectable frequency is.`
			`* @return The fundamental frequency in Hertz`
			`*/`
			`private int calculateFundamentalFrequency(Complex[] fftData, int N){`
			`if(N<=0 \|\| fftData == null){ return -1; } //error case`

			`final int LENGTH = fftData.length;//Used to calculate bin size`
			`fftData = removeNegativeFrequencies(fftData);`
			`Complex[][] data = new Complex[N][fftData.length/N];`
			`for(int i = 0; i<N; i++){`
			`for(int j = 0; j<data[0].length; j++){`
			`data[i][j] = fftData[j*(i+1)];`
			`}`
			`}`
			`Complex[] result = new Complex[fftData.length/N];//Combines the arrays`
			`for(int i = 0; i<result.length; i++){`
			`Complex tmp = new Complex(1,0);`
			`for(int j = 0; j<N; j++){`
			`tmp = tmp.times(data[j][i]);`
			`}`
			`result[i] = tmp;`
			`}`
			`int index = this.findMaxMagnitude(result);`
			`return index*getFFTBinSize(LENGTH);`
			`}`

			`/**`
			`* Removes useless data from transform since sound doesn't use complex numbers.`
			`* @param The data you want to remove the complex transforms from`
			`* @return The cleaned data`
			`*/`
			`private Complex[] removeNegativeFrequencies(Complex[] c){`
			`Complex[] out = new Complex[c.length/2];`
			`for(int i = 0; i<out.length; i++){`
			`out[i] = c[i];`
			`}`
			`return out;`
			`}`

			`/**`
			`* Calculates the FFTbin size based off the length of the the array`
			`* Each FFTBin size represents the range of frequencies treated as one.`
			`* For example, if the bin size is 5 then the algorithm is precise to within 5hz.`
			`* Precondition: length cannot be 0.`
			`* @param fftDataLength The length of the array used to feed the FFT algorithm`
			`* @return FFTBin size`
			`*/`
			`private int getFFTBinSize(int fftDataLength){`
			`return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);`
			`}`

			`/**`
			`* Calculates index of the maximum magnitude in a complex array.`
			`* @param The Complex[] you want to get max magnitude from.`
			`* @return The index of the max magnitude`
			`*/`
			`private int findMaxMagnitude(Complex[] input){`
			`//Calculates Maximum Magnitude of the array`
			`double max = Double.MIN_VALUE;`
			`int index = -1;`
			`for(int i = 0; i<input.length; i++){`
			`Complex c = input[i];`
			`double tmp = c.getMagnitude();`
			`if(tmp>max){`
			`max = tmp;;`
			`index = i;`
			`}`
			`}`
			`return index;`
			`}`

			`/**`
			`* Converts bytes from a TargetDataLine into a double[] allowing the information to be read.`
			`* NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!`
			`* @param bufferData The buffer read in from the target data line`
			`* @return The double[] that the buffer has been converted into.`
			`*/`
			`private double[] bytesToDoubleArray(byte[] bufferData){`
			`final int bytesRecorded = bufferData.length;`
			`final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;`
			`final double amplification = 100.0; // choose a number as you like`
			`double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];`
			`for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {`
			`double sample = 0;`
			`for (int b = 0; b < bytesPerSample; b++) {`
			`int v = bufferData[index + b];`
			`if (b < bytesPerSample - 1 \|\| bytesPerSample == 1) {`
			`v &= 0xFF;`
			`}`
			`sample += v << (b * 8);`
			`}`
			`double sample32 = amplification * (sample / 32768.0);`
			`micBufferData[floatIndex] = sample32;`

			`}`
			`return micBufferData;`
			`}`

			`}`