Added voice control
Former-commit-id: 6f69079bf44f0d8f9ae40de6b0f1638d103464c2
This commit is contained in:
parent
35c92407a3
commit
53da641909
863 changed files with 192681 additions and 0 deletions
|
|
@ -0,0 +1,224 @@
|
|||
package com.darkprograms.speech.microphone;
|
||||
|
||||
import javax.sound.sampled.*;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
|
||||
/***************************************************************************
|
||||
* Microphone class that contains methods to capture audio from microphone
|
||||
*
|
||||
* @author Luke Kuza, Aaron Gokaslan
|
||||
***************************************************************************/
|
||||
public class Microphone implements Closeable{
|
||||
|
||||
/**
|
||||
* TargetDataLine variable to receive data from microphone
|
||||
*/
|
||||
private TargetDataLine targetDataLine;
|
||||
|
||||
/**
|
||||
* Enum for current Microphone state
|
||||
*/
|
||||
public enum CaptureState {
|
||||
PROCESSING_AUDIO, STARTING_CAPTURE, CLOSED
|
||||
}
|
||||
|
||||
/**
|
||||
* Variable for enum
|
||||
*/
|
||||
CaptureState state;
|
||||
|
||||
/**
|
||||
* Variable for the audios saved file type
|
||||
*/
|
||||
private AudioFileFormat.Type fileType;
|
||||
|
||||
/**
|
||||
* Variable that holds the saved audio file
|
||||
*/
|
||||
private File audioFile;
|
||||
|
||||
/**
|
||||
* Gets the current state of Microphone
|
||||
*
|
||||
* @return PROCESSING_AUDIO is returned when the Thread is recording Audio and/or saving it to a file<br>
|
||||
* STARTING_CAPTURE is returned if the Thread is setting variables<br>
|
||||
* CLOSED is returned if the Thread is not doing anything/not capturing audio
|
||||
*/
|
||||
public CaptureState getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the current state of Microphone
|
||||
*
|
||||
* @param state State from enum
|
||||
*/
|
||||
private void setState(CaptureState state) {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public File getAudioFile() {
|
||||
return audioFile;
|
||||
}
|
||||
|
||||
public void setAudioFile(File audioFile) {
|
||||
this.audioFile = audioFile;
|
||||
}
|
||||
|
||||
public AudioFileFormat.Type getFileType() {
|
||||
return fileType;
|
||||
}
|
||||
|
||||
public void setFileType(AudioFileFormat.Type fileType) {
|
||||
this.fileType = fileType;
|
||||
}
|
||||
|
||||
public TargetDataLine getTargetDataLine() {
|
||||
return targetDataLine;
|
||||
}
|
||||
|
||||
public void setTargetDataLine(TargetDataLine targetDataLine) {
|
||||
this.targetDataLine = targetDataLine;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param fileType File type to save the audio in<br>
|
||||
* Example, to save as WAVE use AudioFileFormat.Type.WAVE
|
||||
*/
|
||||
public Microphone(AudioFileFormat.Type fileType) {
|
||||
setState(CaptureState.CLOSED);
|
||||
setFileType(fileType);
|
||||
initTargetDataLine();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the target data line.
|
||||
*/
|
||||
private void initTargetDataLine(){
|
||||
DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, getAudioFormat());
|
||||
try {
|
||||
setTargetDataLine((TargetDataLine) AudioSystem.getLine(dataLineInfo));
|
||||
} catch (LineUnavailableException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Captures audio from the microphone and saves it a file
|
||||
*
|
||||
* @param audioFile The File to save the audio to
|
||||
* @throws LineUnavailableException
|
||||
* @throws Exception Throws an exception if something went wrong
|
||||
*/
|
||||
public void captureAudioToFile(File audioFile) throws LineUnavailableException {
|
||||
setState(CaptureState.STARTING_CAPTURE);
|
||||
setAudioFile(audioFile);
|
||||
|
||||
if(getTargetDataLine() == null){
|
||||
initTargetDataLine();
|
||||
}
|
||||
|
||||
//Get Audio
|
||||
new Thread(new CaptureThread()).start();
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Captures audio from the microphone and saves it a file
|
||||
*
|
||||
* @param audioFile The fully path (String) to a file you want to save the audio in
|
||||
* @throws LineUnavailableException
|
||||
* @throws Exception Throws an exception if something went wrong
|
||||
*/
|
||||
public void captureAudioToFile(String audioFile) throws LineUnavailableException {
|
||||
File file = new File(audioFile);
|
||||
captureAudioToFile(file);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The audio format to save in
|
||||
*
|
||||
* @return Returns AudioFormat to be used later when capturing audio from microphone
|
||||
*/
|
||||
public AudioFormat getAudioFormat() {
|
||||
float sampleRate = 8000.0F;
|
||||
//8000,11025,16000,22050,44100
|
||||
int sampleSizeInBits = 16;
|
||||
//8,16
|
||||
int channels = 1;
|
||||
//1,2
|
||||
boolean signed = true;
|
||||
//true,false
|
||||
boolean bigEndian = false;
|
||||
//true,false
|
||||
return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the microphone, starting the targetDataLine.
|
||||
* If it's already open, it does nothing.
|
||||
*/
|
||||
public void open(){
|
||||
if(getTargetDataLine()==null){
|
||||
initTargetDataLine();
|
||||
}
|
||||
if(!getTargetDataLine().isOpen() && !getTargetDataLine().isRunning() && !getTargetDataLine().isActive()){
|
||||
try {
|
||||
setState(CaptureState.PROCESSING_AUDIO);
|
||||
getTargetDataLine().open(getAudioFormat());
|
||||
getTargetDataLine().start();
|
||||
} catch (LineUnavailableException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the microphone capture, saving all processed audio to the specified file.<br>
|
||||
* If already closed, this does nothing
|
||||
*/
|
||||
public void close() {
|
||||
if (getState() == CaptureState.CLOSED) {
|
||||
} else {
|
||||
getTargetDataLine().stop();
|
||||
getTargetDataLine().close();
|
||||
setState(CaptureState.CLOSED);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Thread to capture the audio from the microphone and save it to a file
|
||||
*/
|
||||
private class CaptureThread implements Runnable {
|
||||
|
||||
/**
|
||||
* Run method for thread
|
||||
*/
|
||||
public void run() {
|
||||
try {
|
||||
AudioFileFormat.Type fileType = getFileType();
|
||||
File audioFile = getAudioFile();
|
||||
open();
|
||||
AudioSystem.write(new AudioInputStream(getTargetDataLine()), fileType, audioFile);
|
||||
//Will write to File until it's closed.
|
||||
} catch (Exception ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,288 @@
|
|||
package com.darkprograms.speech.microphone;
|
||||
|
||||
import javax.sound.sampled.AudioFileFormat;
|
||||
import com.darkprograms.speech.util.*;
|
||||
|
||||
/********************************************************************************************
|
||||
* Microphone Analyzer class, detects pitch and volume while extending the microphone class.
|
||||
* Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
|
||||
* Currently can be used for audio data analysis.
|
||||
* Dependencies: FFT.java & Complex.java. Both found in the utility package.
|
||||
* @author Aaron Gokaslan
|
||||
********************************************************************************************/
|
||||
|
||||
public class MicrophoneAnalyzer extends Microphone {
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param fileType The file type you want to save in. FLAC recommended.
|
||||
*/
|
||||
public MicrophoneAnalyzer(AudioFileFormat.Type fileType){
|
||||
super(fileType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the volume of the microphone input
|
||||
* Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
|
||||
* @return The volume of the microphone input or -1 if data-line is not available
|
||||
*/
|
||||
public int getAudioVolume(){
|
||||
return getAudioVolume(100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the volume of the microphone input
|
||||
* @param interval: The length of time you would like to calculate the volume over in milliseconds.
|
||||
* @return The volume of the microphone input or -1 if data-line is not available.
|
||||
*/
|
||||
public int getAudioVolume(int interval){
|
||||
return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the volume of microphone input
|
||||
* @param numOfBytes The number of bytes you want for volume interpretation
|
||||
* @return The volume over the specified number of bytes or -1 if data-line is unavailable.
|
||||
*/
|
||||
private int calculateAudioVolume(int numOfBytes){
|
||||
byte[] data = getBytes(numOfBytes);
|
||||
if(data==null)
|
||||
return -1;
|
||||
return calculateRMSLevel(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the volume of AudioData which may be buffered data from a data-line.
|
||||
* @param audioData The byte[] you want to determine the volume of
|
||||
* @return the calculated volume of audioData
|
||||
*/
|
||||
public static int calculateRMSLevel(byte[] audioData){
|
||||
long lSum = 0;
|
||||
for(int i=0; i<audioData.length; i++)
|
||||
lSum = lSum + audioData[i];
|
||||
|
||||
double dAvg = lSum / audioData.length;
|
||||
|
||||
double sumMeanSquare = 0d;
|
||||
for(int j=0; j<audioData.length; j++)
|
||||
sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);
|
||||
|
||||
double averageMeanSquare = sumMeanSquare / audioData.length;
|
||||
return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes over interval for useful when figuring out how long to record.
|
||||
* @param seconds The length in seconds
|
||||
* @return the number of bytes the microphone will save.
|
||||
*/
|
||||
public int getNumOfBytes(int seconds){
|
||||
return getNumOfBytes((double)seconds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes over interval for useful when figuring out how long to record.
|
||||
* @param seconds The length in seconds
|
||||
* @return the number of bytes the microphone will output over the specified time.
|
||||
*/
|
||||
public int getNumOfBytes(double seconds){
|
||||
return (int)(seconds*getAudioFormat().getSampleRate()*getAudioFormat().getFrameSize()+.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the a byte[] containing the specified number of bytes
|
||||
* @param numOfBytes The length of the returned array.
|
||||
* @return The specified array or null if it cannot.
|
||||
*/
|
||||
private byte[] getBytes(int numOfBytes){
|
||||
if(getTargetDataLine()!=null){
|
||||
byte[] data = new byte[numOfBytes];
|
||||
this.getTargetDataLine().read(data, 0, numOfBytes);
|
||||
return data;
|
||||
}
|
||||
return null;//If data cannot be read, returns a null array.
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the fundamental frequency. In other words, it calculates pitch,
|
||||
* except pitch is far more subjective and subtle. Also note, that readings may occasionally,
|
||||
* be in error due to the complex nature of sound. This feature is in Beta
|
||||
* @return The frequency of the sound in Hertz.
|
||||
*/
|
||||
public int getFrequency(){
|
||||
try {
|
||||
return getFrequency(4096);
|
||||
} catch (Exception e) {
|
||||
//This will never happen. Ever...
|
||||
return -666;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the frequency based off of the number of bytes.
|
||||
* CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!
|
||||
* @param numOfBytes The number of bytes which must be a multiple of 2!!!
|
||||
* @return The calculated frequency in Hertz.
|
||||
*/
|
||||
public int getFrequency(int numOfBytes) throws Exception{
|
||||
if(getTargetDataLine() == null){
|
||||
return -1;
|
||||
}
|
||||
byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion
|
||||
this.getTargetDataLine().read(data, 0, numOfBytes);
|
||||
return getFrequency(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the frequency based off of the byte array,
|
||||
* @param bytes The audioData you want to analyze
|
||||
* @return The calculated frequency in Hertz.
|
||||
*/
|
||||
public int getFrequency(byte[] bytes){
|
||||
double[] audioData = this.bytesToDoubleArray(bytes);
|
||||
audioData = applyHanningWindow(audioData);
|
||||
Complex[] complex = new Complex[audioData.length];
|
||||
for(int i = 0; i<complex.length; i++){
|
||||
complex[i] = new Complex(audioData[i], 0);
|
||||
}
|
||||
Complex[] fftTransformed = FFT.fft(complex);
|
||||
return this.calculateFundamentalFrequency(fftTransformed, 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a Hanning Window to the data set.
|
||||
* Hanning Windows are used to increase the accuracy of the FFT.
|
||||
* One should always apply a window to a dataset before applying an FFT
|
||||
* @param The data you want to apply the window to
|
||||
* @return The windowed data set
|
||||
*/
|
||||
private double[] applyHanningWindow(double[] data){
|
||||
return applyHanningWindow(data, 0, data.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a Hanning Window to the data set.
|
||||
* Hanning Windows are used to increase the accuracy of the FFT.
|
||||
* One should always apply a window to a dataset before applying an FFT
|
||||
* @param The data you want to apply the window to
|
||||
* @param The starting index you want to apply a window from
|
||||
* @param The size of the window
|
||||
* @return The windowed data set
|
||||
*/
|
||||
private double[] applyHanningWindow(double[] signal_in, int pos, int size){
|
||||
for (int i = pos; i < pos + size; i++){
|
||||
int j = i - pos; // j = index into Hann window function
|
||||
signal_in[i] = (double)(signal_in[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * j / size)));
|
||||
}
|
||||
return signal_in;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method calculates the fundamental frequency using Harmonic Product Specturm
|
||||
* It down samples the FFTData four times and multiplies the arrays
|
||||
* together to determine the fundamental frequency. This is slightly more computationally
|
||||
* expensive, but much more accurate. In simpler terms, the function will remove the harmonic frequencies
|
||||
* which occur at every N value by finding the lowest common divisor among them.
|
||||
* @param fftData The array returned by the FFT
|
||||
* @param N the number of times you wish to downsample.
|
||||
* WARNING: The more times you downsample, the lower the maximum detectable frequency is.
|
||||
* @return The fundamental frequency in Hertz
|
||||
*/
|
||||
private int calculateFundamentalFrequency(Complex[] fftData, int N){
|
||||
if(N<=0 || fftData == null){ return -1; } //error case
|
||||
|
||||
final int LENGTH = fftData.length;//Used to calculate bin size
|
||||
fftData = removeNegativeFrequencies(fftData);
|
||||
Complex[][] data = new Complex[N][fftData.length/N];
|
||||
for(int i = 0; i<N; i++){
|
||||
for(int j = 0; j<data[0].length; j++){
|
||||
data[i][j] = fftData[j*(i+1)];
|
||||
}
|
||||
}
|
||||
Complex[] result = new Complex[fftData.length/N];//Combines the arrays
|
||||
for(int i = 0; i<result.length; i++){
|
||||
Complex tmp = new Complex(1,0);
|
||||
for(int j = 0; j<N; j++){
|
||||
tmp = tmp.times(data[j][i]);
|
||||
}
|
||||
result[i] = tmp;
|
||||
}
|
||||
int index = this.findMaxMagnitude(result);
|
||||
return index*getFFTBinSize(LENGTH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes useless data from transform since sound doesn't use complex numbers.
|
||||
* @param The data you want to remove the complex transforms from
|
||||
* @return The cleaned data
|
||||
*/
|
||||
private Complex[] removeNegativeFrequencies(Complex[] c){
|
||||
Complex[] out = new Complex[c.length/2];
|
||||
for(int i = 0; i<out.length; i++){
|
||||
out[i] = c[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the FFTbin size based off the length of the the array
|
||||
* Each FFTBin size represents the range of frequencies treated as one.
|
||||
* For example, if the bin size is 5 then the algorithm is precise to within 5hz.
|
||||
* Precondition: length cannot be 0.
|
||||
* @param fftDataLength The length of the array used to feed the FFT algorithm
|
||||
* @return FFTBin size
|
||||
*/
|
||||
private int getFFTBinSize(int fftDataLength){
|
||||
return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates index of the maximum magnitude in a complex array.
|
||||
* @param The Complex[] you want to get max magnitude from.
|
||||
* @return The index of the max magnitude
|
||||
*/
|
||||
private int findMaxMagnitude(Complex[] input){
|
||||
//Calculates Maximum Magnitude of the array
|
||||
double max = Double.MIN_VALUE;
|
||||
int index = -1;
|
||||
for(int i = 0; i<input.length; i++){
|
||||
Complex c = input[i];
|
||||
double tmp = c.getMagnitude();
|
||||
if(tmp>max){
|
||||
max = tmp;;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
|
||||
* NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
|
||||
* @param bufferData The buffer read in from the target data line
|
||||
* @return The double[] that the buffer has been converted into.
|
||||
*/
|
||||
private double[] bytesToDoubleArray(byte[] bufferData){
|
||||
final int bytesRecorded = bufferData.length;
|
||||
final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;
|
||||
final double amplification = 100.0; // choose a number as you like
|
||||
double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
|
||||
for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
|
||||
double sample = 0;
|
||||
for (int b = 0; b < bytesPerSample; b++) {
|
||||
int v = bufferData[index + b];
|
||||
if (b < bytesPerSample - 1 || bytesPerSample == 1) {
|
||||
v &= 0xFF;
|
||||
}
|
||||
sample += v << (b * 8);
|
||||
}
|
||||
double sample32 = amplification * (sample / 32768.0);
|
||||
micBufferData[floatIndex] = sample32;
|
||||
|
||||
}
|
||||
return micBufferData;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue