Implemented Gradient Ascent (TCs failing)

This commit is contained in:
Ziver Koc 2018-04-17 17:03:34 +02:00
parent 8050170ee3
commit 2160976406
4 changed files with 187 additions and 15 deletions

View file

@ -172,6 +172,40 @@ public class Matrix {
* Vector * Vector
**********************************************************************/ **********************************************************************/
/**
* Vector addition, every element in the first vector will be added
* with the corresponding element in the second vector.
*
* @return a new vector with subtracted elements
*/
public static double[] add(double[] vector1, double[] vector2){
vectorPreCheck(vector1, vector2);
double[] result = new double[vector1.length];
for (int i=0; i < result.length; ++i) {
result[i] = vector1[i] + vector2[i];
}
return result;
}
/**
* Matrix Vector addition, every column in the matrix will be added
* with the vector.
*
* @return a new matrix with subtracted elements
*/
public static double[][] add(double[][] matrix, double[] vector){
vectorPreCheck(matrix, vector);
double[][] result = new double[matrix.length][matrix[0].length];
for (int y=0; y < result.length; ++y) {
for (int x=0; x < result.length; ++x) {
result[y][x] = matrix[y][x] + vector[x];
}
}
return result;
}
/** /**
* Vector subtraction, every element in the first vector will be subtracted * Vector subtraction, every element in the first vector will be subtracted
* with the corresponding element in the second vector. * with the corresponding element in the second vector.
@ -188,19 +222,40 @@ public class Matrix {
return result; return result;
} }
/**
* Matrix Vector subtraction, each column in the matrix will be subtracted
* with the vector.
*
* @return a new vector with subtracted elements
*/
public static double[][] subtract(double[][] matrix, double[] vector){
vectorPreCheck(matrix, vector);
double[][] result = new double[matrix.length][matrix[0].length];
for (int y=0; y < result.length; ++y) {
for (int x=0; x < result.length; ++x) {
result[y][x] = matrix[y][x] - vector[x];
}
}
return result;
}
/** /**
* Matrix Vector multiplication, each element column in the matrix will be * Matrix Vector multiplication, each element column in the matrix will be
* multiplied with the corresponding element row in the vector. * multiplied with the corresponding element row in the vector.
* *
* @return a new vector with the result * @return a new vector with the result
*/ */
public static double[] multiply(double[][] matrix, double[] vector){ public static double[][] multiply(double[][] matrix, double[] vector){
vectorPreCheck(matrix, vector); vectorPreCheck(matrix, vector);
double[] result = new double[matrix.length]; double[][] result = new double[matrix.length][1];
for (int y=0; y < matrix.length; ++y) { for (int y=0; y < result.length; ++y) {
for (int x=0; x < matrix[y].length; ++x){ for (int x=0; x<matrix[0].length; ++x) {
result[y] += matrix[y][x] * vector[x]; for (int i=0; i < result[y].length; ++i){
result[y][i] += matrix[y][x] * vector[x];
}
} }
} }
return result; return result;
@ -263,10 +318,10 @@ public class Matrix {
matrixPreCheck(matrix1, matrix2); matrixPreCheck(matrix1, matrix2);
double[][] result = new double[matrix1.length][matrix2[0].length]; double[][] result = new double[matrix1.length][matrix2[0].length];
for (int i=0; i < result.length; ++i) { for (int y=0; y < result.length; ++y) {
for (int k=0; k<matrix1[0].length; ++k) { for (int x=0; x<matrix1[0].length; ++x) {
for (int j=0; j < result[i].length; ++j){ for (int i=0; i < result[y].length; ++i){
result[i][j] += matrix1[i][k] * matrix2[k][j]; result[y][i] += matrix1[y][x] * matrix2[x][i];
} }
} }
} }
@ -323,4 +378,15 @@ public class Matrix {
return result; return result;
} }
/**
* @return a single dimension double array containing the elements of the j:th column
*/
public static double[] getColumn(double[][] x, int j) {
double[] col = new double[x.length];
for (int i = 0; i<x.length; i++)
col[i] = x[i][j];
return col;
}
} }

View file

@ -16,7 +16,7 @@ public class LinearRegression {
* h(x) = theta0 * x0 + theta1 * x1 + ... + thetan * xn => transpose(theta) * x * h(x) = theta0 * x0 + theta1 * x1 + ... + thetan * xn => transpose(theta) * x
* </i> * </i>
*/ */
protected static double[] calculateHypotesis(double[][] x, double[] theta){ protected static double[][] calculateHypotesis(double[][] x, double[] theta){
return Matrix.multiply(x, theta); return Matrix.multiply(x, theta);
} }
@ -24,15 +24,36 @@ public class LinearRegression {
* Linear Regresion cost method. * Linear Regresion cost method.
* <br /><br /> * <br /><br />
* <i> * <i>
* J(O) = 1 / (2 * m) * Σ { ( h(xi) - yi )^2 } * J(O) = 1 / (2 * m) * Σ { ( h(Xi) - Yi )^2 }
* </i><br> * </i><br>
* m = learning data size (rows) * m = learning data size (rows)
* @return a number indicating the error rate * @return a number indicating the error rate
*/ */
protected static double calculateCost(double[][] x, double[] y, double[] theta){ protected static double calculateCost(double[][] x, double[] y, double[] theta){
return 1 / (2 * x.length) * Matrix.sum( return 1.0 / (2.0 * x.length) * Matrix.sum(
Matrix.Elemental.pow( Matrix.Elemental.pow(
Matrix.subtract(calculateHypotesis(x, theta), y), Matrix.subtract(calculateHypotesis(x, theta), y),
2)); 2));
} }
/**
* Gradient Descent algorithm
* <br /><br />
* <i>
* Oj = Oj - α * (1 / m) * Σ { ( h(Xi) - Yi ) * Xij }
* </i><br />
*
* @return the theta that was found to minimize the cost function
*/
public static double[] gradientAscent(double[][] x, double[] y, double[] theta, double alpha){
double[] newTheta = new double[theta.length];
double m = y.length;
double[][] hypotesisCache = Matrix.subtract(calculateHypotesis(x, theta), y);
for (int j= 0; j < theta.length; j++) {
newTheta[j] = theta[j] - alpha * (1.0/m) * Matrix.sum(Matrix.add(hypotesisCache, Matrix.getColumn(x, j)));
}
return newTheta;
}
} }

View file

@ -70,14 +70,46 @@ public class MatrixTest {
@Test @Test
public void vectorMultiply(){ public void vectorAddition(){
assertArrayEquals( assertArrayEquals(
new double[]{8,14}, new double[]{3,5,-1,13},
Matrix.multiply(new double[][]{{2,3},{-4,9}}, new double[]{1,2}), Matrix.add(new double[]{2,3,-4,9}, new double[]{1,2,3,4}),
0.0 0.0
); );
} }
@Test
public void vectorMatrixAddition(){
assertArrayEquals(
new double[][]{{2,3,4,5},{2,3,4,5},{2,3,4,5},{2,3,4,5}},
Matrix.add(new double[][]{{1,2,3,4},{1,2,3,4},{1,2,3,4},{1,2,3,4}}, new double[]{1,1,1,1})
);
}
@Test
public void vectorSubtraction(){
assertArrayEquals(
new double[]{1,1,-7,5},
Matrix.subtract(new double[]{2,3,-4,9}, new double[]{1,2,3,4}),
0.0
);
}
@Test
public void vectorMatrixSubtraction(){
assertArrayEquals(
new double[][]{{0,1,2,3},{0,1,2,3},{0,1,2,3},{0,1,2,3}},
Matrix.subtract(new double[][]{{1,2,3,4},{1,2,3,4},{1,2,3,4},{1,2,3,4}}, new double[]{1,1,1,1})
);
}
@Test
public void vectorMultiply(){
assertArrayEquals(
new double[][]{{8},{14}},
Matrix.multiply(new double[][]{{2,3},{-4,9}}, new double[]{1,2}));
}
@Test @Test
public void vectorDivision(){ public void vectorDivision(){
assertArrayEquals( assertArrayEquals(
@ -138,4 +170,13 @@ public class MatrixTest {
new double[][]{{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}}, new double[][]{{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}},
Matrix.identity(4)); Matrix.identity(4));
} }
@Test
public void getColumn(){
assertArrayEquals(
new double[]{2,3,4,1},
Matrix.getColumn(new double[][]{{1,2,3,4},{2,3,4,1},{3,4,1,2},{4,1,2,3}}, 1),
0.0
);
}
} }

View file

@ -0,0 +1,44 @@
package zutil.ml;
import org.junit.Test;
import static org.junit.Assert.*;
/**
* Test cases are from the Machine Learning course on coursera.
* https://www.coursera.org/learn/machine-learning/discussions/all/threads/0SxufTSrEeWPACIACw4G5w
*/
public class LinearRegressionTest {
@Test
public void calculateHypotesis() {
double[][] hypotesis = LinearRegression.calculateHypotesis(
/* x */ new double[][]{{1, 2}, {1, 3}, {1, 4}, {1, 5}},
/* theta */ new double[]{0.1, 0.2}
);
assertArrayEquals(new double[][]{{0.5}, {0.7}, {0.9}, {1.1}}, hypotesis);
}
@Test
public void calculateCost() {
double cost = LinearRegression.calculateCost(
/* x */ new double[][]{{1, 2}, {1, 3}, {1, 4}, {1, 5}},
/* y */ new double[]{7, 6, 5, 4},
/* theta */ new double[]{0.1, 0.2}
);
assertEquals(11.9450, cost, 0.0001);
}
@Test
public void gradientAscent() {
double[] theta = LinearRegression.gradientAscent(
/* x */ new double[][]{{1, 5},{1, 2},{1, 4},{1, 5}},
/* y */ new double[]{1, 6, 4, 2},
/* theta */ new double[]{0, 0},
/* alpha */0.01);
assertArrayEquals(new double[]{0.032500, 0.107500}, theta, 0.000001);
}
}