Implemented Gradient Ascent (TCs failing)
This commit is contained in:
parent
8050170ee3
commit
2160976406
4 changed files with 187 additions and 15 deletions
|
|
@ -172,6 +172,40 @@ public class Matrix {
|
||||||
* Vector
|
* Vector
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Vector addition, every element in the first vector will be added
|
||||||
|
* with the corresponding element in the second vector.
|
||||||
|
*
|
||||||
|
* @return a new vector with subtracted elements
|
||||||
|
*/
|
||||||
|
public static double[] add(double[] vector1, double[] vector2){
|
||||||
|
vectorPreCheck(vector1, vector2);
|
||||||
|
double[] result = new double[vector1.length];
|
||||||
|
|
||||||
|
for (int i=0; i < result.length; ++i) {
|
||||||
|
result[i] = vector1[i] + vector2[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matrix Vector addition, every column in the matrix will be added
|
||||||
|
* with the vector.
|
||||||
|
*
|
||||||
|
* @return a new matrix with subtracted elements
|
||||||
|
*/
|
||||||
|
public static double[][] add(double[][] matrix, double[] vector){
|
||||||
|
vectorPreCheck(matrix, vector);
|
||||||
|
double[][] result = new double[matrix.length][matrix[0].length];
|
||||||
|
|
||||||
|
for (int y=0; y < result.length; ++y) {
|
||||||
|
for (int x=0; x < result.length; ++x) {
|
||||||
|
result[y][x] = matrix[y][x] + vector[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Vector subtraction, every element in the first vector will be subtracted
|
* Vector subtraction, every element in the first vector will be subtracted
|
||||||
* with the corresponding element in the second vector.
|
* with the corresponding element in the second vector.
|
||||||
|
|
@ -188,19 +222,40 @@ public class Matrix {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matrix Vector subtraction, each column in the matrix will be subtracted
|
||||||
|
* with the vector.
|
||||||
|
*
|
||||||
|
* @return a new vector with subtracted elements
|
||||||
|
*/
|
||||||
|
public static double[][] subtract(double[][] matrix, double[] vector){
|
||||||
|
vectorPreCheck(matrix, vector);
|
||||||
|
double[][] result = new double[matrix.length][matrix[0].length];
|
||||||
|
|
||||||
|
for (int y=0; y < result.length; ++y) {
|
||||||
|
for (int x=0; x < result.length; ++x) {
|
||||||
|
result[y][x] = matrix[y][x] - vector[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Matrix Vector multiplication, each element column in the matrix will be
|
* Matrix Vector multiplication, each element column in the matrix will be
|
||||||
* multiplied with the corresponding element row in the vector.
|
* multiplied with the corresponding element row in the vector.
|
||||||
*
|
*
|
||||||
* @return a new vector with the result
|
* @return a new vector with the result
|
||||||
*/
|
*/
|
||||||
public static double[] multiply(double[][] matrix, double[] vector){
|
public static double[][] multiply(double[][] matrix, double[] vector){
|
||||||
vectorPreCheck(matrix, vector);
|
vectorPreCheck(matrix, vector);
|
||||||
double[] result = new double[matrix.length];
|
double[][] result = new double[matrix.length][1];
|
||||||
|
|
||||||
for (int y=0; y < matrix.length; ++y) {
|
for (int y=0; y < result.length; ++y) {
|
||||||
for (int x=0; x < matrix[y].length; ++x){
|
for (int x=0; x<matrix[0].length; ++x) {
|
||||||
result[y] += matrix[y][x] * vector[x];
|
for (int i=0; i < result[y].length; ++i){
|
||||||
|
result[y][i] += matrix[y][x] * vector[x];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|
@ -263,10 +318,10 @@ public class Matrix {
|
||||||
matrixPreCheck(matrix1, matrix2);
|
matrixPreCheck(matrix1, matrix2);
|
||||||
double[][] result = new double[matrix1.length][matrix2[0].length];
|
double[][] result = new double[matrix1.length][matrix2[0].length];
|
||||||
|
|
||||||
for (int i=0; i < result.length; ++i) {
|
for (int y=0; y < result.length; ++y) {
|
||||||
for (int k=0; k<matrix1[0].length; ++k) {
|
for (int x=0; x<matrix1[0].length; ++x) {
|
||||||
for (int j=0; j < result[i].length; ++j){
|
for (int i=0; i < result[y].length; ++i){
|
||||||
result[i][j] += matrix1[i][k] * matrix2[k][j];
|
result[y][i] += matrix1[y][x] * matrix2[x][i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -323,4 +378,15 @@ public class Matrix {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a single dimension double array containing the elements of the j:th column
|
||||||
|
*/
|
||||||
|
public static double[] getColumn(double[][] x, int j) {
|
||||||
|
double[] col = new double[x.length];
|
||||||
|
|
||||||
|
for (int i = 0; i<x.length; i++)
|
||||||
|
col[i] = x[i][j];
|
||||||
|
|
||||||
|
return col;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ public class LinearRegression {
|
||||||
* h(x) = theta0 * x0 + theta1 * x1 + ... + thetan * xn => transpose(theta) * x
|
* h(x) = theta0 * x0 + theta1 * x1 + ... + thetan * xn => transpose(theta) * x
|
||||||
* </i>
|
* </i>
|
||||||
*/
|
*/
|
||||||
protected static double[] calculateHypotesis(double[][] x, double[] theta){
|
protected static double[][] calculateHypotesis(double[][] x, double[] theta){
|
||||||
return Matrix.multiply(x, theta);
|
return Matrix.multiply(x, theta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -24,15 +24,36 @@ public class LinearRegression {
|
||||||
* Linear Regresion cost method.
|
* Linear Regresion cost method.
|
||||||
* <br /><br />
|
* <br /><br />
|
||||||
* <i>
|
* <i>
|
||||||
* J(O) = 1 / (2 * m) * Σ { ( h(xi) - yi )^2 }
|
* J(O) = 1 / (2 * m) * Σ { ( h(Xi) - Yi )^2 }
|
||||||
* </i><br>
|
* </i><br>
|
||||||
* m = learning data size (rows)
|
* m = learning data size (rows)
|
||||||
* @return a number indicating the error rate
|
* @return a number indicating the error rate
|
||||||
*/
|
*/
|
||||||
protected static double calculateCost(double[][] x, double[] y, double[] theta){
|
protected static double calculateCost(double[][] x, double[] y, double[] theta){
|
||||||
return 1 / (2 * x.length) * Matrix.sum(
|
return 1.0 / (2.0 * x.length) * Matrix.sum(
|
||||||
Matrix.Elemental.pow(
|
Matrix.Elemental.pow(
|
||||||
Matrix.subtract(calculateHypotesis(x, theta), y),
|
Matrix.subtract(calculateHypotesis(x, theta), y),
|
||||||
2));
|
2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gradient Descent algorithm
|
||||||
|
* <br /><br />
|
||||||
|
* <i>
|
||||||
|
* Oj = Oj - α * (1 / m) * Σ { ( h(Xi) - Yi ) * Xij }
|
||||||
|
* </i><br />
|
||||||
|
*
|
||||||
|
* @return the theta that was found to minimize the cost function
|
||||||
|
*/
|
||||||
|
public static double[] gradientAscent(double[][] x, double[] y, double[] theta, double alpha){
|
||||||
|
double[] newTheta = new double[theta.length];
|
||||||
|
double m = y.length;
|
||||||
|
double[][] hypotesisCache = Matrix.subtract(calculateHypotesis(x, theta), y);
|
||||||
|
|
||||||
|
for (int j= 0; j < theta.length; j++) {
|
||||||
|
newTheta[j] = theta[j] - alpha * (1.0/m) * Matrix.sum(Matrix.add(hypotesisCache, Matrix.getColumn(x, j)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return newTheta;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -70,14 +70,46 @@ public class MatrixTest {
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void vectorMultiply(){
|
public void vectorAddition(){
|
||||||
assertArrayEquals(
|
assertArrayEquals(
|
||||||
new double[]{8,14},
|
new double[]{3,5,-1,13},
|
||||||
Matrix.multiply(new double[][]{{2,3},{-4,9}}, new double[]{1,2}),
|
Matrix.add(new double[]{2,3,-4,9}, new double[]{1,2,3,4}),
|
||||||
0.0
|
0.0
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void vectorMatrixAddition(){
|
||||||
|
assertArrayEquals(
|
||||||
|
new double[][]{{2,3,4,5},{2,3,4,5},{2,3,4,5},{2,3,4,5}},
|
||||||
|
Matrix.add(new double[][]{{1,2,3,4},{1,2,3,4},{1,2,3,4},{1,2,3,4}}, new double[]{1,1,1,1})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void vectorSubtraction(){
|
||||||
|
assertArrayEquals(
|
||||||
|
new double[]{1,1,-7,5},
|
||||||
|
Matrix.subtract(new double[]{2,3,-4,9}, new double[]{1,2,3,4}),
|
||||||
|
0.0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void vectorMatrixSubtraction(){
|
||||||
|
assertArrayEquals(
|
||||||
|
new double[][]{{0,1,2,3},{0,1,2,3},{0,1,2,3},{0,1,2,3}},
|
||||||
|
Matrix.subtract(new double[][]{{1,2,3,4},{1,2,3,4},{1,2,3,4},{1,2,3,4}}, new double[]{1,1,1,1})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void vectorMultiply(){
|
||||||
|
assertArrayEquals(
|
||||||
|
new double[][]{{8},{14}},
|
||||||
|
Matrix.multiply(new double[][]{{2,3},{-4,9}}, new double[]{1,2}));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void vectorDivision(){
|
public void vectorDivision(){
|
||||||
assertArrayEquals(
|
assertArrayEquals(
|
||||||
|
|
@ -138,4 +170,13 @@ public class MatrixTest {
|
||||||
new double[][]{{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}},
|
new double[][]{{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}},
|
||||||
Matrix.identity(4));
|
Matrix.identity(4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void getColumn(){
|
||||||
|
assertArrayEquals(
|
||||||
|
new double[]{2,3,4,1},
|
||||||
|
Matrix.getColumn(new double[][]{{1,2,3,4},{2,3,4,1},{3,4,1,2},{4,1,2,3}}, 1),
|
||||||
|
0.0
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
44
test/zutil/ml/LinearRegressionTest.java
Executable file
44
test/zutil/ml/LinearRegressionTest.java
Executable file
|
|
@ -0,0 +1,44 @@
|
||||||
|
package zutil.ml;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases are from the Machine Learning course on coursera.
|
||||||
|
* https://www.coursera.org/learn/machine-learning/discussions/all/threads/0SxufTSrEeWPACIACw4G5w
|
||||||
|
*/
|
||||||
|
public class LinearRegressionTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void calculateHypotesis() {
|
||||||
|
double[][] hypotesis = LinearRegression.calculateHypotesis(
|
||||||
|
/* x */ new double[][]{{1, 2}, {1, 3}, {1, 4}, {1, 5}},
|
||||||
|
/* theta */ new double[]{0.1, 0.2}
|
||||||
|
);
|
||||||
|
|
||||||
|
assertArrayEquals(new double[][]{{0.5}, {0.7}, {0.9}, {1.1}}, hypotesis);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void calculateCost() {
|
||||||
|
double cost = LinearRegression.calculateCost(
|
||||||
|
/* x */ new double[][]{{1, 2}, {1, 3}, {1, 4}, {1, 5}},
|
||||||
|
/* y */ new double[]{7, 6, 5, 4},
|
||||||
|
/* theta */ new double[]{0.1, 0.2}
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(11.9450, cost, 0.0001);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void gradientAscent() {
|
||||||
|
double[] theta = LinearRegression.gradientAscent(
|
||||||
|
/* x */ new double[][]{{1, 5},{1, 2},{1, 4},{1, 5}},
|
||||||
|
/* y */ new double[]{1, 6, 4, 2},
|
||||||
|
/* theta */ new double[]{0, 0},
|
||||||
|
/* alpha */0.01);
|
||||||
|
|
||||||
|
assertArrayEquals(new double[]{0.032500, 0.107500}, theta, 0.000001);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue