Implemented Gradient Ascent (TCs failing)

This commit is contained in:
Ziver Koc 2018-04-17 17:03:34 +02:00
parent 8050170ee3
commit 2160976406
4 changed files with 187 additions and 15 deletions

View file

@ -172,6 +172,40 @@ public class Matrix {
* Vector
**********************************************************************/
/**
* Vector addition, every element in the first vector will be added
* with the corresponding element in the second vector.
*
* @return a new vector with subtracted elements
*/
public static double[] add(double[] vector1, double[] vector2){
vectorPreCheck(vector1, vector2);
double[] result = new double[vector1.length];
for (int i=0; i < result.length; ++i) {
result[i] = vector1[i] + vector2[i];
}
return result;
}
/**
* Matrix Vector addition, every column in the matrix will be added
* with the vector.
*
* @return a new matrix with subtracted elements
*/
public static double[][] add(double[][] matrix, double[] vector){
vectorPreCheck(matrix, vector);
double[][] result = new double[matrix.length][matrix[0].length];
for (int y=0; y < result.length; ++y) {
for (int x=0; x < result.length; ++x) {
result[y][x] = matrix[y][x] + vector[x];
}
}
return result;
}
/**
* Vector subtraction, every element in the first vector will be subtracted
* with the corresponding element in the second vector.
@ -188,19 +222,40 @@ public class Matrix {
return result;
}
/**
* Matrix Vector subtraction, each column in the matrix will be subtracted
* with the vector.
*
* @return a new vector with subtracted elements
*/
public static double[][] subtract(double[][] matrix, double[] vector){
vectorPreCheck(matrix, vector);
double[][] result = new double[matrix.length][matrix[0].length];
for (int y=0; y < result.length; ++y) {
for (int x=0; x < result.length; ++x) {
result[y][x] = matrix[y][x] - vector[x];
}
}
return result;
}
/**
* Matrix Vector multiplication, each element column in the matrix will be
* multiplied with the corresponding element row in the vector.
*
* @return a new vector with the result
*/
public static double[] multiply(double[][] matrix, double[] vector){
public static double[][] multiply(double[][] matrix, double[] vector){
vectorPreCheck(matrix, vector);
double[] result = new double[matrix.length];
double[][] result = new double[matrix.length][1];
for (int y=0; y < matrix.length; ++y) {
for (int x=0; x < matrix[y].length; ++x){
result[y] += matrix[y][x] * vector[x];
for (int y=0; y < result.length; ++y) {
for (int x=0; x<matrix[0].length; ++x) {
for (int i=0; i < result[y].length; ++i){
result[y][i] += matrix[y][x] * vector[x];
}
}
}
return result;
@ -263,10 +318,10 @@ public class Matrix {
matrixPreCheck(matrix1, matrix2);
double[][] result = new double[matrix1.length][matrix2[0].length];
for (int i=0; i < result.length; ++i) {
for (int k=0; k<matrix1[0].length; ++k) {
for (int j=0; j < result[i].length; ++j){
result[i][j] += matrix1[i][k] * matrix2[k][j];
for (int y=0; y < result.length; ++y) {
for (int x=0; x<matrix1[0].length; ++x) {
for (int i=0; i < result[y].length; ++i){
result[y][i] += matrix1[y][x] * matrix2[x][i];
}
}
}
@ -323,4 +378,15 @@ public class Matrix {
return result;
}
/**
* @return a single dimension double array containing the elements of the j:th column
*/
public static double[] getColumn(double[][] x, int j) {
double[] col = new double[x.length];
for (int i = 0; i<x.length; i++)
col[i] = x[i][j];
return col;
}
}

View file

@ -16,7 +16,7 @@ public class LinearRegression {
* h(x) = theta0 * x0 + theta1 * x1 + ... + thetan * xn => transpose(theta) * x
* </i>
*/
protected static double[] calculateHypotesis(double[][] x, double[] theta){
protected static double[][] calculateHypotesis(double[][] x, double[] theta){
return Matrix.multiply(x, theta);
}
@ -24,15 +24,36 @@ public class LinearRegression {
* Linear Regresion cost method.
* <br /><br />
* <i>
* J(O) = 1 / (2 * m) * Σ { ( h(xi) - yi )^2 }
* J(O) = 1 / (2 * m) * Σ { ( h(Xi) - Yi )^2 }
* </i><br>
* m = learning data size (rows)
* @return a number indicating the error rate
*/
protected static double calculateCost(double[][] x, double[] y, double[] theta){
return 1 / (2 * x.length) * Matrix.sum(
return 1.0 / (2.0 * x.length) * Matrix.sum(
Matrix.Elemental.pow(
Matrix.subtract(calculateHypotesis(x, theta), y),
2));
}
/**
* Gradient Descent algorithm
* <br /><br />
* <i>
* Oj = Oj - α * (1 / m) * Σ { ( h(Xi) - Yi ) * Xij }
* </i><br />
*
* @return the theta that was found to minimize the cost function
*/
public static double[] gradientAscent(double[][] x, double[] y, double[] theta, double alpha){
double[] newTheta = new double[theta.length];
double m = y.length;
double[][] hypotesisCache = Matrix.subtract(calculateHypotesis(x, theta), y);
for (int j= 0; j < theta.length; j++) {
newTheta[j] = theta[j] - alpha * (1.0/m) * Matrix.sum(Matrix.add(hypotesisCache, Matrix.getColumn(x, j)));
}
return newTheta;
}
}