I tried to follow this crash course to create a neural network from scratch. It seems to be working, which is great, but as I kept running the simulation I noticed that the cost of the network sometimes behaves, continuously decreasing until it reaches a minimum. Other times, it will hit a low, then go back up and rest at that higher position. Other times, it always increases!
Why is it happening?
I wrote it in C# as a Visual Studio Console App.
int[] layerLengths = { 2, 30, 30, 30, 1 };
double[][,] weights = new double[layerLengths.Length - 1][,];
double[][,] biases = new double[layerLengths.Length - 1][,];
double[][,] layers = new double[layerLengths.Length][,];
Random rand = new Random();
double[,] input = {
{ 142, 64, 27 },
{ 185, 71, 42 },
{ 128, 62, 23 },
{ 210, 74, 51 },
{ 167, 68, 35 },
{ 154, 66, 29 },
{ 198, 72, 46 },
{ 135, 63, 21 },
{ 176, 70, 38 },
{ 221, 75, 54 },
{ 149, 65, 31 },
{ 162, 67, 33 },
{ 193, 73, 48 },
{ 124, 61, 20 },
{ 181, 69, 41 },
{ 205, 76, 57 },
{ 157, 66, 30 },
{ 170, 68, 36 },
{ 138, 64, 25 },
{ 214, 74, 53 },
{ 146, 65, 28 },
{ 189, 72, 44 },
{ 132, 62, 22 },
{ 173, 69, 37 },
{ 201, 73, 49 },
{ 159, 67, 32 },
{ 144, 64, 26 },
{ 178, 70, 39 },
{ 226, 77, 60 },
{ 151, 65, 29 },
{ 166, 68, 34 },
{ 196, 74, 47 },
{ 127, 61, 19 },
{ 183, 71, 43 },
{ 208, 75, 55 },
{ 155, 66, 31 },
{ 171, 69, 36 },
{ 140, 63, 24 },
{ 217, 76, 58 },
{ 148, 65, 27 },
{ 191, 73, 45 },
{ 130, 62, 21 },
{ 175, 70, 38 },
{ 203, 74, 50 },
{ 160, 67, 33 },
{ 145, 64, 26 },
{ 180, 71, 40 },
{ 223, 77, 59 },
{ 153, 66, 30 },
{ 168, 68, 35 } };
double[,] desiredOutput = { { 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 } };
double[,] transposedInput = scaleInput(transpose(input));
double[,][,] backpropResults = new double[layerLengths.Length - 1, 3][,];
string printMatrix(double[,] matrix)
{
string retString = "";
for(int i = 0; i < matrix.GetLength(0); i++)
{
for(int j = 0; j < matrix.GetLength(1); j++)
{
retString += matrix[i, j].ToString("F2") + " ";
}
retString += "\n";
}
return retString;
}
double[,] initializeWeightOrBias(int rows, int cols)
{
double[,] ret = new double[rows, cols];
int b = 0;
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
ret[i, j] = randd();
}
}
return ret;
}
double[,] transpose(double[,] arr)
{
double[,] output = new double[arr.GetLength(1), arr.GetLength(0)];
for (int i = 0; i < arr.GetLength(1); i++)
{
for (int j = 0; j < arr.GetLength(0); j++)
{
output[i, j] = arr[j, i];
}
}
return output;
}
double randd()
{
return (rand.NextDouble() - 0.5) * 6;
}
double[,] sigmoid(double[,] arr)
{
for (int i = 0; i < arr.GetLength(0); i++)
{
for (int j = 0; j < arr.GetLength(1); j++)
{
arr[i, j] = 1 / (1 + Math.Pow(double.E, arr[i, j] * -1));
}
}
return arr;
}
double[,] scaleInput(double[,] arr)
{
for(int h = 0; h < arr.GetLength(0); h++)
{
double total = 0;
for (int i = 0; i < arr.GetLength(1); i++)
{
total += arr[h, i];
}
double mean = total / arr.GetLength(1);
total = 0;
for (int i = 0; i < arr.GetLength(1); i++)
{
total += Math.Pow(mean - arr[h, i], 2);
}
double std = Math.Sqrt(total / arr.GetLength(1));
for (int i = 0; i < arr.GetLength(1); i++)
{
arr[h, i] = (arr[h, i] - mean) / std;
}
}
return arr;
}
double[,] matrixMult(double[,] m1, double[,] m2)
{
double[,] result = new double[m1.GetLength(0), m2.GetLength(1)];
for (int i = 0; i < m1.GetLength(0); i++)
{
for (int j = 0; j < m2.GetLength(1); j++)
{
result[i, j] = 0;
for (int k = 0; k < m1.GetLength(1); k++)
{
result[i, j] += m1[i, k] * m2[k, j];
}
}
}
return result;
}
double[,] matrixAdd(double[,] m1, double[,] m2)
{
for(int i = 0; i < m2.GetLength(0); i++)
{
for(int j = 0; j < m1.GetLength(1); j++)
{
m1[i, j] += m2[i, 0];
}
}
return m1;
}
double cost(double[,] test, double[,] real)
{
double result = 0;
for(int i = 0; i < test.GetLength(1); i++)
{
result += -1 * ((real[0,i] * Math.Log(test[0,i])) + ((1 - real[0,i]) * Math.Log(1 - test[0,i])));
}
return result;
}
void backpropRecursive(int layer)
{
if(layer == layerLengths.Length - 1)
{
//final layer
double[,] dcdz = new double[layers[layer].GetLength(0), layers[layer].GetLength(1)];
for (int i = 0; i < layers[layer].GetLength(1); i++)
{
dcdz[0, i] = layers[layer][0, i] - desiredOutput[0, i];
}
double[,] dzdw = transpose(layers[layer - 1]);
double[,] finalWeight = matrixMult(dcdz, dzdw);
double[,] finalBias = new double[1, 1];
foreach (double d in dcdz)
{
finalBias[0, 0] += d;
}
double[,] nextPropagator = matrixMult(transpose(weights[layer - 1]), dcdz);
backpropResults[(layerLengths.Length - 1) - layer, 0] = finalWeight;
backpropResults[(layerLengths.Length - 1) - layer, 1] = finalBias;
backpropResults[(layerLengths.Length - 1) - layer, 2] = nextPropagator;
backpropRecursive(layer - 1);
}
else if(layer == 1)
{
//first hidden layer
double[,] dadz = new double[layers[layer].GetLength(0), layers[layer].GetLength(1)];
for (int j = 0; j < layers[layer].GetLength(0); j++)
{
for (int k = 0; k < layers[layer].GetLength(1); k++)
{
dadz[j, k] = layers[layer][j, k] * (1 - layers[layer][j, k]);
}
}
double[,] dcdz = new double[dadz.GetLength(0), dadz.GetLength(1)];
for (int l = 0; l < dadz.GetLength(0); l++)
{
for (int m = 0; m < dadz.GetLength(1); m++)
{
dcdz[l, m] = backpropResults[layerLengths.Length - 2 - layer, 2][l, m] * dadz[l, m];
}
}
double[,] finalWeight = matrixMult(dcdz, input);
double[,] finalBias = new double[dadz.GetLength(0), 1];
for (int a = 0; a < dcdz.GetLength(0); a++)
{
for (int b = 0; b < dcdz.GetLength(1); b++)
{
finalBias[a, 0] += dcdz[a, b];
}
}
backpropResults[(layerLengths.Length - 1) - layer, 0] = finalWeight;
backpropResults[(layerLengths.Length - 1) - layer, 1] = finalBias;
}
else
{
//other hidden layers
double[,] dadz = new double[layers[layer].GetLength(0), layers[layer].GetLength(1)];
for (int j = 0; j < layers[layer].GetLength(0); j++)
{
for (int k = 0; k < layers[layer].GetLength(1); k++)
{
dadz[j, k] = layers[layer][j, k] * (1 - layers[layer][j, k]);
}
}
double[,] dcdz = new double[dadz.GetLength(0), dadz.GetLength(1)];
for (int l = 0; l < dadz.GetLength(0); l++)
{
for (int m = 0; m < dadz.GetLength(1); m++)
{
dcdz[l, m] = backpropResults[layerLengths.Length - 2 - layer, 2][l, m] * dadz[l, m];
}
}
double[,] finalWeight = matrixMult(dcdz, transpose(layers[1]));
double[,] finalBias = new double[dadz.GetLength(0), 1];
for (int a = 0; a < dcdz.GetLength(0); a++)
{
for (int b = 0; b < dcdz.GetLength(1); b++)
{
finalBias[a, 0] += dcdz[a, b];
}
}
double[,] nextPropagator = matrixMult(transpose(weights[layer - 1]), dcdz);
backpropResults[(layerLengths.Length - 1) - layer, 0] = finalWeight;
backpropResults[(layerLengths.Length - 1) - layer, 1] = finalBias;
backpropResults[(layerLengths.Length - 1) - layer, 2] = nextPropagator;
backpropRecursive(layer - 1);
}
}
//the actual executing code
for (int i = 0; i < weights.Length; i++)
{
weights[i] = initializeWeightOrBias(layerLengths[i + 1], layerLengths[i]);
biases[i] = initializeWeightOrBias(layerLengths[i + 1], 1);
}
double sensitivity = 500;
int iterations = 800;
double[] costs = new double[iterations];
for (int loops = 0; loops < iterations; loops++)
{
layers[0] = transposedInput;
for(int layerNum = 1; layerNum < layerLengths.Length; layerNum++)
{
layers[layerNum] = sigmoid(matrixAdd(matrixMult(weights[layerNum - 1], layers[layerNum - 1]), biases[layerNum - 1]));
}
costs[loops] = cost(layers[3], desiredOutput)/input.GetLength(0);
backpropRecursive(layerLengths.Length - 1);
for(int i = 0; i < layerLengths.Length - 1; i++)
{
for(int j = 0; j < weights[i].GetLength(0); j++)
{
for(int k = 0; k < weights[i].GetLength(1); k++)
{
weights[i][j, k] -= backpropResults[layerLengths.Length - 2 - i, 0][j, k] / sensitivity;
}
}
for (int j = 0; j < biases[i].GetLength(0); j++)
{
for (int k = 0; k < biases[i].GetLength(1); k++)
{
biases[i][j, k] -= backpropResults[layerLengths.Length - 2 - i, 0][j, k] / sensitivity;
}
}
}
Console.WriteLine("Cost " + loops + ": " + costs[loops] + "\n");
Console.WriteLine("Guess: " + printMatrix(layers[layerLengths.Length - 1]));
Console.WriteLine("Actual:" + printMatrix(desiredOutput));
Console.WriteLine();
}
Console.WriteLine("Best: Gen " + costs.IndexOf(costs.Min()) + " | " + costs.Min());