I tried to follow this crash course to create a neural network from scratch. It seems to be working, which is great, but as I kept running the simulation I noticed that the cost of the network sometimes behaves, continuously decreasing until it reaches a minimum. Other times, it will hit a low, then go back up and rest at that higher position. Other times, it always increases!

Why is it happening?

I wrote it in C# as a Visual Studio Console App.

int[] layerLengths = { 2, 30, 30, 30, 1 };
double[][,] weights = new double[layerLengths.Length - 1][,];
double[][,] biases = new double[layerLengths.Length - 1][,];
double[][,] layers = new double[layerLengths.Length][,];
Random rand = new Random();
double[,] input = { 
{ 142, 64, 27 },
{ 185, 71, 42 },
{ 128, 62, 23 },
{ 210, 74, 51 },
{ 167, 68, 35 },
{ 154, 66, 29 },
{ 198, 72, 46 },
{ 135, 63, 21 },
{ 176, 70, 38 },
{ 221, 75, 54 },
{ 149, 65, 31 },
{ 162, 67, 33 },
{ 193, 73, 48 },
{ 124, 61, 20 },
{ 181, 69, 41 },
{ 205, 76, 57 },
{ 157, 66, 30 },
{ 170, 68, 36 },
{ 138, 64, 25 },
{ 214, 74, 53 },
{ 146, 65, 28 },
{ 189, 72, 44 },
{ 132, 62, 22 },
{ 173, 69, 37 },
{ 201, 73, 49 },
{ 159, 67, 32 },
{ 144, 64, 26 },
{ 178, 70, 39 },
{ 226, 77, 60 },
{ 151, 65, 29 },
{ 166, 68, 34 },
{ 196, 74, 47 },
{ 127, 61, 19 },
{ 183, 71, 43 },
{ 208, 75, 55 },
{ 155, 66, 31 },
{ 171, 69, 36 },
{ 140, 63, 24 },
{ 217, 76, 58 },
{ 148, 65, 27 },
{ 191, 73, 45 },
{ 130, 62, 21 },
{ 175, 70, 38 },
{ 203, 74, 50 },
{ 160, 67, 33 },
{ 145, 64, 26 },
{ 180, 71, 40 },
{ 223, 77, 59 },
{ 153, 66, 30 },
{ 168, 68, 35 } };
double[,] desiredOutput = { { 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 } };
double[,] transposedInput = scaleInput(transpose(input));
double[,][,] backpropResults = new double[layerLengths.Length - 1, 3][,];

string printMatrix(double[,] matrix)
{
    string retString = "";
    for(int i = 0; i < matrix.GetLength(0); i++)
    {
        for(int j = 0; j < matrix.GetLength(1); j++)
        {
            retString += matrix[i, j].ToString("F2") + " ";
        }
        retString += "\n";
    }
    return retString;
}

double[,] initializeWeightOrBias(int rows, int cols)
{
    double[,] ret = new double[rows, cols];
    int b = 0;
    for (int i = 0; i < rows; i++)
    {
        for (int j = 0; j < cols; j++)
        {
            ret[i, j] = randd();
        }
    }
    return ret;
}

double[,] transpose(double[,] arr)
{
    double[,] output = new double[arr.GetLength(1), arr.GetLength(0)];
    for (int i = 0; i < arr.GetLength(1); i++)
    {
        for (int j = 0; j < arr.GetLength(0); j++)
        {
            output[i, j] = arr[j, i];
        }
    }
    return output;
}
double randd()
{
    return (rand.NextDouble() - 0.5) * 6;
}

double[,] sigmoid(double[,] arr)
{
    for (int i = 0; i < arr.GetLength(0); i++)
    {
        for (int j = 0; j < arr.GetLength(1); j++)
        {
            arr[i, j] = 1 / (1 + Math.Pow(double.E, arr[i, j] * -1));
        }
    }
    return arr;
}

double[,] scaleInput(double[,] arr)
{
    for(int h = 0; h < arr.GetLength(0); h++)
    {
        double total = 0;
        for (int i = 0; i < arr.GetLength(1); i++)
        {
            total += arr[h, i];
        }
        double mean = total / arr.GetLength(1);
        total = 0;
        for (int i = 0; i < arr.GetLength(1); i++)
        {
            total += Math.Pow(mean - arr[h, i], 2);
        }
        double std = Math.Sqrt(total / arr.GetLength(1));
        for (int i = 0; i < arr.GetLength(1); i++)
        {
            arr[h, i] = (arr[h, i] - mean) / std;
        }
    }
    return arr;
}

double[,] matrixMult(double[,] m1, double[,] m2)
{
    double[,] result = new double[m1.GetLength(0), m2.GetLength(1)];
    for (int i = 0; i < m1.GetLength(0); i++)
    {
        for (int j = 0; j < m2.GetLength(1); j++)
        {
            result[i, j] = 0;
            for (int k = 0; k < m1.GetLength(1); k++)
            {
                result[i, j] += m1[i, k] * m2[k, j];
            }
        }
    }
    return result;
}

double[,] matrixAdd(double[,] m1, double[,] m2)
{
    for(int i = 0; i < m2.GetLength(0); i++)
    {
        for(int j = 0; j < m1.GetLength(1); j++)
        {
            m1[i, j] += m2[i, 0];
        }
    }
    return m1;
}

double cost(double[,] test, double[,] real)
{
    double result = 0;
    for(int i = 0; i < test.GetLength(1); i++)
    {
        result += -1 * ((real[0,i] * Math.Log(test[0,i])) + ((1 - real[0,i]) * Math.Log(1 - test[0,i])));
    }
    return result;
}

void backpropRecursive(int layer)
{
    if(layer == layerLengths.Length - 1)
    {
        //final layer
        double[,] dcdz = new double[layers[layer].GetLength(0), layers[layer].GetLength(1)];
        for (int i = 0; i < layers[layer].GetLength(1); i++)
        {
            dcdz[0, i] = layers[layer][0, i] - desiredOutput[0, i];
        }
        double[,] dzdw = transpose(layers[layer - 1]);
        double[,] finalWeight = matrixMult(dcdz, dzdw);
        double[,] finalBias = new double[1, 1];
        foreach (double d in dcdz)
        {
            finalBias[0, 0] += d;
        }
        double[,] nextPropagator = matrixMult(transpose(weights[layer - 1]), dcdz);
        backpropResults[(layerLengths.Length - 1) - layer, 0] = finalWeight;
        backpropResults[(layerLengths.Length - 1) - layer, 1] = finalBias;
        backpropResults[(layerLengths.Length - 1) - layer, 2] = nextPropagator;
        backpropRecursive(layer - 1);

    }
    else if(layer == 1)
    {
        //first hidden layer
        double[,] dadz = new double[layers[layer].GetLength(0), layers[layer].GetLength(1)];
        for (int j = 0; j < layers[layer].GetLength(0); j++)
        {
            for (int k = 0; k < layers[layer].GetLength(1); k++)
            {
                dadz[j, k] = layers[layer][j, k] * (1 - layers[layer][j, k]);
            }
        }
        double[,] dcdz = new double[dadz.GetLength(0), dadz.GetLength(1)];
        for (int l = 0; l < dadz.GetLength(0); l++)
        {
            for (int m = 0; m < dadz.GetLength(1); m++)
            {
                dcdz[l, m] = backpropResults[layerLengths.Length - 2 - layer, 2][l, m] * dadz[l, m];
            }
        }
        double[,] finalWeight = matrixMult(dcdz, input);
        double[,] finalBias = new double[dadz.GetLength(0), 1];
        for (int a = 0; a < dcdz.GetLength(0); a++)
        {
            for (int b = 0; b < dcdz.GetLength(1); b++)
            {
                finalBias[a, 0] += dcdz[a, b];
            }
        }
        backpropResults[(layerLengths.Length - 1) - layer, 0] = finalWeight;
        backpropResults[(layerLengths.Length - 1) - layer, 1] = finalBias;
    }
    else
    {
        //other hidden layers
        double[,] dadz = new double[layers[layer].GetLength(0), layers[layer].GetLength(1)];
        for (int j = 0; j < layers[layer].GetLength(0); j++)
        {
            for (int k = 0; k < layers[layer].GetLength(1); k++)
            {
                dadz[j, k] = layers[layer][j, k] * (1 - layers[layer][j, k]);
            }
        }
        double[,] dcdz = new double[dadz.GetLength(0), dadz.GetLength(1)];
        for (int l = 0; l < dadz.GetLength(0); l++)
        {
            for (int m = 0; m < dadz.GetLength(1); m++)
            {
                dcdz[l, m] = backpropResults[layerLengths.Length - 2 - layer, 2][l, m] * dadz[l, m];
            }
        }
        double[,] finalWeight = matrixMult(dcdz, transpose(layers[1]));
        double[,] finalBias = new double[dadz.GetLength(0), 1];
        for (int a = 0; a < dcdz.GetLength(0); a++)
        {
            for (int b = 0; b < dcdz.GetLength(1); b++)
            {
                finalBias[a, 0] += dcdz[a, b];
            }
        }
        double[,] nextPropagator = matrixMult(transpose(weights[layer - 1]), dcdz);
        backpropResults[(layerLengths.Length - 1) - layer, 0] = finalWeight;
        backpropResults[(layerLengths.Length - 1) - layer, 1] = finalBias;
        backpropResults[(layerLengths.Length - 1) - layer, 2] = nextPropagator;
        backpropRecursive(layer - 1);
    }
}


//the actual executing code

for (int i = 0; i < weights.Length; i++)
{
    weights[i] = initializeWeightOrBias(layerLengths[i + 1], layerLengths[i]);
    biases[i] = initializeWeightOrBias(layerLengths[i + 1], 1);
}

double sensitivity = 500;
int iterations = 800;
double[] costs = new double[iterations];

for (int loops = 0; loops < iterations; loops++)
{
    layers[0] = transposedInput;
    for(int layerNum = 1; layerNum < layerLengths.Length; layerNum++)
    {
        layers[layerNum] = sigmoid(matrixAdd(matrixMult(weights[layerNum - 1], layers[layerNum - 1]), biases[layerNum - 1]));
    }
    costs[loops] = cost(layers[3], desiredOutput)/input.GetLength(0);
    backpropRecursive(layerLengths.Length - 1);
    for(int i = 0; i < layerLengths.Length - 1; i++)
    {
        for(int j = 0; j < weights[i].GetLength(0); j++)
        {
            for(int k = 0; k < weights[i].GetLength(1); k++)
            {
                weights[i][j, k] -= backpropResults[layerLengths.Length - 2 - i, 0][j, k] / sensitivity;
            }
        }
        for (int j = 0; j < biases[i].GetLength(0); j++)
        {
            for (int k = 0; k < biases[i].GetLength(1); k++)
            {
                biases[i][j, k] -= backpropResults[layerLengths.Length - 2 - i, 0][j, k] / sensitivity;
            }
        }
    }
    Console.WriteLine("Cost " + loops + ": " + costs[loops] + "\n");
    Console.WriteLine("Guess: " + printMatrix(layers[layerLengths.Length - 1]));
    Console.WriteLine("Actual:" + printMatrix(desiredOutput));
    Console.WriteLine();
}
Console.WriteLine("Best: Gen " + costs.IndexOf(costs.Min()) + " | " + costs.Min());