#Multiple Layers

import numpy as np

class NeuralNetwork:

    def __init__(self, input_size, hiddenLayerSizes, output_size):

        self.input_size = input_size
        self.hiddenLayerSizes = hiddenLayerSizes
        self.output_size = output_size

        self.hiddenLayerWeights = []
        self.hiddenLayerBiases =[]

        self.weights_input_hidden1 = np.random.randn(self.input_size, self.hiddenLayerSizes[0])

        for i in range(0, len(hiddenLayerSizes)-1):

            self.hiddenLayerWeights.append(np.random.randn(self.hiddenLayerSizes[i], self.hiddenLayerSizes[i+1]))
            self.hiddenLayerBiases.append(np.zeros((1, self.hiddenLayerSizes[i])))

        self.weights_hidden_output = np.random.randn(self.hiddenLayerSizes[len(self.hiddenLayerSizes)-1], self.output_size)
        self.bias_output = np.zeros((1, self.output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def feedforward(self, x):

        self.hidden_activations = []
        self.hidden_outputs = []

        self.hidden_activations.append(np.dot(X, self.weights_input_hidden1) + self.hiddenLayerBiases[0])
        self.hidden_outputs.append(self.sigmoid(self.hidden_activations[0]))

        for i in range(0, len(self.hiddenLayerSizes)-1):

            self.hidden_activations.append(np.dot(self.hidden_outputs[i], self.hiddenLayerWeights[i]) + self.hiddenLayerBiases[i])
            self.hidden_outputs.append(self.sigmoid(self.hidden_activations[i]))

        self.output_activation = np.dot(self.hidden_outputs[len(self.hidden_outputs)-1], self.weights_hidden_output) + self.bias_output
        self.predicted_output = self.sigmoid(self.output_activation)

        return self.predicted_output

    def backward(self, X, y, learning_rate):

        self.output_error = y - self.predicted_output
        self.output_delta = self.output_error * self.sigmoid_derivative(self.predicted_output)

        self.hidden_errors = [np.dot(self.output_delta, self.weights_hidden_output.T)]
        self.hidden_deltas = [self.hidden_errors[0]*self.sigmoid_derivative(self.hidden_outputs[len(self.hidden_outputs)-1])]

        for i in range(0, len(self.hiddenLayerSizes)-1):

            self.hidden_errors.append(np.dot(self.hidden_deltas[i], self.hiddenLayerWeights[len(self.hiddenLayerWeights)-i-1].T))
            self.hidden_deltas.append(self.hidden_errors[i+1] * self.sigmoid_derivative(self.hidden_outputs[len(self.hidden_outputs)-i-1]))

        #Weight Updates

        self.weights_hidden_output += np.dot(self.hidden_outputs[len(self.hidden_outputs)-1].T, self.output_delta) * learning_rate
        self.bias_output += np.sum(self.output_delta, axis=0, keepdims=True) * learning_rate

        for i in range(len(self.hiddenLayerSizes)-1, 0):

            self.hiddenLayerWeights[i] += np.dot(self.hidden_outputs[i].T, self.hidden_deltas[len(self.hidden_deltas)-i]) * learning_rate
            self.hiddenLayerBiases[i+1] += np.sum(self.hidden_deltas[len(self.hidden_deltas)-i], axis=0, keepdims=True) * learning_rate

        self.weights_input_hidden1 += np.dot(X.T, self.hidden_deltas[len(self.hidden_deltas)-1]) * learning_rate
        self.hiddenLayerBiases[0] += np.sum(self.hidden_deltas[len(self.hidden_deltas)-1], axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):

        for epoch in range(epochs):
            output = self.feedforward(X)
            self.backward(X, y, learning_rate)
            if epoch % 4000 == 0:
                loss = np.mean(np.square(y-output))
                print(f"Epoch {epoch}, Loss:{loss}")

nn1 = NeuralNetwork(input_size=2, hiddenLayerSizes=[4, 4], output_size=1)

X = np.array([[0,0], [0,1], [1,0], [1,1]])

y = np.array([[1], [1], [0], [0]])

nn1.train(X, y, epochs=80000, learning_rate=0.01)

output = nn1.feedforward([[1,1], [0,0], [1,1], [0,1]])

print(output)

Unless I am using it incorrectly, it doesn't seem to work as desired. I expect the outputs to be near to [0,1,1,0] however i am getting:

[[0.97068419]

[0.96361902]

[0.02130181]

[0.03903314]]

which is [1,1,0,0]

Also when trying to use different hidden layer sizes such as [4, 8] it runs into a sizing error such as:

"operands could not be broadcast together with shapes (8,8) (1,4)"

How would I need to change my code in order to use different layers sizes?