#Multiple Layers
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hiddenLayerSizes, output_size):
self.input_size = input_size
self.hiddenLayerSizes = hiddenLayerSizes
self.output_size = output_size
self.hiddenLayerWeights = []
self.hiddenLayerBiases =[]
self.weights_input_hidden1 = np.random.randn(self.input_size, self.hiddenLayerSizes[0])
for i in range(0, len(hiddenLayerSizes)-1):
self.hiddenLayerWeights.append(np.random.randn(self.hiddenLayerSizes[i], self.hiddenLayerSizes[i+1]))
self.hiddenLayerBiases.append(np.zeros((1, self.hiddenLayerSizes[i])))
self.weights_hidden_output = np.random.randn(self.hiddenLayerSizes[len(self.hiddenLayerSizes)-1], self.output_size)
self.bias_output = np.zeros((1, self.output_size))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
return x * (1 - x)
def feedforward(self, x):
self.hidden_activations = []
self.hidden_outputs = []
self.hidden_activations.append(np.dot(X, self.weights_input_hidden1) + self.hiddenLayerBiases[0])
self.hidden_outputs.append(self.sigmoid(self.hidden_activations[0]))
for i in range(0, len(self.hiddenLayerSizes)-1):
self.hidden_activations.append(np.dot(self.hidden_outputs[i], self.hiddenLayerWeights[i]) + self.hiddenLayerBiases[i])
self.hidden_outputs.append(self.sigmoid(self.hidden_activations[i]))
self.output_activation = np.dot(self.hidden_outputs[len(self.hidden_outputs)-1], self.weights_hidden_output) + self.bias_output
self.predicted_output = self.sigmoid(self.output_activation)
return self.predicted_output
def backward(self, X, y, learning_rate):
self.output_error = y - self.predicted_output
self.output_delta = self.output_error * self.sigmoid_derivative(self.predicted_output)
self.hidden_errors = [np.dot(self.output_delta, self.weights_hidden_output.T)]
self.hidden_deltas = [self.hidden_errors[0]*self.sigmoid_derivative(self.hidden_outputs[len(self.hidden_outputs)-1])]
for i in range(0, len(self.hiddenLayerSizes)-1):
self.hidden_errors.append(np.dot(self.hidden_deltas[i], self.hiddenLayerWeights[len(self.hiddenLayerWeights)-i-1].T))
self.hidden_deltas.append(self.hidden_errors[i+1] * self.sigmoid_derivative(self.hidden_outputs[len(self.hidden_outputs)-i-1]))
#Weight Updates
self.weights_hidden_output += np.dot(self.hidden_outputs[len(self.hidden_outputs)-1].T, self.output_delta) * learning_rate
self.bias_output += np.sum(self.output_delta, axis=0, keepdims=True) * learning_rate
for i in range(len(self.hiddenLayerSizes)-1, 0):
self.hiddenLayerWeights[i] += np.dot(self.hidden_outputs[i].T, self.hidden_deltas[len(self.hidden_deltas)-i]) * learning_rate
self.hiddenLayerBiases[i+1] += np.sum(self.hidden_deltas[len(self.hidden_deltas)-i], axis=0, keepdims=True) * learning_rate
self.weights_input_hidden1 += np.dot(X.T, self.hidden_deltas[len(self.hidden_deltas)-1]) * learning_rate
self.hiddenLayerBiases[0] += np.sum(self.hidden_deltas[len(self.hidden_deltas)-1], axis=0, keepdims=True) * learning_rate
def train(self, X, y, epochs, learning_rate):
for epoch in range(epochs):
output = self.feedforward(X)
self.backward(X, y, learning_rate)
if epoch % 4000 == 0:
loss = np.mean(np.square(y-output))
print(f"Epoch {epoch}, Loss:{loss}")
nn1 = NeuralNetwork(input_size=2, hiddenLayerSizes=[4, 4], output_size=1)
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[1], [1], [0], [0]])
nn1.train(X, y, epochs=80000, learning_rate=0.01)
output = nn1.feedforward([[1,1], [0,0], [1,1], [0,1]])
print(output)
Unless I am using it incorrectly, it doesn't seem to work as desired. I expect the outputs to be near to [0,1,1,0] however i am getting:
[[0.97068419]
[0.96361902]
[0.02130181]
[0.03903314]]
which is [1,1,0,0]
Also when trying to use different hidden layer sizes such as [4, 8] it runs into a sizing error such as:
"operands could not be broadcast together with shapes (8,8) (1,4)"
How would I need to change my code in order to use different layers sizes?