I wanted to create a supervised NN that recognises numbers on images from scratch, pretty ambitious project for someone like me. But when I test it with a simple XOr gate, it completely fails. It just sarts with a random value (as it should) but then the value of the last neuron always goes down. Have I made a mistake in my derivatives with respect to the cost? Note: I take as refference 3blue1brown's video about this.
This is my program. The comments are there to explain what each function does.
I do not know where the mistake is located, but, as I suppose its a learning mistake, I think its either in the trainOneExample() or Training().
import random
import math
import numpy as np
def DerivativeSigmoid(z):
if z < -100:
return 0
if z > 100:
return 1
return 4.9 / (math.exp(-4.9 * z))
#Derivative of (output-expected_output-1)**2
def DrtValueVsCostLastLayer(a, y):
return 2 * (a - y)
class Weight:
def __init__(self):
#Initialise randomly
self.weight = random.uniform(-1, 1)
#Variables that will store the sum of the wished changes for each example and then do the average later
self.total_wished_changes = 0
self.number_of_wished_changes = 0
class Neuron:
def __init__(self, layer, number, num_output_weights=0):
#Classifying variables
self.num_output_weights = num_output_weights
self.layer = layer
self.number = number
#Value of the neuron before the sigmoid squishification function
self.z = 0
#Value of the neuron after the signmoid function
self.value = 0
#Initialise the weights comming from this neuron
self.output_weights = []
for i in range(self.num_output_weights):
self.output_weights.append(Weight())
#Value of the bias
self.bias_weight = 0
# Variables that will store the sum of the wished changes for the bias for each example and then do the average later
self.total_wished_bias_changes = 0
self.number_wished_bias_changes = 0
#Calculated value of the derivative of the neuron with respect to the cost
self.actual_derivative = 0
class Net:
def __init__(self, topology, bias=1):
#Number of neurons for each layer
self.topology = topology
#Value that will multiply the bias_weight value of each neuron, 1 by default
self.bias = bias
#Self.layers is where all the neurons are stored. This is just to initialise it
self.topology.append(0)
self.layers = []
for layer in range(len(self.topology) - 1):
#Create a layer, add it a number of neurons and append it to self.layers
layer_toAdd = []
for i in range(self.topology[layer]):
layer_toAdd.append(Neuron(layer, i, self.topology[layer + 1]))
self.layers.append(layer_toAdd)
self.topology.pop()
#Function that goes throught each of the neurons of a certain layer and calculates z, then its actual value
def modifLayer(self, layer):
for neuron in range(len(self.layers[layer])):
#Z= sum of all the previous layers values * the weights + the bias
self.layers[layer][neuron].z = sum(
[inputNeuron.value * inputNeuron.output_weights[neuron].weight for inputNeuron in
self.layers[layer - 1]]) + self.bias * self.layers[layer][neuron].bias_weight
#Apply the sigmoid function to z to get the actual value
self.layers[layer][neuron].value = self.sigmoid(self.layers[layer][neuron].z)
#Function that initialises the first layer according to the inputs, then calculates each layer according to its previous one
def calculateResult(self, inputs):
#Input layer
for i in range(len(inputs)):
self.layers[0][i].value = inputs[i]
#Calculate values of each layer
for layer in range(1, len(self.topology)):
self.modifLayer(layer)
#return the values of the output layer
return [neuron.value for neuron in self.layers[-1]]
#Sigmoid function. To avoid OverflowError, when x is too high or too low it makes automatically 1 or 0
def sigmoid(self, x):
if x < -100:
return 0
if x > 100:
return 1
return 1 / (1 + np.exp(-4.9 * x))
#Funtion that takes inputs and expected outputs and calculates the wished changes for each bias and weight
def trainOnOneExample(self, inputs, expected_outputs):
outputs_got = self.calculateResult(inputs)
errors = [(expected_outputs[i] - outputs_got[i]) ** 2 for i in range(len(expected_outputs))]
#Derivative of a neuron value: sum of ( (all the weights its got) x (derivative of the signmoid function) x (wished output of the neuron the weight leads to) )
#Derivative of a weight: (value of the neuron it comes from) x (the derivative of the sigmoid function) x (the wished output of the neuron the weight leads to)
#Derivative of the bias: (derivative of the sigmoid function) x (the wished change of that neuron)
#Last layer only: the wished neuron changes are calculated accroding to the expected outputs
for neuron_idx in range(self.topology[-1]):
neuron = self.layers[-1][neuron_idx]
#Derivative: 2(output got - expected output)
neuron.actual_derivative = DrtValueVsCostLastLayer(neuron.value, expected_outputs[neuron_idx])
#Line 117
neuron.total_wished_bias_changes += DerivativeSigmoid(neuron.z) * neuron.actual_derivative
neuron.number_wished_bias_changes += 1
#All layers but last one
for layer_idx in range(len(self.topology)-2,-1,-1):
for neuron in self.layers[layer_idx]:
#Line 115
neuron.actual_derivative = sum([neuron.output_weights[i].weight * DerivativeSigmoid(self.layers[layer_idx+1][i].z) * self.layers[layer_idx+1][i].actual_derivative for i in range(self.topology[layer_idx+1])])
#Line 116
neuron.total_wished_bias_changes += DerivativeSigmoid(neuron.z) * neuron.actual_derivative
neuron.number_wished_bias_changes += 1
#Go throught each weight
for weight in range(len(neuron.output_weights)):
this_weight = neuron.output_weights[weight]
#Line 117
this_weight.total_wished_changes += neuron.value * \
DerivativeSigmoid(self.layers[layer_idx+1][weight].z) \
* self.layers[layer_idx+1][weight].actual_derivative
this_weight.number_of_wished_changes += 1
print()
#Function that takes the training data and test data, trains the NN, modifies the values and test to see the improvement
def Training(self, training_data, test_data):
#Divide in mini batches for stochastic gradeint descend
batches = np.array_split(training_data, 5)
#lr = np.arange(10)
for i in range(1000):
for batch in batches:
for sample in batch:
# expectedoutput = (lr==sample[0]).astype(np.int)
#Trains an example
expectedoutput = sample[0]
self.trainOnOneExample(sample[1], expectedoutput)
#Modify each bias and weight as "wished" by the training examples
for layer in range(len(self.layers)):
for neuron in self.layers[layer]:
if layer != 0:
#Does the average wished changes and adds it to the value of the bias
neuron.bias_weight += neuron.total_wished_bias_changes / neuron.number_wished_bias_changes
#Always remain beatween -1 and 1
neuron.bias_weight = max(-1, min(1, neuron.bias_weight))
# print((neuron.total_wished_bias_changes)/neuron.number_of_changers)
neuron.total_wished_bias_changes = 0
neuron.number_wished_bias_changes = 0
for weight in neuron.output_weights:
# Does the average wished changes and adds it to the value of the weight
weight.weight += weight.total_wished_changes / weight.number_of_wished_changes
#print(weight.total_wished_changes / weight.number_of_wished_changes)
# Always remain beatween -1 and 1
weight.weight = max(-1, min(1, weight.weight))
weight.total_wished_changes = 0
weight.number_of_wished_changes = 0
#Randomly select a test sample and print its result
test_sample = test_data[random.randint(0, len(test_data) - 1)]
expectedoutput_test = test_sample[0]
actualoutput = self.calculateResult(test_sample[1])
print("Expected: ",expectedoutput_test," Got: ",actualoutput)
net = Net([2, 3, 2, 1])
net.trainOnOneExample([1,0],[0])
#net.Training([[[0], [1, 0]], [[0], [0, 1]], [[1], [1, 1]], [[1], [1, 1]], [[1], [0, 0]], [[0], [1, 0]]],[[[0], [0, 0]], [[1], [0, 0]]])
```