Supervised NN not learning

Question

I wanted to create a supervised NN that recognises numbers on images from scratch, pretty ambitious project for someone like me. But when I test it with a simple XOr gate, it completely fails. It just sarts with a random value (as it should) but then the value of the last neuron always goes down. Have I made a mistake in my derivatives with respect to the cost? Note: I take as refference 3blue1brown's video about this.

This is my program. The comments are there to explain what each function does.

I do not know where the mistake is located, but, as I suppose its a learning mistake, I think its either in the trainOneExample() or Training().

import random
import math
import numpy as np


def DerivativeSigmoid(z):
    if z < -100:
        return 0
    if z > 100:
        return 1
    return 4.9 / (math.exp(-4.9 * z))

#Derivative of (output-expected_output-1)**2
def DrtValueVsCostLastLayer(a, y):
    return 2 * (a - y)


class Weight:
    def __init__(self):

        #Initialise randomly
        self.weight = random.uniform(-1, 1)

        #Variables that will store the sum of the wished changes for each example and then do the average later
        self.total_wished_changes = 0
        self.number_of_wished_changes = 0


class Neuron:
    def __init__(self, layer, number, num_output_weights=0):
        #Classifying variables
        self.num_output_weights = num_output_weights
        self.layer = layer
        self.number = number

        #Value of the neuron before the sigmoid squishification function
        self.z = 0
        #Value of the neuron after the signmoid function
        self.value = 0

        #Initialise the weights comming from this neuron
        self.output_weights = []
        for i in range(self.num_output_weights):
            self.output_weights.append(Weight())

        #Value of the bias
        self.bias_weight = 0

        # Variables that will store the sum of the wished changes for the bias for each example and then do the average later
        self.total_wished_bias_changes = 0
        self.number_wished_bias_changes = 0

        #Calculated value of the derivative of the neuron with respect to the cost
        self.actual_derivative = 0


class Net:
    def __init__(self, topology, bias=1):
        #Number of neurons for each layer
        self.topology = topology

        #Value that will multiply the bias_weight value of each neuron, 1 by default
        self.bias = bias

        #Self.layers is where all the neurons are stored. This is just to initialise it
        self.topology.append(0)
        self.layers = []
        for layer in range(len(self.topology) - 1):
            #Create a layer, add it a number of neurons and append it to self.layers
            layer_toAdd = []
            for i in range(self.topology[layer]):
                layer_toAdd.append(Neuron(layer, i, self.topology[layer + 1]))
            self.layers.append(layer_toAdd)
        self.topology.pop()

    #Function that goes throught each of the neurons of a certain layer and calculates z, then its actual value
    def modifLayer(self, layer):
        for neuron in range(len(self.layers[layer])):
            #Z= sum of all the previous layers values * the weights + the bias
            self.layers[layer][neuron].z = sum(
                [inputNeuron.value * inputNeuron.output_weights[neuron].weight for inputNeuron in
                 self.layers[layer - 1]]) + self.bias * self.layers[layer][neuron].bias_weight

            #Apply the sigmoid function to z to get the actual value
            self.layers[layer][neuron].value = self.sigmoid(self.layers[layer][neuron].z)

    #Function that initialises the first layer according to the inputs, then calculates each layer according to its previous one
    def calculateResult(self, inputs):
        #Input layer
        for i in range(len(inputs)):
            self.layers[0][i].value = inputs[i]

        #Calculate values of each layer
        for layer in range(1, len(self.topology)):
            self.modifLayer(layer)

        #return the values of the output layer
        return [neuron.value for neuron in self.layers[-1]]

    #Sigmoid function. To avoid OverflowError, when x is too high or too low it makes automatically 1 or 0
    def sigmoid(self, x):
        if x < -100:
            return 0
        if x > 100:
            return 1
        return 1 / (1 + np.exp(-4.9 * x))

    #Funtion that takes inputs and expected outputs and calculates the wished changes for each bias and weight
    def trainOnOneExample(self, inputs, expected_outputs):

        outputs_got = self.calculateResult(inputs)

        errors = [(expected_outputs[i] - outputs_got[i]) ** 2 for i in range(len(expected_outputs))]

        #Derivative of a neuron value: sum of ( (all the weights its got) x (derivative of the signmoid function) x (wished output of the neuron the weight leads to) )
        #Derivative of a weight: (value of the neuron it comes from) x (the derivative of the sigmoid function) x (the wished output of the neuron the weight leads to)
        #Derivative of the bias: (derivative of the sigmoid function) x (the wished change of that neuron)


        #Last layer only: the wished neuron changes are calculated accroding to the expected outputs
        for neuron_idx in range(self.topology[-1]):
            neuron = self.layers[-1][neuron_idx]

            #Derivative: 2(output got - expected output)
            neuron.actual_derivative = DrtValueVsCostLastLayer(neuron.value, expected_outputs[neuron_idx])

            #Line 117
            neuron.total_wished_bias_changes += DerivativeSigmoid(neuron.z) * neuron.actual_derivative
            neuron.number_wished_bias_changes += 1

        #All layers but last one
        for layer_idx in range(len(self.topology)-2,-1,-1):
            for neuron in self.layers[layer_idx]:
                #Line 115
                neuron.actual_derivative = sum([neuron.output_weights[i].weight * DerivativeSigmoid(self.layers[layer_idx+1][i].z) * self.layers[layer_idx+1][i].actual_derivative for i in range(self.topology[layer_idx+1])])

                #Line 116
                neuron.total_wished_bias_changes += DerivativeSigmoid(neuron.z) * neuron.actual_derivative
                neuron.number_wished_bias_changes += 1

                #Go throught each weight
                for weight in range(len(neuron.output_weights)):
                    this_weight = neuron.output_weights[weight]
                    #Line 117
                    this_weight.total_wished_changes += neuron.value * \
                                                       DerivativeSigmoid(self.layers[layer_idx+1][weight].z) \
                                                       * self.layers[layer_idx+1][weight].actual_derivative

                    this_weight.number_of_wished_changes += 1

        print()

    #Function that takes the training data and test data, trains the NN, modifies the values and test to see the improvement
    def Training(self, training_data, test_data):

        #Divide in mini batches for stochastic gradeint descend
        batches = np.array_split(training_data, 5)

        #lr = np.arange(10)

        for i in range(1000):

            for batch in batches:
                for sample in batch:
                    # expectedoutput = (lr==sample[0]).astype(np.int)

                    #Trains an example
                    expectedoutput = sample[0]
                    self.trainOnOneExample(sample[1], expectedoutput)

                #Modify each bias and weight as "wished" by the training examples
                for layer in range(len(self.layers)):
                    for neuron in self.layers[layer]:
                        if layer != 0:
                            #Does the average wished changes and adds it to the value of the bias
                            neuron.bias_weight += neuron.total_wished_bias_changes / neuron.number_wished_bias_changes

                            #Always remain beatween -1 and 1
                            neuron.bias_weight = max(-1, min(1, neuron.bias_weight))

                            # print((neuron.total_wished_bias_changes)/neuron.number_of_changers)

                            neuron.total_wished_bias_changes = 0
                            neuron.number_wished_bias_changes = 0

                        for weight in neuron.output_weights:
                            # Does the average wished changes and adds it to the value of the weight
                            weight.weight += weight.total_wished_changes / weight.number_of_wished_changes

                            #print(weight.total_wished_changes / weight.number_of_wished_changes)

                            # Always remain beatween -1 and 1
                            weight.weight = max(-1, min(1, weight.weight))

                            weight.total_wished_changes = 0
                            weight.number_of_wished_changes = 0

                #Randomly select a test sample and print its result
                test_sample = test_data[random.randint(0, len(test_data) - 1)]
                expectedoutput_test = test_sample[0]
                actualoutput = self.calculateResult(test_sample[1])
                print("Expected: ",expectedoutput_test,"  Got: ",actualoutput)


net = Net([2, 3, 2, 1])

net.trainOnOneExample([1,0],[0])

#net.Training([[[0], [1, 0]], [[0], [0, 1]], [[1], [1, 1]], [[1], [1, 1]], [[1], [0, 0]], [[0], [1, 0]]],[[[0], [0, 0]], [[1], [0, 0]]])


```

Supervised NN not learning

0 Answers0