Neural network from scratch: only predicts training inputs correctly

Question

Here is a neural network I've been working on. It takes in an array of four zeros or ones and predicts whether that pattern of zeros and ones is a backslash.

import numpy as np

class NeuralNetwork():

    correct = 0
    num_predictions = 10
    epochs = 5000
    learningRate = 0.1

    def __init__(self, sizes, sizeOfEpoch):
        self.sizeOfEpoch = sizeOfEpoch 
        self.dimensions = sizes

        self.secondLayerNeurons = np.empty(sizes[1])
        self.outputNeurons = np.empty(sizes[2])

        self.firstLayerWeights = np.random.rand(sizes[1], sizes[0])
        self.secondLayerWeights = np.random.rand(sizes[2], sizes[1])
        self.firstLayerBiases = np.random.rand(sizes[1])
        self.secondLayerBiases = np.random.rand(sizes[2])

        self.firstLayerWeightsSummations = np.zeros([sizes[1], sizes[0]])
        self.secondLayerWeightsSummations = np.zeros([sizes[2], sizes[1]])
        self.firstLayerBiasesSummations = np.zeros([sizes[1]])
        self.secondLayerBiasesSummations = np.zeros([sizes[2]])

        self.hiddenLayerErrors = np.empty(sizes[1])
        self.outputLayerErrors = np.empty(sizes[2])

    def sigmoid(self, x):
        return 1/(1+np.exp(-x))

    def sigmoidDerivative(self, x):
        return np.multiply(x,(1-x))

    def forwardProp(self, inputs):
        for i in range (self.dimensions[1]):
            self.secondLayerNeurons[i] = self.sigmoid(np.dot(self.firstLayerWeights[i], inputs)+self.firstLayerBiases[i])
        for i in range (self.dimensions[2]):
            self.outputNeurons[i] = self.sigmoid(np.dot(self.secondLayerWeights[i], self.secondLayerNeurons)+self.secondLayerBiases[i])

    def backProp(self, inputs, correct_output):
        self.outputLayerErrors = np.subtract(self.outputNeurons, correct_output)
        self.hiddenLayerErrors = np.multiply(np.dot(self.secondLayerWeights.T, self.outputLayerErrors), self.sigmoidDerivative(self.secondLayerNeurons))

        for i in range (self.dimensions[2]):
            for j in range (self.dimensions[1]):
                if j==0:
                    self.secondLayerBiasesSummations[i] += self.outputLayerErrors[i]
                self.secondLayerWeightsSummations[i][j] += self.outputLayerErrors[i]*self.secondLayerNeurons[j]
        for i in range (self.dimensions[1]):
            for j in range (self.dimensions[0]):
                if j==0:
                    self.firstLayerBiasesSummations[i] += self.hiddenLayerErrors[i]
                self.firstLayerWeightsSummations[i][j] += self.hiddenLayerErrors[i]*inputs[j]

    def train(self, trainImages, trainLabels):
        size = str(self.sizeOfEpoch)

        for m in range (self.sizeOfEpoch):
            correct_output = trainLabels[m]

            self.forwardProp(trainImages[m].flatten())
            self.backProp(trainImages[m].flatten(), correct_output)

            if self.outputNeurons > 0.90 and trainLabels[m] == 1 or self.outputNeurons < 0.1 and trainLabels[m] == 0:
                self.correct+=1
            accuracy = str(int((self.correct/(m+1))*100)) + '%'
            percent = str(int((m/self.sizeOfEpoch)*100)) + '%'
            print ("Progress: " + percent + " -- Accuracy: " + accuracy, end="\r")
        self.change()
        self.correct = 0

        print (size+'/'+size+" -- Accuracy: "+accuracy+" -- Error: "+str(np.amax(np.absolute(self.outputLayerErrors))),end="\r")

    def change(self):
        self.secondLayerBiases -= self.learningRate*self.secondLayerBiasesSummations
        self.firstLayerBiases -= self.learningRate*self.firstLayerBiasesSummations
        self.secondLayerWeights -= self.learningRate*self.secondLayerWeightsSummations
        self.firstLayerWeights -= self.learningRate*self.firstLayerWeightsSummations
        self.firstLayerSummations = np.zeros([self.dimensions[1], self.dimensions[0]])
        self.secondLayerSummations = np.zeros([self.dimensions[2], self.dimensions[1]])
        self.firstLayerBiasesSummations = np.zeros(self.dimensions[1])
        self.secondLayerBiasesSummations = np.zeros(self.dimensions[2])
            
    def predict(self, testImage):
        secondLayerAnsNodes = np.empty([self.dimensions[1]])
        outputAns = np.empty([self.dimensions[2]])
        for i in range (self.dimensions[1]):
            secondLayerAnsNodes[i] = self.sigmoid(np.dot(self.firstLayerWeights[i], testImage)+self.firstLayerBiases[i])
        for i in range (self.dimensions[2]):
            outputAns[i] = self.sigmoid(np.dot(self.secondLayerWeights[i], secondLayerAnsNodes)+self.secondLayerBiases[i])
        return outputAns

if __name__ == "__main__":
    train_images = np.array([[1,0,0,1],[1,0,1,0],[0,1,0,1],[1,1,1,1],[0,0,0,0]])
    train_labels = np.array([1, 0, 0, 0, 0])

    neural_network = NeuralNetwork([4, 2, 1], train_images.shape[0])

    for i in range (neural_network.epochs):
        print ("\nEpoch", str(i+1) + "/" + str(neural_network.epochs))
        neural_network.train(train_images, train_labels)

    for i in range (neural_network.num_predictions):
        print("\n\n\nNew Situations: " + str(i+1) + "/" + str(neural_network.num_predictions))
        A = list(map(int,input("Enter the numbers : ").strip().split()))[:4] 

        try:
            result = neural_network.predict(A)
        except ValueError:
            print("\nValueError, try again")
            continue

        print("\nOutput Data:", result[0])
        if result>0.95:
            print("Result: Back Slash")
        else:
            print("Result: Not Back Slash")

This program only predicts the training examples correctly. When I give it [0,0,0,1] or [1,0,0,0] it predicts that it is a backslash. Also, I have run the classic xor problem with this exact code (just change the dimensions of the NN and also the training I/O) and it works perfectly. I have also made a logistic regression program almost identical to this one except it doesn't have a hidden layer. For the logistic regression program, all inputs are predicted correctly.

How do I fix/change this NN so that it can correctly predict inputs it hasn't seen before?

UPDATE: It works now, I just had to add a regularization term when I change the weights and biases. Before my lambda was 0.01 but it works when I tried a larger lambda.

New change code:

for i in range (self.dimensions[2]):
    for j in range (self.dimensions[1]):
        if j == 0:
            self.secondLayerBiases[i] -= self.learningRate*(self.secondLayerBiasesSummations[i]/self.sizeOfEpoch)
        self.secondLayerWeights[i][j] -= self.learningRate*(self.secondLayerWeightsSummations[i][j]/self.sizeOfEpoch+self.Lambda*self.secondLayerWeights[i][j])
for i in range (self.dimensions[1]):
    for j in range (self.dimensions[0]):
        if j == 0:
            self.firstLayerBiases[i] -= self.learningRate*(self.firstLayerBiasesSummations[i]/self.sizeOfEpoch)
        self.firstLayerWeights[i][j] -= self.learningRate*(self.firstLayerWeightsSummations[i][j]/self.sizeOfEpoch+self.Lambda*self.firstLayerWeights[i][j])

Hi again. You can probably plot the decision boundaries for your model by evaluating [x, 0, 0, y] for `x=y=np.linspace(0, 1, 100)` in a 3D plot. This might clear up how this can happen, if not why. See: https://matplotlib.org/3.1.0/gallery/mplot3d/surface3d.html — Multihunter, Aug 06 '20 at 03:59
@Multihunter I'm not very familiar with making graphs but could you please explain how to do this in python and why it would help? Thanks a lot! — Joey, Aug 06 '20 at 04:07
You are probably encountering overfitting, i.e. good performance on the training data and bad performance on unseen data. — Michael M, Aug 06 '20 at 06:03

Neural network from scratch: only predicts training inputs correctly

0 Answers0