I am try to train a simple model following (http://colah.github.io/posts/2014-03-NN-Manifolds-Topology/) to understand neural networks better but my model is unable to learn simple curves to segment the data. My loss keeps decreasing but not validation. Even if I make my model more complex its validation loss becomes constant very quickly(at 0.029) and the graph looks like
I dont understand why a multi layer can't learn the simple up curve after x=4? Would love some help and insights. Ideally this should work with one-two layers as the function it is learning is not very complex.
Here is my basic code:
NB_EPOCH = 300
def prepare_data(n_points=256):
x = np.linspace(0, 2*np.pi, n_points)
shift1 = 0.8
shift2 = -0.8
y1 = np.cos(x*1.2)+shift1
y2 = np.cos(x*1.2)+shift2
features1 = zip(x, y1)
features2 = zip(x, y2)
labels1 = np.zeros((n_points,))
labels2 = np.ones((n_points,))
return np.concatenate([features1, features2]), np.concatenate([labels1, labels2])
def create_model():
model = Sequential()
model.add(Dense(4, input_shape=(2,)))
model.add(Activation('tanh'))
model.add(Dense(4, input_shape=(2,)))
model.add(Activation('tanh'))
model.add(Dense(4, input_shape=(2,)))
model.add(Activation('tanh'))
model.add(Dense(4))
model.add(Activation('tanh'))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='sgd',
metrics=['accuracy'])
return model
if __name__ == "__main__":
features, labels = prepare_data()
f_len = features.shape[0]
labels = np_utils.to_categorical(labels, 2)
model = create_model()
print(model.summary())
model.fit(features, labels, nb_epoch=NB_EPOCH,batch_size=8,validation_split=0.2)
reshaped_features = features.reshape(features.shape[0], 2)
fig = plt.figure(1)
ax = Axes3D(fig) #fig.add_subplot(311, projection='3d')
ax.plot(features[:f_len/2,0], np.zeros(f_len/2), features[:f_len/2, 1])
ax.plot(features[f_len/2:,0], np.zeros(f_len/2), features[f_len/2:, 1])
# ###########################################
# Separation curve
# ###########################################
x,y = np.meshgrid(np.linspace(0, 3*np.pi, 50), np.linspace(-1, 1, 36))
input = []
for _x, _y in zip(x, y):
for i in range(len(_x)):
input.append((_x[i], _y[i]))
input = np.asarray(input)
classes = model.predict(input)
first_class_x = []
first_class_y = []
second_class_x = []
second_class_y = []
for i in range(len(classes)):
if classes[i][0] > 0.5:
first_class_x.append(input[i][0])
first_class_y.append(input[i][1])
else:
second_class_x.append(input[i][0])
second_class_y.append(input[i][1])
ax.scatter(first_class_x, np.zeros(len(first_class_x)),
first_class_y, c='r')
ax.scatter(second_class_x, np.zeros(len(second_class_x)),
second_class_y, c='y')
plt.show()