I can't manage to train the MLPRegressor with 'lbfgs' algorithm to better R2 score than around -14. How comes? First I tried randomly guess the hidden layers shape, then I even tried to use Grid Search CV, but doesn't help much. How can I train it better? How can I know the model is trained to best accuracy?
How comes that a simple linear regression would have at least 40% score R2 while the above has -14%?
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import math
import numpy as np
from scipy.stats import uniform
X = [
[390973, 262345, 324807],
[188322, 120766, 174883],
[185967, 173290, 175605],
[179309, 117915, 169950],
[166298, 40042, 153851],
]
X_test = [
[164077, 73041, 147249],
[152734, 52099, 77967],
]
y = [
8080000,
1940000,
3300000,
1970000,
624000,
]
y_test = [
1580000,
118000,
]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.fit_transform(X_test)
# https://stackoverflow.com/questions/52032019/sklearn-mlp-classifier-hidden-layers-optimization-randomizedsearchcv
class RandIntMatrix(object):
def __init__(self, low, high, shape=(1)):
self.low = low
self.high = high
self.shape = shape
def rvs(self, random_state=None):
np.random.seed(random_state)
return np.random.randint(self.low, self.high, self.shape)
parameters = {
'hidden_layer_sizes': RandIntMatrix(1, 50, (500, 2)).rvs().tolist(),
'solver': ['lbfgs'],
}
grid = GridSearchCV(MLPRegressor(random_state=1, max_iter=500), parameters, cv=2)
grid.fit(X_scaled, y)
print(grid.score(X_test_scaled, y_test))
print(grid.best_params_)
R2 score and Best params output: (for shape 2)
-14.423559240364366
{'hidden_layer_sizes': [11, 24], 'solver': 'lbfgs'}
R2 score and Best params output: (for shape 3)
-14.024830648388866
{'hidden_layer_sizes': [7, 19, 49], 'solver': 'lbfgs'}