I am trying to do feature selection with genetic algorithm on a neural network built with keras. For Feature Selection i am using
GAFeatureSelectionCV
from sklearn_genetic.genetic_search. This is my code, which works fine so far:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn_genetic import ExponentialAdapter
from sklearn_genetic.genetic_search import GAFeatureSelectionCV
mutation_adapter = ExponentialAdapter(initial_value=0.8, end_value=0.2, adaptive_rate=0.01)
crossover_adapter = ExponentialAdapter(initial_value=0.2, end_value=0.8, adaptive_rate=0.01)
tscv=TimeSeriesSplit(n_splits=2, test_size=182, gap=0)
EPOCHS = 150
BATCH_SIZE = 8
def create_model():
model = Sequential([
Dense(32, activation='relu'),
Dense(32, activation='relu'),
Dense(1)
])
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=0.001),
loss=keras.losses.MeanSquaredError())
return model
class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
def __init__(self):
self.model = None
def fit(self, X, y):
skwrapped_model = KerasRegressor(build_fn=create_model,
epochs=EPOCHS,
batch_size=BATCH_SIZE,
verbose=0
)
self.model = skwrapped_model
self.model.fit(X, y)
return self
def predict(self, X):
return self.model.predict(X)
class MakeModel(KerasRegressorWrapper):
def __init__(self):
super().__init__()
evolved_estimator = GAFeatureSelectionCV(estimator=MakeModel(),
cv=tscv,
scoring='neg_root_mean_squared_error',
population_size=20,
generations=50,
tournament_size=5,
elitism=True,
crossover_probability=crossover_adapter,
mutation_probability=mutation_adapter,
criteria='max',
algorithm='eaMuPlusLambda',
n_jobs=-1,
verbose=True,
keep_top_k=4)
eve = evolved_estimator.fit(X, y)
I would like to include early stopping callback. One thing i don't quite understand is how to access the validation data from the folds. From my understanding, i need to access the validation data used in the current fold and pass it as validation_data argument for KerasRegressor and includie stopping as a callback.
How can one dynamically adjust this? How can i assure that whenever a neural net is trained for the current fold, the validation_data for this fold is used for monitoring the val loss?
I couldn't find anything on the docs for sklearn-genetic and any discussions. I already tried some approaches where this is done for GridSearchCV but i haven't been able to adapt this to GAFeatureSelectionCV. I tried something like this, which won't work:
def fit(self, X, y):
for i in tscv.split(X):
train_index=i[0]
test_index=i[1]
mean = X[:int(len(train_index))].mean(axis=0)
X -= mean
std = X[:int(len(train_index))].std(axis=0)
X /= std
X_train = X[train_index]
X_test = X[test_index]
y_train = y[train_index]
y_test = y[test_index]
skwrapped_model = KerasRegressor(build_fn=create_model,
epochs=EPOCHS,
batch_size=BATCH_SIZE,
verbose=1,
callbacks=[stopping],
validation_data=(X_test, y_test)
)
self.model = skwrapped_model
self.model.fit(X_train, y_test)
return self