Include early stopping in GAFeatureSelectionCV with keras model

83 Views Asked by At

I am trying to do feature selection with genetic algorithm on a neural network built with keras. For Feature Selection i am using GAFeatureSelectionCV from sklearn_genetic.genetic_search. This is my code, which works fine so far:

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn_genetic import ExponentialAdapter
from sklearn_genetic.genetic_search import GAFeatureSelectionCV
mutation_adapter = ExponentialAdapter(initial_value=0.8, end_value=0.2, adaptive_rate=0.01)
crossover_adapter = ExponentialAdapter(initial_value=0.2, end_value=0.8, adaptive_rate=0.01)

tscv=TimeSeriesSplit(n_splits=2, test_size=182, gap=0)
EPOCHS = 150
BATCH_SIZE = 8


def create_model():
    model = Sequential([
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1)
    ])

    model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=0.001),
                  loss=keras.losses.MeanSquaredError())

    return model

class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self):
        self.model = None

    def fit(self, X, y):
        skwrapped_model = KerasRegressor(build_fn=create_model,
                                         epochs=EPOCHS,
                                         batch_size=BATCH_SIZE,
                                         verbose=0
                                         )
        self.model = skwrapped_model
        self.model.fit(X, y)
        return self

    def predict(self, X):
        return self.model.predict(X)

class MakeModel(KerasRegressorWrapper):
    def __init__(self):
        super().__init__()

evolved_estimator = GAFeatureSelectionCV(estimator=MakeModel(),
                                         cv=tscv,
                                         scoring='neg_root_mean_squared_error',
                                         population_size=20,
                                         generations=50,
                                         tournament_size=5,
                                         elitism=True,
                                         crossover_probability=crossover_adapter,
                                         mutation_probability=mutation_adapter,
                                         criteria='max',
                                         algorithm='eaMuPlusLambda',
                                         n_jobs=-1,
                                         verbose=True,
                                         keep_top_k=4)

eve = evolved_estimator.fit(X, y)

I would like to include early stopping callback. One thing i don't quite understand is how to access the validation data from the folds. From my understanding, i need to access the validation data used in the current fold and pass it as validation_data argument for KerasRegressor and includie stopping as a callback.

How can one dynamically adjust this? How can i assure that whenever a neural net is trained for the current fold, the validation_data for this fold is used for monitoring the val loss?

I couldn't find anything on the docs for sklearn-genetic and any discussions. I already tried some approaches where this is done for GridSearchCV but i haven't been able to adapt this to GAFeatureSelectionCV. I tried something like this, which won't work:

 def fit(self, X, y):
        for i in tscv.split(X):
            train_index=i[0]
            test_index=i[1]

            mean = X[:int(len(train_index))].mean(axis=0)
            X -= mean
            std = X[:int(len(train_index))].std(axis=0)
            X /= std

            X_train = X[train_index]
            X_test = X[test_index]
            y_train = y[train_index]
            y_test = y[test_index]
        
            skwrapped_model = KerasRegressor(build_fn=create_model,
                                         epochs=EPOCHS,
                                         batch_size=BATCH_SIZE,
                                         verbose=1,
                                         callbacks=[stopping],
                                         validation_data=(X_test, y_test)
                                         )
            self.model = skwrapped_model
            self.model.fit(X_train, y_test)
        return self
0

There are 0 best solutions below