I wrote architecture for anomaly detection and want to optimize my hyperparameters,
import keras_tuner
class GlobalAttention(Layer):
def __init__(self, **kwargs):
super(GlobalAttention, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.W_q = self.add_weight(name='W_q',
shape=(input_shape[-1], input_shape[-1]),
initializer='uniform',
trainable=True)
self.W_k = self.add_weight(name='W_k',
shape=(input_shape[-1], input_shape[-1]),
initializer='uniform',
trainable=True)
super(GlobalAttention, self).build(input_shape)
def call(self, inputs):
# Calculate attention scores using a dot product between inputs and W_q
query = tf.matmul(inputs, self.W_q)
key = tf.matmul(inputs, self.W_k)
# Calculate attention weights using softmax
attention_weights = tf.nn.softmax(tf.matmul(query, key, transpose_b=True))
# Apply attention weights to inputs
attended_inputs = tf.matmul(attention_weights, inputs)
return attended_inputs
def compute_output_shape(self, input_shape):
return input_shape
def build_model(hp):
input_layer = Input(shape=(sequence_length, X_train_reshaped.shape[2]), name='classifier_input') # None, 1, 122
lstm_layer_1 = Bidirectional(GRU(units=hp.Int('GRU_units_1', min_value=8, max_value=128, step=16), activation='tanh',return_sequences=True))(input_layer)
attention_layer = GlobalAttention()(lstm_layer_1)
# concatenated_features = tf.concat([attention_layer, maxpool_layer, avgpool_layer], axis=-1)
dense_layer_1 = Dense(units=hp.Int('dense_1_units', min_value=16, max_value=128, step=16),
activation=hp.Choice('dense_activation_1', values = ['relu', 'tanh']))(attention_layer)
output_layer = Dense(num_classes, activation='softmax') (dense_layer_1)
classifier = Model(input_layer, output_layer)
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return classifier
tuner_test = keras_tuner.RandomSearch(
build_model,
objective='val_accuracy',
max_trials=50,
directory='new_tuning_dir', # Change the directory
project_name='my_attention_model',
overwrite = True # Change the project name
)
tuner_test.search(X_train_reshaped, y_train, epochs=50, validation_data=(X_val_reshaped, y_val), batch_size=64)
But when I try to use it with my custom attention layer, I get error:
Trial 2 Complete [00h 00m 18s]
Best val_accuracy So Far: None
Total elapsed time: 00h 00m 37s
Search: Running Trial #3
Value |Best Value So Far |Hyperparameter
8 |88 |GRU_units_1
16 |112 |dense_1_units
tanh |relu |dense_activation_1
Epoch 1/50
1571/1575 [============================>.] - ETA: 0s - loss: 0.1609 - accuracy: 0.9613
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/base_tuner.py", line 270, in _try_run_and_update_trial
self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/base_tuner.py", line 235, in _run_and_update_trial
results = self.run_trial(trial, *fit_args, **fit_kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
results = self.hypermodel.fit(hp, model, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/hypermodel.py", line 144, in fit
return model.fit(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner_utils.py", line 172, in on_epoch_end
self._save_model()
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner_utils.py", line 182, in _save_model
self.model.save_weights(write_filepath)
AttributeError: 'NoneType' object has no attribute 'replace'
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-60-bf475b6b2efc> in <cell line: 1>()
----> 1 tuner_test.search(X_train_reshaped, y_train, epochs=50, validation_data=(X_val_reshaped, y_val), batch_size=64)
4 frames
/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/oracle.py in _check_consecutive_failures(self)
384 consecutive_failures = 0
385 if consecutive_failures == self.max_consecutive_failed_trials:
--> 386 raise RuntimeError(
387 "Number of consecutive failures exceeded the limit "
388 f"of {self.max_consecutive_failed_trials}.\n"
RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/base_tuner.py", line 270, in _try_run_and_update_trial
self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/base_tuner.py", line 235, in _run_and_update_trial
results = self.run_trial(trial, *fit_args, **fit_kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
results = self.hypermodel.fit(hp, model, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/hypermodel.py", line 144, in fit
return model.fit(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner_utils.py", line 172, in on_epoch_end
self._save_model()
File "/usr/local/lib/python3.10/dist-packages/keras_tuner/src/engine/tuner_utils.py", line 182, in _save_model
self.model.save_weights(write_filepath)
AttributeError: 'NoneType' object has no attribute 'replace'
Without custom attention layer everything is working fine. I tried to remove attention layer in keras tuner - worked correctly. And if i try to use it without keras tuner - worked correctly. So I think that problem with keras_tuner and custom layers.