I would like to train an autoencoder neural network.
Then in order to create a dataset, I split this signal into several thousands of (overlapping) segments (each with 1024 samples). Each segment is saved into a separate .txt file.
My code is following
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import glob
import numpy as np
def load_dataset(dataset_path, train_val_ratio=0.8):
filenames = glob.glob(dataset_path+"\\*.txt")
windows_amount = len(filenames)
lines = [line.rstrip('\n') for line in open(filenames[0])]
window_length = len(lines)
train_windows_amount = int(np.ceil(windows_amount*train_val_ratio))
val_windows_amount = int(np.ceil(windows_amount*(1-train_val_ratio)))
train_windows = np.zeros((train_windows_amount, window_length), dtype=np.float)
val_windows = np.zeros((val_windows_amount, window_length), dtype=np.float)
for j in range(len(filenames)):
if j%100 == 0:
print(str(j/windows_amount*100)+"%")
lines = [line.rstrip('\n') for line in open(filenames[j])]
if j < train_windows_amount:
for i in range(len(lines)):
train_windows[j, i] = float(lines[i])
else:
for i in range(len(lines)):
val_windows[j-train_windows_amount, i] = float(lines[i])
return train_windows, val_windows
train_path = ".\\dataset\\dataset_1\\train"
train_windows, val_windows = load_dataset(train_path)
train_windows = train_windows/1000000
val_windows = val_windows/1000000
treshold_path = ".\\dataset\\dataset_1\\treshold"
treshold_windows, _ = load_dataset(treshold_path)
treshold_windows = treshold_windows/1000000
model = keras.Sequential([
keras.layers.Dense(1024, activation='relu', name="encoder"),
keras.layers.Dense(512, activation='relu'),
keras.layers.Dense(256, activation='relu', name="bottleneck"),
keras.layers.Dense(512, activation='relu'),
keras.layers.Dense(1024, activation='sigmoid', name="decoder")
])
model.compile(optimizer = "adam",
loss='binary_crossentropy',
metrics=['accuracy'])
history = model.fit(train_windows, train_windows,
batch_size=128,
epochs=200,
verbose=1,
shuffle=True,
validation_data=(val_windows, val_windows))
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='val')
plt.grid()
plt.legend()
plt.show()
test_loss, test_acc = model.evaluate(treshold_windows, treshold_windows, verbose=2)
print('Test loss:', test_loss)
model.save_weights('model.h5')
reconstructed = model.predict(treshold_windows)
plt.plot(treshold_windows[10])
plt.plot(reconstructed[0])
plt.plot(reconstructed[10])
plt.plot(reconstructed[20])
plt.plot(reconstructed[30])
plt.plot(reconstructed[40])
plt.grid()
plt.legend()
plt.show()
After the autoencoder training I get the following history plot:
And when I try to use the trained autoencoder on a segment of the previously unseen signal, I get the following plot:
where the upper signal is the input segment, the yellow signal is the output signal of the neural network, the other color signals are neural network outputs for other input segments.
I would rather expect that the autoencoder produced the output that is similar to its input, but it is not the case here. What am I doing wrong?