I am currently using Kaggle's notebook and environment, so I am limited to 13 GB of RAM. Before this was working with a small dataset, but now it is over a gigabyte. I do have a data generator in place, but now it is not working. RAM currently maxes out when I do anything. I am using Python with keras and a text corpora dataset filled with my data.
I have tried making all the batch_size, steps, hidden_size, to the lowest(except steps which I made higher because that lowers usage of RAM). I tried finding solutions on google and even resorted to ChatGPT for some help. None of them worked. I would greatly appreciate help on this. Code:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.callbacks import LambdaCallback, ModelCheckpoint, ReduceLROnPlateau
import random
import sys
class TextDataGenerator(Sequence):
def __init__(self, text, vocabulary, char_to_indices, indices_to_char, max_length, batch_size):
self.text = text
self.vocabulary = vocabulary
self.char_to_indices = char_to_indices
self.indices_to_char = indices_to_char
self.max_length = max_length
self.batch_size = batch_size
self.steps = (len(text) - max_length) // batch_size
def __len__(self):
return self.steps
def __getitem__(self, idx):
batch_start = idx * self.batch_size
batch_end = (idx + 1) * self.batch_size
batches = self.text[batch_start:batch_end]
X = np.zeros((self.batch_size, self.max_length, len(self.vocabulary)), dtype=bool)
y = np.zeros((self.batch_size, len(self.vocabulary)), dtype=bool)
for i, batch in enumerate(batches):
for t, char in enumerate(batch[:-1]):
X[i, t, self.char_to_indices[char]] = 1
y[i, self.char_to_indices[batch[-1]]] = 1
return X, y
def on_epoch_end(self):
random.shuffle(self.text)
with open('/kaggle/input/crptic-python/python.txt', 'r') as file:
text = file.read()
# A preview of the text file
vocabulary = sorted(list(set(text)))
char_to_indices = dict((c, i) for i, c in enumerate(vocabulary))
indices_to_char = dict((i, c) for i, c in enumerate(vocabulary))
# Dividing the text into subsequences of length max_length
# So that at each time step the next max_length characters
# are fed into the network
max_length = 100
batch_size = 32
steps = 10
sentences = []
next_chars = []
for i in range(0, len(text) - max_length, steps):
sentences.append(text[i: i + max_length + 1])
next_chars.append(text[i + max_length + 1])
# Building the LSTM network for the task
model = Sequential()
model.add(LSTM(128, input_shape=(max_length, len(vocabulary))))
model.add(Dense(len(vocabulary)))
model.add(Activation('softmax'))
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
# Helper function to sample an index from a probability array
def sample_index(preds, temperature=1.0):
preds = np.asarray(preds).astype('float64')
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
# Helper function to generate text after the end of each epoch
def on_epoch_end(epoch, logs):
if epoch % 1 == 0:
print()
print('----- Generating text after Epoch: % d' % epoch)
start_index = random.randint(0, len(text) - max_length - 1)
for diversity in [0.1, 0.3, 0.5]:
print('----- diversity:', diversity)
generated = ''
sentence = text[start_index: start_index + max_length]
generated += sentence
print('----- Generating with seed: "' + sentence + '"')
sys.stdout.write(generated)
for i in range(400):
x_pred = np.zeros((1, max_length, len(vocabulary)))
for t, char in enumerate(sentence):
x_pred[0, t, char_to_indices[char]] = 1.
preds = model.predict(x_pred, verbose=0)[0]
next_index = sample_index(preds, diversity)
next_char = indices_to_char[next_index]
generated += next_char
sentence = sentence[1:] + next_char
sys.stdout.write(next_char)
sys.stdout.flush()
print()
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
# Defining a helper function to save the model after each epoch
# in which the loss decreases
filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
verbose=1, save_best_only=True,
mode='min')
# Defining a helper function to reduce the learning rate each time
# the learning plateaus
reduce_alpha = ReduceLROnPlateau(monitor='loss', factor=0.2,
patience=1, min_lr=0.001)
callbacks = [print_callback, checkpoint, reduce_alpha]
# Training the LSTM model
data_generator = TextDataGenerator(sentences, vocabulary, char_to_indices, indices_to_char, max_length, batch_size)
model.fit(data_generator, epochs=2, callbacks=callbacks)
def generate_text(length, diversity):
# Get random starting text
start_index = random.randint(0, len(text) - max_length - 1)
generated = ''
sentence = text[start_index: start_index + max_length]
generated += sentence
for i in range(length):
x_pred = np.zeros((1, max_length, len(vocabulary)))
for t, char in enumerate(sentence):
x_pred[0, t, char_to_indices[char]] = 1.
preds = model.predict(x_pred, verbose=0)[0]
next_index = sample_index(preds, diversity)
next_char = indices_to_char[next_index]
generated += next_char
sentence = sentence[1:] + next_char
return generated
print(generate_text(500, 0.5))