Need help integrating trained RNN model into guess function of another code

Question

Need help integrating trained RNN model into guess function of another code

72 Views Asked by Saumya Dwivedi At 13 February 2024 at 18:29

The dataset is a list of about 250,000 random words of random lengths. Here is the code I used for training the RNN model:

# Load dataset
with open("/content/drive/MyDrive/Colab Notebooks/HangmanTest/words_250000_train.txt", "r") as file:
    dataset = file.read().splitlines()

# Function to prepare data
def prepare_data(dataset):
    # Preprocess dataset
    vocab = set()
    max_sequence_length = 0
    for word in dataset:
        word = word.lower()  # Convert to lowercase
        vocab.update(set(word))
        max_sequence_length = max(max_sequence_length, len(word))
    vocab_size = len(vocab)
    char_to_index = {char: index for index, char in enumerate(sorted(vocab))}
    index_to_char = {index: char for char, index in char_to_index.items()}

    # Generate sequences
    X, y = [], []
    for word in dataset:
        word = word.lower()  # Convert to lowercase
        X_word, y_word = [], []
        for char in word:
            X_word.append(char_to_index[char])
            y_word.append(char_to_index[char])
        X.append(X_word)
        y.append(y_word)

    # Pad sequences to the maximum sequence length
    X_padded = pad_sequences(X, maxlen=max_sequence_length, padding='post')
    y_padded = pad_sequences(y, maxlen=max_sequence_length, padding='post')

    return X_padded, y_padded, vocab_size, max_sequence_length, char_to_index, index_to_char

# Prepare data
X_train, y_train, vocab_size, max_sequence_length, char_to_index, index_to_char = prepare_data(dataset)


# Define model architecture
embedding_dim = 50
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    LSTM(units=64, return_sequences=True),
    Dense(units=vocab_size, activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
batch_size = 64
num_epochs = 10
model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_split=0.2)

# Save the model
model.save("/content/drive/MyDrive/Colab Notebooks/HangmanTest/Trained Model/Model 2")

Here is the code of a hangman type game where I am trying to implement the model:

class HangmanOffline(object):
    def __init__(self):
        self.full_dictionary = self.build_dictionary("/content/drive/MyDrive/Colab Notebooks/HangmanTest/words_250000_train.txt")
        self.guessed_letters = []
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()
        self.current_dictionary = []
        self.model = tf.keras.models.load_model("/content/drive/MyDrive/Colab Notebooks/HangmanTest/Trained Model/Model 2")
        self.char_to_index = {}
        self.index_to_char = {}
        self.build_index_maps()

    model_path = "/content/drive/MyDrive/Colab Notebooks/HangmanTest/Trained Model/Model 2"
    model = tf.keras.models.load_model(model_path)

    def build_index_maps(self):
        vocab = set("".join(self.full_dictionary))
        self.char_to_index = {char: index for index, char in enumerate(sorted(vocab))}
        self.index_to_char = {index: char for char, index in self.char_to_index.items()}

    def preprocess_word(self, word):
        return [self.char_to_index[char] if char in self.char_to_index else -1 for char in word]



    def predict_next_letter(self, word):
        model_path = "/content/drive/MyDrive/Colab Notebooks/HangmanTest/Trained Model/Model 2"
        model = tf.keras.models.load_model(model_path)
        encoded_word = self.preprocess_word(word)
        padded_word = np.pad(encoded_word, (0, 29 - len(encoded_word)), mode='constant')
        padded_word = np.array([padded_word])  # Add batch dimension
        next_letter_index = np.argmax(model.predict(padded_word), axis=-1)
        return self.index_to_char[next_letter_index[0]]


    def guess(self, word):
        clean_word = word.replace("_", ".")
        len_word = len(clean_word)
        current_dictionary = self.current_dictionary
        new_dictionary = []

        for dict_word in current_dictionary:
            if len(dict_word) != len_word:
                continue
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)

        self.current_dictionary = new_dictionary

        guess_letter = self.predict_next_letter(word)
        return guess_letter

    # The rest of the class remains unchanged...
    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location, "r") as text_file:
            full_dictionary = text_file.read().splitlines()
        return full_dictionary

    def start_game(self, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary

        word = random.choice(self.full_dictionary)
        hidden_word = '-' * len(word)
        tries_remains = 6  # Adjusted to match the online version

        game_id = ''.join(random.choices('0123456789abcdef', k=12))  # Generate a random game ID

        if verbose:
            print(f"Successfully start a new game! Game ID: {game_id}. # of tries remaining: {tries_remains}. Word: {hidden_word}.")

        while tries_remains > 0:
            guess_letter = self.guess(hidden_word)

            if guess_letter is None:
                # No possible guess. Fall back to original logic.
                guess_letter = self.original_guess()

            self.guessed_letters.append(guess_letter)

            if verbose:
                print(f"Guessing letter: {guess_letter}")

            if guess_letter in word:
                new_hidden_word = ''
                for i, letter in enumerate(word):
                    if letter == guess_letter:
                        new_hidden_word += letter
                    else:
                        new_hidden_word += hidden_word[i]

                if new_hidden_word == word:
                    if verbose:
                        print(f"Word: {new_hidden_word}")
                        print(f"Congratulations! You won!")
                    return True

                hidden_word = new_hidden_word
                if verbose:
                    print(f"Word: {hidden_word}")

            else:
                tries_remains -= 1
                if verbose:
                    print(f"Wrong attempt! Word: {hidden_word} Tries remaining: {tries_remains}")
                if tries_remains == 0:
                    if verbose:
                        print(f"Failed game: {game_id}. Because of: # of tries exceeded!")
                    return False

# Play the game a certain number of times
total_recorded_runs = 10
total_recorded_successes = 0

hangman_game = HangmanOffline()

for i in range(total_recorded_runs):
    print('Playing', i+1, 'th game')
    if hangman_game.start_game(verbose=True):
        total_recorded_successes += 1
    time.sleep(0.5)

# Calculate success rate
success_rate = (total_recorded_successes / total_recorded_runs) * 100
print(f'Total recorded successes = {total_recorded_successes}')
print(f'Overall success rate: {success_rate} %')

The problem I am facing is that whatever value I assign to blanks appearing in the first step of every game, the model tries to input it and gives an error like this:

indices[0,0] = -1 is not in [0, 26)
     [[{{node sequential/embedding/embedding_lookup}}]] [Op:__inference_predict_function_93797]

How to fix this issue?

Original Q&A

Need help integrating trained RNN model into guess function of another code

There are 0 best solutions below

Related Questions in TENSORFLOW

Related Questions in MACHINE-LEARNING

Related Questions in DEEP-LEARNING

Related Questions in RECURRENT-NEURAL-NETWORK

Trending Questions

Popular # Hahtags

Popular Questions