I am creating a GRU to do some classification for a project, and I'm relatively new to Pytorch and implementing GRUs. I know similar questions like this one have been answered already but I can't seem to bring the same solution over to my own problem. I understand that there is an issue with the shape/order of my fc arrays but after trying to change things I can no longer see the trees for the wood. I would appreciate it if someone could point me in the right direction.
Below I have attached my code and the error. The datasets im using contain 24 features with a label in the 25th column.
# Imports
import pandas as pd
import numpy as np
import torch
import torchvision # torch package for vision related things
import torch.nn.functional as F # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets # Standard datasets
import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation
from torch import optim # For optimizers like SGD, Adam, etc.
from torch import nn # All neural network modules
from torch.utils.data import Dataset, DataLoader # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm # For a nice progress bar
from sklearn.preprocessing import StandardScaler
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
input_size = 24
hidden_size = 128
num_layers = 1
num_classes = 2
sequence_length = 1
learning_rate = 0.005
batch_size = 8
num_epochs = 3
# Recurrent neural network with GRU (many-to-one)
class RNN_GRU(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN_GRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
def forward(self, x):
# Set initial hidden and cell states
x = x.unsqueeze(0)
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# Forward propagate LSTM
out, _ = self.gru(x, h0)
out = out.reshape(out.shape[0], -1)
# Decode the hidden state of the last time step
out = self.fc(out)
return out
class MyDataset(Dataset):
def __init__(self,file_name):
stats_df=pd.read_csv(file_name)
x=stats_df.iloc[:,0:24].values
y=stats_df.iloc[:,24].values
self.x_train=torch.tensor(x,dtype=torch.float32)
self.y_train=torch.tensor(y,dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self,idx):
return self.x_train[idx],self.y_train[idx]
nomDs=MyDataset("nomStats.csv")
atkDs=MyDataset("atkStats.csv")
train_loader=DataLoader(dataset=nomDs,batch_size=batch_size)
test_loader=DataLoader(dataset=atkDs,batch_size=batch_size)
# Initialize network (try out just using simple RNN, or GRU, and then compare with LSTM)
model = RNN_GRU(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
# Get data to cuda if possible
data = data.to(device=device).squeeze(1)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent update step/adam step
optimizer.step()
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
# Set model to eval
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device).squeeze(1)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
# Toggle model back to train
model.train()
return num_correct / num_samples
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")
Traceback (most recent call last):
File "TESTGRU.py", line 87, in <module>
scores = model(data)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "TESTGRU.py", line 47, in forward
out = self.fc(out)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\linear.py", line 94, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\functional.py", line 1753, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1024 and 128x2)
It seems like these lines
are the problem.
It appears that you only want to feed the hidden state of the last time step.
This could be read from the output in two ways:
If you want the output of all layers at the last time step, you should use the second return value of
out, _ = self.gru(x, h0)not the first.If you want to use just the last layer's output at the last time step (which seems to be the case), you should use
out[:, -1, :]. With this change, you may not need the reshape operation.