Matrix multiplication issue in a Bidirectional LSTM Model

21 Views Asked by At
class BiLSTMNERTagger(nn.Module):
    def __init__(self, emb_dim, hid_dim, n_layers, token_vocab_size, tag_vocab_size):
        super().__init__()

        self.embedding = nn.Embedding(token_vocab_size, emb_dim, padding_idx=0)
        self.rnn = nn.LSTM(emb_dim, hid_dim, num_layers=n_layers, batch_first=True, bidirectional=True)

        self.fc = nn.Linear(2*hid_dim, tag_vocab_size)

        self.softmax = nn.Softmax()

        self.dropout = nn.Dropout(0.1)
        self.hidden = None

    def forward(self, words, words_len):
      #YOUR CODE HERE
      out = self.dropout(self.embedding(words))
      out = nn.utils.rnn.pack_padded_sequence(out, words_len.cpu().numpy(), enforce_sorted=False, batch_first=True)
      out, (hidden, cell) = self.rnn(out)
      out = hidden[-1, :, :]
      self.hidden = hidden
      out = self.fc(out)
      prediction = self.softmax(out)
      return prediction

torch.manual_seed(42)
model = BiLSTMNERTagger(
    emb_dim=20,
    hid_dim=64,
    n_layers=2,
    token_vocab_size=len(tokens_vocab),
    tag_vocab_size=len(ner_vocab),)
model = model.to(device)

This is my error:

 115     def forward(self, input: Tensor) -> Tensor:
--> 116         return F.linear(input, self.weight, self.bias)
    117 
    118     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x64 and 128x10)

So the issue is that I thought that in a bidirectional model, the vector would double after I pass each word through my LSTM error, but that doesn't seem the case. And because of that, I cannot feed it to my linear layer.

Could you please tell me what the issue is, and how to fix it?

0

There are 0 best solutions below