Runtime error: mat1 and mat2 shapes cannot be multiplied (400x201 and 400x 200)

22 Views Asked by At

I have created a model using the code below for text summarization

class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attn = nn.Linear(hidden_size * 2, hidden_size)  # Ensure correct input and output dimensions

 self.v = nn.Linear(hidden_size, 1, bias=False)  # Ensure correct input and output dimensions
def forward(self, hidden, encoder_outputs):
    # Expand hidden to match encoder_outputs's shape
    hidden_expanded = hidden.unsqueeze(1).repeat(1, encoder_outputs.size(1), 1)

    # Expand encoder_outputs to have the same number of dimensions as hidden_expanded
    encoder_outputs_expanded = encoder_outputs.unsqueeze(2)

    # Concatenate hidden_expanded and encoder_outputs_expanded along the last dimension
    combined = torch.cat((hidden_expanded, encoder_outputs_expanded), dim=2)
    
    # Debugging: Print out shapes
    print("Shapes:")
    print("hidden_expanded:", hidden_expanded.shape)
    print("encoder_outputs:", encoder_outputs_expanded.shape)

    # Calculate attention scores
    energy = torch.tanh(self.attn(combined))
    attention = self.v(energy).squeeze(2)

    # Apply softmax to get attention weights
    attention_weights = F.softmax(attention, dim=1)

    return attention_weights

The code gives the following runtime error

RuntimeError: mat1 and mat2 shapes cannot be multiplied (400x201 and 400x200)

How can this error of the shapes be resolved?

0

There are 0 best solutions below