I have created a model using the code below for text summarization
class Attention(nn.Module):
def __init__(self, hidden_size):
super(Attention, self).__init__()
self.hidden_size = hidden_size
self.attn = nn.Linear(hidden_size * 2, hidden_size) # Ensure correct input and output dimensions
self.v = nn.Linear(hidden_size, 1, bias=False) # Ensure correct input and output dimensions
def forward(self, hidden, encoder_outputs):
# Expand hidden to match encoder_outputs's shape
hidden_expanded = hidden.unsqueeze(1).repeat(1, encoder_outputs.size(1), 1)
# Expand encoder_outputs to have the same number of dimensions as hidden_expanded
encoder_outputs_expanded = encoder_outputs.unsqueeze(2)
# Concatenate hidden_expanded and encoder_outputs_expanded along the last dimension
combined = torch.cat((hidden_expanded, encoder_outputs_expanded), dim=2)
# Debugging: Print out shapes
print("Shapes:")
print("hidden_expanded:", hidden_expanded.shape)
print("encoder_outputs:", encoder_outputs_expanded.shape)
# Calculate attention scores
energy = torch.tanh(self.attn(combined))
attention = self.v(energy).squeeze(2)
# Apply softmax to get attention weights
attention_weights = F.softmax(attention, dim=1)
return attention_weights
The code gives the following runtime error
RuntimeError: mat1 and mat2 shapes cannot be multiplied (400x201 and 400x200)
How can this error of the shapes be resolved?