I have created a CBOW model:
class CBOW(nn.Module):
def __init__(self, vocab_size, emb_dim, hidden_dim, context_window=CONTEXT_WINDOW):
super(CBOW, self).__init__()
self.embedding = nn.Embedding(vocab_size, emb_dim)
self.linear1 = nn.Linear(emb_dim * (context_window*2), vocab_size)
def forward(self, inputs):
embeds = self.embedding(inputs)
out = torch.flatten(embeds, start_dim=1, end_dim=2)
out = self.linear1(out)
return out
model_fname = 'model1_cbow.pt'
# Define hyperparameters
vocab_size = len(vocab)
emb_dim = 10
hidden_dim = 32
lr = 0.001
num_epochs = 3
# Initialize model
model = CBOW(vocab_size, emb_dim, hidden_dim)
model = model.to(device)
# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()
And gotten its associated weights:
embeddings = list(model.parameters())[0]
embeddings = torch.tensor(embeddings)
I want now to build the embedding x vocabulary matrix, associating to each token its corresponding embedding. But my question is: how do I actually make sure they are all in the correct order? Sure, I have the parameters, should I pass every word in the vocab to the frozen model and thus "retrieve" its correct embedding? I'm somewhat confused on how to proceed.
Thanks!