This is my model which works on a batch data and I want to deploy it to process a single input which is a natural language question. you can find the classes of the embedding layer, encoder, decoder... in this repository https://github.com/wangpinggl/TREQS/tree/master/LeafNATS/modules
import os
import time
import torch
from torch.autograd import Variable
from seq2sql.model_seq2seq_base import modelSeq2SeqBase
from LeafNATS.data.seq2sql.process_batch_cqa_v1 import process_batch
from LeafNATS.modules.embedding.nats_embedding import natsEmbedding
from LeafNATS.modules.encoder.encoder_rnn import EncoderRNN
from LeafNATS.modules.encoder2decoder.nats_encoder2decoder import natsEncoder2Decoder
from LeafNATS.modules.attention.nats_attention_encoder import AttentionEncoder
from LeafNATS.modules.attention.nats_attention_decoder import AttentionDecoder
from LeafNATS.utils.utils import *
class modelABS(modelSeq2SeqBase):
def __init__(self, args):
super().__init__(args=args)
def build_scheduler(self, optimizer):
'''
Schedule Learning Rate
'''
scheduler = torch.optim.lr_scheduler.StepLR(
optimizer=optimizer, step_size=self.args.step_size,
gamma=self.args.step_decay)
return scheduler
def build_batch(self, batch_id):
'''
get batch data
'''
output = process_batch(
batch_id=batch_id,
path_=os.path.join('..', 'nats_results'),
fkey_=self.args.task,
batch_size=self.args.batch_size,
vocab2id=self.batch_data['vocab2id'],
max_lens=[self.args.src_seq_len, self.args.trg_seq_len])
self.batch_data['ext_id2oov'] = output['ext_id2oov']
self.batch_data['src_var'] = output['src_var'].to(self.args.device)
self.batch_data['batch_size'] = self.batch_data['src_var'].size(0)
self.batch_data['src_seq_len'] = self.batch_data['src_var'].size(1)
self.batch_data['src_mask_pad'] = output['src_mask_pad'].to(self.args.device)
if self.args.task == 'train' or self.args.task == 'validate':
self.batch_data['trg_input'] = output['trg_input_var'].to(self.args.device)
# different from seq2seq models.
self.batch_data['trg_output'] = output['trg_output_var'].to(self.args.device)
self.batch_data['trg_seq_len'] = self.batch_data['trg_input'].size(1)
else:
self.batch_data['src_mask_unk'] = output['src_mask_unk'].to(self.args.device)
self.batch_data['src_txt'] = output['src_txt']
self.batch_data['trg_txt'] = output['trg_txt']
self.batch_data['trg_seq_len'] = 1
def build_models(self):
'''
build all models.
in this model source and target share embeddings
'''
#print("hello")
#print(self.train_models)
self.train_models['embedding'] = natsEmbedding(
vocab_size = self.batch_data['vocab_size'],
emb_dim = self.args.emb_dim,
share_emb_weight = True
).to(self.args.device)
self.train_models['encoder'] = EncoderRNN(
self.args.emb_dim, self.args.src_hidden_dim,
self.args.nLayers, 'lstm',
device = self.args.device
).to(self.args.device)
self.train_models['encoder2decoder'] = natsEncoder2Decoder(
src_hidden_size = self.args.src_hidden_dim,
trg_hidden_size = self.args.trg_hidden_dim,
rnn_network = 'lstm',
device = self.args.device
).to(self.args.device)
self.train_models['decoderRNN'] = torch.nn.LSTMCell(
self.args.emb_dim+self.args.trg_hidden_dim,
self.args.trg_hidden_dim
).to(self.args.device)
self.train_models['attnEncoder'] = AttentionEncoder(
self.args.src_hidden_dim,
self.args.trg_hidden_dim,
attn_method='luong_general',
repetition='temporal'
).to(self.args.device)
self.train_models['attnDecoder'] = AttentionDecoder(
self.args.trg_hidden_dim,
attn_method='luong_general'
).to(self.args.device)
self.train_models['wrapDecoder'] = torch.nn.Linear(
self.args.src_hidden_dim*2+self.args.trg_hidden_dim*2,
self.args.trg_hidden_dim, bias=True
).to(self.args.device)
self.train_models['genPrb'] = torch.nn.Linear(
self.args.emb_dim+self.args.src_hidden_dim*2+self.args.trg_hidden_dim, 1
).to(self.args.device)
# decoder to vocab
self.train_models['decoder2proj'] = torch.nn.Linear(
self.args.trg_hidden_dim, self.args.emb_dim, bias=False
).to(self.args.device)
def build_encoder(self):
'''
Encoder Pipeline
self.pipe_data = {
'encoder': {},
'decoderA': {}}
'decoderB': {'accu_attn': [], 'last_word': word}}
'''
src_emb = self.train_models['embedding'].get_embedding(
self.batch_data['src_var'])
src_enc, hidden_encoder = self.train_models['encoder'](src_emb)
trg_hidden0 = self.train_models['encoder2decoder'](hidden_encoder)
# set up pipe_data pass to decoder
self.pipe_data['encoder'] = {}
self.pipe_data['encoder']['src_emb'] = src_emb
self.pipe_data['encoder']['src_enc'] = src_enc
self.pipe_data['decoderB'] = {}
self.pipe_data['decoderB']['hidden'] = trg_hidden0
self.pipe_data['decoderB']['h_attn'] = Variable(torch.zeros(
self.batch_data['batch_size'], self.args.trg_hidden_dim
)).to(self.args.device)
self.pipe_data['decoderB']['past_attn'] = Variable(torch.ones(
self.batch_data['batch_size'], self.batch_data['src_seq_len']
)/float(self.batch_data['src_seq_len'])).to(self.args.device)
self.pipe_data['decoderB']['past_dech'] = Variable(torch.zeros(
1, 1)).to(self.args.device)
self.pipe_data['decoderB']['accu_attn'] = []
self.pipe_data['decoderFF'] = {}
self.pipe_data['decoderFF']['h_attn'] = []
self.pipe_data['decoderFF']['attn'] = []
self.pipe_data['decoderFF']['genPrb'] = []
# when training get target embedding at the same time.
if self.args.task == 'train' or self.args.task == 'validate':
trg_emb = self.train_models['embedding'].get_embedding(
self.batch_data['trg_input'])
self.pipe_data['decoderFF']['trg_seq_emb'] = trg_emb
def build_decoder_one_step(self, k=0):
'''
Decoder one-step
'''
# embedding at current decoding step
if self.args.task == 'train' or self.args.task == 'validate':
self.pipe_data['decoderA'] = self.pipe_data['decoderB']
word_emb = self.pipe_data['decoderFF']['trg_seq_emb'][:, k]
else:
word_emb = self.train_models['embedding'].get_embedding(
self.pipe_data['decoderA']['last_word'])
h_attn = self.pipe_data['decoderA']['h_attn']
dec_input = torch.cat((word_emb, h_attn), 1)
hidden = self.pipe_data['decoderA']['hidden']
past_attn = self.pipe_data['decoderA']['past_attn']
accu_attn = self.pipe_data['decoderA']['accu_attn']
past_dech = self.pipe_data['decoderA']['past_dech']
hidden = self.train_models['decoderRNN'](dec_input, hidden)
ctx_enc, attn, attn_ee = self.train_models['attnEncoder'](
hidden[0], self.pipe_data['encoder']['src_enc'],
past_attn, self.batch_data['src_mask_pad'])
# temporal attention
past_attn = past_attn + attn_ee
# decoder attention
if k == 0:
ctx_dec = Variable(torch.zeros(
self.batch_data['batch_size'], self.args.trg_hidden_dim
)).to(self.args.device)
else:
ctx_dec, _ = self.train_models['attnDecoder'](
hidden[0], past_dech)
past_dech = past_dech.transpose(0, 1) # seqL*batch*hidden
dec_idx = past_dech.size(0)
if k == 0:
past_dech = hidden[0].unsqueeze(0) # seqL*batch*hidden
past_dech = past_dech.transpose(0, 1) # batch*seqL*hidden
else:
past_dech = past_dech.contiguous().view(
-1, self.args.trg_hidden_dim) # seqL*batch**hidden
past_dech = torch.cat((past_dech, hidden[0]), 0) # (seqL+1)*batch**hidden
past_dech = past_dech.view(
dec_idx+1, self.batch_data['batch_size'], self.args.trg_hidden_dim
) # (seqL+1)*batch*hidden
past_dech = past_dech.transpose(0, 1) # batch*(seqL+1)*hidden
# wrap up.
h_attn = self.train_models['wrapDecoder'](torch.cat((ctx_enc, ctx_dec, hidden[0]), 1))
# pointer generator
pt_input = torch.cat((word_emb, hidden[0], ctx_enc), 1)
genPrb = torch.sigmoid(self.train_models['genPrb'](pt_input))
# setup piped_data
self.pipe_data['decoderB'] = {}
self.pipe_data['decoderB']['h_attn'] = h_attn
self.pipe_data['decoderB']['past_attn'] = past_attn
self.pipe_data['decoderB']['hidden'] = hidden
self.pipe_data['decoderB']['past_dech'] = past_dech
self.pipe_data['decoderB']['accu_attn'] = [a for a in accu_attn]
self.pipe_data['decoderB']['accu_attn'].append(attn)
if self.args.task == 'train' or self.args.task == 'validate':
self.pipe_data['decoderFF']['h_attn'].append(h_attn)
self.pipe_data['decoderFF']['attn'].append(attn)
self.pipe_data['decoderFF']['genPrb'].append(genPrb)
if k == self.batch_data['trg_seq_len']-1:
self.pipe_data['decoderFF']['h_attn'] = \
torch.cat(self.pipe_data['decoderFF']['h_attn'], 0).view(
self.batch_data['trg_seq_len'],
self.batch_data['batch_size'],
self.args.trg_hidden_dim).transpose(0,1)
self.pipe_data['decoderFF']['attn'] = \
torch.cat(self.pipe_data['decoderFF']['attn'], 0).view(
self.batch_data['trg_seq_len'],
self.batch_data['batch_size'],
self.args.src_seq_len).transpose(0,1)
self.pipe_data['decoderFF']['genPrb'] = \
torch.cat(self.pipe_data['decoderFF']['genPrb'], 0).view(
self.batch_data['trg_seq_len'],
self.batch_data['batch_size']).transpose(0,1)
else:
self.pipe_data['decoderFF']['h_attn'] = h_attn
self.pipe_data['decoderFF']['attn'] = attn.unsqueeze(0)
self.pipe_data['decoderFF']['genPrb'] = genPrb
def build_vocab_distribution(self):
'''
Data flow from input to output.
'''
trg_out = self.pipe_data['decoderFF']['h_attn']
trg_out = self.train_models['decoder2proj'](trg_out)
trg_out = self.train_models['embedding'].get_decode2vocab(trg_out)
trg_out = trg_out.view(
self.batch_data['batch_size'], self.batch_data['trg_seq_len'], -1)
prb = torch.softmax(trg_out, dim=2)
vocab_size = self.batch_data['vocab_size']
batch_size = self.batch_data['batch_size']
# trg_seq_len = self.batch_data['trg_seq_len']
src_seq_len = self.batch_data['src_seq_len']
# pointer-generator calculate index matrix
pt_idx = Variable(torch.FloatTensor(torch.zeros(1, 1, 1))).to(self.args.device)
pt_idx = pt_idx.repeat(batch_size, src_seq_len, vocab_size)
pt_idx.scatter_(2, self.batch_data['src_var'].unsqueeze(2), 1.0)
p_gen = self.pipe_data['decoderFF']['genPrb']
attn_ = self.pipe_data['decoderFF']['attn']
prb_output = p_gen.unsqueeze(2)*prb + \
(1.0-p_gen.unsqueeze(2))*torch.bmm(attn_, pt_idx)
return prb_output + 1e-20
def build_pipelines(self):
'''
Build pipeline from input to output.
Output is loss.
Input is word one-hot encoding.
'''
self.build_encoder()
for k in range(self.args.trg_seq_len):
self.build_decoder_one_step(k)
prb = self.build_vocab_distribution()
pad_mask = torch.ones(self.batch_data['vocab_size']).to(self.args.device)
pad_mask[self.batch_data['vocab2id']['<pad>']] = 0
self.loss_criterion = torch.nn.NLLLoss(pad_mask).to(self.args.device)
prb = torch.log(prb)
loss = self.loss_criterion(
prb.view(-1, self.batch_data['vocab_size']),
prb.reshape(-1, self.batch_data['vocab_size']),
self.batch_data['trg_output'].view(-1))
return loss
this is my attempt to deploy the model for single natural question (the app_worker method below)
Some methods are lacking some attributes which i am still working on
'''
@author Tian Shi
Please contact [email protected]
'''
import glob
import json
import os
import pickle
import re
import shutil
import tokenize
import time
from pprint import pprint
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
import torch
from torch.autograd import Variable
from LeafNATS.modules.embedding.nats_embedding import natsEmbedding
from LeafNATS.modules.encoder.encoder_rnn import EncoderRNN
from LeafNATS.modules.encoder2decoder.nats_encoder2decoder import natsEncoder2Decoder
from LeafNATS.modules.attention.nats_attention_encoder import AttentionEncoder
from LeafNATS.modules.attention.nats_attention_decoder import AttentionDecoder
from LeafNATS.modules.decoder.nats_decoder_pointer_generator import PointerGeneratorDecoder
#from LeafNATS.modules.attention import AttentionSelf
from LeafNATS.utils.utils import *
from LeafNATS.data.utils import *
from LeafNATS.utils.utils import show_progress
#nltk.download('punkt')
class End2EndBase(object):
'''
This engine is for the end2end training for seq2seq models.
It is a new version of previous one.
Our goal is to extend its application to all kinds of language generation tasks.
'''
def __init__(self, args=None):
'''
Initialize
'''
self.args = args
self.base_models = {}
self.train_models = {}
self.batch_data = {}
self.test_data = {}
self.global_steps = 0
def build_vocabulary(self):
'''
vocabulary
'''
raise NotImplementedError
def build_models(self):
'''
Models:
self.base_models: models that will be trained
Format: {'name1': model1, 'name2': model2}
self.train_models: models that will be trained.
Format: {'name1': model1, 'name2': model2}
'''
raise NotImplementedError
def init_base_model_params(self):
'''
Initialize Base Model Parameters.
self.base_models.
'''
raise NotImplementedError
def build_pipelines(self):
'''
Pipelines and loss here.
'''
raise NotImplementedError
def build_optimizer(self, params):
'''
define optimizer
'''
raise NotImplementedError
def print_info_train(self):
'''
Print additional information on screen.
'''
print('NATS Message: ')
def build_batch(self, batch_id):
'''
process batch data.
'''
raise NotImplementedError
def test_worker(self):
'''
Used in decoding.
Users can define their own decoding process.
You do not have to worry about path and prepare input.
'''
raise NotImplementedError
def app_worker(self):
'''
For application.
'''
question = input("ask a question: ")
quest_tok = word_tokenize(question)
print("TOK",quest_tok)
# Read the file containing the vocabulary IDs
file_path = "./nats_results/model/vocab"
# Now we have mapping words to their vocabulary IDs
vocab2id, id2vocab = construct_vocab(file_path,
max_size=200000,
mincount=5)
'''print("vocab2id",vocab2id)
print("id2vocab",id2vocab)'''
#ques_id is the list of word indices
ques_id = []
for e in quest_tok:
if e in vocab2id:
ques_id.append(vocab2id[e])
print("the list of word indices: ", ques_id)
input_tensor = torch.tensor(ques_id).unsqueeze(0)
#====EMBEDDING==========
vocab_size = 2353
emb_dim = 128
# Initialize the embedding layer
embedding_layer = natsEmbedding(vocab_size, emb_dim)
# Access the weights of the embedding layer
embedding_weights = embedding_layer.embedding.weight
# Print the shape of the weights tensor
print("Shape of embedding weights:", embedding_weights.shape)
#print("Embedding weights:", embedding_weights)
tensor_embedded = embedding_layer.get_embedding(input_tensor)
print("tensor_embedded: ", tensor_embedded)
print(tensor_embedded.size())
print(input_tensor.size())
#embedding.get_embedding(input)
#====ENCODER=====================================================================
encoder = EncoderRNN(
emb_dim=128,
hidden_size=256,
nLayers=1,
rnn_network='lstm',
bidirectional=True
)
hy_encoder, (ht_encoder, ct_encoder) = encoder.forward(tensor_embedded)
print(hy_encoder, (ht_encoder, ct_encoder))
#====ENCODER2DECODER=====
encoder2decoder_instance = natsEncoder2Decoder(
src_hidden_size=256, # the source hidden size is 256(256*2=512)
trg_hidden_size=128, # the target hidden size is 128
rnn_network='lstm' # We are using LSTM networks
)
(decoder_h0, decoder_c0) = encoder2decoder_instance.forward((ht_encoder, ct_encoder))
#====ATTDECODER==========
decoder = AttentionDecoder(hidden_size=256, attn_method='luong_concat') # Hidden size of the decoder LSTM is 256
decoder.forward()
#====ATTENCODER=================
src_hidden_size = 256 #source side hidden size
trg_hidden_size = 256 #target side hidden size
attn_method = 'luong_general' #attention method
repetition = 'temporal' #repetition handling (as mentioned in the build_models part)
src_hidden_doubled = True #whether source hidden size is doubled
# Instantiate the AttentionEncoder
attention_encoder = AttentionEncoder(
src_hidden_size=src_hidden_size,
trg_hidden_size=trg_hidden_size,
attn_method=attn_method,
repetition=repetition,
src_hidden_doubled=src_hidden_doubled
)
attention_encoder.forward((decoder_h0, decoder_c0)[0], hy_encoder, (ht_encoder, ct_encoder)[1][0])
#==========PointerGeneratorDecoder====================
# Instantiate the PointerGeneratorDecoder
pointer_generator_decoder = PointerGeneratorDecoder(
input_size=128, # Size of the input vector
src_hidden_size=256, # Source side hidden size
trg_hidden_size=128, # Target side hidden size
attn_method='luong_general', # Alignment method
repetition='temporal', # Repetition handling method
pointer_net=True, # Turn on pointer network
attn_decoder=True, # Turn on attention decoder
rnn_network='lstm', # Type of RNN network
device=torch.device("cpu") # Specify the device (CPU or GPU)
)
#raise NotImplementedError
def app2Go(self):
'''
For the application.
Don't overwrite.
'''
self.build_vocabulary()
self.build_models()
for model_name in self.train_models:
self.base_models[model_name] = self.train_models[model_name]
pprint(self.base_models)
if len(self.base_models) > 0:
self.init_base_model_params()
for model_name in self.base_models:
self.base_models[model_name].eval()
with torch.no_grad():
while 1:
self.app_worker()