Wrong Shape Output from Tensorflow Model with Custom Layers

25 Views Asked by At

I have an encoder-decoder model for which I'm using a custom encoder and custom decoder layers. The problem is that the decoder layer is returning a 2D output as against 3D with the features dimension missing. I have a batch size of 128, sequence length of 70 and 73 features. I'm expecting an output shape of 128x70x73 but I get 128x70.

The encoder and decoder class definitions:

class encoder_layer(tf.keras.layers.Layer):
  def __init__(self):
    super(encoder_layer,self).__init__()

    # encoder block
    self.enc_input = Input(shape=(seq_length),name='enc_input')
    self.embed_layer = Embedding(input_dim=len(char2idx),output_dim=EMBEDDING_DIM,
                            weights=[embed_matrix],trainable=False,mask_zero=True)
    self.enc_lstm = Bidirectional(LSTM(lstm_size,return_state=True,return_sequences=True,name='enc_lstm'))

  def call(self,input_tensor,training=False):
    
    embed_data = self.embed_layer(input_tensor)
    enc_out,enc_state_h_f,enc_state_h_r,enc_state_c_f,enc_state_c_r = self.enc_lstm(embed_data)
    enc_state_h = tf.concat([enc_state_h_f,enc_state_h_r],axis=1)
    enc_state_c = tf.concat([enc_state_c_f,enc_state_c_r],axis=1)
    enc_state_h = tf.expand_dims(enc_state_h,axis = 1)
    enc_state_c = tf.expand_dims(enc_state_c,axis = 1)
    # self.states = [self.enc_state_h,self.enc_state_c]
    return tf.concat([enc_out,enc_state_h,enc_state_c],axis=1)


class decoder_layer(tf.keras.layers.Layer):
  def __init__(self):
    super(decoder_layer,self).__init__()
    # self.dec_input_layer = Input(shape=(1,seq_length+latentSpaceDimension))
    self.dec_lstm = LSTM(lstm_size*2,return_state=True,name='dec_lstm')

    self.attention = cust_attention(latentSpaceDimension)
    self.dec_dropout = Dropout(0.5)
    self.decoder_dense = Dense(num_features, activation='softmax',  name='decoder_dense')
    self.input_slice = Lambda(lambda x: tf.split(x, [-1,1,1],axis=1))
    self.final_dec_out = Lambda(lambda x: K.concatenate(x, axis=1))

  def compute_output_shape(self,input_shape):
    return(self.cur_batch_size,seq_length,len(char2idx))

  def call(self,input_tensor,training=False):
    # print('decoder input tensor shape:',input_tensor.shape)
    self.cur_batch_size = input_tensor.shape[0]
    # dec_input = self.dec_input_layer
    enc_out,enc_state_h,enc_state_c = self.input_slice(input_tensor)
    # print('enc_out shape - {},enc_state_h shape - {},enc_state_c shape - {}'\
    #       .format(enc_out.shape,enc_state_h.shape,enc_state_c.shape))
    if self.cur_batch_size == None:
      return tf.zeros((1,seq_length))
    enc_state_h = tf.squeeze(enc_state_h,axis=1)
    enc_state_c = tf.squeeze(enc_state_c,axis=1)
    dec_in_data = np.zeros((self.cur_batch_size,1,num_features)).astype(np.float32)
    states = [enc_state_h,enc_state_c]
    all_outputs = []
    # print('enc_out shape - {},enc_state_h shape - {},enc_state_c shape - {}'\
    #       .format(enc_out.shape,enc_state_h.shape,enc_state_c.shape))

    dec_out = enc_state_h

    for _ in range(seq_length):
      # print('******iter {}******'.format(_))

      context_vector, attention_weights = self.attention(dec_out,enc_out)
      context_vector = tf.expand_dims(context_vector,1)
      # if cur_batch_size != None:
      # print('pre concat context vector - {}, dec in data  - {}'\
      #       .format(context_vector.shape,dec_in_data.shape))
      dec_in_data = tf.concat([context_vector,dec_in_data],axis=-1)
      # print('post concat dec in data  - {}'.format(dec_in_data.shape))
    
      dec_out,dec_state_h,dec_state_c = self.dec_lstm(dec_in_data,initial_state=states)

      states = [dec_state_h,dec_state_c]
      dense_output = self.decoder_dense(self.dec_dropout(dec_out))
      dense_output = tf.expand_dims(dense_output,1)
      dec_in_data = dense_output
      all_outputs.append(dense_output)
      print('output shape:',dense_output.shape)
    # print('output shape:',out_val.shape)
    # if cur_batch_size != None:
    print('len all outputs:',len(all_outputs))
    print('output shape:',self.final_dec_out(all_outputs).shape)
    return self.final_dec_out(all_outputs)

The model code:

def make_model():
  enc_layer = encoder_layer()
  dec_layer = decoder_layer()

  input_layer = Input(shape=(MAX_SENT_LEN+10))
  x = enc_layer(input_layer)
  output_layer = dec_layer(x)

  model = models.Model(inputs=input_layer,outputs=output_layer)
  return model

Training code:

x_model = make_model()
x_model.compile(optimizer=tf.keras.optimizers.RMSprop(),loss=tf.keras.losses.CategoricalCrossentropy(),metrics='accuracy')

history = x_model.fit(x=x_train,y = y_train,validation_data=(x_val,y_val),epochs=120,callbacks=callbacks_list,batch_size=batch_size)

What could the problem be?

1

There are 1 best solutions below

0
Tunde On

The error came from the declaration of tf.zeros in the call function. The declaration omitted one of the dimensions and the rest of the layer assumed the erroneous shape.