Wrong Shape Output from Tensorflow Model with Custom Layers

Question

Wrong Shape Output from Tensorflow Model with Custom Layers

25 Views Asked by Tunde At 05 January 2024 at 19:34

I have an encoder-decoder model for which I'm using a custom encoder and custom decoder layers. The problem is that the decoder layer is returning a 2D output as against 3D with the features dimension missing. I have a batch size of 128, sequence length of 70 and 73 features. I'm expecting an output shape of 128x70x73 but I get 128x70.

The encoder and decoder class definitions:

class encoder_layer(tf.keras.layers.Layer):
  def __init__(self):
    super(encoder_layer,self).__init__()

    # encoder block
    self.enc_input = Input(shape=(seq_length),name='enc_input')
    self.embed_layer = Embedding(input_dim=len(char2idx),output_dim=EMBEDDING_DIM,
                            weights=[embed_matrix],trainable=False,mask_zero=True)
    self.enc_lstm = Bidirectional(LSTM(lstm_size,return_state=True,return_sequences=True,name='enc_lstm'))

  def call(self,input_tensor,training=False):
    
    embed_data = self.embed_layer(input_tensor)
    enc_out,enc_state_h_f,enc_state_h_r,enc_state_c_f,enc_state_c_r = self.enc_lstm(embed_data)
    enc_state_h = tf.concat([enc_state_h_f,enc_state_h_r],axis=1)
    enc_state_c = tf.concat([enc_state_c_f,enc_state_c_r],axis=1)
    enc_state_h = tf.expand_dims(enc_state_h,axis = 1)
    enc_state_c = tf.expand_dims(enc_state_c,axis = 1)
    # self.states = [self.enc_state_h,self.enc_state_c]
    return tf.concat([enc_out,enc_state_h,enc_state_c],axis=1)


class decoder_layer(tf.keras.layers.Layer):
  def __init__(self):
    super(decoder_layer,self).__init__()
    # self.dec_input_layer = Input(shape=(1,seq_length+latentSpaceDimension))
    self.dec_lstm = LSTM(lstm_size*2,return_state=True,name='dec_lstm')

    self.attention = cust_attention(latentSpaceDimension)
    self.dec_dropout = Dropout(0.5)
    self.decoder_dense = Dense(num_features, activation='softmax',  name='decoder_dense')
    self.input_slice = Lambda(lambda x: tf.split(x, [-1,1,1],axis=1))
    self.final_dec_out = Lambda(lambda x: K.concatenate(x, axis=1))

  def compute_output_shape(self,input_shape):
    return(self.cur_batch_size,seq_length,len(char2idx))

  def call(self,input_tensor,training=False):
    # print('decoder input tensor shape:',input_tensor.shape)
    self.cur_batch_size = input_tensor.shape[0]
    # dec_input = self.dec_input_layer
    enc_out,enc_state_h,enc_state_c = self.input_slice(input_tensor)
    # print('enc_out shape - {},enc_state_h shape - {},enc_state_c shape - {}'\
    #       .format(enc_out.shape,enc_state_h.shape,enc_state_c.shape))
    if self.cur_batch_size == None:
      return tf.zeros((1,seq_length))
    enc_state_h = tf.squeeze(enc_state_h,axis=1)
    enc_state_c = tf.squeeze(enc_state_c,axis=1)
    dec_in_data = np.zeros((self.cur_batch_size,1,num_features)).astype(np.float32)
    states = [enc_state_h,enc_state_c]
    all_outputs = []
    # print('enc_out shape - {},enc_state_h shape - {},enc_state_c shape - {}'\
    #       .format(enc_out.shape,enc_state_h.shape,enc_state_c.shape))

    dec_out = enc_state_h

    for _ in range(seq_length):
      # print('******iter {}******'.format(_))

      context_vector, attention_weights = self.attention(dec_out,enc_out)
      context_vector = tf.expand_dims(context_vector,1)
      # if cur_batch_size != None:
      # print('pre concat context vector - {}, dec in data  - {}'\
      #       .format(context_vector.shape,dec_in_data.shape))
      dec_in_data = tf.concat([context_vector,dec_in_data],axis=-1)
      # print('post concat dec in data  - {}'.format(dec_in_data.shape))
    
      dec_out,dec_state_h,dec_state_c = self.dec_lstm(dec_in_data,initial_state=states)

      states = [dec_state_h,dec_state_c]
      dense_output = self.decoder_dense(self.dec_dropout(dec_out))
      dense_output = tf.expand_dims(dense_output,1)
      dec_in_data = dense_output
      all_outputs.append(dense_output)
      print('output shape:',dense_output.shape)
    # print('output shape:',out_val.shape)
    # if cur_batch_size != None:
    print('len all outputs:',len(all_outputs))
    print('output shape:',self.final_dec_out(all_outputs).shape)
    return self.final_dec_out(all_outputs)

The model code:

def make_model():
  enc_layer = encoder_layer()
  dec_layer = decoder_layer()

  input_layer = Input(shape=(MAX_SENT_LEN+10))
  x = enc_layer(input_layer)
  output_layer = dec_layer(x)

  model = models.Model(inputs=input_layer,outputs=output_layer)
  return model

Training code:

x_model = make_model()
x_model.compile(optimizer=tf.keras.optimizers.RMSprop(),loss=tf.keras.losses.CategoricalCrossentropy(),metrics='accuracy')

history = x_model.fit(x=x_train,y = y_train,validation_data=(x_val,y_val),epochs=120,callbacks=callbacks_list,batch_size=batch_size)

What could the problem be?

Original Q&A

There are 1 best solutions below

**Tunde** · Answer 1 · 2024-01-06T10:47:24.280000

Tunde On 06 January 2024 at 10:47

The error came from the declaration of tf.zeros in the call function. The declaration omitted one of the dimensions and the rest of the layer assumed the erroneous shape.

Wrong Shape Output from Tensorflow Model with Custom Layers

There are 1 best solutions below

Related Questions in TENSORFLOW

Related Questions in KERAS

Related Questions in SEQ2SEQ

Trending Questions

Popular # Hahtags

Popular Questions