How to solve the problem ValueError: indices.shape[-1]

23 Views Asked by At

I have built a Bi-lstm model for NER Tagging and now I want to introduce CRF layer in it. The CRF Layer I modified as follow:

import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
from tensorflow_addons.text import crf_log_likelihood, crf_decode


class CRF(L.Layer):
    def __init__(self,
                 output_dim,
                 sparse_target=True,
                 use_mask=False,
                 label2idx_map=None,
                 **kwargs):
        """    
        Args:
            output_dim (int): the number of labels to tag each temporal input.
            sparse_target (bool): whether the ground-truth label is represented in one-hot.
            use_mask (bool): whether to use masking as specified in MaskedCRF.
            label2idx_map (dict): a mapping from labels to indices for masking.
        Input shape:
            (batch_size, sentence length, output_dim)
        Output shape:
            (batch_size, sentence length, output_dim)
        """
        super(CRF, self).__init__(**kwargs)
        self.output_dim = int(output_dim) 
        self.sparse_target = sparse_target
        self.use_mask = use_mask
        self.label2idx_map = label2idx_map
        self.input_spec = L.InputSpec(min_ndim=3)
        self.supports_masking = False
        self.sequence_lengths = None
        self.transitions = None
        self.mask_tran_matrix = None
        

    def build(self, input_shape):
        assert len(input_shape) == 3
        f_shape = tf.TensorShape(input_shape)
        input_spec = L.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})

        if f_shape[-1] is None:
            raise ValueError('The last dimension of the inputs to `CRF` '
                             'should be defined. Found `None`.')
        if f_shape[-1] != self.output_dim:
            raise ValueError('The last dimension of the input shape must be equal to output'
                             ' shape. Use a linear layer if needed.')
        self.input_spec = input_spec

        # Initialize transitions
        initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=None)
        self.transitions = tf.Variable(
            name="crf_transitions",
            initial_value=initializer(shape=[self.output_dim, self.output_dim])
        )

        if self.use_mask:
            self.mask_tran_matrix = self.get_mask_trans()
            self.transitions = tf.minimum(self.transitions, self.mask_tran_matrix)

        self.built = True

    def compute_mask(self, inputs, mask=None):
        # Just pass the received mask from previous layer, to the next layer or
        # manipulate it if this layer changes the shape of the input
        return mask

    def call(self, inputs, sequence_lengths=None, training=None, **kwargs):
        sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
        if sequence_lengths is not None:
            assert len(sequence_lengths.shape) == 2
            assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
            seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
            assert seq_len_shape[1] == 1
            self.sequence_lengths = K.flatten(sequence_lengths)
        else:
            self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * (
                tf.shape(inputs)[1]
            )

        viterbi_sequence, _ = crf_decode(sequences,
                                         self.transitions,
                                         self.sequence_lengths)
        output = K.one_hot(viterbi_sequence, self.output_dim)
        return K.in_train_phase(sequences, output)

    @property
    def loss(self):
        def crf_loss(y_true, y_pred):
            if self.sparse_target:
                y_true = tf.argmax(y_true, axis=-1)
            y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
            log_likelihood, self.transitions = crf_log_likelihood(
                y_pred,
                tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true,
                self.sequence_lengths,
                transition_params=self.transitions,
            )
            return tf.reduce_mean(-log_likelihood)
        return crf_loss

    @property
    def accuracy(self):
        def viterbi_accuracy(y_true, y_pred):
            # -1e10 to avoid zero at sum(mask)
            mask = K.cast(
                K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
            shape = tf.shape(y_pred)
            sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
            y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
            if self.sparse_target:
                y_true = K.argmax(y_true, 2)
            y_pred = K.cast(y_pred, 'int32')
            y_true = K.cast(y_true, 'int32')
            corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
            return K.sum(corrects * mask) / K.sum(mask)
        return viterbi_accuracy

    def compute_output_shape(self, input_shape):
        tf.TensorShape(input_shape).assert_has_rank(3)
        return input_shape[:2] + (self.output_dim,)

    def get_config(self):
        config = {
            'output_dim': self.output_dim,
            'sparse_target': self.sparse_target,
            'supports_masking': self.supports_masking,
            'transitions': K.eval(self.transitions)
        }
        base_config = super(CRF, self).get_config()
        return dict(base_config, **config)
    
    def get_mask_trans(self):
        size = len(self.label2idx_map)
        tag_lst = self.label2idx_map.keys()

        mask_mat = np.ones(shape=(size, size), dtype=np.float32)
        # analysis tag schema,BIO or BIOES
        is_scheme_bioes = False
        flag_e = False
        flag_s = False
        for tag in tag_lst:
            if tag.startswith("E-"):
                flag_e = True

            if tag.startswith("S-"):
                flag_s = True

        if flag_e and flag_s:
            is_scheme_bioes = True
            print("BIOES format tagging scheme detected.")
        else:
            print("BIO format tagging scheme detected.")
        
        # for col_tag, col_index in self.label2idx_map.items():
            # if col_tag.startswith("I-"):
                # slot_name = col_tag.replace("I-", "")
                # begin_slot = "B-" + slot_name
                # for row_tag, row_index in self.label2idx_map.items():
                    # row_index -= 1  # Adjust the index to start from 0
                    # # Print values for debugging
                    # print("row_index:", row_index, "col_index:", col_index)
                    # # I-city must follow B-city or I-city
                    # if row_tag != begin_slot and row_tag != col_tag:
                        # mask_mat[row_index, col_index] = -1.0
                        
        for col_tag, col_index in self.label2idx_map.items():
            col_index -= 1  # Adjust the index to start from 0
            if col_tag.startswith("I-"):
                slot_name = col_tag.replace("I-", "")
                begin_slot = "B-" + slot_name
                for row_tag, row_index in self.label2idx_map.items():
                    row_index -= 1  # Adjust the index to start from 0
                    # Print values for debugging
                    print("row_index:", row_index, "col_index:", col_index)
                    # I-city must follow B-city or I-city
                    if row_tag != begin_slot and row_tag != col_tag:
                        mask_mat[row_index, col_index] = -1.0

            if is_scheme_bioes:
                if col_tag.startswith("E-"):
                    slot_name = col_tag.replace("E-", "")
                    intermediate_slot = "I-" + slot_name
                    begin_slot = "B-" + slot_name
                    for row_tag, row_index in self.label2idx_map.items():
                        # E-city must follow I-city or B-city
                        if row_tag != intermediate_slot and row_tag != begin_slot:
                            mask_mat[row_index, col_index] = -1.0

                if col_tag.startswith("S-") or col_tag.startswith("B-"):
                    for row_tag, row_index in self.label2idx_map.items():
                        # S-city must not follow B-slot or I-slot
                        if row_tag.startswith("B-") or row_tag.startswith("I-"):
                            mask_mat[row_index, col_index] = -1.0

        return 100 * mask_mat

I have built the model BiLSTM with CRF layer above on this model as follow:

import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, TimeDistributed, Bidirectional
import tensorflow_addons as tfa
from tensorflow.keras.models import Model

# Input layer
input_layer = Input(shape=(MAX_LEN,), dtype='int32')

# Embedding layer
embedding_layer = Embedding(input_dim=number_words + 1,
                            input_length=MAX_LEN,
                            output_dim=DIM_EMBEDDINGS, trainable=True)(input_layer)

# BiLSTM layer
bilstm_layer = Bidirectional(LSTM(units=DIM_EMBEDDINGS,
                                  return_sequences=True,
                                  dropout=0.5,
                                  recurrent_dropout=0.5))(embedding_layer)

# TimeDistributed layer
kernel = TimeDistributed(Dense(number_tags, activation="relu"))(bilstm_layer)
 
crf = CRF(output_dim=number_tags, sparse_target=True, use_mask=True, label2idx_map=tag2idx)
output = crf(kernel)
model = Model(input_layer, output)
model.compile('adam', loss=crf.loss, metrics=[crf.accuracy])

Summary of this model is as below: enter image description here

When I want to train this model, I face this error:

ValueError                                Traceback (most recent call last)
<ipython-input-16-fe62e47f1cc5> in <cell line: 1>()
----> 1 history = model.fit(X_train, np.array(y_train), epochs=40, batch_size=32, validation_split=0.2, verbose=1)

5 frames
/usr/local/lib/python3.10/dist-packages/tensorflow_addons/text/crf.py in _single_seq_fn()
     31                         batch_inds = ag__.converted_call(ag__.ld(tf).reshape, (ag__.converted_call(ag__.ld(tf).range, (ag__.ld(batch_size),), None, fscope_1), [-1, 1]), None, fscope_1)
     32                         indices = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(batch_inds), ag__.converted_call(ag__.ld(tf).zeros_like, (ag__.ld(batch_inds),), None, fscope_1)],), dict(axis=1), fscope_1)
---> 33                         tag_inds = ag__.converted_call(ag__.ld(tf).gather_nd, (ag__.ld(tag_indices), ag__.ld(indices)), None, fscope_1)
     34                         tag_inds = ag__.converted_call(ag__.ld(tf).reshape, (ag__.ld(tag_inds), [-1, 1]), None, fscope_1)
     35                         indices = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(indices), ag__.ld(tag_inds)],), dict(axis=1), fscope_1)

ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "<ipython-input-12-10fd54fd5d04>", line 93, in crf_loss  *
        log_likelihood, self.transitions = crf_log_likelihood(
    File "/usr/local/lib/python3.10/dist-packages/tensorflow_addons/text/crf.py", line 241, in crf_log_likelihood  *
        sequence_scores = crf_sequence_score(
    File "/usr/local/lib/python3.10/dist-packages/tensorflow_addons/text/crf.py", line 82, in _single_seq_fn  *
        tag_inds = tf.gather_nd(tag_indices, indices)

    ValueError: indices.shape[-1] must be <= params.rank, but saw indices shape: [?,2] and params shape: [?] for '{{node crf_loss/cond/GatherNd}} = GatherNd[Tindices=DT_INT32, Tparams=DT_INT32](crf_loss/cond/GatherNd/crf_loss/Cast, crf_loss/cond/concat)' with input shapes: [?], [?,2].

How to solve the problem?

0

There are 0 best solutions below