Is there any way to automatically perform hyperparameter tuning when using the tensorflow custom-manual model?

16 Views Asked by At

I took the TF_Transformer_xl model from huggingspace and tried to automatically perform hyperparameter tuning, but I keep getting errors.
The method I'm currently using is hyperopt.
The problem is that the following error occurs when the first training is finished in the place decorated with @tf.function, and the hyperparameter is changed and retrained.



@tf.function
def train_step(model, data1,data2, target, mems, optimizer):
    with tf.GradientTape() as tape:
        outputs = model(concepts=data1,responses=data2, labels=target, mems=mems)
        logit = outputs.logit
        mems = outputs.mems
        logit_mx = target != -100
        logit_value = logit[logit_mx]
        logit_value = tf.reshape(logit_value, [-1, config_xl.R_vocab_size])
        labels = target[logit_mx]

        
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logit_value)
        # batch_loss = tf.reduce_sum(loss) / valid_samples
        mean_loss = tf.reduce_mean(loss)
        train_loss(loss)
        train_accuracy(labels,logit_value)
        predictions =tf.nn.softmax(logit_value)
        train_auc(tf.one_hot(labels, depth=predictions.shape[1]), predictions)

    gradients = tape.gradient(mean_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    return mems,mean_loss


def evaluate(model,test_dataset,config_xl):
    total_loss = 0.0
    num_batches = 0
    evaluation_metrics = []
    test_mems = None

    for input_data, masked_responses, responses in tqdm(test_dataset, desc='eval'):

        outputs = model(concepts=input_data, responses=masked_responses, labels=responses, mems=test_mems, training=False)
        logit = outputs.logit
        test_mems = outputs.mems

        logit_mx = responses != -100
        logit_value = logit[logit_mx]
        logit_value = tf.reshape(logit_value, [-1, config_xl.R_vocab_size])
        labels = responses[logit_mx]

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logit_value)
        mean_loss = tf.reduce_mean(loss)

        # Update precision and recall metrics
        predicted_labels = tf.argmax(logit_value, axis=1)
        predictions =tf.nn.softmax(logit_value)

        
        test_auc(tf.one_hot(labels, depth=predictions.shape[1]), predictions)
        test_precision(labels, predicted_labels)
        test_recall(labels, predicted_labels)

        test_accuracy(labels, logit_value)
        test_loss(loss)
        
        
        precision = test_precision.result().numpy()
        recall = test_recall.result().numpy()
        f1_score = 2 * (precision * recall) / (precision + recall + 1e-7)

        evaluation_metrics.append(test_accuracy.result().numpy())

        total_loss += mean_loss.numpy()
        num_batches += 1


def train(train_dataset,config_xl):
    try:
        learning_rate = CustomSchedule(config_xl.d_model)

        optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
        model = TFTransfoXLMLMHeadModel(config=config_xl)

        loss_values = []
        num_batches = 0

        for epoch in range(config_xl.epoch):
            start = time.time()
            total_loss = 0.0
            mems = None                   
            for input_data, masked_responses, responses in tqdm(train_dataset, desc='train'):
                mems, loss_value = train_step(model, input_data,masked_responses, responses, mems, optimizer)
                num_batches += 1
                total_loss += loss_value.numpy()



def main(config_xl) :
    train_dataset,test_dataset,dkeyid2idx=load_TFdataset(config_xl)
    model =train(train_dataset.take(10),config_xl)
    test_loss,test_acc,test_precision, test_recall, test_f1_score = evaluate(model, test_dataset,config_xl)


if __name__ == "__main__":

    config_xl = TransfoXLConfig(
            d_embed=args.d_embed,
            d_head = args.d_head,
            d_model=args.d_model,
            mem_len=args.mem_len,
            n_head=args.n_head,
            n_layer=args.n_layer,
            eos_token = args.eos_token,
            mask_token=args.mask_token,
            batch_size=args.batch_size,
            tgt_len=args.tgt_len,
            C_vocab_size=args.C_vocab_size,
            Q_vocab_size = args.Q_vocab_size,
            R_vocab_size = args.R_vocab_size,
            epoch = args.epoch,
            mode = args.mode, # concepts or questions 
            tf_data_dir = args.tf_data_dir,
            tensorboard_log_dir = args.tensorboard_log_dir,
            tensorboard_emb_log_dir = args.tensorboard_emb_log_dir,
            model_save_dir = args.model_save_dir
        )
    
    #hyperparameter
    # d_inner, num_layer, n_head, dropout 변동은 자유롭게, d_embedding과 d_model은 일치 
    space = {
    'num_layer': hp.quniform('num_layer', low=4, high=12, q=2),  # 32에서 128 사이의 값을 8의 배수로 선택
    'n_head': hp.quniform('n_head', low=6, high=12, q=2)  # 128에서 512 사이의 값을 64의 배수로 선택
}
    


    logging.info('config_xl:  %s',config_xl)
    

    # Create a new MLflow Experiment
    mlflow.set_experiment("MLflow Test")

    # Start an MLflow run
    with mlflow.start_run():
        #set a run name
        mlflow.set_tag("mlflow.runName", '{}ep_{}mem_{}'.format(args.epoch,args.mem_len, args.mode))
        
        # Set a tag that we can use to remind ourselves what this run was for
        mlflow.set_tag("Training Info", '{}ep_{}mem_{}'.format(args.epoch,args.mem_len, args.mode))

        # Log the hyperparameters
        mlflow.log_params(config_xl.to_dict())
        # mlflow.tensorflow.autolog()


        # main(config_xl)
        trials = Trials()
        best = fmin(
            fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=8,
            trials=trials,
        )
        train_dataset,test_dataset,dkeyid2idx=load_TFdataset(config_xl)
        input_data, masked_responses, responses = next(iter(test_dataset))
   
        input_schema = Schema(
        [
            TensorSpec(np.dtype(np.int32), (-1,len(input_data[1].numpy())), "input_data"),
            TensorSpec(np.dtype(np.int32), (-1,len(masked_responses[1].numpy())), "responses"),
        ])


        signature = ModelSignature(input_schema)

      
        best_run = sorted(trials.results, key=lambda x: x["loss"])[0]
ERROR:root:Error: in user code:

    File "train_args_mlflows.py", line 113, in train_step  *
        outputs = model(concepts=data1,responses=data2, labels=target, mems=mems)
    File "/home/jun/miniconda3/envs/new1/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_filej0_kgex_.py", line 26, in tf__call
        transformer_outputs = ag__.converted_call(ag__.ld(self).transformer, (ag__.ld(inputs)['concepts'], ag__.ld(inputs)['responses'], ag__.ld(inputs)['mems'], ag__.ld(inputs)['head_mask'], ag__.ld(inputs)['inputs_embeds'], ag__.ld(inputs)['output_attentions'], ag__.ld(inputs)['output_hidden_states'], ag__.ld(inputs)['return_dict']), dict(training=ag__.ld(inputs)['training']), fscope)
    File "/tmp/__autograph_generated_filekwj7svil.py", line 126, in tf__call
        ag__.if_stmt((ag__.ld(inputs)['inputs_embeds'] is not None), if_body_6, else_body_6, get_state_6, set_state_6, ('word_emb',), 1)
    File "/tmp/__autograph_generated_filekwj7svil.py", line 120, in else_body_6
        word_emb_C = ag__.converted_call(ag__.ld(self).word_emb_C, (ag__.ld(inputs)['concepts'],), None, fscope)

    ValueError: Exception encountered when calling layer 'tf_transfo_xlmlm_head_model_1' (type TFTransfoXLMLMHeadModel).
    
    in user code:
    
        File "/home/jun/workspace/KT/models/model_for_kt.py", line 1782, in call  *
            transformer_outputs = self.transformer(
        File "/home/jun/miniconda3/envs/new1/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/tmp/__autograph_generated_filekwj7svil.py", line 126, in tf__call
            ag__.if_stmt((ag__.ld(inputs)['inputs_embeds'] is not None), if_body_6, else_body_6, get_state_6, set_state_6, ('word_emb',), 1)
        File "/tmp/__autograph_generated_filekwj7svil.py", line 120, in else_body_6
            word_emb_C = ag__.converted_call(ag__.ld(self).word_emb_C, (ag__.ld(inputs)['concepts'],), None, fscope)
    
        ValueError: Exception encountered when calling layer 'transformer' (type TFTransfoXLMLMMainLayer).
        
        in user code:
        
            File "/home/jun/workspace/KT/models/model_for_kt.py", line 1171, in call  *
                word_emb_C = self.word_emb_C(inputs["concepts"])
            File "/home/jun/miniconda3/envs/new1/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
        
            ValueError: tf.function only supports singleton tf.Variables created on the first call. Make sure the tf.Variable is only created once or created outside tf.function. See https://www.tensorflow.org/guide/function#creating_tfvariables for more information.
        
        
        Call arguments received by layer 'transformer' (type TFTransfoXLMLMMainLayer):
          • concepts=tf.Tensor(shape=(65, 140), dtype=int32)
          • responses=tf.Tensor(shape=(65, 140), dtype=int32)
          • mems=None
          • head_mask=None
          • inputs_embeds=None
          • output_attentions=False
          • output_hidden_states=False
          • return_dict=True
          • labels=None
          • training=False
          • kwargs=<class 'inspect._empty'>
    
    
    Call arguments received by layer 'tf_transfo_xlmlm_head_model_1' (type TFTransfoXLMLMHeadModel):
      • concepts=tf.Tensor(shape=(65, 140), dtype=int32)
      • responses=tf.Tensor(shape=(65, 140), dtype=int32)
      • mems=None
      • head_mask=None
      • inputs_embeds=None
      • output_attentions=None
      • output_hidden_states=None
      • return_dict=None
      • labels=tf.Tensor(shape=(65, 140), dtype=int32)
      • training=False
      • kwargs=<class 'inspect._empty'>

Is there any automated hyperparameter tuning method that I can use in the code I provided, even if it's not hyperopt?

Is there any automated hyperparameter tuning method that I can use in the code I provided, even if it's not hyperopt?

0

There are 0 best solutions below