I worked on text classification and sentiment analysis projects , Now I try to make Question and answer project. I tokenized the question (input ids, attention mask). For Target, tokenize the answer , taking (input ids only and save it as labels ). After that , we load roberta model which was trained on QA datasets and try to add addapter and training adapter only for specific type of questions . I use AdapterTrainer from transformer, but it didnot accept the input i donnot know why. so , can anyone tell me what wrong here?
'''
from datasets import load_dataset
training_dataset = load_dataset("csv", data_files="/content/drive/My Drive/QA/training.csv", split='train')
training_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
testing_dataset = load_dataset("csv", data_files="/content/drive/My Drive/QA/testing.csv", split='train')
testing_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
from transformers import AutoConfig
from adapters import AutoAdapterModel
config1 = AutoConfig.from_pretrained(
"deepset/roberta-base-squad2"
)
model =AutoAdapterModel.from_pretrained(
"deepset/roberta-base-squad2",
config=config1
)
# Add a new adapter
model.add_adapter("qa", config="lora")
# Activate the adapter
model.train_adapter("qa")
import numpy as np
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer
training_args = TrainingArguments(
learning_rate=2e-5,
num_train_epochs=5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
logging_steps=200,
output_dir="./training_output",
overwrite_output_dir=True,
# The next line is important to ensure the dataset labels are properly passed to the model
remove_unused_columns=False,
)
trainer = AdapterTrainer(
model=model,
args=training_args,
train_dataset=training_dataset,
eval_dataset=training_dataset,
compute_metrics=compute_metrics,
)
trainer.train()
''' The error is
TypeError
Traceback (most recent call
last) in <cell line: 1>() ----> 1 trainer.train()
3 frames /usr/local/lib/python3.10/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1553 hf_hub_utils.enable_progress_bars() 1554 else: -> 1555 return inner_training_loop( 1556 args=args, 1557
resume_from_checkpoint=resume_from_checkpoint,/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 1858 1859 with self.accelerator.accumulate(model): -> 1860 tr_loss_step = self.training_step(model, inputs) 1861 1862 if (
/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in training_step(self, model, inputs) 2716 """ 2717
model.train() -> 2718 inputs = self._prepare_inputs(inputs) 2719 2720 if is_sagemaker_mp_enabled():/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _prepare_inputs(self, inputs) 2672 raise ValueError( 2673 "The batch received was empty, your model won't be able to train on it. Double-check that your " -> 2674 f"training dataset contains keys expected by the model: {','.join(self._signature_columns)}." 2675 ) 2676 if self.args.past_index >= 0 and self._past is not None:
TypeError: can only join an iterable