I have exported this huggingface model to onnx: https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english And I came across a blog post where the poster overrided the transformers pipeline() method to accept a .onnx model. https://towardsdatascience.com/nlp-transformers-pipelines-with-onnx-9b890d015723
After running the code i am getting
TypeError: 'BatchEncoding' object is not an iterator
and am not finding much documentation for the error.
This is the full file:
import torch
from onnxruntime import (
InferenceSession, SessionOptions, GraphOptimizationLevel
)
from transformers import (
TokenClassificationPipeline, AutoTokenizer, AutoModelForTokenClassification
)
options = SessionOptions() # initialize session options
options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
session = InferenceSession(
"mynewModel.onnx", sess_options=options, providers=["CPUExecutionProvider"]
)
# disable session.run() fallback mechanism, it prevents for a reset of the execution provider
session.disable_fallback()
class OnnxTokenClassificationPipeline(TokenClassificationPipeline):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _forward(self, model_inputs):
"""
Forward pass through the model. This method is not to be called by the user directly and is only used
by the pipeline to perform the actual predictions.
This is where we will define the actual process to do inference with the ONNX model and the session created
before.
"""
# This comes from the original implementation of the pipeline
special_tokens_mask = model_inputs.pop("special_tokens_mask")
offset_mapping = model_inputs.pop("offset_mapping", None)
sentence = model_inputs.pop("sentence")
inputs = {k: v.cpu().detach().numpy() for k, v in model_inputs.items()} # dict of numpy arrays
outputs_name = session.get_outputs()[0].name # get the name of the output tensor
logits = session.run(output_names=[outputs_name], input_feed=inputs)[0] # run the session
logits = torch.tensor(logits) # convert to torch tensor to be compatible with the original implementation
return {
"logits": logits,
"special_tokens_mask": special_tokens_mask,
"offset_mapping": offset_mapping,
"sentence": sentence,
**model_inputs,
}
# We need to override the preprocess method because the onnx model is waiting for the attention masks as inputs
# along with the embeddings.
def preprocess(self, sentence, offset_mapping=None):
truncation = True if self.tokenizer.model_max_length and self.tokenizer.model_max_length > 0 else False
model_inputs = self.tokenizer(
sentence,
return_attention_mask=True, # This is the only difference from the original implementation
return_tensors=self.framework,
truncation=truncation,
return_special_tokens_mask=True,
return_offsets_mapping=self.tokenizer.is_fast,
)
if offset_mapping:
model_inputs["offset_mapping"] = offset_mapping
model_inputs["sentence"] = sentence
return model_inputs
model_name_from_hub = "dbmdz/bert-large-cased-finetuned-conll03-english"
tokenizer = AutoTokenizer.from_pretrained(model_name_from_hub)
model = AutoModelForTokenClassification.from_pretrained(model_name_from_hub)
onnx_pipeline = OnnxTokenClassificationPipeline(
task="ner",
model=model,
tokenizer=tokenizer,
framework="pt",
aggregation_strategy="simple",
)
mySequence = "JJ is going to go the Mall with his pals later."
onnx_pipeline(mySequence)
Running the model before export gives output in this format with this sentence as input "JJ is going to go the Mall with his pals later.": [{'entity_group': 'PER', 'score': 0.7977358, 'word': 'JJ', 'start': 0, 'end': 2}, {'entity_group': 'LOC', 'score': 0.9910767, 'word': 'Mall', 'start': 22, 'end': 26}].
I am trying to get the same output from my exported .onnx model.
EDIT: Stacktrace:
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Traceback (most recent call last):
File "c:\Users\L7400\Desktop\MyONNXtest\myprojectname\src\main\java\mygroupid\NewWorkspace\onnxPipeline.py", line 85, in <module>
onnx_pipeline(mySequence)
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\pipelines\token_classification.py", line 249, in __call__
return super().__call__(inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\pipelines\base.py", line 1154, in __call__
return next(
^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\pipelines\pt_utils.py", line 124, in __next__
item = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\pipelines\pt_utils.py", line 266, in __next__
processed = self.infer(next(self.iterator), **self.params)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\utils\data\dataloader.py", line 630, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\utils\data\dataloader.py", line 674, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 32, in fetch
data.append(next(self.dataset_iter))
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\L7400\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\pipelines\pt_utils.py", line 183, in __next__
processed = next(self.subiterator)
^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'BatchEncoding' object is not an iterator