I want to use a crf ner inside my spacy pipeline (spacy 3)

224 Views Asked by At

I have trained a crf model and stored it locally using joblib. I want to use this model inside in place of default spacy ner. I tried it using spacy_crfsuite, but I get an attribute error. Any ideas?

    import spacy
    from spacy.language import Language
    from spacy_crfsuite import CRFEntityExtractor
    
    @Language.factory("ner-crf-3")
    def create_my_component(nlp, name):
        crf_extractor = CRFExtractor().from_disk("spacy_crfsuite_trained_spacy3.bz2")
        return CRFEntityExtractor(nlp, crf_extractor=crf_extractor)
    
    nlp = spacy.load("en_core_web_md", disable=["ner"])
    nlp.add_pipe("ner-crf-3")
    
    # And use natively ..
    doc = nlp(
        "George Walker Bush (born July 6, 1946) is an American politician and businessman "
        "who served as the 43rd president of the United States from 2001 to 2009.")
    
    for ent in doc.ents:
        print(ent, "-", ent.label_)

Here is the output

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
/var/folders/v1/t149q84d20s60ktyxvb3bft40000gn/T/ipykernel_16022/3378811215.py in <module>
     12 
     13 nlp = spacy.load("en_core_web_md", disable=["ner"])
---> 14 nlp.add_pipe("ner-crf-3")
     15 
     16 doc = nlp(

~/opt/anaconda3/envs/mini_project_spacy3/lib/python3.9/site-packages/spacy/language.py in add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
    793                     lang_code=self.lang,
    794                 )
--> 795             pipe_component = self.create_pipe(
    796                 factory_name,
    797                 name=name,

~/opt/anaconda3/envs/mini_project_spacy3/lib/python3.9/site-packages/spacy/language.py in create_pipe(self, factory_name, name, config, raw_config, validate)
    672         # We're calling the internal _fill here to avoid constructing the
    673         # registered functions twice
--> 674         resolved = registry.resolve(cfg, validate=validate)
    675         filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
    676         filled = Config(filled)

~/opt/anaconda3/envs/mini_project_spacy3/lib/python3.9/site-packages/confection/__init__.py in resolve(cls, config, schema, overrides, validate)
    726         validate: bool = True,
...
--> 101         assert isinstance(ent_tagger, CRF)
    102 
    103         self.ent_tagger = ent_tagger

AssertionError: 
0

There are 0 best solutions below