Unable to read a CSV using AzureOPENAI and Langchain

145 Views Asked by At

I am trying to create a BOT on top of csv file using AzureOPENAI (llm) and Langchain framework. But i am getting "UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 12062: invalid start byte" error when executed.

Please refer my code snippet below and correct me if something is wrong

from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain_openai import AzureOpenAI
from dotenv import load_dotenv
import os
import streamlit as st

def main():
    load_dotenv()
    OPENAI_API_KEY = ""
    OPENAI_API_VERSION = "0301"
    
    os.environ["OPENAI_API_TYPE"] = "azure"
    os.environ["OPENAI_API_VERSION"] = "2023-05-15"
    os.environ["AZURE_OPENAI_ENDPOINT"] = "xxxx"
    os.environ["OPENAI_API_KEY"] = "xxxx"
    
    st.set_page_config(page_title="XXXXX")
    st.header("XXXXX ")
    
    llm = AzureOpenAI(
    deployment_name="name",
    model_name="gpt-3.5-turbo",
)
  
 
    
    agent = create_csv_agent(llm, 'Data.csv')
    user_question = st.text_input("Ask your question ")
    
    if user_question is not None and user_question != "":
            with st.spinner(text="In progress..."):
                st.write(agent.run(user_question))
    
if __name__ == "__main__":
    main()```

**Error Message below:**
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 12062: invalid start byte

Traceback:

File "C:\Program Files\Python39\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 535, in _run_script
    exec(code, module.__dict__)
File "C:\UsersXXXmain.py", line 38, in <module>
    main()
File "C:\Users\XXXX\XXXX\main.py", line 27, in main
    agent = create_csv_agent(llm, 'Data.csv')
File "C:\Program Files\Python39\lib\site-packages\langchain_experimental\agents\agent_toolkits\csv\base.py", line 28, in create_csv_agent
    df = pd.read_csv(path, **_kwargs)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1024, in read_csv
    return _read(filepath_or_buffer, kwds)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 618, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1618, in __init__
    self._engine = self._make_engine(f, self.engine)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1896, in _make_engine
    return mapping[engine](f, **self.options)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 93, in __init__
    self._reader = parsers.TextReader(src, **kwds)
File "parsers.pyx", line 574, in pandas._libs.parsers.TextReader.__cinit__
File "parsers.pyx", line 663, in pandas._libs.parsers.TextReader._get_header
File "parsers.pyx", line 874, in pandas._libs.parsers.TextReader._tokenize_rows
File "parsers.pyx", line 891, in pandas._libs.parsers.TextReader._check_tokenize_status
File "parsers.pyx", line 2053, in pandas._libs.parsers.raise_parser_error
0

There are 0 best solutions below