I am learning to use pandasai and I tried to use the SmarDataframe but i got the error as follow. And i have no idea about it. I also git the same issue when using SmartDatalake.
Here is my code.(Python 3.11.5)
!pip install pandasai
from pandasai import SmartDataframe
from pandasai.llm.openai import OpenAI
from pathlib import Path
env_path = "mytry.env"
load_dotenv(dotenv_path=env_path, verbose=True)
openai_api_base = os.environ.get("OPENAI_API_BASE")
openai_api_key = os.environ.get("OPENAI_API_KEY")
llm = OpenAI(api_token=openai_api_key)
llm.api_base = openai_api_base
employees_data = {
'EmployeeID': [1, 2, 3, 4, 5],
'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],
'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']
}
employees_df = pd.DataFrame(employees_data)
dl = SmartDataframe(employees_df, config={"llm":llm})
And the error is
UnicodeDecodeError Traceback (most recent call last)
f:\\JupyterFile\\artefact\\chat-data-main\\chat-data-main\\myfirst_try.ipynb Cell 2 line 2
14 employees_data = {
15 'EmployeeID': \[1, 2, 3, 4, 5\],
16 'Name': \['John', 'Emma', 'Liam', 'Olivia', 'William'\],
17 'Department': \['HR', 'Sales', 'IT', 'Marketing', 'Finance'\]
18 }
19 employees_df = pd.DataFrame(employees_data)
\---\> 21 dl = SmartDataframe(employees_df, config={"llm":llm})
File d:\\anaconda\\envs\\artefact\\Lib\\site-packages\\pandasai\\smart_dataframe\__init_\_.py:280, in SmartDataframe.__init__(self, df, name, description, custom_head, config, logger)
278 self.\_table_description = description
279 self.\_table_name = name
\--\> 280 self.\_lake = SmartDatalake(\[self\], config, logger)
282 # set instance type in SmartDataLake
283 self.\_lake.set_instance_type(self.__class__.__name__)
File d:\\anaconda\\envs\\artefact\\Lib\\site-packages\\pandasai\\smart_datalake\__init_\_.py:112, in SmartDatalake.__init__(self, dfs, config, logger, memory, cache)
110 self.\_cache = cache
111 elif self.\_config.enable_cache:
\--\> 112 self.\_cache = Cache()
114 context = Context(self.\_config, self.logger, self.engine)
116 if self.\_config.response_parser:
File d:\\anaconda\\envs\\artefact\\Lib\\site-packages\\pandasai\\helpers\\cache.py:30, in Cache.__init__(self, filename, abs_path)
27 os.makedirs(cache_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True)
29 self.filepath = os.path.join(cache_dir, f"{filename}.db")
\---\> 30 self.connection = duckdb.connect(self.filepath)
31 self.connection.execute(
32 "CREATE TABLE IF NOT EXISTS cache (key STRING, value STRING)"
33 )
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc1 in position 102: invalid start byte
Thanks!