I have a subroutine that takes a collection of outlook messages, saves the attached zip files as temporary files and reads the tabular data to pandas data frames. It seems to work fine for csv files and most excel files, but my function fails whenever there's a xls file stored within the attached zip folder in the message.
Here's two functions I'm using to read the files:
def isexcel(file):
#evaluates file path and return true if excel file. otherwise returns false
extension = os.path.splitext(file.filename)[1]
if extension in ['.xls','.xlsx','.xlsm','.xlsb','.odf','.odt','.ods']:
return True
else:
return False
def zipattach_to_dfs(attachment, extract_fn=None):
#evaluates zip file attachments and returns dictionary with file name as key and dataframes as values
df_objects = {}
with NamedTemporaryFile(delete=False) as tmp:
attachment.SaveAsFile(tmp.name)
zf = ZipFile(tmp, mode = 'a')
for file in zf.infolist():
key = (f'{file.filename} ({"-".join(map(str, file.date_time[:3]))})')
if isexcel(file) ==True:
temp_df = pd.read_excel(zf.open(file.filename), header=None)
df_objects.update({key:temp_df})
elif file.filename.endswith(".csv"):
temp_df = pd.read_csv(zf.open(file.filename), header=None)
df_objects.update({key:temp_df})
else:
raise NotImplementedError('Unexpected filetype: '+str(file.filename))
return (df_objects)
I keep getting the error "File is Not a Zip File."
Has anyone seen anything like this before with an xls file? Is there any alternative way to read the data on these xls files to a pandas dataframe when its retrieved from a emailed zip folder?