I am totally new in Python and I am following a google drive tutorial to be able to read a file from the cloud:
def download_file(real_file_id):
try:
service = authorize()
file_id = real_file_id
# pylint: disable=maybe-no-member
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(F'Download {int(status.progress() * 100)}.')
except HttpError as error:
print(F'An error occurred: {error}')
file = None
return file.seek(0)
Then I am trying to read this file with pandas read_csv method:
def read_file(item):
with open(item, 'rb') as file:
csvreader = pd.read_csv(file.read())
print(csvreader)
if __name__ == '__main__':
file = download_file("file_id")
read_file(file)
I am sure the download method is working because is tested but when I try to read with pandas it just hang and nothing happens... I need to cancel the script manually and I got this errors:
Traceback (most recent call last):
File "C:\Users\tluce\PycharmProjects\pythonProject\main.py", line 23, in <module>
read_file(download_file(files[0]['id']))
File "C:\Users\tluce\PycharmProjects\pythonProject\main.py", line 15, in read_file
csvreader = pd.read_csv(file)
File "C:\Users\tluce\PycharmProjects\pythonProject\venv\lib\site-packages\pandas\io\parsers\readers.py", line 912, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\Users\tluce\PycharmProjects\pythonProject\venv\lib\site-packages\pandas\io\parsers\readers.py", line 577, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Users\tluce\PycharmProjects\pythonProject\venv\lib\site-packages\pandas\io\parsers\readers.py", line 1407, in __init__
self._engine = self._make_engine(f, self.engine)
File "C:\Users\tluce\PycharmProjects\pythonProject\venv\lib\site-packages\pandas\io\parsers\readers.py", line 1679, in _make_engine
return mapping[engine](f, **self.options)
File "C:\Users\tluce\PycharmProjects\pythonProject\venv\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 93, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 548, in pandas._libs.parsers.TextReader.__cinit__
File "pandas\_libs\parsers.pyx", line 637, in pandas._libs.parsers.TextReader._get_header
File "pandas\_libs\parsers.pyx", line 848, in pandas._libs.parsers.TextReader._tokenize_rows
File "pandas\_libs\parsers.pyx", line 859, in pandas._libs.parsers.TextReader._check_tokenize_status
File "pandas\_libs\parsers.pyx", line 2025, in pandas._libs.parsers.raise_parser_error
pandas.errors.ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.
First:
seek()returns the new absolute position of the bytes io as an integer. It does not return the bytes io object. You will need to callfile.seek(0)on one line, and thenreturn fileon the next. For reference: https://docs.python.org/3/library/io.html#io.IOBase.seekSecond: you can pass the BytesIO object directly into
read_csv. Try replace yourread_filefunction with:Here's a short example for you try out yourself!