I have a generator that returns a parquet file, I would like to add this parquet in streaming piece by piece to a tar file every time the generator returns a part of the parquet file
I expect something like this:
def tar_generator(parquet_generator, log_generator):
"""Streams a tar file containing the parquet file"""
tar_stream = BytesIO()
tar = tarfile.open(fileobj=tar_stream, mode="w")
# Add Parquet file to the TAR
parquet_stream = parquet_generator()
tarinfo = tarfile.TarInfo(name="name.parquet")
tar.addfile(tarinfo)
fileobj = tar.fileobj
fileobj.write(parquet_stream)
tar_stream.seek(0)
yield tar_stream.read()