pd.read_parquet gives ValueError: year 0 is out of range

28 Views Asked by At

When trying to load a parquet into a df by typing: g34i9 = pd.read_parquet('G34I9.snappy.parquet')

I am getting an error as below:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[46], line 1
----> 1 g34i9 = pd.read_parquet('G34I9.snappy.parquet')

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pandas\io\parquet.py:670, in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)
    667     use_nullable_dtypes = False
    668 check_dtype_backend(dtype_backend)
--> 670 return impl.read(
    671     path,
    672     columns=columns,
    673     filters=filters,
    674     storage_options=storage_options,
    675     use_nullable_dtypes=use_nullable_dtypes,
    676     dtype_backend=dtype_backend,
    677     filesystem=filesystem,
    678     **kwargs,
    679 )

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pandas\io\parquet.py:279, in PyArrowImpl.read(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)
    271 try:
    272     pa_table = self.api.parquet.read_table(
    273         path_or_handle,
    274         columns=columns,
   (...)
    277         **kwargs,
    278     )
--> 279     result = pa_table.to_pandas(**to_pandas_kwargs)
    281     if manager == "array":
    282         result = result._as_manager("array", copy=False)

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\array.pxi:884, in pyarrow.lib._PandasConvertible.to_pandas()

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\table.pxi:4192, in pyarrow.lib.Table._to_pandas()

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\pandas_compat.py:776, in table_to_dataframe(options, table, categories, ignore_metadata, types_mapper)
    774 _check_data_column_metadata_consistency(all_columns)
    775 columns = _deserialize_column_index(table, all_columns, column_indexes)
--> 776 blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
    778 axes = [columns, index]
    779 mgr = BlockManager(blocks, axes)

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\pandas_compat.py:1129, in _table_to_blocks(options, block_table, categories, extension_columns)
   1124 def _table_to_blocks(options, block_table, categories, extension_columns):
   1125     # Part of table_to_blockmanager
   1126 
   1127     # Convert an arrow table to Block from the internal pandas API
   1128     columns = block_table.column_names
-> 1129     result = pa.lib.table_to_blocks(options, block_table, categories,
   1130                                     list(extension_columns.keys()))
   1131     return [_reconstruct_block(item, columns, extension_columns)
   1132             for item in result]

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\table.pxi:3115, in pyarrow.lib.table_to_blocks()

File ~\AppData\Local\anaconda3\envs\basevenv\Lib\site-packages\pyarrow\types.pxi:88, in pyarrow.lib._datatype_to_pep3118()

ValueError: year 0 is out of range

I would like to successfully load parquet into df.

1

There are 1 best solutions below

0
udaykumar gajavalli On

This error occur when pandas encounters a date value outside its supported range while reading a Parquet file. Try the below way if this solves.

g34i9 = pd.read_parquet('G34I9.snappy.parquet',coerce_datetime=True, errors='coerce')