how to read from cursor which has invalid date format in one row and ignore the problematic entry

138 Views Asked by At

sample table data

{ _id: 1, modified: 2020-07-25T14:10:26.000+00:00, created :  2020-07-20T14:10:26.000+00:00}
{ _id: 2, modified: 2020-07-29T07:55:55.485.000+00:00, created : 201244-01-01T14:10:26.000+00:00}
{ _id: 3, modified: 2020-08-01T01:00:12.002.000+00:00, created : 2020-07-01T01:00:12.002.000+00:00}
used below sample code to read the data from table
using pymongo==3.12.0

db = "testdb"
table = "test"
filter = "modified"
query = {'modified': {'$gt': datetime.datetime(2020, 07, 22, 6, 35, 51, 859000), '$lte': datetime.datetime(2022, 12, 1, 2, 44, 41, 424501)}}
cursor = db[table].find(query).sort(filter, 1).skip(1000).limit(1000)

for docs in cursor:
    print(docs)

I am getting InvalidBson exception year 201244 out of range in _id = 2 which has problematic created and it couldn't proceed further I would like to know how to read from cursor in loop and ignore the invalid bson formatted data from cursor

Traceback (most recent call last):
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 1027, in decode_all
    docs.append(_elements_to_dict(data,
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 471, in _elements_to_dict
    key, value, position = _element_to_dict(data, view, position, obj_end, opts)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 446, in _element_to_dict
    value, position = _ELEMENT_GETTER[element_type](data, view, position,
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 246, in _get_object
    obj = _elements_to_dict(data, view, position + 4, end, opts)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 471, in _elements_to_dict
    key, value, position = _element_to_dict(data, view, position, obj_end, opts)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 446, in _element_to_dict
    value, position = _ELEMENT_GETTER[element_type](data, view, position,
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 277, in _get_array
    value, position = getter[element_type](
                      ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 246, in _get_object
    obj = _elements_to_dict(data, view, position + 4, end, opts)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 471, in _elements_to_dict
    key, value, position = _element_to_dict(data, view, position, obj_end, opts)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 446, in _element_to_dict
    value, position = _ELEMENT_GETTER[element_type](data, view, position,
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 352, in _get_date
    return _millis_to_datetime(
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 898, in _millis_to_datetime
    return EPOCH_NAIVE + datetime.timedelta(seconds=seconds,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
OverflowError: date value out of range

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\sample.py", line 173, in run
           ^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\cursor.py", line 1238, in next
    if len(self.__data) or self._refresh():
                           ^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\cursor.py", line 1155, in _refresh
    self.__send_message(q)
  File "C:\Python\Python311\Lib\site-packages\pymongo\cursor.py", line 1044, in __send_message
    response = client._run_operation(
               ^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\mongo_client.py", line 1424, in _run_operation
    return self._retryable_read(
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\mongo_client.py", line 1525, in _retryable_read
    return func(session, server, sock_info, secondary_ok)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\mongo_client.py", line 1420, in _cmd
    return server.run_operation(
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\server.py", line 123, in run_operation
    docs = unpack_res(reply, operation.cursor_id,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\cursor.py", line 1109, in _unpack_response
    return response.unpack_response(cursor_id, codec_options, user_fields,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\pymongo\message.py", line 1600, in unpack_response
    return bson._decode_all_selective(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 1099, in _decode_all_selective
    return decode_all(data, codec_options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 1039, in decode_all
    reraise(InvalidBSON, exc_value, exc_tb)
  File "C:\Python\Python311\Lib\site-packages\bson\py3compat.py", line 53, in reraise
    raise exctype(str(value)).with_traceback(trace)
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 1027, in decode_all
    docs.append(_elements_to_dict(data,
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 471, in _elements_to_dict
    key, value, position = _element_to_dict(data, view, position, obj_end, opts)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 446, in _element_to_dict
    value, position = _ELEMENT_GETTER[element_type](data, view, position,
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 246, in _get_object
    obj = _elements_to_dict(data, view, position + 4, end, opts)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 471, in _elements_to_dict
    key, value, position = _element_to_dict(data, view, position, obj_end, opts)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 446, in _element_to_dict
    value, position = _ELEMENT_GETTER[element_type](data, view, position,
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 277, in _get_array
    value, position = getter[element_type](
                      ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 246, in _get_object
    obj = _elements_to_dict(data, view, position + 4, end, opts)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 471, in _elements_to_dict
    key, value, position = _element_to_dict(data, view, position, obj_end, opts)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 446, in _element_to_dict
    value, position = _ELEMENT_GETTER[element_type](data, view, position,
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 352, in _get_date
    return _millis_to_datetime(
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Python311\Lib\site-packages\bson\__init__.py", line 898, in _millis_to_datetime
    return EPOCH_NAIVE + datetime.timedelta(seconds=seconds,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
bson.errors.InvalidBSON: date value out of range

I have tried handling through exception but couldn't ignore problematic entry from cursor

1

There are 1 best solutions below

0
R2D2 On

PyMongo decodes BSON datetime values to instances of Python’s datetime.datetime. Instances of datetime.datetime are limited to years between datetime.MINYEAR (usually 1) and datetime.MAXYEAR (usually 9999). Some MongoDB drivers (e.g. the PHP driver) can store BSON datetimes with year values far outside those supported by datetime.datetime.

Best option is to fix those dates in the mongoDB collection from mongosh shell before fetching them via the pymongo driver since those dates are most probably invalid , but tolerated by mongoDB BSON.