Fields not initialized when __post_init__() called using ruamel.yaml

182 Views Asked by At

I have two dataclasses: Msg and Field. Msg has a field fields of type list[Field]. I want to assign something to a field of each Field after they have all been initialized which is more or less their relative index in the fields list.

However, when I add a __post_init__(self) method to the Msg dataclass, the fields list is empty, so I can't update the indices.

from dataclasses import dataclass
from ruamel.yaml import YAML

@dataclass
class Msg:

    id: int
    desc: str
    fields: list[Field]

    def __post_init__(self) -> None:
        idx: int = 0
        for field in self.fields: # why is this empty??
            field.index = idx
            idx += field.size

@dataclass
class Field:
    id: int
    name: str
    units: str
    size: int
    index: int = -1

y = YAML()
y.register_class(Msg)
y.register_class(Field)

msg: Msg = y.load("""\
!Msg
id: 1
desc: status
fields:
- !Field
    id: 1
    name: Temp
    units: degC
    size: 2
""")

assert(msg.fields[0].index != -1) # fails :(

Why is this? How is the Msg being initialized without fields being initialized? Is there any way to do what I am trying to do using the class system? I am using Python 3.11 with ruamel.yaml 0.18.5 on MacOS.

2

There are 2 best solutions below

6
On BEST ANSWER

By default, object serializers such as YAML and pickle have no idea what to do with the attribute mapping for a user-defined object other than to assign the mapping directly to the object's attribute dictionary as-is.

This is why you can define a __setstate__ method for your class, so that ruamel.yaml's object constructor knows in this case to call the __init__ method with the mapping unpacked as arguments, which in turn calls __post_init__ for post-initialization:

@dataclass
class Msg:
    id: int
    desc: str
    fields: list[Field]

    def __post_init__(self) -> None:
        idx: int = 0
        for field in self.fields:
            field.index = idx
            idx += field.size

    def __setstate__(self, state):
        self.__init__(**state)

Demo: https://replit.com/@blhsing1/PowerfulThoseJavadocs

1
On

You discovered a bug in ruamel.yaml, in that, up to 0.18.5, the fields for the dataclass were not properly initialised when consisting of a collection, and only worked and were tested) with simple scalars.

This is going to be fixed in 0.18.6, but until then you can do:

from __future__ import annotations

from dataclasses import dataclass
import ruamel.yaml

from ruamel.yaml.constructor import SafeConstructor

class MyConstructor(ruamel.yaml.RoundTripConstructor):
    def construct_yaml_object(self, node, cls):
        from dataclasses import is_dataclass, InitVar, MISSING

        data = cls.__new__(cls)
        yield data
        if hasattr(data, '__setstate__'):
            state = SafeConstructor.construct_mapping(self, node, deep=True)
            data.__setstate__(state)
        elif is_dataclass(data):
            mapping = SafeConstructor.construct_mapping(self, node, deep=True)
            #                                                       ^^ missing in 0.18.5
            init_var_defaults = {}
            for field in data.__dataclass_fields__.values():
                if (
                    isinstance(field.type, InitVar) or field.type is  InitVar or
                    (isinstance(field.type, str) and field.type.startswith('InitVar'))
                ) and field.default is not MISSING:
                    init_var_defaults[field.name] = field.default
            for attr, value in mapping.items():
                if attr not in init_var_defaults:
                    setattr(data, attr, value)
            post_init = getattr(data, '__post_init__', None)
            if post_init is not None:
                kw = {}
                for name, default in init_var_defaults.items():
                    kw[name] = mapping.get(name, default)
                post_init(**kw)
        else:
            state = SafeConstructor.construct_mapping(self, node, deep=True)
            if hasattr(data, '__attrs_attrs__'):  # issue 394
                data.__init__(**state)
            else:
                data.__dict__.update(state)
        if node.anchor:
            from ruamel.yaml.serializer import templated_id
            from ruamel.yaml.anchor import Anchor

            if not templated_id(node.anchor):
                if not hasattr(data, Anchor.attrib):
                    a = Anchor()
                    setattr(data, Anchor.attrib, a)
                else:
                    a = getattr(data, Anchor.attrib)
                a.value = node.anchor

@dataclass
class Msg:

    id: int
    desc: str
    fields: list[Field]

    def __post_init__(self) -> None:
        idx: int = 0
        for field in self.fields:  # why is this empty??
            field.index = idx
            idx += field.size

@dataclass
class Field:
    id: int
    name: str
    units: str
    size: int
    index: int = -1

y = ruamel.yaml.YAML()
y.Constructor = MyConstructor
y.register_class(Msg)
y.register_class(Field)

msg: Msg = y.load("""\
!Msg
id: 1
desc: status
fields:
- !Field
    id: 1
    name: Temp
    units: degC
    size: 2
""")

assert msg.fields[0].index != -1  # no longer fails :-)
print('ok')

which prints:

ok

It also looks like your code was missing

from __future__ import annotations

at the top, so the Field typing was initialisation was not postponed, and you get a NameError