gh-114087: Speed up dataclasses._asdict_inner (#114088)

This commit is contained in:
keithasaurus 2024-01-18 08:03:20 -08:00 committed by GitHub
parent 339fc3c224
commit 2d3f6b56c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 55 additions and 44 deletions

View file

@ -1332,58 +1332,69 @@ class C:
def _asdict_inner(obj, dict_factory):
if type(obj) in _ATOMIC_TYPES:
obj_type = type(obj)
if obj_type in _ATOMIC_TYPES:
return obj
elif _is_dataclass_instance(obj):
# fast path for the common case
elif hasattr(obj_type, _FIELDS):
# dataclass instance: fast path for the common case
if dict_factory is dict:
return {
f.name: _asdict_inner(getattr(obj, f.name), dict)
for f in fields(obj)
}
else:
result = []
for f in fields(obj):
value = _asdict_inner(getattr(obj, f.name), dict_factory)
result.append((f.name, value))
return dict_factory(result)
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
# object is another namedtuple of the same type. This is
# similar to how other list- or tuple-derived classes are
# treated (see below), but we just need to create them
# differently because a namedtuple's __init__ needs to be
# called differently (see bpo-34363).
return dict_factory([
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
for f in fields(obj)
])
# handle the builtin types first for speed; subclasses handled below
elif obj_type is list:
return [_asdict_inner(v, dict_factory) for v in obj]
elif obj_type is dict:
return {
_asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory)
for k, v in obj.items()
}
elif obj_type is tuple:
return tuple([_asdict_inner(v, dict_factory) for v in obj])
elif issubclass(obj_type, tuple):
if hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
# object is another namedtuple of the same type. This is
# similar to how other list- or tuple-derived classes are
# treated (see below), but we just need to create them
# differently because a namedtuple's __init__ needs to be
# called differently (see bpo-34363).
# I'm not using namedtuple's _asdict()
# method, because:
# - it does not recurse in to the namedtuple fields and
# convert them to dicts (using dict_factory).
# - I don't actually want to return a dict here. The main
# use case here is json.dumps, and it handles converting
# namedtuples to lists. Admittedly we're losing some
# information here when we produce a json list instead of a
# dict. Note that if we returned dicts here instead of
# namedtuples, we could no longer call asdict() on a data
# structure where a namedtuple was used as a dict key.
return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
elif isinstance(obj, (list, tuple)):
# Assume we can create an object of this type by passing in a
# generator (which is not true for namedtuples, handled
# above).
return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
elif isinstance(obj, dict):
if hasattr(type(obj), 'default_factory'):
# I'm not using namedtuple's _asdict()
# method, because:
# - it does not recurse in to the namedtuple fields and
# convert them to dicts (using dict_factory).
# - I don't actually want to return a dict here. The main
# use case here is json.dumps, and it handles converting
# namedtuples to lists. Admittedly we're losing some
# information here when we produce a json list instead of a
# dict. Note that if we returned dicts here instead of
# namedtuples, we could no longer call asdict() on a data
# structure where a namedtuple was used as a dict key.
return obj_type(*[_asdict_inner(v, dict_factory) for v in obj])
else:
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
elif issubclass(obj_type, dict):
if hasattr(obj_type, 'default_factory'):
# obj is a defaultdict, which has a different constructor from
# dict as it requires the default_factory as its first arg.
result = type(obj)(getattr(obj, 'default_factory'))
result = obj_type(obj.default_factory)
for k, v in obj.items():
result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory)
return result
return type(obj)((_asdict_inner(k, dict_factory),
_asdict_inner(v, dict_factory))
for k, v in obj.items())
return obj_type((_asdict_inner(k, dict_factory),
_asdict_inner(v, dict_factory))
for k, v in obj.items())
elif issubclass(obj_type, list):
# Assume we can create an object of this type by passing in a
# generator
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
else:
return copy.deepcopy(obj)
@ -1416,11 +1427,10 @@ def _astuple_inner(obj, tuple_factory):
if type(obj) in _ATOMIC_TYPES:
return obj
elif _is_dataclass_instance(obj):
result = []
for f in fields(obj):
value = _astuple_inner(getattr(obj, f.name), tuple_factory)
result.append(value)
return tuple_factory(result)
return tuple_factory([
_astuple_inner(getattr(obj, f.name), tuple_factory)
for f in fields(obj)
])
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
# object is another namedtuple of the same type. This is

View file

@ -0,0 +1 @@
Speed up ``dataclasses.asdict`` up to 1.35x.