bpo-33453: Handle string type annotations in dataclasses. (GH-6768)

This commit is contained in:
Eric V. Smith 2018-05-15 22:44:27 -04:00 committed by GitHub
parent d8dcd57edb
commit 2a7bacbd91
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 399 additions and 20 deletions

View file

@ -1,3 +1,4 @@
import re
import sys
import copy
import types
@ -187,6 +188,10 @@ def __repr__(self):
# __init__.
_POST_INIT_NAME = '__post_init__'
# String regex that string annotations for ClassVar or InitVar must match.
# Allows "identifier.identifier[" or "identifier[".
# https://bugs.python.org/issue33453 for details.
_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
class _InitVarMeta(type):
def __getitem__(self, params):
@ -532,6 +537,80 @@ def _hash_fn(fields):
[f'return hash({self_tuple})'])
def _is_classvar(a_type, typing):
if typing:
# This test uses a typing internal class, but it's the best
# way to test if this is a ClassVar.
return (a_type is typing.ClassVar
or (type(a_type) is typing._GenericAlias
and a_type.__origin__ is typing.ClassVar))
def _is_initvar(a_type, dataclasses):
# The module we're checking against is the module we're
# currently in (dataclasses.py).
return a_type is dataclasses.InitVar
def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
# Given a type annotation string, does it refer to a_type in
# a_module? For example, when checking that annotation denotes a
# ClassVar, then a_module is typing, and a_type is
# typing.ClassVar.
# It's possible to look up a_module given a_type, but it involves
# looking in sys.modules (again!), and seems like a waste since
# the caller already knows a_module.
# - annotation is a string type annotation
# - cls is the class that this annotation was found in
# - a_module is the module we want to match
# - a_type is the type in that module we want to match
# - is_type_predicate is a function called with (obj, a_module)
# that determines if obj is of the desired type.
# Since this test does not do a local namespace lookup (and
# instead only a module (global) lookup), there are some things it
# gets wrong.
# With string annotations, this will work:
# CV = ClassVar
# @dataclass
# class C0:
# cv0: CV
# But this will not:
# @dataclass
# class C1:
# CV = ClassVar
# cv1: CV
# In C1, the code in this function will look up "CV" in the module
# and not find it, so it will not consider cv1 as a ClassVar.
# This is a fairly obscure corner case, and the best way to fix it
# would be to eval() the string "CV" with the correct global and
# local namespaces. However that would involve a eval() penalty
# for every single field of every dataclass that's defined. It
# was judged not worth it.
match = _MODULE_IDENTIFIER_RE.match(annotation)
if match:
ns = None
module_name = match.group(1)
if not module_name:
# No module name, assume the class's module did
# "from dataclasses import InitVar".
ns = sys.modules.get(cls.__module__).__dict__
else:
# Look up module_name in the class's module.
module = sys.modules.get(cls.__module__)
if module and module.__dict__.get(module_name) is a_module:
ns = sys.modules.get(a_type.__module__).__dict__
if ns and is_type_predicate(ns.get(match.group(2)), a_module):
return True
return False
def _get_field(cls, a_name, a_type):
# Return a Field object for this field name and type. ClassVars
# and InitVars are also returned, but marked as such (see
@ -548,34 +627,54 @@ def _get_field(cls, a_name, a_type):
default = MISSING
f = field(default=default)
# Assume it's a normal field until proven otherwise.
f._field_type = _FIELD
# Only at this point do we know the name and the type. Set them.
f.name = a_name
f.type = a_type
# If typing has not been imported, then it's impossible for
# any annotation to be a ClassVar. So, only look for ClassVar
# if typing has been imported.
# Assume it's a normal field until proven otherwise. We're next
# going to decide if it's a ClassVar or InitVar, everything else
# is just a normal field.
f._field_type = _FIELD
# In addition to checking for actual types here, also check for
# string annotations. get_type_hints() won't always work for us
# (see https://github.com/python/typing/issues/508 for example),
# plus it's expensive and would require an eval for every stirng
# annotation. So, make a best effort to see if this is a
# ClassVar or InitVar using regex's and checking that the thing
# referenced is actually of the correct type.
# For the complete discussion, see https://bugs.python.org/issue33453
# If typing has not been imported, then it's impossible for any
# annotation to be a ClassVar. So, only look for ClassVar if
# typing has been imported by any module (not necessarily cls's
# module).
typing = sys.modules.get('typing')
if typing is not None:
if typing:
# This test uses a typing internal class, but it's the best
# way to test if this is a ClassVar.
if (type(a_type) is typing._GenericAlias and
a_type.__origin__ is typing.ClassVar):
# This field is a ClassVar, so it's not a field.
if (_is_classvar(a_type, typing)
or (isinstance(f.type, str)
and _is_type(f.type, cls, typing, typing.ClassVar,
_is_classvar))):
f._field_type = _FIELD_CLASSVAR
# If the type is InitVar, or if it's a matching string annotation,
# then it's an InitVar.
if f._field_type is _FIELD:
# Check if this is an InitVar.
if a_type is InitVar:
# InitVars are not fields, either.
# The module we're checking against is the module we're
# currently in (dataclasses.py).
dataclasses = sys.modules[__name__]
if (_is_initvar(a_type, dataclasses)
or (isinstance(f.type, str)
and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
_is_initvar))):
f._field_type = _FIELD_INITVAR
# Validations for fields. This is delayed until now, instead of
# in the Field() constructor, since only here do we know the field
# name, which allows better error reporting.
# Validations for individual fields. This is delayed until now,
# instead of in the Field() constructor, since only here do we
# know the field name, which allows for better error reporting.
# Special restrictions for ClassVar and InitVar.
if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
@ -605,7 +704,6 @@ def _set_new_attribute(cls, name, value):
return False
# Decide if/how we're going to create a hash function. Key is
# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
# take. The common case is to do nothing, so instead of providing a
@ -865,7 +963,7 @@ def fields(class_or_instance):
# Might it be worth caching this, per class?
try:
fields = getattr(class_or_instance, _FIELDS)
fields = getattr(class_or_instance, _FIELDS)
except AttributeError:
raise TypeError('must be called with a dataclass type or instance')

View file

@ -0,0 +1,32 @@
#from __future__ import annotations
USING_STRINGS = False
# dataclass_module_1.py and dataclass_module_1_str.py are identical
# except only the latter uses string annotations.
import dataclasses
import typing
T_CV2 = typing.ClassVar[int]
T_CV3 = typing.ClassVar
T_IV2 = dataclasses.InitVar[int]
T_IV3 = dataclasses.InitVar
@dataclasses.dataclass
class CV:
T_CV4 = typing.ClassVar
cv0: typing.ClassVar[int] = 20
cv1: typing.ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
@dataclasses.dataclass
class IV:
T_IV4 = dataclasses.InitVar
iv0: dataclasses.InitVar[int]
iv1: dataclasses.InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

View file

@ -0,0 +1,32 @@
from __future__ import annotations
USING_STRINGS = True
# dataclass_module_1.py and dataclass_module_1_str.py are identical
# except only the latter uses string annotations.
import dataclasses
import typing
T_CV2 = typing.ClassVar[int]
T_CV3 = typing.ClassVar
T_IV2 = dataclasses.InitVar[int]
T_IV3 = dataclasses.InitVar
@dataclasses.dataclass
class CV:
T_CV4 = typing.ClassVar
cv0: typing.ClassVar[int] = 20
cv1: typing.ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
@dataclasses.dataclass
class IV:
T_IV4 = dataclasses.InitVar
iv0: dataclasses.InitVar[int]
iv1: dataclasses.InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

View file

@ -0,0 +1,32 @@
#from __future__ import annotations
USING_STRINGS = False
# dataclass_module_2.py and dataclass_module_2_str.py are identical
# except only the latter uses string annotations.
from dataclasses import dataclass, InitVar
from typing import ClassVar
T_CV2 = ClassVar[int]
T_CV3 = ClassVar
T_IV2 = InitVar[int]
T_IV3 = InitVar
@dataclass
class CV:
T_CV4 = ClassVar
cv0: ClassVar[int] = 20
cv1: ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
@dataclass
class IV:
T_IV4 = InitVar
iv0: InitVar[int]
iv1: InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

View file

@ -0,0 +1,32 @@
from __future__ import annotations
USING_STRINGS = True
# dataclass_module_2.py and dataclass_module_2_str.py are identical
# except only the latter uses string annotations.
from dataclasses import dataclass, InitVar
from typing import ClassVar
T_CV2 = ClassVar[int]
T_CV3 = ClassVar
T_IV2 = InitVar[int]
T_IV3 = InitVar
@dataclass
class CV:
T_CV4 = ClassVar
cv0: ClassVar[int] = 20
cv1: ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
@dataclass
class IV:
T_IV4 = InitVar
iv0: InitVar[int]
iv1: InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

View file

@ -12,6 +12,9 @@
from collections import deque, OrderedDict, namedtuple
from functools import total_ordering
import typing # Needed for the string "typing.ClassVar[int]" to work as an annotation.
import dataclasses # Needed for the string "dataclasses.InitVar[int]" to work as an annotation.
# Just any custom exception we can catch.
class CustomError(Exception): pass
@ -600,7 +603,6 @@ class C:
class C:
x: ClassVar[typ] = Subclass()
def test_deliberately_mutable_defaults(self):
# If a mutable default isn't in the known list of
# (list, dict, set), then it's okay.
@ -924,14 +926,16 @@ class C:
z: ClassVar[int] = 1000
w: ClassVar[int] = 2000
t: ClassVar[int] = 3000
s: ClassVar = 4000
c = C(5)
self.assertEqual(repr(c), 'TestCase.test_class_var.<locals>.C(x=5, y=10)')
self.assertEqual(len(fields(C)), 2) # We have 2 fields.
self.assertEqual(len(C.__annotations__), 5) # And 3 ClassVars.
self.assertEqual(len(C.__annotations__), 6) # And 4 ClassVars.
self.assertEqual(c.z, 1000)
self.assertEqual(c.w, 2000)
self.assertEqual(c.t, 3000)
self.assertEqual(c.s, 4000)
C.z += 1
self.assertEqual(c.z, 1001)
c = C(20)
@ -939,6 +943,7 @@ class C:
self.assertEqual(c.z, 1001)
self.assertEqual(c.w, 2000)
self.assertEqual(c.t, 3000)
self.assertEqual(c.s, 4000)
def test_class_var_no_default(self):
# If a ClassVar has no default value, it should not be set on the class.
@ -2798,5 +2803,149 @@ class C:
self.assertEqual(D.__set_name__.call_count, 1)
class TestStringAnnotations(unittest.TestCase):
def test_classvar(self):
# Some expressions recognized as ClassVar really aren't. But
# if you're using string annotations, it's not an exact
# science.
# These tests assume that both "import typing" and "from
# typing import *" have been run in this file.
for typestr in ('ClassVar[int]',
'ClassVar [int]'
' ClassVar [int]',
'ClassVar',
' ClassVar ',
'typing.ClassVar[int]',
'typing.ClassVar[str]',
' typing.ClassVar[str]',
'typing .ClassVar[str]',
'typing. ClassVar[str]',
'typing.ClassVar [str]',
'typing.ClassVar [ str]',
# Not syntactically valid, but these will
# be treated as ClassVars.
'typing.ClassVar.[int]',
'typing.ClassVar+',
):
with self.subTest(typestr=typestr):
@dataclass
class C:
x: typestr
# x is a ClassVar, so C() takes no args.
C()
# And it won't appear in the class's dict because it doesn't
# have a default.
self.assertNotIn('x', C.__dict__)
def test_isnt_classvar(self):
for typestr in ('CV',
't.ClassVar',
't.ClassVar[int]',
'typing..ClassVar[int]',
'Classvar',
'Classvar[int]',
'typing.ClassVarx[int]',
'typong.ClassVar[int]',
'dataclasses.ClassVar[int]',
'typingxClassVar[str]',
):
with self.subTest(typestr=typestr):
@dataclass
class C:
x: typestr
# x is not a ClassVar, so C() takes one arg.
self.assertEqual(C(10).x, 10)
def test_initvar(self):
# These tests assume that both "import dataclasses" and "from
# dataclasses import *" have been run in this file.
for typestr in ('InitVar[int]',
'InitVar [int]'
' InitVar [int]',
'InitVar',
' InitVar ',
'dataclasses.InitVar[int]',
'dataclasses.InitVar[str]',
' dataclasses.InitVar[str]',
'dataclasses .InitVar[str]',
'dataclasses. InitVar[str]',
'dataclasses.InitVar [str]',
'dataclasses.InitVar [ str]',
# Not syntactically valid, but these will
# be treated as InitVars.
'dataclasses.InitVar.[int]',
'dataclasses.InitVar+',
):
with self.subTest(typestr=typestr):
@dataclass
class C:
x: typestr
# x is an InitVar, so doesn't create a member.
with self.assertRaisesRegex(AttributeError,
"object has no attribute 'x'"):
C(1).x
def test_isnt_initvar(self):
for typestr in ('IV',
'dc.InitVar',
'xdataclasses.xInitVar',
'typing.xInitVar[int]',
):
with self.subTest(typestr=typestr):
@dataclass
class C:
x: typestr
# x is not an InitVar, so there will be a member x.
self.assertEqual(C(10).x, 10)
def test_classvar_module_level_import(self):
from . import dataclass_module_1
from . import dataclass_module_1_str
from . import dataclass_module_2
from . import dataclass_module_2_str
for m in (dataclass_module_1, dataclass_module_1_str,
dataclass_module_2, dataclass_module_2_str,
):
with self.subTest(m=m):
# There's a difference in how the ClassVars are
# interpreted when using string annotations or
# not. See the imported modules for details.
if m.USING_STRINGS:
c = m.CV(10)
else:
c = m.CV()
self.assertEqual(c.cv0, 20)
# There's a difference in how the InitVars are
# interpreted when using string annotations or
# not. See the imported modules for details.
c = m.IV(0, 1, 2, 3, 4)
for field_name in ('iv0', 'iv1', 'iv2', 'iv3'):
with self.subTest(field_name=field_name):
with self.assertRaisesRegex(AttributeError, f"object has no attribute '{field_name}'"):
# Since field_name is an InitVar, it's
# not an instance field.
getattr(c, field_name)
if m.USING_STRINGS:
# iv4 is interpreted as a normal field.
self.assertIn('not_iv4', c.__dict__)
self.assertEqual(c.not_iv4, 4)
else:
# iv4 is interpreted as an InitVar, so it
# won't exist on the instance.
self.assertNotIn('not_iv4', c.__dict__)
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,4 @@
Fix dataclasses to work if using literal string type annotations or if using
PEP 563 "Postponed Evaluation of Annotations". Only specific string
prefixes are detected for both ClassVar ("ClassVar" and "typing.ClassVar")
and InitVar ("InitVar" and "dataclasses.InitVar").