From 2a7bacbd913cf2bf568b3c0f85a758946d3cf4e9 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Tue, 15 May 2018 22:44:27 -0400 Subject: [PATCH] bpo-33453: Handle string type annotations in dataclasses. (GH-6768) --- Lib/dataclasses.py | 134 ++++++++++++--- Lib/test/dataclass_module_1.py | 32 ++++ Lib/test/dataclass_module_1_str.py | 32 ++++ Lib/test/dataclass_module_2.py | 32 ++++ Lib/test/dataclass_module_2_str.py | 32 ++++ Lib/test/test_dataclasses.py | 153 +++++++++++++++++- .../2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst | 4 + 7 files changed, 399 insertions(+), 20 deletions(-) create mode 100644 Lib/test/dataclass_module_1.py create mode 100644 Lib/test/dataclass_module_1_str.py create mode 100644 Lib/test/dataclass_module_2.py create mode 100644 Lib/test/dataclass_module_2_str.py create mode 100644 Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 0f9041604a5..c93aadc95e0 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1,3 +1,4 @@ +import re import sys import copy import types @@ -187,6 +188,10 @@ def __repr__(self): # __init__. _POST_INIT_NAME = '__post_init__' +# String regex that string annotations for ClassVar or InitVar must match. +# Allows "identifier.identifier[" or "identifier[". +# https://bugs.python.org/issue33453 for details. +_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') class _InitVarMeta(type): def __getitem__(self, params): @@ -532,6 +537,80 @@ def _hash_fn(fields): [f'return hash({self_tuple})']) +def _is_classvar(a_type, typing): + if typing: + # This test uses a typing internal class, but it's the best + # way to test if this is a ClassVar. + return (a_type is typing.ClassVar + or (type(a_type) is typing._GenericAlias + and a_type.__origin__ is typing.ClassVar)) + + +def _is_initvar(a_type, dataclasses): + # The module we're checking against is the module we're + # currently in (dataclasses.py). + return a_type is dataclasses.InitVar + + +def _is_type(annotation, cls, a_module, a_type, is_type_predicate): + # Given a type annotation string, does it refer to a_type in + # a_module? For example, when checking that annotation denotes a + # ClassVar, then a_module is typing, and a_type is + # typing.ClassVar. + + # It's possible to look up a_module given a_type, but it involves + # looking in sys.modules (again!), and seems like a waste since + # the caller already knows a_module. + + # - annotation is a string type annotation + # - cls is the class that this annotation was found in + # - a_module is the module we want to match + # - a_type is the type in that module we want to match + # - is_type_predicate is a function called with (obj, a_module) + # that determines if obj is of the desired type. + + # Since this test does not do a local namespace lookup (and + # instead only a module (global) lookup), there are some things it + # gets wrong. + + # With string annotations, this will work: + # CV = ClassVar + # @dataclass + # class C0: + # cv0: CV + + # But this will not: + # @dataclass + # class C1: + # CV = ClassVar + # cv1: CV + + # In C1, the code in this function will look up "CV" in the module + # and not find it, so it will not consider cv1 as a ClassVar. + # This is a fairly obscure corner case, and the best way to fix it + # would be to eval() the string "CV" with the correct global and + # local namespaces. However that would involve a eval() penalty + # for every single field of every dataclass that's defined. It + # was judged not worth it. + + match = _MODULE_IDENTIFIER_RE.match(annotation) + if match: + ns = None + module_name = match.group(1) + if not module_name: + # No module name, assume the class's module did + # "from dataclasses import InitVar". + ns = sys.modules.get(cls.__module__).__dict__ + else: + # Look up module_name in the class's module. + module = sys.modules.get(cls.__module__) + if module and module.__dict__.get(module_name) is a_module: + ns = sys.modules.get(a_type.__module__).__dict__ + if ns and is_type_predicate(ns.get(match.group(2)), a_module): + return True + return False + + def _get_field(cls, a_name, a_type): # Return a Field object for this field name and type. ClassVars # and InitVars are also returned, but marked as such (see @@ -548,34 +627,54 @@ def _get_field(cls, a_name, a_type): default = MISSING f = field(default=default) - # Assume it's a normal field until proven otherwise. - f._field_type = _FIELD - # Only at this point do we know the name and the type. Set them. f.name = a_name f.type = a_type - # If typing has not been imported, then it's impossible for - # any annotation to be a ClassVar. So, only look for ClassVar - # if typing has been imported. + # Assume it's a normal field until proven otherwise. We're next + # going to decide if it's a ClassVar or InitVar, everything else + # is just a normal field. + f._field_type = _FIELD + + # In addition to checking for actual types here, also check for + # string annotations. get_type_hints() won't always work for us + # (see https://github.com/python/typing/issues/508 for example), + # plus it's expensive and would require an eval for every stirng + # annotation. So, make a best effort to see if this is a + # ClassVar or InitVar using regex's and checking that the thing + # referenced is actually of the correct type. + + # For the complete discussion, see https://bugs.python.org/issue33453 + + # If typing has not been imported, then it's impossible for any + # annotation to be a ClassVar. So, only look for ClassVar if + # typing has been imported by any module (not necessarily cls's + # module). typing = sys.modules.get('typing') - if typing is not None: + if typing: # This test uses a typing internal class, but it's the best # way to test if this is a ClassVar. - if (type(a_type) is typing._GenericAlias and - a_type.__origin__ is typing.ClassVar): - # This field is a ClassVar, so it's not a field. + if (_is_classvar(a_type, typing) + or (isinstance(f.type, str) + and _is_type(f.type, cls, typing, typing.ClassVar, + _is_classvar))): f._field_type = _FIELD_CLASSVAR + # If the type is InitVar, or if it's a matching string annotation, + # then it's an InitVar. if f._field_type is _FIELD: - # Check if this is an InitVar. - if a_type is InitVar: - # InitVars are not fields, either. + # The module we're checking against is the module we're + # currently in (dataclasses.py). + dataclasses = sys.modules[__name__] + if (_is_initvar(a_type, dataclasses) + or (isinstance(f.type, str) + and _is_type(f.type, cls, dataclasses, dataclasses.InitVar, + _is_initvar))): f._field_type = _FIELD_INITVAR - # Validations for fields. This is delayed until now, instead of - # in the Field() constructor, since only here do we know the field - # name, which allows better error reporting. + # Validations for individual fields. This is delayed until now, + # instead of in the Field() constructor, since only here do we + # know the field name, which allows for better error reporting. # Special restrictions for ClassVar and InitVar. if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR): @@ -605,7 +704,6 @@ def _set_new_attribute(cls, name, value): return False - # Decide if/how we're going to create a hash function. Key is # (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to # take. The common case is to do nothing, so instead of providing a @@ -865,7 +963,7 @@ def fields(class_or_instance): # Might it be worth caching this, per class? try: - fields = getattr(class_or_instance, _FIELDS) + fields = getattr(class_or_instance, _FIELDS) except AttributeError: raise TypeError('must be called with a dataclass type or instance') diff --git a/Lib/test/dataclass_module_1.py b/Lib/test/dataclass_module_1.py new file mode 100644 index 00000000000..87a33f8191d --- /dev/null +++ b/Lib/test/dataclass_module_1.py @@ -0,0 +1,32 @@ +#from __future__ import annotations +USING_STRINGS = False + +# dataclass_module_1.py and dataclass_module_1_str.py are identical +# except only the latter uses string annotations. + +import dataclasses +import typing + +T_CV2 = typing.ClassVar[int] +T_CV3 = typing.ClassVar + +T_IV2 = dataclasses.InitVar[int] +T_IV3 = dataclasses.InitVar + +@dataclasses.dataclass +class CV: + T_CV4 = typing.ClassVar + cv0: typing.ClassVar[int] = 20 + cv1: typing.ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclasses.dataclass +class IV: + T_IV4 = dataclasses.InitVar + iv0: dataclasses.InitVar[int] + iv1: dataclasses.InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/dataclass_module_1_str.py b/Lib/test/dataclass_module_1_str.py new file mode 100644 index 00000000000..6de490b7ad7 --- /dev/null +++ b/Lib/test/dataclass_module_1_str.py @@ -0,0 +1,32 @@ +from __future__ import annotations +USING_STRINGS = True + +# dataclass_module_1.py and dataclass_module_1_str.py are identical +# except only the latter uses string annotations. + +import dataclasses +import typing + +T_CV2 = typing.ClassVar[int] +T_CV3 = typing.ClassVar + +T_IV2 = dataclasses.InitVar[int] +T_IV3 = dataclasses.InitVar + +@dataclasses.dataclass +class CV: + T_CV4 = typing.ClassVar + cv0: typing.ClassVar[int] = 20 + cv1: typing.ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclasses.dataclass +class IV: + T_IV4 = dataclasses.InitVar + iv0: dataclasses.InitVar[int] + iv1: dataclasses.InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/dataclass_module_2.py b/Lib/test/dataclass_module_2.py new file mode 100644 index 00000000000..68fb733e299 --- /dev/null +++ b/Lib/test/dataclass_module_2.py @@ -0,0 +1,32 @@ +#from __future__ import annotations +USING_STRINGS = False + +# dataclass_module_2.py and dataclass_module_2_str.py are identical +# except only the latter uses string annotations. + +from dataclasses import dataclass, InitVar +from typing import ClassVar + +T_CV2 = ClassVar[int] +T_CV3 = ClassVar + +T_IV2 = InitVar[int] +T_IV3 = InitVar + +@dataclass +class CV: + T_CV4 = ClassVar + cv0: ClassVar[int] = 20 + cv1: ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclass +class IV: + T_IV4 = InitVar + iv0: InitVar[int] + iv1: InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/dataclass_module_2_str.py b/Lib/test/dataclass_module_2_str.py new file mode 100644 index 00000000000..b363d17c176 --- /dev/null +++ b/Lib/test/dataclass_module_2_str.py @@ -0,0 +1,32 @@ +from __future__ import annotations +USING_STRINGS = True + +# dataclass_module_2.py and dataclass_module_2_str.py are identical +# except only the latter uses string annotations. + +from dataclasses import dataclass, InitVar +from typing import ClassVar + +T_CV2 = ClassVar[int] +T_CV3 = ClassVar + +T_IV2 = InitVar[int] +T_IV3 = InitVar + +@dataclass +class CV: + T_CV4 = ClassVar + cv0: ClassVar[int] = 20 + cv1: ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclass +class IV: + T_IV4 = InitVar + iv0: InitVar[int] + iv1: InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/test_dataclasses.py b/Lib/test/test_dataclasses.py index 2c890a2cbe9..b251c04cb9a 100755 --- a/Lib/test/test_dataclasses.py +++ b/Lib/test/test_dataclasses.py @@ -12,6 +12,9 @@ from collections import deque, OrderedDict, namedtuple from functools import total_ordering +import typing # Needed for the string "typing.ClassVar[int]" to work as an annotation. +import dataclasses # Needed for the string "dataclasses.InitVar[int]" to work as an annotation. + # Just any custom exception we can catch. class CustomError(Exception): pass @@ -600,7 +603,6 @@ class C: class C: x: ClassVar[typ] = Subclass() - def test_deliberately_mutable_defaults(self): # If a mutable default isn't in the known list of # (list, dict, set), then it's okay. @@ -924,14 +926,16 @@ class C: z: ClassVar[int] = 1000 w: ClassVar[int] = 2000 t: ClassVar[int] = 3000 + s: ClassVar = 4000 c = C(5) self.assertEqual(repr(c), 'TestCase.test_class_var..C(x=5, y=10)') self.assertEqual(len(fields(C)), 2) # We have 2 fields. - self.assertEqual(len(C.__annotations__), 5) # And 3 ClassVars. + self.assertEqual(len(C.__annotations__), 6) # And 4 ClassVars. self.assertEqual(c.z, 1000) self.assertEqual(c.w, 2000) self.assertEqual(c.t, 3000) + self.assertEqual(c.s, 4000) C.z += 1 self.assertEqual(c.z, 1001) c = C(20) @@ -939,6 +943,7 @@ class C: self.assertEqual(c.z, 1001) self.assertEqual(c.w, 2000) self.assertEqual(c.t, 3000) + self.assertEqual(c.s, 4000) def test_class_var_no_default(self): # If a ClassVar has no default value, it should not be set on the class. @@ -2798,5 +2803,149 @@ class C: self.assertEqual(D.__set_name__.call_count, 1) +class TestStringAnnotations(unittest.TestCase): + def test_classvar(self): + # Some expressions recognized as ClassVar really aren't. But + # if you're using string annotations, it's not an exact + # science. + # These tests assume that both "import typing" and "from + # typing import *" have been run in this file. + for typestr in ('ClassVar[int]', + 'ClassVar [int]' + ' ClassVar [int]', + 'ClassVar', + ' ClassVar ', + 'typing.ClassVar[int]', + 'typing.ClassVar[str]', + ' typing.ClassVar[str]', + 'typing .ClassVar[str]', + 'typing. ClassVar[str]', + 'typing.ClassVar [str]', + 'typing.ClassVar [ str]', + + # Not syntactically valid, but these will + # be treated as ClassVars. + 'typing.ClassVar.[int]', + 'typing.ClassVar+', + ): + with self.subTest(typestr=typestr): + @dataclass + class C: + x: typestr + + # x is a ClassVar, so C() takes no args. + C() + + # And it won't appear in the class's dict because it doesn't + # have a default. + self.assertNotIn('x', C.__dict__) + + def test_isnt_classvar(self): + for typestr in ('CV', + 't.ClassVar', + 't.ClassVar[int]', + 'typing..ClassVar[int]', + 'Classvar', + 'Classvar[int]', + 'typing.ClassVarx[int]', + 'typong.ClassVar[int]', + 'dataclasses.ClassVar[int]', + 'typingxClassVar[str]', + ): + with self.subTest(typestr=typestr): + @dataclass + class C: + x: typestr + + # x is not a ClassVar, so C() takes one arg. + self.assertEqual(C(10).x, 10) + + def test_initvar(self): + # These tests assume that both "import dataclasses" and "from + # dataclasses import *" have been run in this file. + for typestr in ('InitVar[int]', + 'InitVar [int]' + ' InitVar [int]', + 'InitVar', + ' InitVar ', + 'dataclasses.InitVar[int]', + 'dataclasses.InitVar[str]', + ' dataclasses.InitVar[str]', + 'dataclasses .InitVar[str]', + 'dataclasses. InitVar[str]', + 'dataclasses.InitVar [str]', + 'dataclasses.InitVar [ str]', + + # Not syntactically valid, but these will + # be treated as InitVars. + 'dataclasses.InitVar.[int]', + 'dataclasses.InitVar+', + ): + with self.subTest(typestr=typestr): + @dataclass + class C: + x: typestr + + # x is an InitVar, so doesn't create a member. + with self.assertRaisesRegex(AttributeError, + "object has no attribute 'x'"): + C(1).x + + def test_isnt_initvar(self): + for typestr in ('IV', + 'dc.InitVar', + 'xdataclasses.xInitVar', + 'typing.xInitVar[int]', + ): + with self.subTest(typestr=typestr): + @dataclass + class C: + x: typestr + + # x is not an InitVar, so there will be a member x. + self.assertEqual(C(10).x, 10) + + def test_classvar_module_level_import(self): + from . import dataclass_module_1 + from . import dataclass_module_1_str + from . import dataclass_module_2 + from . import dataclass_module_2_str + + for m in (dataclass_module_1, dataclass_module_1_str, + dataclass_module_2, dataclass_module_2_str, + ): + with self.subTest(m=m): + # There's a difference in how the ClassVars are + # interpreted when using string annotations or + # not. See the imported modules for details. + if m.USING_STRINGS: + c = m.CV(10) + else: + c = m.CV() + self.assertEqual(c.cv0, 20) + + + # There's a difference in how the InitVars are + # interpreted when using string annotations or + # not. See the imported modules for details. + c = m.IV(0, 1, 2, 3, 4) + + for field_name in ('iv0', 'iv1', 'iv2', 'iv3'): + with self.subTest(field_name=field_name): + with self.assertRaisesRegex(AttributeError, f"object has no attribute '{field_name}'"): + # Since field_name is an InitVar, it's + # not an instance field. + getattr(c, field_name) + + if m.USING_STRINGS: + # iv4 is interpreted as a normal field. + self.assertIn('not_iv4', c.__dict__) + self.assertEqual(c.not_iv4, 4) + else: + # iv4 is interpreted as an InitVar, so it + # won't exist on the instance. + self.assertNotIn('not_iv4', c.__dict__) + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst b/Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst new file mode 100644 index 00000000000..6595b1265a4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-05-12-06-01-02.bpo-33453.Fj-jMD.rst @@ -0,0 +1,4 @@ +Fix dataclasses to work if using literal string type annotations or if using +PEP 563 "Postponed Evaluation of Annotations". Only specific string +prefixes are detected for both ClassVar ("ClassVar" and "typing.ClassVar") +and InitVar ("InitVar" and "dataclasses.InitVar").