bpo-36876: Add a tool that identifies unsupported global C variables. (#15877)

This commit is contained in:
Eric Snow 2019-09-11 19:49:45 +01:00 committed by GitHub
parent 9936371af2
commit ee536b2020
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
51 changed files with 9467 additions and 19 deletions

View file

@ -0,0 +1,22 @@
import unittest
import test.test_tools
test.test_tools.skip_if_missing('c-analyzer')
with test.test_tools.imports_under_tool('c-analyzer'):
from c_globals.__main__ import main
class ActualChecks(unittest.TestCase):
# XXX Also run the check in "make check".
@unittest.expectedFailure
def test_check_c_globals(self):
try:
main('check', {})
except NotImplementedError:
raise unittest.SkipTest('not supported on this host')
if __name__ == '__main__':
# Test needs to be a package, so we can do relative imports.
unittest.main()

View file

@ -2,7 +2,7 @@
# Copyright 2012-2013 by Larry Hastings.
# Licensed to the PSF under a contributor agreement.
from test import support
from test import support, test_tools
from unittest import TestCase
import collections
import inspect
@ -10,17 +10,10 @@
import sys
import unittest
clinic_path = os.path.join(os.path.dirname(__file__), '..', '..', 'Tools', 'clinic')
clinic_path = os.path.normpath(clinic_path)
if not os.path.exists(clinic_path):
raise unittest.SkipTest(f'{clinic_path!r} path does not exist')
sys.path.append(clinic_path)
try:
test_tools.skip_if_missing('clinic')
with test_tools.imports_under_tool('clinic'):
import clinic
from clinic import DSLParser
finally:
del sys.path[-1]
class FakeConverter:

View file

@ -1,20 +1,33 @@
"""Support functions for testing scripts in the Tools directory."""
import os
import unittest
import contextlib
import importlib
import os.path
import unittest
from test import support
basepath = os.path.dirname( # <src/install dir>
os.path.dirname( # Lib
os.path.dirname( # test
os.path.dirname(__file__)))) # test_tools
basepath = os.path.normpath(
os.path.dirname( # <src/install dir>
os.path.dirname( # Lib
os.path.dirname( # test
os.path.dirname(__file__))))) # test_tools
toolsdir = os.path.join(basepath, 'Tools')
scriptsdir = os.path.join(toolsdir, 'scripts')
def skip_if_missing():
if not os.path.isdir(scriptsdir):
raise unittest.SkipTest('scripts directory could not be found')
def skip_if_missing(tool=None):
if tool:
tooldir = os.path.join(toolsdir, tool)
else:
tool = 'scripts'
tooldir = scriptsdir
if not os.path.isdir(tooldir):
raise unittest.SkipTest(f'{tool} directory could not be found')
@contextlib.contextmanager
def imports_under_tool(name, *subdirs):
tooldir = os.path.join(toolsdir, name, *subdirs)
with support.DirsOnSysPath(tooldir) as cm:
yield cm
def import_tool(toolname):
with support.DirsOnSysPath(scriptsdir):

View file

@ -0,0 +1,15 @@
import contextlib
import os.path
import test.test_tools
from test.support import load_package_tests
@contextlib.contextmanager
def tool_imports_for_tests():
test.test_tools.skip_if_missing('c-analyzer')
with test.test_tools.imports_under_tool('c-analyzer'):
yield
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View file

@ -0,0 +1,5 @@
from . import load_tests
import unittest
unittest.main()

View file

@ -0,0 +1,470 @@
import os.path
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.files import (
iter_files, _walk_tree, glob_tree,
)
def fixpath(filename):
return filename.replace('/', os.path.sep)
class IterFilesTests(unittest.TestCase):
maxDiff = None
_return_walk = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def set_files(self, *filesperroot):
roots = []
result = []
for root, files in filesperroot:
root = fixpath(root)
roots.append(root)
result.append([os.path.join(root, fixpath(f))
for f in files])
self._return_walk = result
return roots
def _walk(self, root, *, suffix=None, walk=None):
self.calls.append(('_walk', (root, suffix, walk)))
return iter(self._return_walk.pop(0))
def _glob(self, root, *, suffix=None):
self.calls.append(('_glob', (root, suffix)))
return iter(self._return_walk.pop(0))
def test_typical(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.h']),
)
suffixes = ('.c', '.h')
files = list(iter_files(dirnames, suffixes,
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.h'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', None, _walk_tree)),
('_walk', ('eggs', None, _walk_tree)),
])
def test_single_root(self):
self._return_walk = [
[fixpath('spam/file1.c'), fixpath('spam/file2.c')],
]
files = list(iter_files('spam', '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
])
def test_one_root(self):
self._return_walk = [
[fixpath('spam/file1.c'), fixpath('spam/file2.c')],
]
files = list(iter_files(['spam'], '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
])
def test_multiple_roots(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.c']),
)
files = list(iter_files(dirnames, '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
('_walk', ('eggs', '.c', _walk_tree)),
])
def test_no_roots(self):
files = list(iter_files([], '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [])
self.assertEqual(self.calls, [])
def test_single_suffix(self):
self._return_walk = [
[fixpath('spam/file1.c'),
fixpath('spam/eggs/file3.c'),
],
]
files = list(iter_files('spam', '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/eggs/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
])
def test_one_suffix(self):
self._return_walk = [
[fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/file1.o'),
fixpath('spam/eggs/file3.c'),
],
]
files = list(iter_files('spam', ['.c'],
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/eggs/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', None, _walk_tree)),
])
def test_multiple_suffixes(self):
self._return_walk = [
[fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/file1.o'),
fixpath('spam/eggs/file3.c'),
],
]
files = list(iter_files('spam', ('.c', '.h'),
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/eggs/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', None, _walk_tree)),
])
def test_no_suffix(self):
expected = [fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/file1.o'),
fixpath('spam/eggs/file3.c'),
]
for suffix in (None, '', ()):
with self.subTest(suffix):
self.calls.clear()
self._return_walk = [list(expected)]
files = list(iter_files('spam', suffix,
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, expected)
self.assertEqual(self.calls, [
('_walk', ('spam', suffix, _walk_tree)),
])
def test_relparent(self):
dirnames = self.set_files(
('/x/y/z/spam', ['file1.c', 'file2.c']),
('/x/y/z/eggs', ['ham/file3.c']),
)
files = list(iter_files(dirnames, '.c', fixpath('/x/y'),
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('z/spam/file1.c'),
fixpath('z/spam/file2.c'),
fixpath('z/eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', (fixpath('/x/y/z/spam'), '.c', _walk_tree)),
('_walk', (fixpath('/x/y/z/eggs'), '.c', _walk_tree)),
])
def test_glob(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.c']),
)
files = list(iter_files(dirnames, '.c',
get_files=glob_tree,
_walk=self._walk,
_glob=self._glob))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_glob', ('spam', '.c')),
('_glob', ('eggs', '.c')),
])
def test_alt_walk_func(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.c']),
)
def get_files(root):
return None
files = list(iter_files(dirnames, '.c',
get_files=get_files,
_walk=self._walk,
_glob=self._glob))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', get_files)),
('_walk', ('eggs', '.c', get_files)),
])
# def test_no_dirnames(self):
# dirnames = []
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [])
# self.assertEqual(self.calls, [])
#
# def test_no_filter(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file1'),
# fixpath('spam/file2.c'),
# fixpath('spam/file3.h'),
# fixpath('spam/file4.o'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ])
#
# def test_no_files(self):
# self._return_walk = [
# [('spam', (), ()),
# ],
# [(fixpath('eggs/ham'), (), ()),
# ],
# ]
# dirnames = [
# 'spam',
# fixpath('eggs/ham'),
# ]
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('_walk', (fixpath('eggs/ham'),)),
# ])
#
# def test_tree(self):
# self._return_walk = [
# [('spam', ('sub1', 'sub2', 'sub3'), ('file1',)),
# (fixpath('spam/sub1'), ('sub1sub1',), ('file2', 'file3')),
# (fixpath('spam/sub1/sub1sub1'), (), ('file4',)),
# (fixpath('spam/sub2'), (), ()),
# (fixpath('spam/sub3'), (), ('file5',)),
# ],
# [(fixpath('eggs/ham'), (), ('file6',)),
# ],
# ]
# dirnames = [
# 'spam',
# fixpath('eggs/ham'),
# ]
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file1'),
# fixpath('spam/sub1/file2'),
# fixpath('spam/sub1/file3'),
# fixpath('spam/sub1/sub1sub1/file4'),
# fixpath('spam/sub3/file5'),
# fixpath('eggs/ham/file6'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('_walk', (fixpath('eggs/ham'),)),
# ])
#
# def test_filter_suffixes(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# filter_by_name = ('.c', '.h')
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file2.c'),
# fixpath('spam/file3.h'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ])
#
# def test_some_filtered(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2', 'file3', 'file4')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# def filter_by_name(filename, results=[False, True, False, True]):
# self.calls.append(('filter_by_name', (filename,)))
# return results.pop(0)
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file2'),
# fixpath('spam/file4'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('filter_by_name', ('file1',)),
# ('filter_by_name', ('file2',)),
# ('filter_by_name', ('file3',)),
# ('filter_by_name', ('file4',)),
# ])
#
# def test_none_filtered(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2', 'file3', 'file4')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# def filter_by_name(filename, results=[True, True, True, True]):
# self.calls.append(('filter_by_name', (filename,)))
# return results.pop(0)
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file1'),
# fixpath('spam/file2'),
# fixpath('spam/file3'),
# fixpath('spam/file4'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('filter_by_name', ('file1',)),
# ('filter_by_name', ('file2',)),
# ('filter_by_name', ('file3',)),
# ('filter_by_name', ('file4',)),
# ])
#
# def test_all_filtered(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2', 'file3', 'file4')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# def filter_by_name(filename, results=[False, False, False, False]):
# self.calls.append(('filter_by_name', (filename,)))
# return results.pop(0)
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('filter_by_name', ('file1',)),
# ('filter_by_name', ('file2',)),
# ('filter_by_name', ('file3',)),
# ('filter_by_name', ('file4',)),
# ])

View file

@ -0,0 +1,194 @@
import string
import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
class IDTests(unittest.TestCase):
VALID_ARGS = (
'x/y/z/spam.c',
'func',
'eggs',
)
VALID_KWARGS = dict(zip(ID._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_from_raw(self):
tests = [
('', None),
(None, None),
('spam', (None, None, 'spam')),
(('spam',), (None, None, 'spam')),
(('x/y/z/spam.c', 'spam'), ('x/y/z/spam.c', None, 'spam')),
(self.VALID_ARGS, self.VALID_EXPECTED),
(self.VALID_KWARGS, self.VALID_EXPECTED),
]
for raw, expected in tests:
with self.subTest(raw):
id = ID.from_raw(raw)
self.assertEqual(id, expected)
def test_minimal(self):
id = ID(
filename=None,
funcname=None,
name='eggs',
)
self.assertEqual(id, (
None,
None,
'eggs',
))
def test_init_typical_global(self):
id = ID(
filename='x/y/z/spam.c',
funcname=None,
name='eggs',
)
self.assertEqual(id, (
'x/y/z/spam.c',
None,
'eggs',
))
def test_init_typical_local(self):
id = ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
)
self.assertEqual(id, (
'x/y/z/spam.c',
'func',
'eggs',
))
def test_init_all_missing(self):
for value in ('', None):
with self.subTest(repr(value)):
id = ID(
filename=value,
funcname=value,
name=value,
)
self.assertEqual(id, (
None,
None,
None,
))
def test_init_all_coerced(self):
tests = [
('str subclass',
dict(
filename=PseudoStr('x/y/z/spam.c'),
funcname=PseudoStr('func'),
name=PseudoStr('eggs'),
),
('x/y/z/spam.c',
'func',
'eggs',
)),
('non-str',
dict(
filename=StrProxy('x/y/z/spam.c'),
funcname=Object(),
name=('a', 'b', 'c'),
),
('x/y/z/spam.c',
'<object>',
"('a', 'b', 'c')",
)),
]
for summary, kwargs, expected in tests:
with self.subTest(summary):
id = ID(**kwargs)
for field in ID._fields:
value = getattr(id, field)
self.assertIs(type(value), str)
self.assertEqual(tuple(id), expected)
def test_iterable(self):
id = ID(**self.VALID_KWARGS)
filename, funcname, name = id
values = (filename, funcname, name)
for value, expected in zip(values, self.VALID_EXPECTED):
self.assertEqual(value, expected)
def test_fields(self):
id = ID('a', 'b', 'z')
self.assertEqual(id.filename, 'a')
self.assertEqual(id.funcname, 'b')
self.assertEqual(id.name, 'z')
def test_validate_typical(self):
id = ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
)
id.validate() # This does not fail.
def test_validate_missing_field(self):
for field in ID._fields:
with self.subTest(field):
id = ID(**self.VALID_KWARGS)
id = id._replace(**{field: None})
if field == 'funcname':
id.validate() # The field can be missing (not set).
id = id._replace(filename=None)
id.validate() # Both fields can be missing (not set).
continue
with self.assertRaises(TypeError):
id.validate()
def test_validate_bad_field(self):
badch = tuple(c for c in string.punctuation + string.digits)
notnames = (
'1a',
'a.b',
'a-b',
'&a',
'a++',
) + badch
tests = [
('filename', ()), # Any non-empty str is okay.
('funcname', notnames),
('name', notnames),
]
seen = set()
for field, invalid in tests:
for value in invalid:
seen.add(value)
with self.subTest(f'{field}={value!r}'):
id = ID(**self.VALID_KWARGS)
id = id._replace(**{field: value})
with self.assertRaises(ValueError):
id.validate()
for field, invalid in tests:
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):
id = ID(**self.VALID_KWARGS)
id = id._replace(**{field: value})
id.validate() # This does not fail.

View file

@ -0,0 +1,68 @@
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser.info import Variable
from c_analyzer_common.info import ID
from c_analyzer_common.known import from_file
class FromFileTests(unittest.TestCase):
maxDiff = None
_return_read_tsv = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind declaration
file1.c - var1 variable static int
file1.c func1 local1 variable static int
file1.c - var2 variable int
file1.c func2 local2 variable char *
file2.c - var1 variable char *
''').strip().splitlines()
lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
known = from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(known, {
'variables': {v.id: v for v in [
Variable.from_parts('file1.c', '', 'var1', 'static int'),
Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
Variable.from_parts('file1.c', '', 'var2', 'int'),
Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
Variable.from_parts('file2.c', '', 'var1', 'char *'),
]},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
def test_empty(self):
self._return_read_tsv = []
known = from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(known, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
])

View file

@ -0,0 +1,296 @@
import sys
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.known import DATA_FILE as KNOWN_FILE
from c_parser import info
import c_globals as cg
from c_globals.supported import IGNORED_FILE
from c_globals.__main__ import cmd_check, cmd_show, parse_args, main
TYPICAL = [
(info.Variable.from_parts('src1/spam.c', None, 'var1', 'const char *'),
True,
),
(info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'int'),
True,
),
(info.Variable.from_parts('src1/spam.c', None, 'var2', 'PyObject *'),
False,
),
(info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'int'),
True,
),
(info.Variable.from_parts('src1/spam.c', None, 'freelist', '(PyTupleObject *)[10]'),
False,
),
(info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'const char const *'),
True,
),
(info.Variable.from_parts('src2/jam.c', None, 'var1', 'int'),
True,
),
(info.Variable.from_parts('src2/jam.c', None, 'var2', 'MyObject *'),
False,
),
(info.Variable.from_parts('Include/spam.h', None, 'data', 'const int'),
True,
),
]
class CMDBase(unittest.TestCase):
maxDiff = None
_return_find = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def _find(self, *args):
self.calls.append(('_find', args))
return self._return_find
def _show(self, *args):
self.calls.append(('_show', args))
def _print(self, *args):
self.calls.append(('_print', args))
class CheckTests(CMDBase):
def test_defaults(self):
self._return_find = []
cmd_check('check',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(self.calls[0], (
'_find', (
SOURCE_DIRS,
KNOWN_FILE,
IGNORED_FILE,
),
))
def test_all_supported(self):
self._return_find = [(v, s) for v, s in TYPICAL if s]
dirs = ['src1', 'src2', 'Include']
cmd_check('check',
dirs,
ignored='ignored.tsv',
known='known.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')),
#('_print', ('okay',)),
])
def test_some_unsupported(self):
self._return_find = TYPICAL
dirs = ['src1', 'src2', 'Include']
with self.assertRaises(SystemExit) as cm:
cmd_check('check',
dirs,
ignored='ignored.tsv',
known='known.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')),
('_print', ('ERROR: found unsupported global variables',)),
('_print', ()),
('_show', (sorted(unsupported),)),
('_print', (' (3 total)',)),
])
self.assertEqual(cm.exception.code, 1)
class ShowTests(CMDBase):
def test_defaults(self):
self._return_find = []
cmd_show('show',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(self.calls[0], (
'_find', (
SOURCE_DIRS,
KNOWN_FILE,
IGNORED_FILE,
),
))
def test_typical(self):
self._return_find = TYPICAL
dirs = ['src1', 'src2', 'Include']
cmd_show('show',
dirs,
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
supported = [v for v, s in TYPICAL if s]
unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')),
('_print', ('supported:',)),
('_print', ('----------',)),
('_show', (sorted(supported),)),
('_print', (' (6 total)',)),
('_print', ()),
('_print', ('unsupported:',)),
('_print', ('------------',)),
('_show', (sorted(unsupported),)),
('_print', (' (3 total)',)),
])
class ParseArgsTests(unittest.TestCase):
maxDiff = None
def test_no_args(self):
self.errmsg = None
def fail(msg):
self.errmsg = msg
sys.exit(msg)
with self.assertRaises(SystemExit):
parse_args('cg', [], _fail=fail)
self.assertEqual(self.errmsg, 'missing command')
def test_check_no_args(self):
cmd, cmdkwargs = parse_args('cg', [
'check',
])
self.assertEqual(cmd, 'check')
self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE,
'known': KNOWN_FILE,
'dirs': SOURCE_DIRS,
})
def test_check_full_args(self):
cmd, cmdkwargs = parse_args('cg', [
'check',
'--ignored', 'spam.tsv',
'--known', 'eggs.tsv',
'dir1',
'dir2',
'dir3',
])
self.assertEqual(cmd, 'check')
self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3']
})
def test_show_no_args(self):
cmd, cmdkwargs = parse_args('cg', [
'show',
])
self.assertEqual(cmd, 'show')
self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE,
'known': KNOWN_FILE,
'dirs': SOURCE_DIRS,
'skip_objects': False,
})
def test_show_full_args(self):
cmd, cmdkwargs = parse_args('cg', [
'show',
'--ignored', 'spam.tsv',
'--known', 'eggs.tsv',
'dir1',
'dir2',
'dir3',
])
self.assertEqual(cmd, 'show')
self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'],
'skip_objects': False,
})
def new_stub_commands(*names):
calls = []
def cmdfunc(cmd, **kwargs):
calls.append((cmd, kwargs))
commands = {name: cmdfunc for name in names}
return commands, calls
class MainTests(unittest.TestCase):
def test_no_command(self):
with self.assertRaises(ValueError):
main(None, {})
def test_check(self):
commands, calls = new_stub_commands('check', 'show')
cmdkwargs = {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'],
}
main('check', cmdkwargs, _COMMANDS=commands)
self.assertEqual(calls, [
('check', cmdkwargs),
])
def test_show(self):
commands, calls = new_stub_commands('check', 'show')
cmdkwargs = {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'],
}
main('show', cmdkwargs, _COMMANDS=commands)
self.assertEqual(calls, [
('show', cmdkwargs),
])

View file

@ -0,0 +1,332 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser import info
from c_globals.find import globals_from_binary, globals
class _Base(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class StaticsFromBinaryTests(_Base):
_return_iter_symbols = ()
_return_resolve_symbols = ()
_return_get_symbol_resolver = None
def setUp(self):
super().setUp()
self.kwargs = dict(
_iter_symbols=self._iter_symbols,
_resolve=self._resolve_symbols,
_get_symbol_resolver=self._get_symbol_resolver,
)
def _iter_symbols(self, binfile, find_local_symbol):
self.calls.append(('_iter_symbols', (binfile, find_local_symbol)))
return self._return_iter_symbols
def _resolve_symbols(self, symbols, resolve):
self.calls.append(('_resolve_symbols', (symbols, resolve,)))
return self._return_resolve_symbols
def _get_symbol_resolver(self, knownvars, dirnames=None):
self.calls.append(('_get_symbol_resolver', (knownvars, dirnames)))
return self._return_get_symbol_resolver
def test_typical(self):
symbols = self._return_iter_symbols = ()
resolver = self._return_get_symbol_resolver = object()
variables = self._return_resolve_symbols = [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
]
knownvars = object()
found = list(globals_from_binary('python',
knownvars=knownvars,
**self.kwargs))
self.assertEqual(found, [
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
])
self.assertEqual(self.calls, [
('_iter_symbols', ('python', None)),
('_get_symbol_resolver', (knownvars, None)),
('_resolve_symbols', (symbols, resolver)),
])
# self._return_iter_symbols = [
# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
# s_info.Symbol(('???', None, 'var_x'), 'variable', False),
# s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
# s_info.Symbol((None, None, '???'), 'other', False),
# ]
# known = object()
#
# globals_from_binary('python', knownvars=known, **this.kwargs)
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
#
# def test_no_symbols(self):
# self._return_iter_symbols = []
#
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
# XXX need functional test
#class StaticFromDeclarationsTests(_Base):
#
# _return_iter_declarations = ()
#
# def iter_declarations(self, dirnames):
# self.calls.append(('iter_declarations', (dirnames,)))
# return iter(self._return_iter_declarations)
#
# def test_typical(self):
# self._return_iter_declarations = [
# None,
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# object(),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# object(),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# object(),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# object(),
# ]
#
# found = list(globals_from_declarations(['dir1'], self.iter_declarations))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_declarations', (['dir1'],)),
# ])
#
# def test_no_declarations(self):
# self._return_iter_declarations = []
#
# found = list(globals_from_declarations(['dir1'], self.iter_declarations))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_declarations', (['dir1'],)),
# ])
#class IterVariablesTests(_Base):
#
# _return_from_symbols = ()
# _return_from_declarations = ()
#
# def _from_symbols(self, dirnames, iter_symbols):
# self.calls.append(('_from_symbols', (dirnames, iter_symbols)))
# return iter(self._return_from_symbols)
#
# def _from_declarations(self, dirnames, iter_declarations):
# self.calls.append(('_from_declarations', (dirnames, iter_declarations)))
# return iter(self._return_from_declarations)
#
# def test_typical(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_symbols = expected
#
# found = list(iter_variables(['dir1'],
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
# ])
#
# def test_no_symbols(self):
# self._return_from_symbols = []
#
# found = list(iter_variables(['dir1'],
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
# ])
#
# def test_from_binary(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_symbols = expected
#
# found = list(iter_variables(['dir1'], 'platform',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
# ])
#
# def test_from_symbols(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_symbols = expected
#
# found = list(iter_variables(['dir1'], 'symbols',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], s_symbols.iter_symbols)),
# ])
#
# def test_from_declarations(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_declarations = expected
#
# found = list(iter_variables(['dir1'], 'declarations',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_declarations', (['dir1'], declarations.iter_all)),
# ])
#
# def test_from_preprocessed(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_declarations = expected
#
# found = list(iter_variables(['dir1'], 'preprocessed',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_declarations', (['dir1'], declarations.iter_preprocessed)),
# ])
class StaticsTest(_Base):
_return_iter_variables = None
def _iter_variables(self, kind, *, known, dirnames):
self.calls.append(
('_iter_variables', (kind, known, dirnames)))
return iter(self._return_iter_variables or ())
def test_typical(self):
self._return_iter_variables = [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'),
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
]
dirnames = object()
known = object()
found = list(globals(dirnames, known,
kind='platform',
_iter_variables=self._iter_variables,
))
self.assertEqual(found, [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
])
self.assertEqual(self.calls, [
('_iter_variables', ('platform', known, dirnames)),
])

View file

@ -0,0 +1,34 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
pass
class SelfCheckTests(unittest.TestCase):
@unittest.expectedFailure
def test_known(self):
# Make sure known macros & vartypes aren't hiding unknown local types.
# XXX finish!
raise NotImplementedError
@unittest.expectedFailure
def test_compare_nm_results(self):
# Make sure the "show" results match the statics found by "nm" command.
# XXX Skip if "nm" is not available.
# XXX finish!
raise NotImplementedError
class DummySourceTests(unittest.TestCase):
@unittest.expectedFailure
def test_check(self):
# XXX finish!
raise NotImplementedError
@unittest.expectedFailure
def test_show(self):
# XXX finish!
raise NotImplementedError

View file

@ -0,0 +1,52 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser import info
from c_globals.show import basic
TYPICAL = [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
]
class BasicTests(unittest.TestCase):
maxDiff = None
def setUp(self):
self.lines = []
def print(self, line):
self.lines.append(line)
def test_typical(self):
basic(TYPICAL,
_print=self.print)
self.assertEqual(self.lines, [
'src1/spam.c:var1 static const char *',
'src1/spam.c:ham():initialized static int',
'src1/spam.c:var2 static PyObject *',
'src1/eggs.c:tofu():ready static int',
'src1/spam.c:freelist static (PyTupleObject *)[10]',
'src1/sub/ham.c:var1 static const char const *',
'src2/jam.c:var1 static int',
'src2/jam.c:var2 static MyObject *',
'Include/spam.h:data static const int',
])
def test_no_rows(self):
basic([],
_print=self.print)
self.assertEqual(self.lines, [])

View file

@ -0,0 +1,96 @@
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_parser import info
from c_globals.supported import is_supported, ignored_from_file
class IsSupportedTests(unittest.TestCase):
@unittest.expectedFailure
def test_supported(self):
statics = [
info.StaticVar('src1/spam.c', None, 'var1', 'const char *'),
info.StaticVar('src1/spam.c', None, 'var1', 'int'),
]
for static in statics:
with self.subTest(static):
result = is_supported(static)
self.assertTrue(result)
@unittest.expectedFailure
def test_not_supported(self):
statics = [
info.StaticVar('src1/spam.c', None, 'var1', 'PyObject *'),
info.StaticVar('src1/spam.c', None, 'var1', 'PyObject[10]'),
]
for static in statics:
with self.subTest(static):
result = is_supported(static)
self.assertFalse(result)
class IgnoredFromFileTests(unittest.TestCase):
maxDiff = None
_return_read_tsv = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind reason
file1.c - var1 variable ...
file1.c func1 local1 variable |
file1.c - var2 variable ???
file1.c func2 local2 variable |
file2.c - var1 variable reasons
''').strip().splitlines()
lines = [re.sub(r'\s{1,8}', '\t', line, 4).replace('|', '')
for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(ignored, {
'variables': {
ID('file1.c', '', 'var1'): '...',
ID('file1.c', 'func1', 'local1'): '',
ID('file1.c', '', 'var2'): '???',
ID('file1.c', 'func2', 'local2'): '',
ID('file2.c', '', 'var1'): 'reasons',
},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')),
])
def test_empty(self):
self._return_read_tsv = []
ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(ignored, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')),
])

View file

@ -0,0 +1,795 @@
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser.declarations import (
iter_global_declarations, iter_local_statements,
parse_func, parse_var, parse_compound,
iter_variables,
)
class TestCaseBase(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class IterGlobalDeclarationsTests(TestCaseBase):
def test_functions(self):
tests = [
(textwrap.dedent('''
void func1() {
return;
}
'''),
textwrap.dedent('''
void func1() {
return;
}
''').strip(),
),
(textwrap.dedent('''
static unsigned int * _func1(
const char *arg1,
int *arg2
long long arg3
)
{
return _do_something(arg1, arg2, arg3);
}
'''),
textwrap.dedent('''
static unsigned int * _func1( const char *arg1, int *arg2 long long arg3 ) {
return _do_something(arg1, arg2, arg3);
}
''').strip(),
),
(textwrap.dedent('''
static PyObject *
_func1(const char *arg1, PyObject *arg2)
{
static int initialized = 0;
if (!initialized) {
initialized = 1;
_init(arg1);
}
PyObject *result = _do_something(arg1, arg2);
Py_INCREF(result);
return result;
}
'''),
textwrap.dedent('''
static PyObject * _func1(const char *arg1, PyObject *arg2) {
static int initialized = 0;
if (!initialized) {
initialized = 1;
_init(arg1);
}
PyObject *result = _do_something(arg1, arg2);
Py_INCREF(result);
return result;
}
''').strip(),
),
]
for lines, expected in tests:
body = textwrap.dedent(
expected.partition('{')[2].rpartition('}')[0]
).strip()
expected = (expected, body)
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [expected])
@unittest.expectedFailure
def test_declarations(self):
tests = [
'int spam;',
'long long spam;',
'static const int const *spam;',
'int spam;',
'typedef int myint;',
'typedef PyObject * (*unaryfunc)(PyObject *);',
# typedef struct
# inline struct
# enum
# inline enum
]
for text in tests:
expected = (text,
' '.join(l.strip() for l in text.splitlines()))
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [expected])
@unittest.expectedFailure
def test_declaration_multiple_vars(self):
lines = ['static const int const *spam, *ham=NULL, eggs = 3;']
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [
('static const int const *spam;', None),
('static const int *ham=NULL;', None),
('static const int eggs = 3;', None),
])
def test_mixed(self):
lines = textwrap.dedent('''
int spam;
static const char const *eggs;
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
char* ham;
static int stop(char *reason) {
ham = reason;
return _stop();
}
''').splitlines()
expected = [
(textwrap.dedent('''
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
''').strip(),
textwrap.dedent('''
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
''').strip(),
),
(textwrap.dedent('''
static int stop(char *reason) {
ham = reason;
return _stop();
}
''').strip(),
textwrap.dedent('''
ham = reason;
return _stop();
''').strip(),
),
]
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, expected)
#self.assertEqual([stmt for stmt, _ in stmts],
# [stmt for stmt, _ in expected])
#self.assertEqual([body for _, body in stmts],
# [body for _, body in expected])
def test_no_statements(self):
lines = []
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [])
def test_bogus(self):
tests = [
(textwrap.dedent('''
int spam;
static const char const *eggs;
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
char* ham;
static int _stop(void) {
// missing closing bracket
static int stop(char *reason) {
ham = reason;
return _stop();
}
'''),
[(textwrap.dedent('''
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
''').strip(),
textwrap.dedent('''
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
''').strip(),
),
# Neither "stop()" nor "_stop()" are here.
],
),
]
for lines, expected in tests:
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, expected)
#self.assertEqual([stmt for stmt, _ in stmts],
# [stmt for stmt, _ in expected])
#self.assertEqual([body for _, body in stmts],
# [body for _, body in expected])
def test_ignore_comments(self):
tests = [
('// msg', None),
('// int stmt;', None),
(' // ... ', None),
('// /*', None),
('/* int stmt; */', None),
("""
/**
* ...
* int stmt;
*/
""", None),
]
for lines, expected in tests:
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [expected] if expected else [])
class IterLocalStatementsTests(TestCaseBase):
def test_vars(self):
tests = [
# POTS
'int spam;',
'unsigned int spam;',
'char spam;',
'float spam;',
# typedefs
'uint spam;',
'MyType spam;',
# complex
'struct myspam spam;',
'union choice spam;',
# inline struct
# inline union
# enum?
]
# pointers
tests.extend([
# POTS
'int * spam;',
'unsigned int * spam;',
'char *spam;',
'char const *spam = "spamspamspam...";',
# typedefs
'MyType *spam;',
# complex
'struct myspam *spam;',
'union choice *spam;',
# packed with details
'const char const *spam;',
# void pointer
'void *data = NULL;',
# function pointers
'int (* func)(char *arg1);',
'char * (* func)(void);',
])
# storage class
tests.extend([
'static int spam;',
'extern int spam;',
'static unsigned int spam;',
'static struct myspam spam;',
])
# type qualifier
tests.extend([
'const int spam;',
'const unsigned int spam;',
'const struct myspam spam;',
])
# combined
tests.extend([
'const char *spam = eggs;',
'static const char const *spam = "spamspamspam...";',
'extern const char const *spam;',
'static void *data = NULL;',
'static int (const * func)(char *arg1) = func1;',
'static char * (* func)(void);',
])
for line in tests:
expected = line
with self.subTest(line):
stmts = list(iter_local_statements([line]))
self.assertEqual(stmts, [(expected, None)])
@unittest.expectedFailure
def test_vars_multiline_var(self):
lines = textwrap.dedent('''
PyObject *
spam
= NULL;
''').splitlines()
expected = 'PyObject * spam = NULL;'
stmts = list(iter_local_statements(lines))
self.assertEqual(stmts, [(expected, None)])
@unittest.expectedFailure
def test_declaration_multiple_vars(self):
lines = ['static const int const *spam, *ham=NULL, ham2[]={1, 2, 3}, ham3[2]={1, 2}, eggs = 3;']
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [
('static const int const *spam;', None),
('static const int *ham=NULL;', None),
('static const int ham[]={1, 2, 3};', None),
('static const int ham[2]={1, 2};', None),
('static const int eggs = 3;', None),
])
@unittest.expectedFailure
def test_other_simple(self):
raise NotImplementedError
@unittest.expectedFailure
def test_compound(self):
raise NotImplementedError
@unittest.expectedFailure
def test_mixed(self):
raise NotImplementedError
def test_no_statements(self):
lines = []
stmts = list(iter_local_statements(lines))
self.assertEqual(stmts, [])
@unittest.expectedFailure
def test_bogus(self):
raise NotImplementedError
def test_ignore_comments(self):
tests = [
('// msg', None),
('// int stmt;', None),
(' // ... ', None),
('// /*', None),
('/* int stmt; */', None),
("""
/**
* ...
* int stmt;
*/
""", None),
# mixed with statements
('int stmt; // ...', ('int stmt;', None)),
( 'int stmt; /* ... */', ('int stmt;', None)),
( '/* ... */ int stmt;', ('int stmt;', None)),
]
for lines, expected in tests:
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_local_statements(lines))
self.assertEqual(stmts, [expected] if expected else [])
class ParseFuncTests(TestCaseBase):
def test_typical(self):
tests = [
('PyObject *\nspam(char *a)\n{\nreturn _spam(a);\n}',
'return _spam(a);',
('spam', 'PyObject * spam(char *a)'),
),
]
for stmt, body, expected in tests:
with self.subTest(stmt):
name, signature = parse_func(stmt, body)
self.assertEqual((name, signature), expected)
class ParseVarTests(TestCaseBase):
def test_typical(self):
tests = [
# POTS
('int spam;', ('spam', 'int')),
('unsigned int spam;', ('spam', 'unsigned int')),
('char spam;', ('spam', 'char')),
('float spam;', ('spam', 'float')),
# typedefs
('uint spam;', ('spam', 'uint')),
('MyType spam;', ('spam', 'MyType')),
# complex
('struct myspam spam;', ('spam', 'struct myspam')),
('union choice spam;', ('spam', 'union choice')),
# inline struct
# inline union
# enum?
]
# pointers
tests.extend([
# POTS
('int * spam;', ('spam', 'int *')),
('unsigned int * spam;', ('spam', 'unsigned int *')),
('char *spam;', ('spam', 'char *')),
('char const *spam = "spamspamspam...";', ('spam', 'char const *')),
# typedefs
('MyType *spam;', ('spam', 'MyType *')),
# complex
('struct myspam *spam;', ('spam', 'struct myspam *')),
('union choice *spam;', ('spam', 'union choice *')),
# packed with details
('const char const *spam;', ('spam', 'const char const *')),
# void pointer
('void *data = NULL;', ('data', 'void *')),
# function pointers
('int (* func)(char *);', ('func', 'int (*)(char *)')),
('char * (* func)(void);', ('func', 'char * (*)(void)')),
])
# storage class
tests.extend([
('static int spam;', ('spam', 'static int')),
('extern int spam;', ('spam', 'extern int')),
('static unsigned int spam;', ('spam', 'static unsigned int')),
('static struct myspam spam;', ('spam', 'static struct myspam')),
])
# type qualifier
tests.extend([
('const int spam;', ('spam', 'const int')),
('const unsigned int spam;', ('spam', 'const unsigned int')),
('const struct myspam spam;', ('spam', 'const struct myspam')),
])
# combined
tests.extend([
('const char *spam = eggs;', ('spam', 'const char *')),
('static const char const *spam = "spamspamspam...";',
('spam', 'static const char const *')),
('extern const char const *spam;',
('spam', 'extern const char const *')),
('static void *data = NULL;', ('data', 'static void *')),
('static int (const * func)(char *) = func1;',
('func', 'static int (const *)(char *)')),
('static char * (* func)(void);',
('func', 'static char * (*)(void)')),
])
for stmt, expected in tests:
with self.subTest(stmt):
name, vartype = parse_var(stmt)
self.assertEqual((name, vartype), expected)
@unittest.skip('not finished')
class ParseCompoundTests(TestCaseBase):
def test_typical(self):
headers, bodies = parse_compound(stmt, blocks)
...
class IterVariablesTests(TestCaseBase):
_return_iter_source_lines = None
_return_iter_global = None
_return_iter_local = None
_return_parse_func = None
_return_parse_var = None
_return_parse_compound = None
def _iter_source_lines(self, filename):
self.calls.append(
('_iter_source_lines', (filename,)))
return self._return_iter_source_lines.splitlines()
def _iter_global(self, lines):
self.calls.append(
('_iter_global', (lines,)))
try:
return self._return_iter_global.pop(0)
except IndexError:
return ('???', None)
def _iter_local(self, lines):
self.calls.append(
('_iter_local', (lines,)))
try:
return self._return_iter_local.pop(0)
except IndexError:
return ('???', None)
def _parse_func(self, stmt, body):
self.calls.append(
('_parse_func', (stmt, body)))
try:
return self._return_parse_func.pop(0)
except IndexError:
return ('???', '???')
def _parse_var(self, lines):
self.calls.append(
('_parse_var', (lines,)))
try:
return self._return_parse_var.pop(0)
except IndexError:
return ('???', '???')
def _parse_compound(self, stmt, blocks):
self.calls.append(
('_parse_compound', (stmt, blocks)))
try:
return self._return_parse_compound.pop(0)
except IndexError:
return (['???'], ['???'])
def test_empty_file(self):
self._return_iter_source_lines = ''
self._return_iter_global = [
[],
]
self._return_parse_func = None
self._return_parse_var = None
self._return_parse_compound = None
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', ([],)),
])
def test_no_statements(self):
content = textwrap.dedent('''
...
''')
self._return_iter_source_lines = content
self._return_iter_global = [
[],
]
self._return_parse_func = None
self._return_parse_var = None
self._return_parse_compound = None
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', (content.splitlines(),)),
])
def test_typical(self):
content = textwrap.dedent('''
...
''')
self._return_iter_source_lines = content
self._return_iter_global = [
[('<lines 1>', None), # var1
('<lines 2>', None), # non-var
('<lines 3>', None), # var2
('<lines 4>', '<body 1>'), # func1
('<lines 9>', None), # var4
],
]
self._return_iter_local = [
# func1
[('<lines 5>', None), # var3
('<lines 6>', [('<header 1>', '<block 1>')]), # if
('<lines 8>', None), # non-var
],
# if
[('<lines 7>', None), # var2 ("collision" with global var)
],
]
self._return_parse_func = [
('func1', '<sig 1>'),
]
self._return_parse_var = [
('var1', '<vartype 1>'),
(None, None),
('var2', '<vartype 2>'),
('var3', '<vartype 3>'),
('var2', '<vartype 2b>'),
('var4', '<vartype 4>'),
(None, None),
(None, None),
(None, None),
('var5', '<vartype 5>'),
]
self._return_parse_compound = [
([[
'if (',
'<simple>',
')',
],
],
['<block 1>']),
]
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [
(None, 'var1', '<vartype 1>'),
(None, 'var2', '<vartype 2>'),
('func1', 'var3', '<vartype 3>'),
('func1', 'var2', '<vartype 2b>'),
('func1', 'var4', '<vartype 4>'),
(None, 'var5', '<vartype 5>'),
])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', (content.splitlines(),)),
('_parse_var', ('<lines 1>',)),
('_parse_var', ('<lines 2>',)),
('_parse_var', ('<lines 3>',)),
('_parse_func', ('<lines 4>', '<body 1>')),
('_iter_local', (['<body 1>'],)),
('_parse_var', ('<lines 5>',)),
('_parse_compound', ('<lines 6>', [('<header 1>', '<block 1>')])),
('_parse_var', ('if (',)),
('_parse_var', ('<simple>',)),
('_parse_var', (')',)),
('_parse_var', ('<lines 8>',)),
('_iter_local', (['<block 1>'],)),
('_parse_var', ('<lines 7>',)),
('_parse_var', ('<lines 9>',)),
])
def test_no_locals(self):
content = textwrap.dedent('''
...
''')
self._return_iter_source_lines = content
self._return_iter_global = [
[('<lines 1>', None), # var1
('<lines 2>', None), # non-var
('<lines 3>', None), # var2
('<lines 4>', '<body 1>'), # func1
],
]
self._return_iter_local = [
# func1
[('<lines 5>', None), # non-var
('<lines 6>', [('<header 1>', '<block 1>')]), # if
('<lines 8>', None), # non-var
],
# if
[('<lines 7>', None), # non-var
],
]
self._return_parse_func = [
('func1', '<sig 1>'),
]
self._return_parse_var = [
('var1', '<vartype 1>'),
(None, None),
('var2', '<vartype 2>'),
(None, None),
(None, None),
(None, None),
(None, None),
(None, None),
(None, None),
]
self._return_parse_compound = [
([[
'if (',
'<simple>',
')',
],
],
['<block 1>']),
]
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [
(None, 'var1', '<vartype 1>'),
(None, 'var2', '<vartype 2>'),
])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', (content.splitlines(),)),
('_parse_var', ('<lines 1>',)),
('_parse_var', ('<lines 2>',)),
('_parse_var', ('<lines 3>',)),
('_parse_func', ('<lines 4>', '<body 1>')),
('_iter_local', (['<body 1>'],)),
('_parse_var', ('<lines 5>',)),
('_parse_compound', ('<lines 6>', [('<header 1>', '<block 1>')])),
('_parse_var', ('if (',)),
('_parse_var', ('<simple>',)),
('_parse_var', (')',)),
('_parse_var', ('<lines 8>',)),
('_iter_local', (['<block 1>'],)),
('_parse_var', ('<lines 7>',)),
])

View file

@ -0,0 +1,208 @@
import string
import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_parser.info import (
normalize_vartype, Variable,
)
class NormalizeVartypeTests(unittest.TestCase):
def test_basic(self):
tests = [
(None, None),
('', ''),
('int', 'int'),
(PseudoStr('int'), 'int'),
(StrProxy('int'), 'int'),
]
for vartype, expected in tests:
with self.subTest(vartype):
normalized = normalize_vartype(vartype)
self.assertEqual(normalized, expected)
class VariableTests(unittest.TestCase):
VALID_ARGS = (
('x/y/z/spam.c', 'func', 'eggs'),
'int',
)
VALID_KWARGS = dict(zip(Variable._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_init_typical_global(self):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname=None,
name='eggs',
),
vartype='int',
)
self.assertEqual(static, (
('x/y/z/spam.c', None, 'eggs'),
'int',
))
def test_init_typical_local(self):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
vartype='int',
)
self.assertEqual(static, (
('x/y/z/spam.c', 'func', 'eggs'),
'int',
))
def test_init_all_missing(self):
for value in ('', None):
with self.subTest(repr(value)):
static = Variable(
id=value,
vartype=value,
)
self.assertEqual(static, (
None,
None,
))
def test_init_all_coerced(self):
id = ID('x/y/z/spam.c', 'func', 'spam')
tests = [
('str subclass',
dict(
id=(
PseudoStr('x/y/z/spam.c'),
PseudoStr('func'),
PseudoStr('spam'),
),
vartype=PseudoStr('int'),
),
(id,
'int',
)),
('non-str 1',
dict(
id=id,
vartype=Object(),
),
(id,
'<object>',
)),
('non-str 2',
dict(
id=id,
vartype=StrProxy('variable'),
),
(id,
'variable',
)),
('non-str',
dict(
id=id,
vartype=('a', 'b', 'c'),
),
(id,
"('a', 'b', 'c')",
)),
]
for summary, kwargs, expected in tests:
with self.subTest(summary):
static = Variable(**kwargs)
for field in Variable._fields:
value = getattr(static, field)
if field == 'id':
self.assertIs(type(value), ID)
else:
self.assertIs(type(value), str)
self.assertEqual(tuple(static), expected)
def test_iterable(self):
static = Variable(**self.VALID_KWARGS)
id, vartype = static
values = (id, vartype)
for value, expected in zip(values, self.VALID_EXPECTED):
self.assertEqual(value, expected)
def test_fields(self):
static = Variable(('a', 'b', 'z'), 'x')
self.assertEqual(static.id, ('a', 'b', 'z'))
self.assertEqual(static.vartype, 'x')
def test___getattr__(self):
static = Variable(('a', 'b', 'z'), 'x')
self.assertEqual(static.filename, 'a')
self.assertEqual(static.funcname, 'b')
self.assertEqual(static.name, 'z')
def test_validate_typical(self):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
vartype='int',
)
static.validate() # This does not fail.
def test_validate_missing_field(self):
for field in Variable._fields:
with self.subTest(field):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: None})
with self.assertRaises(TypeError):
static.validate()
def test_validate_bad_field(self):
badch = tuple(c for c in string.punctuation + string.digits)
notnames = (
'1a',
'a.b',
'a-b',
'&a',
'a++',
) + badch
tests = [
('id', ()), # Any non-empty str is okay.
('vartype', ()), # Any non-empty str is okay.
]
seen = set()
for field, invalid in tests:
for value in invalid:
seen.add(value)
with self.subTest(f'{field}={value!r}'):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: value})
with self.assertRaises(ValueError):
static.validate()
for field, invalid in tests:
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: value})
static.validate() # This does not fail.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,192 @@
import string
import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_symbols.info import Symbol
class SymbolTests(unittest.TestCase):
VALID_ARGS = (
ID('x/y/z/spam.c', 'func', 'eggs'),
Symbol.KIND.VARIABLE,
False,
)
VALID_KWARGS = dict(zip(Symbol._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_init_typical_binary_local(self):
id = ID(None, None, 'spam')
symbol = Symbol(
id=id,
kind=Symbol.KIND.VARIABLE,
external=False,
)
self.assertEqual(symbol, (
id,
Symbol.KIND.VARIABLE,
False,
))
def test_init_typical_binary_global(self):
id = ID('Python/ceval.c', None, 'spam')
symbol = Symbol(
id=id,
kind=Symbol.KIND.VARIABLE,
external=False,
)
self.assertEqual(symbol, (
id,
Symbol.KIND.VARIABLE,
False,
))
def test_init_coercion(self):
tests = [
('str subclass',
dict(
id=PseudoStr('eggs'),
kind=PseudoStr('variable'),
external=0,
),
(ID(None, None, 'eggs'),
Symbol.KIND.VARIABLE,
False,
)),
('with filename',
dict(
id=('x/y/z/spam.c', 'eggs'),
kind=PseudoStr('variable'),
external=0,
),
(ID('x/y/z/spam.c', None, 'eggs'),
Symbol.KIND.VARIABLE,
False,
)),
('non-str 1',
dict(
id=('a', 'b', 'c'),
kind=StrProxy('variable'),
external=0,
),
(ID('a', 'b', 'c'),
Symbol.KIND.VARIABLE,
False,
)),
('non-str 2',
dict(
id=('a', 'b', 'c'),
kind=Object(),
external=0,
),
(ID('a', 'b', 'c'),
'<object>',
False,
)),
]
for summary, kwargs, expected in tests:
with self.subTest(summary):
symbol = Symbol(**kwargs)
for field in Symbol._fields:
value = getattr(symbol, field)
if field == 'external':
self.assertIs(type(value), bool)
elif field == 'id':
self.assertIs(type(value), ID)
else:
self.assertIs(type(value), str)
self.assertEqual(tuple(symbol), expected)
def test_init_all_missing(self):
id = ID(None, None, 'spam')
symbol = Symbol(id)
self.assertEqual(symbol, (
id,
Symbol.KIND.VARIABLE,
None,
))
def test_fields(self):
id = ID('z', 'x', 'a')
symbol = Symbol(id, 'b', False)
self.assertEqual(symbol.id, id)
self.assertEqual(symbol.kind, 'b')
self.assertIs(symbol.external, False)
def test___getattr__(self):
id = ID('z', 'x', 'a')
symbol = Symbol(id, 'b', False)
filename = symbol.filename
funcname = symbol.funcname
name = symbol.name
self.assertEqual(filename, 'z')
self.assertEqual(funcname, 'x')
self.assertEqual(name, 'a')
def test_validate_typical(self):
id = ID('z', 'x', 'a')
symbol = Symbol(
id=id,
kind=Symbol.KIND.VARIABLE,
external=False,
)
symbol.validate() # This does not fail.
def test_validate_missing_field(self):
for field in Symbol._fields:
with self.subTest(field):
symbol = Symbol(**self.VALID_KWARGS)
symbol = symbol._replace(**{field: None})
with self.assertRaises(TypeError):
symbol.validate()
def test_validate_bad_field(self):
badch = tuple(c for c in string.punctuation + string.digits)
notnames = (
'1a',
'a.b',
'a-b',
'&a',
'a++',
) + badch
tests = [
('id', notnames),
('kind', ('bogus',)),
]
seen = set()
for field, invalid in tests:
for value in invalid:
if field != 'kind':
seen.add(value)
with self.subTest(f'{field}={value!r}'):
symbol = Symbol(**self.VALID_KWARGS)
symbol = symbol._replace(**{field: value})
with self.assertRaises(ValueError):
symbol.validate()
for field, invalid in tests:
if field == 'kind':
continue
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):
symbol = Symbol(**self.VALID_KWARGS)
symbol = symbol._replace(**{field: value})
symbol.validate() # This does not fail.

View file

@ -0,0 +1,60 @@
import itertools
class PseudoStr(str):
pass
class StrProxy:
def __init__(self, value):
self.value = value
def __str__(self):
return self.value
def __bool__(self):
return bool(self.value)
class Object:
def __repr__(self):
return '<object>'
def wrapped_arg_combos(*args,
wrappers=(PseudoStr, StrProxy),
skip=(lambda w, i, v: not isinstance(v, str)),
):
"""Yield every possible combination of wrapped items for the given args.
Effectively, the wrappers are applied to the args according to the
powerset of the args indicies. So the result includes the args
completely unwrapped.
If "skip" is supplied (default is to skip all non-str values) and
it returns True for a given arg index/value then that arg will
remain unwrapped,
Only unique results are returned. If an arg was skipped for one
of the combinations then it could end up matching one of the other
combinations. In that case only one of them will be yielded.
"""
if not args:
return
indices = list(range(len(args)))
# The powerset (from recipe in the itertools docs).
combos = itertools.chain.from_iterable(itertools.combinations(indices, r)
for r in range(len(indices)+1))
seen = set()
for combo in combos:
for wrap in wrappers:
indexes = []
applied = list(args)
for i in combo:
arg = args[i]
if skip and skip(wrap, i, arg):
continue
indexes.append(i)
applied[i] = wrap(arg)
key = (wrap, tuple(indexes))
if key not in seen:
yield tuple(applied)
seen.add(key)

View file

@ -0,0 +1,9 @@
# This is a script equivalent of running "python -m test.test_c_globals.cg".
from c_globals.__main__ import parse_args, main
# This is effectively copied from cg/__main__.py:
if __name__ == '__main__':
cmd, cmdkwargs = parse_args()
main(cmd, cmdkwargs)

View file

@ -0,0 +1,19 @@
import os.path
PKG_ROOT = os.path.dirname(__file__)
DATA_DIR = os.path.dirname(PKG_ROOT)
REPO_ROOT = os.path.dirname(
os.path.dirname(DATA_DIR))
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Include',
'Python',
'Parser',
'Objects',
'Modules',
]]
# Clean up the namespace.
del os

View file

@ -0,0 +1,328 @@
# The code here consists of hacks for pre-populating the known.tsv file.
from c_parser.preprocessor import _iter_clean_lines
from c_parser.naive import (
iter_variables, parse_variable_declaration, find_variables,
)
from c_parser.info import Variable
from . import SOURCE_DIRS, REPO_ROOT
from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER
from .info import UNKNOWN, ID
from .util import write_tsv
from .files import iter_cpython_files
POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
POTS += tuple('const ' + v for v in POTS)
STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
def _parse_global(line, funcname=None):
line = line.strip()
if line.startswith('static '):
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
name, decl = parse_variable_declaration(line)
elif line.startswith('_Py_static_string('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(',')[0].strip()
elif line.startswith('_Py_IDENTIFIER('):
decl = line.strip(';').strip()
name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
elif funcname:
return None, None
# global-only
elif line.startswith('PyAPI_DATA('): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('extern '): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('PyDoc_VAR('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(')')[0].strip()
elif line.startswith(POTS): # implied static
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith('struct '):
if not line.endswith(' = {'):
return None, None
if not line.partition(' ')[2].startswith(STRUCTS):
return None, None
# implied static
name, decl = parse_variable_declaration(line)
# file-specific
elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
# Objects/typeobject.c
funcname = line.split('(')[1].split(',')[0]
return [
('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
]
elif line.startswith('WRAP_METHOD('):
# Objects/weakrefobject.c
funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
return [
('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
]
else:
return None, None
return name, decl
def _pop_cached(varcache, filename, funcname, name, *,
_iter_variables=iter_variables,
):
# Look for the file.
try:
cached = varcache[filename]
except KeyError:
cached = varcache[filename] = {}
for variable in _iter_variables(filename,
parse_variable=_parse_global,
):
variable._isglobal = True
cached[variable.id] = variable
for var in cached:
print(' ', var)
# Look for the variable.
if funcname == UNKNOWN:
for varid in cached:
if varid.name == name:
break
else:
return None
return cached.pop(varid)
else:
return cached.pop((filename, funcname, name), None)
def find_matching_variable(varid, varcache, allfilenames, *,
_pop_cached=_pop_cached,
):
if varid.filename and varid.filename != UNKNOWN:
filenames = [varid.filename]
else:
filenames = allfilenames
for filename in filenames:
variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
if variable is not None:
return variable
else:
if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
for filename in allfilenames:
if not filename.endswith('.h'):
continue
variable = _pop_cached(varcache, filename, None, varid.name)
if variable is not None:
return variable
return None
MULTILINE = {
# Python/Python-ast.c
'Load_singleton': 'PyObject *',
'Store_singleton': 'PyObject *',
'Del_singleton': 'PyObject *',
'AugLoad_singleton': 'PyObject *',
'AugStore_singleton': 'PyObject *',
'Param_singleton': 'PyObject *',
'And_singleton': 'PyObject *',
'Or_singleton': 'PyObject *',
'Add_singleton': 'static PyObject *',
'Sub_singleton': 'static PyObject *',
'Mult_singleton': 'static PyObject *',
'MatMult_singleton': 'static PyObject *',
'Div_singleton': 'static PyObject *',
'Mod_singleton': 'static PyObject *',
'Pow_singleton': 'static PyObject *',
'LShift_singleton': 'static PyObject *',
'RShift_singleton': 'static PyObject *',
'BitOr_singleton': 'static PyObject *',
'BitXor_singleton': 'static PyObject *',
'BitAnd_singleton': 'static PyObject *',
'FloorDiv_singleton': 'static PyObject *',
'Invert_singleton': 'static PyObject *',
'Not_singleton': 'static PyObject *',
'UAdd_singleton': 'static PyObject *',
'USub_singleton': 'static PyObject *',
'Eq_singleton': 'static PyObject *',
'NotEq_singleton': 'static PyObject *',
'Lt_singleton': 'static PyObject *',
'LtE_singleton': 'static PyObject *',
'Gt_singleton': 'static PyObject *',
'GtE_singleton': 'static PyObject *',
'Is_singleton': 'static PyObject *',
'IsNot_singleton': 'static PyObject *',
'In_singleton': 'static PyObject *',
'NotIn_singleton': 'static PyObject *',
# Python/symtable.c
'top': 'static identifier ',
'lambda': 'static identifier ',
'genexpr': 'static identifier ',
'listcomp': 'static identifier ',
'setcomp': 'static identifier ',
'dictcomp': 'static identifier ',
'__class__': 'static identifier ',
# Python/compile.c
'__doc__': 'static PyObject *',
'__annotations__': 'static PyObject *',
# Objects/floatobject.c
'double_format': 'static float_format_type ',
'float_format': 'static float_format_type ',
'detected_double_format': 'static float_format_type ',
'detected_float_format': 'static float_format_type ',
# Parser/listnode.c
'level': 'static int ',
'atbol': 'static int ',
# Python/dtoa.c
'private_mem': 'static double private_mem[PRIVATE_mem]',
'pmem_next': 'static double *',
# Modules/_weakref.c
'weakref_functions': 'static PyMethodDef ',
}
INLINE = {
# Modules/_tracemalloc.c
'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
# Modules/faulthandler.c
'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
# Modules/signalmodule.c
'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
# Python/dynload_shlib.c
'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
# Objects/obmalloc.c
'_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
# Python/bootstrap_hash.c
'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
}
FUNC = {
# Objects/object.c
'_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
# Parser/myreadline.c
'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
# Python/pylifecycle.c
'_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
# Parser/myreadline.c
'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
}
IMPLIED = {
# Objects/boolobject.c
'_Py_FalseStruct': 'static struct _longobject ',
'_Py_TrueStruct': 'static struct _longobject ',
# Modules/config.c
'_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
}
GLOBALS = {}
GLOBALS.update(MULTILINE)
GLOBALS.update(INLINE)
GLOBALS.update(FUNC)
GLOBALS.update(IMPLIED)
LOCALS = {
'buildinfo': ('Modules/getbuildinfo.c',
'Py_GetBuildInfo',
'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
'methods': ('Python/codecs.c',
'_PyCodecRegistry_Init',
'static struct { char *name; PyMethodDef def; } methods[]'),
}
def _known(symbol):
if symbol.funcname:
if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
raise KeyError(symbol.name)
filename, funcname, decl = LOCALS[symbol.name]
varid = ID(filename, funcname, symbol.name)
elif not symbol.filename or symbol.filename == UNKNOWN:
raise KeyError(symbol.name)
else:
varid = symbol.id
try:
decl = GLOBALS[symbol.name]
except KeyError:
if symbol.name.endswith('_methods'):
decl = 'static PyMethodDef '
elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
decl = 'static PyTypeObject '
else:
raise
if symbol.name not in decl:
decl = decl + symbol.name
return Variable(varid, decl)
def known_row(varid, decl):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
decl,
)
def known_rows(symbols, *,
cached=True,
_get_filenames=iter_cpython_files,
_find_match=find_matching_variable,
_find_symbols=find_variables,
_as_known=known_row,
):
filenames = list(_get_filenames())
cache = {}
if cached:
for symbol in symbols:
try:
found = _known(symbol)
except KeyError:
found = _find_match(symbol, cache, filenames)
if found is None:
found = Variable(symbol.id, UNKNOWN)
yield _as_known(found.id, found.vartype)
else:
raise NotImplementedError # XXX incorporate KNOWN
for variable in _find_symbols(symbols, filenames,
srccache=cache,
parse_variable=_parse_global,
):
#variable = variable._replace(
# filename=os.path.relpath(variable.filename, REPO_ROOT))
if variable.funcname == UNKNOWN:
print(variable)
if variable.vartype== UNKNOWN:
print(variable)
yield _as_known(variable.id, variable.vartype)
def generate(symbols, filename=None, *,
_generate_rows=known_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = KNOWN_FILE + '.new'
rows = _generate_rows(symbols)
_write_tsv(filename, KNOWN_HEADER, rows)
if __name__ == '__main__':
from c_symbols import binary
symbols = binary.iter_symbols(
binary.PYTHON,
find_local_symbol=None,
)
generate(symbols)

View file

@ -0,0 +1,138 @@
import glob
import os
import os.path
from . import SOURCE_DIRS, REPO_ROOT
C_SOURCE_SUFFIXES = ('.c', '.h')
def _walk_tree(root, *,
_walk=os.walk,
):
# A wrapper around os.walk that resolves the filenames.
for parent, _, names in _walk(root):
for name in names:
yield os.path.join(parent, name)
def walk_tree(root, *,
suffix=None,
walk=_walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
if suffix and not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in walk(root):
if suffix and not filename.endswith(suffix):
continue
yield filename
def glob_tree(root, *,
suffix=None,
_glob=glob.iglob,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
suffix = suffix or ''
if not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in _glob(f'{root}/*{suffix}'):
yield filename
for filename in _glob(f'{root}/**/*{suffix}'):
yield filename
def iter_files(root, suffix=None, relparent=None, *,
get_files=os.walk,
_glob=glob_tree,
_walk=walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "root" is a non-string iterable then do the same for each of
those trees.
If "suffix" is provided then only files with that suffix will
be included.
if "relparent" is provided then it is used to resolve each
filename as a relative path.
"""
if not isinstance(root, str):
roots = root
for root in roots:
yield from iter_files(root, suffix, relparent,
get_files=get_files,
_glob=_glob, _walk=_walk)
return
# Use the right "walk" function.
if get_files in (glob.glob, glob.iglob, glob_tree):
get_files = _glob
else:
_files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
# Handle a single suffix.
if suffix and not isinstance(suffix, str):
filenames = get_files(root)
suffix = tuple(suffix)
else:
filenames = get_files(root, suffix=suffix)
suffix = None
for filename in filenames:
if suffix and not isinstance(suffix, str): # multiple suffixes
if not filename.endswith(suffix):
continue
if relparent:
filename = os.path.relpath(filename, relparent)
yield filename
def iter_files_by_suffix(root, suffixes, relparent=None, *,
walk=walk_tree,
_iter_files=iter_files,
):
"""Yield each file in the tree that has the given suffixes.
Unlike iter_files(), the results are in the original suffix order.
"""
if isinstance(suffixes, str):
suffixes = [suffixes]
# XXX Ignore repeated suffixes?
for suffix in suffixes:
yield from _iter_files(root, suffix, relparent)
def iter_cpython_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename

View file

@ -0,0 +1,69 @@
from collections import namedtuple
import re
from .util import classonly, _NTBase
UNKNOWN = '???'
NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
"""A unique ID for a single symbol or declaration."""
__slots__ = ()
# XXX Add optional conditions (tuple of strings) field.
#conditions = Slot()
@classonly
def from_raw(cls, raw):
if not raw:
return None
if isinstance(raw, str):
return cls(None, None, raw)
try:
name, = raw
filename = None
except ValueError:
try:
filename, name = raw
except ValueError:
return super().from_raw(raw)
return cls(filename, None, name)
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
filename=str(filename) if filename else None,
funcname=str(funcname) if funcname else None,
name=str(name) if name else None,
)
#cls.conditions.set(self, tuple(str(s) if s else None
# for s in conditions or ()))
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.name:
raise TypeError('missing name')
else:
if not NAME_RE.match(self.name):
raise ValueError(
f'name must be an identifier, got {self.name!r}')
# Symbols from a binary might not have filename/funcname info.
if self.funcname:
if not self.filename:
raise TypeError('missing filename')
if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
raise ValueError(
f'name must be an identifier, got {self.funcname!r}')
# XXX Require the filename (at least UNKONWN)?
# XXX Check the filename?
@property
def islocal(self):
return self.funcname is not None

View file

@ -0,0 +1,67 @@
import csv
import os.path
from c_parser.info import Variable
from . import DATA_DIR
from .info import ID, UNKNOWN
from .util import read_tsv
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
# XXX need tests:
# * from_file()
def from_file(infile, *,
_read_tsv=read_tsv,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
if kind == 'variable':
values = known['variables']
value = Variable(id, declaration)
value._isglobal = _is_global(declaration) or id.funcname is None
else:
raise ValueError(f'unsupported kind in row {row}')
if value.name == 'id' and declaration == UNKNOWN:
# None of these are variables.
declaration = 'int id';
else:
value.validate()
values[id] = value
return known
def _is_global(vartype):
# statics
if vartype.startswith('static '):
return True
if vartype.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return True
if vartype.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return True
if vartype.startswith('PyDoc_VAR('):
return True
if vartype.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return True
if vartype.startswith('WRAP_METHOD('):
return True
# public extern
if vartype.startswith('PyAPI_DATA('):
return True
return False

View file

@ -0,0 +1,214 @@
import csv
import subprocess
_NOT_SET = object()
def run_cmd(argv, **kwargs):
proc = subprocess.run(
argv,
#capture_output=True,
#stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
text=True,
check=True,
**kwargs
)
return proc.stdout
def read_tsv(infile, header, *,
_open=open,
_get_reader=csv.reader,
):
"""Yield each row of the given TSV (tab-separated) file."""
if isinstance(infile, str):
with _open(infile, newline='') as infile:
yield from read_tsv(infile, header,
_open=_open,
_get_reader=_get_reader,
)
return
lines = iter(infile)
# Validate the header.
try:
actualheader = next(lines).strip()
except StopIteration:
actualheader = ''
if actualheader != header:
raise ValueError(f'bad header {actualheader!r}')
for row in _get_reader(lines, delimiter='\t'):
yield tuple(v.strip() for v in row)
def write_tsv(outfile, header, rows, *,
_open=open,
_get_writer=csv.writer,
):
"""Write each of the rows to the given TSV (tab-separated) file."""
if isinstance(outfile, str):
with _open(outfile, 'w', newline='') as outfile:
return write_tsv(outfile, header, rows,
_open=_open,
_get_writer=_get_writer,
)
if isinstance(header, str):
header = header.split('\t')
writer = _get_writer(outfile, delimiter='\t')
writer.writerow(header)
for row in rows:
writer.writerow('' if v is None else str(v)
for v in row)
class Slot:
"""A descriptor that provides a slot.
This is useful for types that can't have slots via __slots__,
e.g. tuple subclasses.
"""
__slots__ = ('initial', 'default', 'readonly', 'instances', 'name')
def __init__(self, initial=_NOT_SET, *,
default=_NOT_SET,
readonly=False,
):
self.initial = initial
self.default = default
self.readonly = readonly
self.instances = {}
self.name = None
def __set_name__(self, cls, name):
if self.name is not None:
raise TypeError('already used')
self.name = name
def __get__(self, obj, cls):
if obj is None: # called on the class
return self
try:
value = self.instances[id(obj)]
except KeyError:
if self.initial is _NOT_SET:
value = self.default
else:
value = self.initial
self.instances[id(obj)] = value
if value is _NOT_SET:
raise AttributeError(self.name)
# XXX Optionally make a copy?
return value
def __set__(self, obj, value):
if self.readonly:
raise AttributeError(f'{self.name} is readonly')
# XXX Optionally coerce?
self.instances[id(obj)] = value
def __delete__(self, obj):
if self.readonly:
raise AttributeError(f'{self.name} is readonly')
self.instances[id(obj)] = self.default
def set(self, obj, value):
"""Update the cached value for an object.
This works even if the descriptor is read-only. This is
particularly useful when initializing the object (e.g. in
its __new__ or __init__).
"""
self.instances[id(obj)] = value
class classonly:
"""A non-data descriptor that makes a value only visible on the class.
This is like the "classmethod" builtin, but does not show up on
instances of the class. It may be used as a decorator.
"""
def __init__(self, value):
self.value = value
self.getter = classmethod(value).__get__
self.name = None
def __set_name__(self, cls, name):
if self.name is not None:
raise TypeError('already used')
self.name = name
def __get__(self, obj, cls):
if obj is not None:
raise AttributeError(self.name)
# called on the class
return self.getter(None, cls)
class _NTBase:
__slots__ = ()
@classonly
def from_raw(cls, raw):
if not raw:
return None
elif isinstance(raw, cls):
return raw
elif isinstance(raw, str):
return cls.from_string(raw)
else:
if hasattr(raw, 'items'):
return cls(**raw)
try:
args = tuple(raw)
except TypeError:
pass
else:
return cls(*args)
raise NotImplementedError
@classonly
def from_string(cls, value):
"""Return a new instance based on the given string."""
raise NotImplementedError
@classmethod
def _make(cls, iterable): # The default _make() is not subclass-friendly.
return cls.__new__(cls, *iterable)
# XXX Always validate?
#def __init__(self, *args, **kwargs):
# self.validate()
# XXX The default __repr__() is not subclass-friendly (where the name changes).
#def __repr__(self):
# _, _, sig = super().__repr__().partition('(')
# return f'{self.__class__.__name__}({sig}'
# To make sorting work with None:
def __lt__(self, other):
try:
return super().__lt__(other)
except TypeError:
if None in self:
return True
elif None in other:
return False
else:
raise
def validate(self):
return
# XXX Always validate?
#def _replace(self, **kwargs):
# obj = super()._replace(**kwargs)
# obj.validate()
# return obj

View file

@ -0,0 +1,72 @@
#######################################
# C Globals and CPython Runtime State.
CPython's C code makes extensive use of global variables (whether static
globals or static locals). Each such variable falls into one of several
categories:
* strictly const data
* used exclusively in main or in the REPL
* process-global state (e.g. managing process-level resources
like signals and file descriptors)
* Python "global" runtime state
* per-interpreter runtime state
The last one can be a problem as soon as anyone creates a second
interpreter (AKA "subinterpreter") in a process. It is definitely a
problem under subinterpreters if they are no longer sharing the GIL,
since the GIL protects us from a lot of race conditions. Keep in mind
that ultimately *all* objects (PyObject) should be treated as
per-interpreter state. This includes "static types", freelists,
_PyIdentifier, and singletons. Take that in for a second. It has
significant implications on where we use static variables!
Be aware that module-global state (stored in C statics) is a kind of
per-interpreter state. There have been efforts across many years, and
still going, to provide extension module authors mechanisms to store
that state safely (see PEPs 3121, 489, etc.).
(Note that there has been discussion around support for running multiple
Python runtimes in the same process. That would ends up with the same
problems, relative to static variables, that subinterpreters have.)
Historically we have been bad at keeping per-interpreter state out of
static variables, mostly because until recently subinterpreters were
not widely used nor even factored in to solutions. However, the
feature is growing in popularity and use in the community.
Mandate: "Eliminate use of static variables for per-interpreter state."
The "c-statics.py" script in this directory, along with its accompanying
data files, are part of the effort to resolve existing problems with
our use of static variables and to prevent future problems.
#-------------------------
## statics for actually-global state (and runtime state consolidation)
In general, holding any kind of state in static variables
increases maintenance burden and increases the complexity of code (e.g.
we use TSS to identify the active thread state). So it is a good idea
to avoid using statics for state even if for the "global" runtime or
for process-global state.
Relative to maintenance burden, one problem is where the runtime
state is spread throughout the codebase in dozens of individual
globals. Unlike the other globals, the runtime state represents a set
of values that are constantly shifting in a complex way. When they are
spread out it's harder to get a clear picture of what the runtime
involves. Furthermore, when they are spread out it complicates efforts
that change the runtime.
Consequently, the globals for Python's runtime state have been
consolidated under a single top-level _PyRuntime global. No new globals
should be added for runtime state. Instead, they should be added to
_PyRuntimeState or one of its sub-structs. The tools in this directory
are run as part of the test suite to ensure that no new globals have
been added. The script can be run manually as well:
./python Lib/test/test_c_statics/c-statics.py check
If it reports any globals then they should be resolved. If the globals
are runtime state then they should be folded into _PyRuntimeState.
Otherwise they should be marked as ignored.

View file

View file

@ -0,0 +1,209 @@
import argparse
import os.path
import re
import sys
from c_analyzer_common import SOURCE_DIRS, REPO_ROOT
from c_analyzer_common.info import UNKNOWN
from c_analyzer_common.known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from . import find, show
from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
def _match_unused_global(variable, knownvars, used):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
found.append(varid)
else:
found.append(varid)
return found
def _check_results(unknown, knownvars, used):
badknown = set()
for variable in sorted(unknown):
msg = None
if variable.funcname != UNKNOWN:
msg = f'could not find global symbol {variable.id}'
elif m := _match_unused_global(variable, knownvars, used):
assert isinstance(m, list)
badknown.update(m)
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
unknown.remove(variable)
else:
msg = f'could not find local symbol {variable.id}'
if msg:
#raise Exception(msg)
print(msg)
if badknown:
print('---')
print(f'{len(badknown)} globals in known.tsv, but may actually be local:')
for varid in sorted(badknown):
print(f'{varid.filename:30} {varid.name}')
unused = sorted(varid
for varid in set(knownvars) - used
if varid.name != 'id') # XXX Figure out where these variables are.
if unused:
print('---')
print(f'did not use {len(unused)} known vars:')
for varid in unused:
print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}')
raise Exception('not all known symbols used')
if unknown:
print('---')
raise Exception('could not find all symbols')
def _find_globals(dirnames, known, ignored):
if dirnames == SOURCE_DIRS:
dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames]
ignored = ignored_from_file(ignored)
known = known_from_file(known)
used = set()
unknown = set()
knownvars = (known or {}).get('variables')
for variable in find.globals_from_binary(knownvars=knownvars,
dirnames=dirnames):
#for variable in find.globals(dirnames, known, kind='platform'):
if variable.vartype == UNKNOWN:
unknown.add(variable)
continue
yield variable, is_supported(variable, ignored, known)
used.add(variable.id)
#_check_results(unknown, knownvars, used)
def cmd_check(cmd, dirs=SOURCE_DIRS, *,
ignored=IGNORED_FILE,
known=KNOWN_FILE,
_find=_find_globals,
_show=show.basic,
_print=print,
):
"""
Fail if there are unsupported globals variables.
In the failure case, the list of unsupported variables
will be printed out.
"""
unsupported = [v for v, s in _find(dirs, known, ignored) if not s]
if not unsupported:
#_print('okay')
return
_print('ERROR: found unsupported global variables')
_print()
_show(sorted(unsupported))
_print(f' ({len(unsupported)} total)')
sys.exit(1)
def cmd_show(cmd, dirs=SOURCE_DIRS, *,
ignored=IGNORED_FILE,
known=KNOWN_FILE,
skip_objects=False,
_find=_find_globals,
_show=show.basic,
_print=print,
):
"""
Print out the list of found global variables.
The variables will be distinguished as "supported" or "unsupported".
"""
allsupported = []
allunsupported = []
for found, supported in _find(dirs, known, ignored):
if skip_objects: # XXX Support proper filters instead.
if _is_object(found.vartype):
continue
(allsupported if supported else allunsupported
).append(found)
_print('supported:')
_print('----------')
_show(sorted(allsupported))
_print(f' ({len(allsupported)} total)')
_print()
_print('unsupported:')
_print('------------')
_show(sorted(allunsupported))
_print(f' ({len(allunsupported)} total)')
#############################
# the script
COMMANDS = {
'check': cmd_check,
'show': cmd_show,
}
PROG = sys.argv[0]
PROG = 'c-globals.py'
def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
common = argparse.ArgumentParser(add_help=False)
common.add_argument('--ignored', metavar='FILE',
default=IGNORED_FILE,
help='path to file that lists ignored vars')
common.add_argument('--known', metavar='FILE',
default=KNOWN_FILE,
help='path to file that lists known types')
common.add_argument('dirs', metavar='DIR', nargs='*',
default=SOURCE_DIRS,
help='a directory to check')
parser = argparse.ArgumentParser(
prog=prog,
)
subs = parser.add_subparsers(dest='cmd')
check = subs.add_parser('check', parents=[common])
show = subs.add_parser('show', parents=[common])
show.add_argument('--skip-objects', action='store_true')
if _fail is None:
def _fail(msg):
parser.error(msg)
# Now parse the args.
args = parser.parse_args(argv)
ns = vars(args)
cmd = ns.pop('cmd')
if not cmd:
_fail('missing command')
return cmd, ns
def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS):
try:
cmdfunc = _COMMANDS[cmd]
except KeyError:
raise ValueError(
f'unsupported cmd {cmd!r}' if cmd else 'missing cmd')
cmdfunc(cmd, **cmdkwargs or {})
if __name__ == '__main__':
cmd, cmdkwargs = parse_args()
main(cmd, cmdkwargs)

View file

@ -0,0 +1,95 @@
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.info import UNKNOWN
from c_symbols import (
info as s_info,
binary as b_symbols,
source as s_symbols,
resolve,
)
from c_parser import info, declarations
# XXX needs tests:
# * iter_variables
def globals_from_binary(binfile=b_symbols.PYTHON, *,
knownvars=None,
dirnames=None,
_iter_symbols=b_symbols.iter_symbols,
_resolve=resolve.symbols_to_variables,
_get_symbol_resolver=resolve.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
symbols = _iter_symbols(binfile, find_local_symbol=None)
#symbols = list(symbols)
for variable in _resolve(symbols,
resolve=_get_symbol_resolver(knownvars, dirnames),
):
# Skip each non-global variable (unless we couldn't find it).
# XXX Drop the "UNKNOWN" condition?
if not variable.isglobal and variable.vartype != UNKNOWN:
continue
yield variable
def globals_from_declarations(dirnames=SOURCE_DIRS, *,
known=None,
):
"""Yield a Variable for each found declaration.
Details are filled in from the given "known" variables and types.
"""
raise NotImplementedError
def iter_variables(kind='platform', *,
known=None,
dirnames=None,
_resolve_symbols=resolve.symbols_to_variables,
_get_symbol_resolver=resolve.get_resolver,
_symbols_from_binary=b_symbols.iter_symbols,
_symbols_from_source=s_symbols.iter_symbols,
_iter_raw=declarations.iter_all,
_iter_preprocessed=declarations.iter_preprocessed,
):
"""Yield a Variable for each one found (e.g. in files)."""
kind = kind or 'platform'
if kind == 'symbols':
knownvars = (known or {}).get('variables')
yield from _resolve_symbols(
_symbols_from_source(dirnames, known),
resolve=_get_symbol_resolver(knownvars, dirnames),
)
elif kind == 'platform':
knownvars = (known or {}).get('variables')
yield from _resolve_symbols(
_symbols_from_binary(find_local_symbol=None),
resolve=_get_symbol_resolver(knownvars, dirnames),
)
elif kind == 'declarations':
for decl in _iter_raw(dirnames):
if not isinstance(decl, info.Variable):
continue
yield decl
elif kind == 'preprocessed':
for decl in _iter_preprocessed(dirnames):
if not isinstance(decl, info.Variable):
continue
yield decl
else:
raise ValueError(f'unsupported kind {kind!r}')
def globals(dirnames, known, *,
kind=None, # Use the default.
_iter_variables=iter_variables,
):
"""Return a list of (StaticVar, <supported>) for each found global var."""
for found in _iter_variables(kind, known=known, dirnames=dirnames):
if not found.isglobal:
continue
yield found

View file

@ -0,0 +1,16 @@
def basic(globals, *,
_print=print):
"""Print each row simply."""
for variable in globals:
if variable.funcname:
line = f'{variable.filename}:{variable.funcname}():{variable.name}'
else:
line = f'{variable.filename}:{variable.name}'
vartype = variable.vartype
#if vartype.startswith('static '):
# vartype = vartype.partition(' ')[2]
#else:
# vartype = '=' + vartype
line = f'{line:<64} {vartype}'
_print(line)

View file

@ -0,0 +1,368 @@
import os.path
import re
from c_analyzer_common import DATA_DIR
from c_analyzer_common.info import ID
from c_analyzer_common.util import read_tsv, write_tsv
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
# XXX Move these to ignored.tsv.
IGNORED = {
# global
'PyImport_FrozenModules': 'process-global',
'M___hello__': 'process-global',
'inittab_copy': 'process-global',
'PyHash_Func': 'process-global',
'_Py_HashSecret_Initialized': 'process-global',
'_TARGET_LOCALES': 'process-global',
# startup (only changed before/during)
'_PyRuntime': 'runtime startup',
'runtime_initialized': 'runtime startup',
'static_arg_parsers': 'runtime startup',
'orig_argv': 'runtime startup',
'opt_ptr': 'runtime startup',
'_preinit_warnoptions': 'runtime startup',
'_Py_StandardStreamEncoding': 'runtime startup',
'Py_FileSystemDefaultEncoding': 'runtime startup',
'_Py_StandardStreamErrors': 'runtime startup',
'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
'Py_BytesWarningFlag': 'runtime startup',
'Py_DebugFlag': 'runtime startup',
'Py_DontWriteBytecodeFlag': 'runtime startup',
'Py_FrozenFlag': 'runtime startup',
'Py_HashRandomizationFlag': 'runtime startup',
'Py_IgnoreEnvironmentFlag': 'runtime startup',
'Py_InspectFlag': 'runtime startup',
'Py_InteractiveFlag': 'runtime startup',
'Py_IsolatedFlag': 'runtime startup',
'Py_NoSiteFlag': 'runtime startup',
'Py_NoUserSiteDirectory': 'runtime startup',
'Py_OptimizeFlag': 'runtime startup',
'Py_QuietFlag': 'runtime startup',
'Py_UTF8Mode': 'runtime startup',
'Py_UnbufferedStdioFlag': 'runtime startup',
'Py_VerboseFlag': 'runtime startup',
'_Py_path_config': 'runtime startup',
'_PyOS_optarg': 'runtime startup',
'_PyOS_opterr': 'runtime startup',
'_PyOS_optind': 'runtime startup',
'_Py_HashSecret': 'runtime startup',
# REPL
'_PyOS_ReadlineLock': 'repl',
'_PyOS_ReadlineTState': 'repl',
# effectively const
'tracemalloc_empty_traceback': 'const',
'_empty_bitmap_node': 'const',
'posix_constants_pathconf': 'const',
'posix_constants_confstr': 'const',
'posix_constants_sysconf': 'const',
'_PySys_ImplCacheTag': 'const',
'_PySys_ImplName': 'const',
'PyImport_Inittab': 'const',
'_PyImport_DynLoadFiletab': 'const',
'_PyParser_Grammar': 'const',
'Py_hexdigits': 'const',
'_PyImport_Inittab': 'const',
'_PyByteArray_empty_string': 'const',
'_PyLong_DigitValue': 'const',
'_Py_SwappedOp': 'const',
'PyStructSequence_UnnamedField': 'const',
# signals are main-thread only
'faulthandler_handlers': 'signals are main-thread only',
'user_signals': 'signals are main-thread only',
'wakeup': 'signals are main-thread only',
# hacks
'_PySet_Dummy': 'only used as a placeholder',
}
BENIGN = 'races here are benign and unlikely'
def is_supported(variable, ignored=None, known=None, *,
_ignored=(lambda *a, **k: _is_ignored(*a, **k)),
_vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
):
"""Return True if the given global variable is okay in CPython."""
if _ignored(variable,
ignored and ignored.get('variables')):
return True
elif _vartype_okay(variable.vartype,
ignored.get('types')):
return True
else:
return False
def _is_ignored(variable, ignoredvars=None, *,
_IGNORED=IGNORED,
):
"""Return the reason if the variable is a supported global.
Return None if the variable is not a supported global.
"""
if ignoredvars and (reason := ignoredvars.get(variable.id)):
return reason
if variable.funcname is None:
if reason := _IGNORED.get(variable.name):
return reason
# compiler
if variable.filename == 'Python/graminit.c':
if variable.vartype.startswith('static state '):
return 'compiler'
if variable.filename == 'Python/symtable.c':
if variable.vartype.startswith('static identifier '):
return 'compiler'
if variable.filename == 'Python/Python-ast.c':
# These should be const.
if variable.name.endswith('_field'):
return 'compiler'
if variable.name.endswith('_attribute'):
return 'compiler'
# other
if variable.filename == 'Python/dtoa.c':
# guarded by lock?
if variable.name in ('p5s', 'freelist'):
return 'dtoa is thread-safe?'
if variable.name in ('private_mem', 'pmem_next'):
return 'dtoa is thread-safe?'
if variable.filename == 'Python/thread.c':
# Threads do not become an issue until after these have been set
# and these never get changed after that.
if variable.name in ('initialized', 'thread_debug'):
return 'thread-safe'
if variable.filename == 'Python/getversion.c':
if variable.name == 'version':
# Races are benign here, as well as unlikely.
return BENIGN
if variable.filename == 'Python/fileutils.c':
if variable.name == 'force_ascii':
return BENIGN
if variable.name == 'ioctl_works':
return BENIGN
if variable.name == '_Py_open_cloexec_works':
return BENIGN
if variable.filename == 'Python/codecs.c':
if variable.name == 'ucnhash_CAPI':
return BENIGN
if variable.filename == 'Python/bootstrap_hash.c':
if variable.name == 'getrandom_works':
return BENIGN
if variable.filename == 'Objects/unicodeobject.c':
if variable.name == 'ucnhash_CAPI':
return BENIGN
if variable.name == 'bloom_linebreak':
# *mostly* benign
return BENIGN
if variable.filename == 'Modules/getbuildinfo.c':
if variable.name == 'buildinfo':
# The static is used for pre-allocation.
return BENIGN
if variable.filename == 'Modules/posixmodule.c':
if variable.name == 'ticks_per_second':
return BENIGN
if variable.name == 'dup3_works':
return BENIGN
if variable.filename == 'Modules/timemodule.c':
if variable.name == 'ticks_per_second':
return BENIGN
if variable.filename == 'Objects/longobject.c':
if variable.name == 'log_base_BASE':
return BENIGN
if variable.name == 'convwidth_base':
return BENIGN
if variable.name == 'convmultmax_base':
return BENIGN
return None
def _is_vartype_okay(vartype, ignoredtypes=None):
if _is_object(vartype):
return None
if vartype.startswith('static const '):
return 'const'
if vartype.startswith('const '):
return 'const'
# components for TypeObject definitions
for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
if name in vartype:
return 'const'
for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
'PyBufferProcs', 'PyAsyncMethods'):
if name in vartype:
return 'const'
for name in ('slotdef', 'newfunc'):
if name in vartype:
return 'const'
# structseq
for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
if name in vartype:
return 'const'
# other definiitions
if 'PyModuleDef' in vartype:
return 'const'
# thread-safe
if '_Py_atomic_int' in vartype:
return 'thread-safe'
if 'pthread_condattr_t' in vartype:
return 'thread-safe'
# startup
if '_Py_PreInitEntry' in vartype:
return 'startup'
# global
# if 'PyMemAllocatorEx' in vartype:
# return True
# others
# if 'PyThread_type_lock' in vartype:
# return True
# XXX ???
# _Py_tss_t
# _Py_hashtable_t
# stack_t
# _PyUnicode_Name_CAPI
# functions
if '(' in vartype and '[' not in vartype:
return 'function pointer'
# XXX finish!
# * allow const values?
#raise NotImplementedError
return None
def _is_object(vartype):
if re.match(r'.*\bPy\w*Object\b', vartype):
return True
if '_PyArg_Parser ' in vartype:
return True
if vartype.startswith(('_Py_IDENTIFIER(', 'static _Py_Identifier',
'_Py_static_string(')):
return True
if 'traceback_t' in vartype:
return True
if 'PyAsyncGenASend' in vartype:
return True
if '_PyAsyncGenWrappedValue' in vartype:
return True
if 'PyContext' in vartype:
return True
if 'method_cache_entry' in vartype:
return True
if vartype.startswith('static identifier '):
return True
if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
return True
# XXX Add more?
#for part in vartype.split():
# # XXX const is automatic True?
# if part == 'PyObject' or part.startswith('PyObject['):
# return True
return False
def ignored_from_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield a Variable for each ignored var in the file."""
ignored = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for row in _read_tsv(infile, IGNORED_HEADER):
filename, funcname, name, kind, reason = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
if kind == 'variable':
values = ignored['variables']
else:
raise ValueError(f'unsupported kind in row {row}')
values[id] = reason
return ignored
##################################
# generate
def _get_row(varid, reason):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
str(reason),
)
def _get_rows(variables, ignored=None, *,
_as_row=_get_row,
_is_ignored=_is_ignored,
_vartype_okay=_is_vartype_okay,
):
count = 0
for variable in variables:
reason = _is_ignored(variable,
ignored and ignored.get('variables'),
)
if not reason:
reason = _vartype_okay(variable.vartype,
ignored and ignored.get('types'))
if not reason:
continue
print(' ', variable, repr(reason))
yield _as_row(variable.id, reason)
count += 1
print(f'total: {count}')
def _generate_ignored_file(variables, filename=None, *,
_generate_rows=_get_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = IGNORED_FILE + '.new'
rows = _generate_rows(variables)
_write_tsv(filename, IGNORED_HEADER, rows)
if __name__ == '__main__':
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from . import find
known = known_from_file(KNOWN_FILE)
knownvars = (known or {}).get('variables')
variables = find.globals_from_binary(knownvars=knownvars,
dirnames=SOURCE_DIRS)
_generate_ignored_file(variables)

View file

View file

@ -0,0 +1,295 @@
import re
import shlex
import subprocess
from . import source
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
TYPE_QUAL = r'(?:const|volatile)'
VAR_TYPE_SPEC = r'''(?:
void |
(?:
(?:(?:un)?signed\s+)?
(?:
char |
short |
int |
long |
long\s+int |
long\s+long
) |
) |
float |
double |
{IDENTIFIER} |
(?:struct|union)\s+{IDENTIFIER}
)'''
POINTER = rf'''(?:
(?:\s+const)?\s*[*]
)'''
#STRUCT = r'''(?:
# (?:struct|(struct\s+%s))\s*[{]
# [^}]*
# [}]
# )''' % (IDENTIFIER)
#UNION = r'''(?:
# (?:union|(union\s+%s))\s*[{]
# [^}]*
# [}]
# )''' % (IDENTIFIER)
#DECL_SPEC = rf'''(?:
# ({VAR_TYPE_SPEC}) |
# ({STRUCT}) |
# ({UNION})
# )'''
FUNC_START = rf'''(?:
(?:
(?:
extern |
static |
static\s+inline
)\s+
)?
#(?:const\s+)?
{VAR_TYPE_SPEC}
)'''
#GLOBAL_VAR_START = rf'''(?:
# (?:
# (?:
# extern |
# static
# )\s+
# )?
# (?:
# {TYPE_QUAL}
# (?:\s+{TYPE_QUAL})?
# )?\s+
# {VAR_TYPE_SPEC}
# )'''
GLOBAL_DECL_START_RE = re.compile(rf'''
^
(?:
({FUNC_START})
)
''', re.VERBOSE)
LOCAL_VAR_START = rf'''(?:
(?:
(?:
register |
static
)\s+
)?
(?:
(?:
{TYPE_QUAL}
(?:\s+{TYPE_QUAL})?
)\s+
)?
{VAR_TYPE_SPEC}
{POINTER}?
)'''
LOCAL_STMT_START_RE = re.compile(rf'''
^
(?:
({LOCAL_VAR_START})
)
''', re.VERBOSE)
def iter_global_declarations(lines):
"""Yield (decl, body) for each global declaration in the given lines.
For function definitions the header is reduced to one line and
the body is provided as-is. For other compound declarations (e.g.
struct) the entire declaration is reduced to one line and "body"
is None. Likewise for simple declarations (e.g. variables).
Declarations inside function bodies are ignored, though their text
is provided in the function body.
"""
# XXX Bail out upon bogus syntax.
lines = source.iter_clean_lines(lines)
for line in lines:
if not GLOBAL_DECL_START_RE.match(line):
continue
# We only need functions here, since we only need locals for now.
if line.endswith(';'):
continue
if line.endswith('{') and '(' not in line:
continue
# Capture the function.
# (assume no func is a one-liner)
decl = line
while '{' not in line: # assume no inline structs, etc.
try:
line = next(lines)
except StopIteration:
return
decl += ' ' + line
body, end = _extract_block(lines)
if end is None:
return
assert end == '}'
yield (f'{decl}\n{body}\n{end}', body)
def iter_local_statements(lines):
"""Yield (lines, blocks) for each statement in the given lines.
For simple statements, "blocks" is None and the statement is reduced
to a single line. For compound statements, "blocks" is a pair of
(header, body) for each block in the statement. The headers are
reduced to a single line each, but the bpdies are provided as-is.
"""
# XXX Bail out upon bogus syntax.
lines = source.iter_clean_lines(lines)
for line in lines:
if not LOCAL_STMT_START_RE.match(line):
continue
stmt = line
blocks = None
if not line.endswith(';'):
# XXX Support compound & multiline simple statements.
#blocks = []
continue
yield (stmt, blocks)
def _extract_block(lines):
end = None
depth = 1
body = []
for line in lines:
depth += line.count('{') - line.count('}')
if depth == 0:
end = line
break
body.append(line)
return '\n'.join(body), end
def parse_func(stmt, body):
"""Return (name, signature) for the given function definition."""
header, _, end = stmt.partition(body)
assert end.strip() == '}'
assert header.strip().endswith('{')
header, _, _= header.rpartition('{')
signature = ' '.join(header.strip().splitlines())
_, _, name = signature.split('(')[0].strip().rpartition(' ')
assert name
return name, signature
def parse_var(stmt):
"""Return (name, vartype) for the given variable declaration."""
stmt = stmt.rstrip(';')
m = LOCAL_STMT_START_RE.match(stmt)
assert m
vartype = m.group(0)
name = stmt[len(vartype):].partition('=')[0].strip()
if name.startswith('('):
name, _, after = name[1:].partition(')')
assert after
name = name.replace('*', '* ')
inside, _, name = name.strip().rpartition(' ')
vartype = f'{vartype} ({inside.strip()}){after}'
else:
name = name.replace('*', '* ')
before, _, name = name.rpartition(' ')
vartype = f'{vartype} {before}'
vartype = vartype.strip()
while ' ' in vartype:
vartype = vartype.replace(' ', ' ')
return name, vartype
def parse_compound(stmt, blocks):
"""Return (headers, bodies) for the given compound statement."""
# XXX Identify declarations inside compound statements
# (if/switch/for/while).
raise NotImplementedError
def iter_variables(filename, *,
_iter_source_lines=source.iter_lines,
_iter_global=iter_global_declarations,
_iter_local=iter_local_statements,
_parse_func=parse_func,
_parse_var=parse_var,
_parse_compound=parse_compound,
):
"""Yield (funcname, name, vartype) for every variable in the given file."""
lines = _iter_source_lines(filename)
for stmt, body in _iter_global(lines):
# At the file top-level we only have to worry about vars & funcs.
if not body:
name, vartype = _parse_var(stmt)
if name:
yield (None, name, vartype)
else:
funcname, _ = _parse_func(stmt, body)
localvars = _iter_locals(body,
_iter_statements=_iter_local,
_parse_var=_parse_var,
_parse_compound=_parse_compound,
)
for name, vartype in localvars:
yield (funcname, name, vartype)
def _iter_locals(lines, *,
_iter_statements=iter_local_statements,
_parse_var=parse_var,
_parse_compound=parse_compound,
):
compound = [lines]
while compound:
body = compound.pop(0)
bodylines = body.splitlines()
for stmt, blocks in _iter_statements(bodylines):
if not blocks:
name, vartype = _parse_var(stmt)
if name:
yield (name, vartype)
else:
headers, bodies = _parse_compound(stmt, blocks)
for header in headers:
for line in header:
name, vartype = _parse_var(line)
if name:
yield (name, vartype)
compound.extend(bodies)
def iter_all(dirnames):
"""Yield a Declaration for each one found.
If there are duplicates, due to preprocessor conditionals, then
they are checked to make sure they are the same.
"""
raise NotImplementedError
def iter_preprocessed(dirnames):
"""Yield a Declaration for each one found.
All source files are run through the preprocessor first.
"""
raise NotImplementedError

View file

@ -0,0 +1,78 @@
from collections import namedtuple
from c_analyzer_common import info, util
from c_analyzer_common.util import classonly, _NTBase
def normalize_vartype(vartype):
"""Return the canonical form for a variable type (or func signature)."""
# We allow empty strring through for semantic reasons.
if vartype is None:
return None
# XXX finish!
# XXX Return (modifiers, type, pointer)?
return str(vartype)
class Variable(_NTBase,
namedtuple('Variable', 'id vartype')):
"""Information about a single variable declaration."""
__slots__ = ()
_isglobal = util.Slot()
@classonly
def from_parts(cls, filename, funcname, name, vartype, isglobal=False):
id = info.ID(filename, funcname, name)
self = cls(id, vartype)
if isglobal:
self._isglobal = True
return self
def __new__(cls, id, vartype):
self = super().__new__(
cls,
id=info.ID.from_raw(id),
vartype=normalize_vartype(vartype) if vartype else None,
)
return self
def __hash__(self):
return hash(self.id)
def __getattr__(self, name):
return getattr(self.id, name)
def _validate_id(self):
if not self.id:
raise TypeError('missing id')
if not self.filename or self.filename == info.UNKNOWN:
raise TypeError(f'id missing filename ({self.id})')
if self.funcname and self.funcname == info.UNKNOWN:
raise TypeError(f'id missing funcname ({self.id})')
self.id.validate()
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id()
if self.vartype is None or self.vartype == info.UNKNOWN:
raise TypeError('missing vartype')
@property
def isglobal(self):
try:
return self._isglobal
except AttributeError:
# XXX Include extern variables.
# XXX Ignore functions.
self._isglobal = ('static' in self.vartype.split())
return self._isglobal
@property
def isconst(self):
return 'const' in self.vartype.split()

View file

@ -0,0 +1,180 @@
import re
from c_analyzer_common.info import UNKNOWN
from .info import Variable
from .preprocessor import _iter_clean_lines
_NOT_SET = object()
def get_srclines(filename, *,
cache=None,
_open=open,
_iter_lines=_iter_clean_lines,
):
"""Return the file's lines as a list.
Each line will have trailing whitespace removed (including newline).
If a cache is given the it is used.
"""
if cache is not None:
try:
return cache[filename]
except KeyError:
pass
with _open(filename) as srcfile:
srclines = [line
for _, line in _iter_lines(srcfile)
if not line.startswith('#')]
for i, line in enumerate(srclines):
srclines[i] = line.rstrip()
if cache is not None:
cache[filename] = srclines
return srclines
def parse_variable_declaration(srcline):
"""Return (name, decl) for the given declaration line."""
# XXX possible false negatives...
decl, sep, _ = srcline.partition('=')
if not sep:
if not srcline.endswith(';'):
return None, None
decl = decl.strip(';')
decl = decl.strip()
m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
if not m:
return None, None
name = m.group(1)
return name, decl
def parse_variable(srcline, funcname=None):
"""Return a Variable for the variable declared on the line (or None)."""
line = srcline.strip()
# XXX Handle more than just static variables.
if line.startswith('static '):
if '(' in line and '[' not in line:
# a function
return None, None
return parse_variable_declaration(line)
else:
return None, None
def iter_variables(filename, *,
srccache=None,
parse_variable=None,
_get_srclines=get_srclines,
_default_parse_variable=parse_variable,
):
"""Yield a Variable for each in the given source file."""
if parse_variable is None:
parse_variable = _default_parse_variable
indent = ''
prev = ''
funcname = None
for line in _get_srclines(filename, cache=srccache):
# remember current funcname
if funcname:
if line == indent + '}':
funcname = None
continue
else:
if '(' in prev and line == indent + '{':
if not prev.startswith('__attribute__'):
funcname = prev.split('(')[0].split()[-1]
prev = ''
continue
indent = line[:-len(line.lstrip())]
prev = line
info = parse_variable(line, funcname)
if isinstance(info, list):
for name, _funcname, decl in info:
yield Variable.from_parts(filename, _funcname, name, decl)
continue
name, decl = info
if name is None:
continue
yield Variable.from_parts(filename, funcname, name, decl)
def _match_varid(variable, name, funcname, ignored=None):
if ignored and variable in ignored:
return False
if variable.name != name:
return False
if funcname == UNKNOWN:
if not variable.funcname:
return False
elif variable.funcname != funcname:
return False
return True
def find_variable(filename, funcname, name, *,
ignored=None,
srccache=None, # {filename: lines}
parse_variable=None,
_iter_variables=iter_variables,
):
"""Return the matching variable.
Return None if the variable is not found.
"""
for variable in _iter_variables(filename,
srccache=srccache,
parse_variable=parse_variable,
):
if _match_varid(variable, name, funcname, ignored):
return variable
else:
return None
def find_variables(varids, filenames=None, *,
srccache=_NOT_SET,
parse_variable=None,
_find_symbol=find_variable,
):
"""Yield a Variable for each ID.
If the variable is not found then its decl will be UNKNOWN. That
way there will be one resulting Variable per given ID.
"""
if srccache is _NOT_SET:
srccache = {}
used = set()
for varid in varids:
if varid.filename and varid.filename != UNKNOWN:
srcfiles = [varid.filename]
else:
if not filenames:
yield Variable(varid, UNKNOWN)
continue
srcfiles = filenames
for filename in srcfiles:
found = _find_varid(filename, varid.funcname, varid.name,
ignored=used,
srccache=srccache,
parse_variable=parse_variable,
)
if found:
yield found
used.add(found)
break
else:
yield Variable(varid, UNKNOWN)

View file

@ -0,0 +1,512 @@
from collections import namedtuple
import shlex
import os
import re
from c_analyzer_common import util
from . import info
CONTINUATION = '\\' + os.linesep
IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
def _coerce_str(value):
if not value:
return ''
return str(value).strip()
#############################
# directives
DIRECTIVE_START = r'''
(?:
^ \s*
[#] \s*
)'''
DIRECTIVE_TEXT = r'''
(?:
(?: \s+ ( .*\S ) )?
\s* $
)'''
DIRECTIVE = rf'''
(?:
{DIRECTIVE_START}
(
include |
error | warning |
pragma |
define | undef |
if | ifdef | ifndef | elseif | else | endif |
__FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
)
{DIRECTIVE_TEXT}
)'''
# (?:
# [^\\\n] |
# \\ [^\n] |
# \\ \n
# )+
# ) \n
# )'''
DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
DEFINE = rf'''
(?:
{DIRECTIVE_START} define \s+
(?:
( \w*[a-zA-Z]\w* )
(?: \s* [(] ([^)]*) [)] )?
)
{DIRECTIVE_TEXT}
)'''
DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
def parse_directive(line):
"""Return the appropriate directive for the given line."""
line = line.strip()
if line.startswith('#'):
line = line[1:].lstrip()
line = '#' + line
directive = line
#directive = '#' + line
while ' ' in directive:
directive = directive.replace(' ', ' ')
return _parse_directive(directive)
def _parse_directive(line):
m = DEFINE_RE.match(line)
if m:
name, args, text = m.groups()
if args:
args = [a.strip() for a in args.split(',')]
return Macro(name, args, text)
else:
return Constant(name, text)
m = DIRECTIVE_RE.match(line)
if not m:
raise ValueError(f'unsupported directive {line!r}')
kind, text = m.groups()
if not text:
if kind not in ('else', 'endif'):
raise ValueError(f'missing text in directive {line!r}')
elif kind in ('else', 'endif', 'define'):
raise ValueError(f'unexpected text in directive {line!r}')
if kind == 'include':
directive = Include(text)
elif kind in IfDirective.KINDS:
directive = IfDirective(kind, text)
else:
directive = OtherDirective(kind, text)
directive.validate()
return directive
class PreprocessorDirective(util._NTBase):
"""The base class for directives."""
__slots__ = ()
KINDS = frozenset([
'include',
'pragma',
'error', 'warning',
'define', 'undef',
'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
'__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
])
@property
def text(self):
return ' '.join(v for v in self[1:] if v and v.strip()) or None
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.kind:
raise TypeError('missing kind')
elif self.kind not in self.KINDS:
raise ValueError
# text can be anything, including None.
class Constant(PreprocessorDirective,
namedtuple('Constant', 'kind name value')):
"""A single "constant" directive ("define")."""
__slots__ = ()
def __new__(cls, name, value=None):
self = super().__new__(
cls,
'define',
name=_coerce_str(name) or None,
value=_coerce_str(value) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.name:
raise TypeError('missing name')
elif not IDENTIFIER_RE.match(self.name):
raise ValueError(f'name must be identifier, got {self.name!r}')
# value can be anything, including None
class Macro(PreprocessorDirective,
namedtuple('Macro', 'kind name args body')):
"""A single "macro" directive ("define")."""
__slots__ = ()
def __new__(cls, name, args, body=None):
# "args" must be a string or an iterable of strings (or "empty").
if isinstance(args, str):
args = [v.strip() for v in args.split(',')]
if args:
args = tuple(_coerce_str(a) or None for a in args)
self = super().__new__(
cls,
kind='define',
name=_coerce_str(name) or None,
args=args if args else (),
body=_coerce_str(body) or None,
)
return self
@property
def text(self):
if self.body:
return f'{self.name}({", ".join(self.args)}) {self.body}'
else:
return f'{self.name}({", ".join(self.args)})'
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.name:
raise TypeError('missing name')
elif not IDENTIFIER_RE.match(self.name):
raise ValueError(f'name must be identifier, got {self.name!r}')
for arg in self.args:
if not arg:
raise ValueError(f'missing arg in {self.args}')
elif not IDENTIFIER_RE.match(arg):
raise ValueError(f'arg must be identifier, got {arg!r}')
# body can be anything, including None
class IfDirective(PreprocessorDirective,
namedtuple('IfDirective', 'kind condition')):
"""A single conditional directive (e.g. "if", "ifdef").
This only includes directives that actually provide conditions. The
related directives "else" and "endif" are covered by OtherDirective
instead.
"""
__slots__ = ()
KINDS = frozenset([
'if',
'ifdef',
'ifndef',
'elseif',
])
@classmethod
def _condition_from_raw(cls, raw, kind):
#return Condition.from_raw(raw, _kind=kind)
condition = _coerce_str(raw)
if not condition:
return None
if kind == 'ifdef':
condition = f'defined({condition})'
elif kind == 'ifndef':
condition = f'! defined({condition})'
return condition
def __new__(cls, kind, condition):
kind = _coerce_str(kind)
self = super().__new__(
cls,
kind=kind or None,
condition=cls._condition_from_raw(condition, kind),
)
return self
@property
def text(self):
if self.kind == 'ifdef':
return self.condition[8:-1] # strip "defined("
elif self.kind == 'ifndef':
return self.condition[10:-1] # strip "! defined("
else:
return self.condition
#return str(self.condition)
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.condition:
raise TypeError('missing condition')
#else:
# for cond in self.condition:
# if not cond:
# raise ValueError(f'missing condition in {self.condition}')
# cond.validate()
# if self.kind in ('ifdef', 'ifndef'):
# if len(self.condition) != 1:
# raise ValueError('too many condition')
# if self.kind == 'ifdef':
# if not self.condition[0].startswith('defined '):
# raise ValueError('bad condition')
# else:
# if not self.condition[0].startswith('! defined '):
# raise ValueError('bad condition')
class Include(PreprocessorDirective,
namedtuple('Include', 'kind file')):
"""A single "include" directive.
Supported "file" values are either follow the bracket style
(<stdio>) or double quotes ("spam.h").
"""
__slots__ = ()
def __new__(cls, file):
self = super().__new__(
cls,
kind='include',
file=_coerce_str(file) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.file:
raise TypeError('missing file')
class OtherDirective(PreprocessorDirective,
namedtuple('OtherDirective', 'kind text')):
"""A single directive not covered by another class.
This includes the "else", "endif", and "undef" directives, which are
otherwise inherently related to the directives covered by the
Constant, Macro, and IfCondition classes.
Note that all directives must have a text value, except for "else"
and "endif" (which must have no text).
"""
__slots__ = ()
KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
def __new__(cls, kind, text):
self = super().__new__(
cls,
kind=_coerce_str(kind) or None,
text=_coerce_str(text) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if self.text:
if self.kind in ('else', 'endif'):
raise ValueError('unexpected text in directive')
elif self.kind not in ('else', 'endif'):
raise TypeError('missing text')
#############################
# iterating lines
def _recompute_conditions(directive, ifstack):
if directive.kind in ('if', 'ifdef', 'ifndef'):
ifstack.append(
([], directive.condition))
elif directive.kind == 'elseif':
if ifstack:
negated, active = ifstack.pop()
if active:
negated.append(active)
else:
negated = []
ifstack.append(
(negated, directive.condition))
elif directive.kind == 'else':
if ifstack:
negated, active = ifstack.pop()
if active:
negated.append(active)
ifstack.append(
(negated, None))
elif directive.kind == 'endif':
if ifstack:
ifstack.pop()
conditions = []
for negated, active in ifstack:
for condition in negated:
conditions.append(f'! ({condition})')
if active:
conditions.append(active)
return tuple(conditions)
def _iter_clean_lines(lines):
lines = iter(enumerate(lines, 1))
for lno, line in lines:
# Handle line continuations.
while line.endswith(CONTINUATION):
try:
lno, _line = next(lines)
except StopIteration:
break
line = line[:-len(CONTINUATION)] + ' ' + _line
# Deal with comments.
after = line
line = ''
while True:
# Look for a comment.
before, begin, remainder = after.partition('/*')
if '//' in before:
before, _, _ = before.partition('//')
line += before + ' ' # per the C99 spec
break
line += before
if not begin:
break
line += ' ' # per the C99 spec
# Go until we find the end of the comment.
_, end, after = remainder.partition('*/')
while not end:
try:
lno, remainder = next(lines)
except StopIteration:
raise Exception('unterminated comment')
_, end, after = remainder.partition('*/')
yield lno, line
def iter_lines(lines, *,
_iter_clean_lines=_iter_clean_lines,
_parse_directive=_parse_directive,
_recompute_conditions=_recompute_conditions,
):
"""Yield (lno, line, directive, active conditions) for each given line.
This is effectively a subset of the operations taking place in
translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
section 5.1.1.2. Line continuations are removed and comments
replaced with a single space. (In both cases "lno" will be the last
line involved.) Otherwise each line is returned as-is.
"lno" is the (1-indexed) line number for the line.
"directive" will be a PreprocessorDirective or None, depending on
whether or not there is a directive on the line.
"active conditions" is the set of preprocessor conditions (e.g.
"defined()") under which the current line of code will be included
in compilation. That set is derived from every conditional
directive block (e.g. "if defined()", "ifdef", "else") containing
that line. That includes nested directives. Note that the
current line does not affect the active conditions for iteself.
It only impacts subsequent lines. That applies to directives
that close blocks (e.g. "endif") just as much as conditional
directvies. Also note that "else" and "elseif" directives
update the active conditions (for later lines), rather than
adding to them.
"""
ifstack = []
conditions = ()
for lno, line in _iter_clean_lines(lines):
stripped = line.strip()
if not stripped.startswith('#'):
yield lno, line, None, conditions
continue
directive = '#' + stripped[1:].lstrip()
while ' ' in directive:
directive = directive.replace(' ', ' ')
directive = _parse_directive(directive)
yield lno, line, directive, conditions
if directive.kind in ('else', 'endif'):
conditions = _recompute_conditions(directive, ifstack)
elif isinstance(directive, IfDirective):
conditions = _recompute_conditions(directive, ifstack)
#############################
# running (platform-specific?)
def _gcc(filename, *,
_get_argv=(lambda: _get_gcc_argv()),
_run=util.run_cmd,
):
argv = _get_argv()
argv.extend([
'-E', filename,
])
output = _run(argv)
return output
def _get_gcc_argv(*,
_open=open,
_run=util.run_cmd,
):
with _open('/tmp/print.mk', 'w') as tmpfile:
tmpfile.write('print-%:\n')
#tmpfile.write('\t@echo $* = $($*)\n')
tmpfile.write('\t@echo $($*)\n')
argv = ['/usr/bin/make',
'-f', 'Makefile',
'-f', '/tmp/print.mk',
'print-CC',
'print-PY_CORE_CFLAGS',
]
output = _run(argv)
gcc, cflags = output.strip().splitlines()
argv = shlex.split(gcc.strip())
cflags = shlex.split(cflags.strip())
return argv + cflags
def run(filename, *,
_gcc=_gcc,
):
"""Return the text of the given file after running the preprocessor."""
return _gcc(filename)

View file

@ -0,0 +1,34 @@
from . import preprocessor
def iter_clean_lines(lines):
incomment = False
for line in lines:
# Deal with comments.
if incomment:
_, sep, line = line.partition('*/')
if sep:
incomment = False
continue
line, _, _ = line.partition('//')
line, sep, remainder = line.partition('/*')
if sep:
_, sep, after = remainder.partition('*/')
if not sep:
incomment = True
continue
line += ' ' + after
# Ignore blank lines and leading/trailing whitespace.
line = line.strip()
if not line:
continue
yield line
def iter_lines(filename, *,
preprocess=preprocessor.run,
):
content = preprocess(filename)
return iter(content.splitlines())

View file

View file

@ -0,0 +1,157 @@
import os
import os.path
import shutil
import sys
from c_analyzer_common import util, info
from . import source
from .info import Symbol
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
def iter_symbols(binary=PYTHON, dirnames=None, *,
# Alternately, use look_up_known_symbol()
# from c_globals.supported.
find_local_symbol=source.find_symbol,
_file_exists=os.path.exists,
_iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
):
"""Yield a Symbol for each symbol found in the binary."""
if not _file_exists(binary):
raise Exception('executable missing (need to build it first?)')
if find_local_symbol:
cache = {}
def find_local_symbol(name, *, _find=find_local_symbol):
return _find(name, dirnames, _perfilecache=cache)
else:
find_local_symbol = None
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
else:
yield from _iter_symbols_nm(binary, find_local_symbol)
#############################
# binary format (e.g. ELF)
SPECIAL_SYMBOLS = {
'__bss_start',
'__data_start',
'__dso_handle',
'_DYNAMIC',
'_edata',
'_end',
'__environ@@GLIBC_2.2.5',
'_GLOBAL_OFFSET_TABLE_',
'__JCR_END__',
'__JCR_LIST__',
'__TMC_END__',
}
def _is_special_symbol(name):
if name in SPECIAL_SYMBOLS:
return True
if '@@GLIBC' in name:
return True
return False
#############################
# "nm"
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
def _iter_symbols_nm(binary, find_local_symbol=None,
*,
_which=shutil.which,
_run=util.run_cmd,
):
nm = _which('nm')
if not nm:
raise NotImplementedError
argv = [nm,
'--line-numbers',
binary,
]
try:
output = _run(argv)
except Exception:
if nm is None:
# XXX Use dumpbin.exe /SYMBOLS on Windows.
raise NotImplementedError
raise
for line in output.splitlines():
(name, kind, external, filename, funcname, vartype,
) = _parse_nm_line(line,
_find_local_symbol=find_local_symbol,
)
if kind != Symbol.KIND.VARIABLE:
continue
elif _is_special_symbol(name):
continue
assert vartype is None
yield Symbol(
id=(filename, funcname, name),
kind=kind,
external=external,
)
def _parse_nm_line(line, *, _find_local_symbol=None):
_origline = line
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
kind, _, line = line.partition(' ')
line = line.strip()
external = kind.isupper()
kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
name, _, filename = line.partition('\t')
name = name.strip()
if filename:
filename = os.path.relpath(filename.partition(':')[0])
else:
filename = info.UNKNOWN
vartype = None
name, islocal = _parse_nm_name(name, kind)
if islocal:
funcname = info.UNKNOWN
if _find_local_symbol is not None:
filename, funcname, vartype = _find_local_symbol(name)
filename = filename or info.UNKNOWN
funcname = funcname or info.UNKNOWN
else:
funcname = None
# XXX fine filename and vartype?
return name, kind, external, filename, funcname, vartype
def _parse_nm_name(name, kind):
if kind != Symbol.KIND.VARIABLE:
return name, None
if _is_special_symbol(name):
return name, None
actual, sep, digits = name.partition('.')
if not sep:
return name, False
if not digits.isdigit():
raise Exception(f'got bogus name {name}')
return actual, True

View file

@ -0,0 +1,51 @@
from collections import namedtuple
from c_analyzer_common.info import ID
from c_analyzer_common.util import classonly, _NTBase
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
"""Info for a single compilation symbol."""
__slots__ = ()
class KIND:
VARIABLE = 'variable'
FUNCTION = 'function'
OTHER = 'other'
@classonly
def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
"""Return a new symbol based on the given name."""
id = ID(filename, None, name)
return cls(id, kind, external)
def __new__(cls, id, kind=KIND.VARIABLE, external=None):
self = super().__new__(
cls,
id=ID.from_raw(id),
kind=str(kind) if kind else None,
external=bool(external) if external is not None else None,
)
return self
def __hash__(self):
return hash(self.id)
def __getattr__(self, name):
return getattr(self.id, name)
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.id:
raise TypeError('missing id')
else:
self.id.validate()
if not self.kind:
raise TypeError('missing kind')
elif self.kind not in vars(self.KIND).values():
raise ValueError(f'unsupported kind {self.kind}')
if self.external is None:
raise TypeError('missing external')

View file

@ -0,0 +1,149 @@
import os.path
from c_analyzer_common import files
from c_analyzer_common.info import UNKNOWN
from c_parser import declarations, info
from .info import Symbol
from .source import _find_symbol
# XXX need tests:
# * look_up_known_symbol()
# * symbol_from_source()
# * get_resolver()
# * symbols_to_variables()
def look_up_known_symbol(symbol, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known variable matching the given symbol.
"knownvars" is a mapping of common.ID to parser.Variable.
"match_files" is used to verify if two filenames point to
the same file.
"""
if not knownvars:
return None
if symbol.funcname == UNKNOWN:
if not symbol.filename or symbol.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == symbol.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, symbol.filename):
continue
if varid.name == symbol.name:
return knownvars[varid]
else:
return None
elif not symbol.filename or symbol.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(symbol.id)
def find_in_source(symbol, dirnames, *,
_perfilecache={},
_find_symbol=_find_symbol,
_iter_files=files.iter_files_by_suffix,
):
"""Return the Variable matching the given Symbol.
If there is no match then return None.
"""
if symbol.filename and symbol.filename != UNKNOWN:
filenames = [symbol.filename]
else:
filenames = _iter_files(dirnames, ('.c', '.h'))
if symbol.funcname and symbol.funcname != UNKNOWN:
raise NotImplementedError
(filename, funcname, vartype
) = _find_symbol(symbol.name, filenames, _perfilecache)
if filename == UNKNOWN:
return None
return info.Variable(
id=(filename, funcname, symbol.name),
vartype=vartype,
)
def get_resolver(knownvars=None, dirnames=None, *,
_look_up_known=look_up_known_symbol,
_from_source=find_in_source,
):
"""Return a "resolver" func for the given known vars and dirnames.
The func takes a single Symbol and returns a corresponding Variable.
If the symbol was located then the variable will be valid, populated
with the corresponding information. Otherwise None is returned.
"""
if knownvars:
knownvars = dict(knownvars) # a copy
def resolve_known(symbol):
found = _look_up_known(symbol, knownvars)
if found is None:
return None
elif symbol.funcname == UNKNOWN:
knownvars.pop(found.id)
elif not symbol.filename or symbol.filename == UNKNOWN:
knownvars.pop(found.id)
return found
if dirnames:
def resolve(symbol):
found = resolve_known(symbol)
if found is None:
return None
#return _from_source(symbol, dirnames)
else:
for dirname in dirnames:
if not dirname.endswith(os.path.sep):
dirname += os.path.sep
if found.filename.startswith(dirname):
break
else:
return None
return found
else:
resolve = resolve_known
elif dirnames:
def resolve(symbol):
return _from_source(symbol, dirnames)
else:
def resolve(symbol):
return None
return resolve
def symbols_to_variables(symbols, *,
resolve=(lambda s: look_up_known_symbol(s, None)),
):
"""Yield the variable the matches each given symbol.
Use get_resolver() for a "resolve" func to use.
"""
for symbol in symbols:
if isinstance(symbol, info.Variable):
# XXX validate?
yield symbol
continue
if symbol.kind != Symbol.KIND.VARIABLE:
continue
resolved = resolve(symbol)
if resolved is None:
#raise NotImplementedError(symbol)
resolved = info.Variable(
id=symbol.id,
vartype=UNKNOWN,
)
yield resolved

View file

@ -0,0 +1,58 @@
from c_analyzer_common import files
from c_analyzer_common.info import UNKNOWN
from c_parser import declarations
# XXX need tests:
# * find_symbol()
def find_symbol(name, dirnames, *,
_perfilecache,
_iter_files=files.iter_files_by_suffix,
**kwargs
):
"""Return (filename, funcname, vartype) for the matching Symbol."""
filenames = _iter_files(dirnames, ('.c', '.h'))
return _find_symbol(name, filenames, _perfilecache, **kwargs)
def _get_symbols(filename, *,
_iter_variables=declarations.iter_variables,
):
"""Return the list of Symbols found in the given file."""
symbols = {}
for funcname, name, vartype in _iter_variables(filename):
if not funcname:
continue
try:
instances = symbols[name]
except KeyError:
instances = symbols[name] = []
instances.append((funcname, vartype))
return symbols
def _find_symbol(name, filenames, _perfilecache, *,
_get_local_symbols=_get_symbols,
):
for filename in filenames:
try:
symbols = _perfilecache[filename]
except KeyError:
symbols = _perfilecache[filename] = _get_local_symbols(filename)
try:
instances = symbols[name]
except KeyError:
continue
funcname, vartype = instances.pop(0)
if not instances:
symbols.pop(name)
return filename, funcname, vartype
else:
return UNKNOWN, UNKNOWN, UNKNOWN
def iter_symbols():
raise NotImplementedError

View file

@ -0,0 +1 @@
filename funcname name kind reason
1 filename funcname name kind reason

1922
Tools/c-analyzer/known.tsv Normal file

File diff suppressed because it is too large Load diff