gh-110722: Make -m test -T -j use sys.monitoring (GH-111710)

Now all results from worker processes are aggregated and displayed together as a summary at the end of a regrtest run. The traditional trace is left in place for use with sequential in-process test runs but now raises a warning that those numbers are not precise. `-T -j` requires `--with-pydebug` as it relies on `-Xpresite=`.
2024-09-05 08:17:45 +00:00 · 2023-11-10 18:17:45 +01:00 · 2023-11-10 18:17:45 +01:00 · 3932b0f7b1
parent 0b06d2482d
commit 3932b0f7b1
13 changed files with 166 additions and 34 deletions
--- a/Doc/library/trace.rst
+++ b/Doc/library/trace.rst
@ -187,7 +187,8 @@ Programmatic Interface

      Merge in data from another :class:`CoverageResults` object.

-   .. method:: write_results(show_missing=True, summary=False, coverdir=None)
+   .. method:: write_results(show_missing=True, summary=False, coverdir=None,\
+                             *, ignore_missing_files=False)

      Write coverage results.  Set *show_missing* to show lines that had no
      hits.  Set *summary* to include in the output the coverage summary per
@ -195,6 +196,13 @@ Programmatic Interface
      result files will be output.  If ``None``, the results for each source
      file are placed in its directory.

+      If *ignore_missing_files* is ``True``, coverage counts for files that no
+      longer exist are silently ignored. Otherwise, a missing file will
+      raise a :exc:`FileNotFoundError`.
+
+      .. versionchanged:: 3.13
+         Added *ignore_missing_files* parameter.
+
 A simple example demonstrating the use of the programmatic interface::

   import sys
--- a/Lib/test/cov.py
+++ b/Lib/test/cov.py
@ -0,0 +1,48 @@
+"""A minimal hook for gathering line coverage of the standard library.
+
+Designed to be used with -Xpresite= which means:
+* it installs itself on import
+* it's not imported as `__main__` so can't use the ifmain idiom
+* it can't import anything besides `sys` to avoid tainting gathered coverage
+* filenames are not normalized
+
+To get gathered coverage back, look for 'test.cov' in `sys.modules`
+instead of importing directly. That way you can determine if the module
+was already in use.
+
+If you need to disable the hook, call the `disable()` function.
+"""
+
+import sys
+
+mon = sys.monitoring
+
+FileName = str
+LineNo = int
+Location = tuple[FileName, LineNo]
+
+coverage: set[Location] = set()
+
+
+# `types` and `typing` aren't imported to avoid invalid coverage
+def add_line(
+    code: "types.CodeType",
+    lineno: int,
+) -> "typing.Literal[sys.monitoring.DISABLE]":
+    coverage.add((code.co_filename, lineno))
+    return mon.DISABLE
+
+
+def enable():
+    mon.use_tool_id(mon.COVERAGE_ID, "regrtest coverage")
+    mon.register_callback(mon.COVERAGE_ID, mon.events.LINE, add_line)
+    mon.set_events(mon.COVERAGE_ID, mon.events.LINE)
+
+
+def disable():
+    mon.set_events(mon.COVERAGE_ID, 0)
+    mon.register_callback(mon.COVERAGE_ID, mon.events.LINE, None)
+    mon.free_tool_id(mon.COVERAGE_ID)
+
+
+enable()
--- a/Lib/test/libregrtest/cmdline.py
+++ b/Lib/test/libregrtest/cmdline.py
@ -2,7 +2,7 @@
 import os.path
 import shlex
 import sys
-from test.support import os_helper
+from test.support import os_helper, Py_DEBUG
 from .utils import ALL_RESOURCES, RESOURCE_NAMES


@ -448,8 +448,16 @@ def _parse_args(args, **kwargs):

    if ns.single and ns.fromfile:
        parser.error("-s and -f don't go together!")
-    if ns.use_mp is not None and ns.trace:
-        parser.error("-T and -j don't go together!")
+    if ns.trace:
+        if ns.use_mp is not None:
+            if not Py_DEBUG:
+                parser.error("need --with-pydebug to use -T and -j together")
+        else:
+            print(
+                "Warning: collecting coverage without -j is imprecise. Configure"
+                " --with-pydebug and run -m test -T -j for best results.",
+                file=sys.stderr
+            )
    if ns.python is not None:
        if ns.use_mp is None:
            parser.error("-p requires -j!")
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@ -5,6 +5,7 @@
 import sys
 import sysconfig
 import time
+import trace

 from test import support
 from test.support import os_helper, MS_WINDOWS
@ -13,7 +14,7 @@
 from .findtests import findtests, split_test_packages, list_cases
 from .logger import Logger
 from .pgo import setup_pgo_tests
-from .result import State
+from .result import State, TestResult
 from .results import TestResults, EXITCODE_INTERRUPTED
 from .runtests import RunTests, HuntRefleak
 from .setup import setup_process, setup_test_dir
@ -284,7 +285,9 @@ def display_result(self, runtests):
        self.results.display_result(runtests.tests,
                                    self.quiet, self.print_slowest)

-    def run_test(self, test_name: TestName, runtests: RunTests, tracer):
+    def run_test(
+        self, test_name: TestName, runtests: RunTests, tracer: trace.Trace | None
+    ) -> TestResult:
        if tracer is not None:
            # If we're tracing code coverage, then we don't exit with status
            # if on a false return value from main.
@ -292,6 +295,7 @@ def run_test(self, test_name: TestName, runtests: RunTests, tracer):
            namespace = dict(locals())
            tracer.runctx(cmd, globals=globals(), locals=namespace)
            result = namespace['result']
+            result.covered_lines = list(tracer.counts)
        else:
            result = run_single_test(test_name, runtests)

@ -299,9 +303,8 @@ def run_test(self, test_name: TestName, runtests: RunTests, tracer):

        return result

-    def run_tests_sequentially(self, runtests):
+    def run_tests_sequentially(self, runtests) -> None:
        if self.coverage:
-            import trace
            tracer = trace.Trace(trace=False, count=True)
        else:
            tracer = None
@ -349,8 +352,6 @@ def run_tests_sequentially(self, runtests):
        if previous_test:
            print(previous_test)

-        return tracer
-
    def get_state(self):
        state = self.results.get_state(self.fail_env_changed)
        if self.first_state:
@ -361,7 +362,7 @@ def _run_tests_mp(self, runtests: RunTests, num_workers: int) -> None:
        from .run_workers import RunWorkers
        RunWorkers(num_workers, runtests, self.logger, self.results).run()

-    def finalize_tests(self, tracer):
+    def finalize_tests(self, coverage: trace.CoverageResults | None) -> None:
        if self.next_single_filename:
            if self.next_single_test:
                with open(self.next_single_filename, 'w') as fp:
@ -369,10 +370,10 @@ def finalize_tests(self, tracer):
            else:
                os.unlink(self.next_single_filename)

-        if tracer is not None:
-            results = tracer.results()
-            results.write_results(show_missing=True, summary=True,
-                                  coverdir=self.coverage_dir)
+        if coverage is not None:
+            coverage.write_results(show_missing=True, summary=True,
+                                   coverdir=self.coverage_dir,
+                                   ignore_missing_files=True)

        if self.want_run_leaks:
            os.system("leaks %d" % os.getpid())
@ -412,6 +413,7 @@ def create_run_tests(self, tests: TestTuple):
            hunt_refleak=self.hunt_refleak,
            test_dir=self.test_dir,
            use_junit=(self.junit_filename is not None),
+            coverage=self.coverage,
            memory_limit=self.memory_limit,
            gc_threshold=self.gc_threshold,
            use_resources=self.use_resources,
@ -458,10 +460,10 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int:
        try:
            if self.num_workers:
                self._run_tests_mp(runtests, self.num_workers)
-                tracer = None
            else:
-                tracer = self.run_tests_sequentially(runtests)
+                self.run_tests_sequentially(runtests)

+            coverage = self.results.get_coverage_results()
            self.display_result(runtests)

            if self.want_rerun and self.results.need_rerun():
@ -471,7 +473,7 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int:
                self.logger.stop_load_tracker()

        self.display_summary()
-        self.finalize_tests(tracer)
+        self.finalize_tests(coverage)

        return self.results.get_exitcode(self.fail_env_changed,
                                         self.fail_rerun)
--- a/Lib/test/libregrtest/result.py
+++ b/Lib/test/libregrtest/result.py
@ -78,6 +78,11 @@ def must_stop(state):
        }


+FileName = str
+LineNo = int
+Location = tuple[FileName, LineNo]
+
+
@dataclasses.dataclass(slots=True)
 class TestResult:
    test_name: TestName
@ -91,6 +96,9 @@ class TestResult:
    errors: list[tuple[str, str]] | None = None
    failures: list[tuple[str, str]] | None = None

+    # partial coverage in a worker run; not used by sequential in-process runs
+    covered_lines: list[Location] | None = None
+
    def is_failed(self, fail_env_changed: bool) -> bool:
        if self.state == State.ENV_CHANGED:
            return fail_env_changed
@ -207,6 +215,10 @@ def _decode_test_result(data: dict[str, Any]) -> TestResult | dict[str, Any]:
        data.pop('__test_result__')
        if data['stats'] is not None:
            data['stats'] = TestStats(**data['stats'])
+        if data['covered_lines'] is not None:
+            data['covered_lines'] = [
+                tuple(loc) for loc in data['covered_lines']
+            ]
        return TestResult(**data)
    else:
        return data
--- a/Lib/test/libregrtest/results.py
+++ b/Lib/test/libregrtest/results.py
@ -1,13 +1,14 @@
 import sys
+import trace

 from .runtests import RunTests
-from .result import State, TestResult, TestStats
+from .result import State, TestResult, TestStats, Location
 from .utils import (
    StrPath, TestName, TestTuple, TestList, FilterDict,
    printlist, count, format_duration)


-# Python uses exit code 1 when an exception is not catched
+# Python uses exit code 1 when an exception is not caught
 # argparse.ArgumentParser.error() uses exit code 2
 EXITCODE_BAD_TEST = 2
 EXITCODE_ENV_CHANGED = 3
@ -34,6 +35,8 @@ def __init__(self):
        self.stats = TestStats()
        # used by --junit-xml
        self.testsuite_xml: list[str] = []
+        # used by -T with -j
+        self.covered_lines: set[Location] = set()

    def is_all_good(self):
        return (not self.bad
@ -119,11 +122,17 @@ def accumulate_result(self, result: TestResult, runtests: RunTests):
            self.stats.accumulate(result.stats)
        if rerun:
            self.rerun.append(test_name)
-
+        if result.covered_lines:
+            # we don't care about trace counts so we don't have to sum them up
+            self.covered_lines.update(result.covered_lines)
        xml_data = result.xml_data
        if xml_data:
            self.add_junit(xml_data)

+    def get_coverage_results(self) -> trace.CoverageResults:
+        counts = {loc: 1 for loc in self.covered_lines}
+        return trace.CoverageResults(counts=counts)
+
    def need_rerun(self):
        return bool(self.rerun_results)

--- a/Lib/test/libregrtest/run_workers.py
+++ b/Lib/test/libregrtest/run_workers.py
@ -277,7 +277,7 @@ def run_tmp_files(self, worker_runtests: RunTests,
        # Python finalization: too late for libregrtest.
        if not support.is_wasi:
            # Don't check for leaked temporary files and directories if Python is
-            # run on WASI. WASI don't pass environment variables like TMPDIR to
+            # run on WASI. WASI doesn't pass environment variables like TMPDIR to
            # worker processes.
            tmp_dir = tempfile.mkdtemp(prefix="test_python_")
            tmp_dir = os.path.abspath(tmp_dir)
--- a/Lib/test/libregrtest/runtests.py
+++ b/Lib/test/libregrtest/runtests.py
@ -85,6 +85,7 @@ class RunTests:
    hunt_refleak: HuntRefleak | None
    test_dir: StrPath | None
    use_junit: bool
+    coverage: bool
    memory_limit: str | None
    gc_threshold: int | None
    use_resources: tuple[str, ...]
--- a/Lib/test/libregrtest/worker.py
+++ b/Lib/test/libregrtest/worker.py
@ -4,7 +4,7 @@
 from typing import Any, NoReturn

 from test import support
-from test.support import os_helper
+from test.support import os_helper, Py_DEBUG

 from .setup import setup_process, setup_test_dir
 from .runtests import RunTests, JsonFile, JsonFileType
@ -30,6 +30,8 @@ def create_worker_process(runtests: RunTests, output_fd: int,
        python_opts = [opt for opt in python_opts if opt != "-E"]
    else:
        executable = (sys.executable,)
+    if runtests.coverage:
+        python_opts.append("-Xpresite=test.cov")
    cmd = [*executable, *python_opts,
           '-u',    # Unbuffered stdout and stderr
           '-m', 'test.libregrtest.worker',
@ -87,6 +89,18 @@ def worker_process(worker_json: StrJSON) -> NoReturn:
            print(f"Re-running {test_name} in verbose mode", flush=True)

    result = run_single_test(test_name, runtests)
+    if runtests.coverage:
+        if "test.cov" in sys.modules:  # imported by -Xpresite=
+            result.covered_lines = list(sys.modules["test.cov"].coverage)
+        elif not Py_DEBUG:
+            print(
+                "Gathering coverage in worker processes requires --with-pydebug",
+                flush=True,
+            )
+        else:
+            raise LookupError(
+                "`test.cov` not found in sys.modules but coverage wanted"
+            )

    if json_file.file_type == JsonFileType.STDOUT:
        print()
--- a/Lib/test/support/init.py
+++ b/Lib/test/support/init.py
@ -1082,18 +1082,30 @@ def check_impl_detail(**guards):

 def no_tracing(func):
    """Decorator to temporarily turn off tracing for the duration of a test."""
-    if not hasattr(sys, 'gettrace'):
-        return func
-    else:
+    trace_wrapper = func
+    if hasattr(sys, 'gettrace'):
        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
+        def trace_wrapper(*args, **kwargs):
            original_trace = sys.gettrace()
            try:
                sys.settrace(None)
                return func(*args, **kwargs)
            finally:
                sys.settrace(original_trace)
-        return wrapper
+
+    coverage_wrapper = trace_wrapper
+    if 'test.cov' in sys.modules:  # -Xpresite=test.cov used
+        cov = sys.monitoring.COVERAGE_ID
+        @functools.wraps(func)
+        def coverage_wrapper(*args, **kwargs):
+            original_events = sys.monitoring.get_events(cov)
+            try:
+                sys.monitoring.set_events(cov, 0)
+                return trace_wrapper(*args, **kwargs)
+            finally:
+                sys.monitoring.set_events(cov, original_events)
+
+    return coverage_wrapper


 def refcount_test(test):
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@ -306,13 +306,23 @@ def test_multiprocess(self):
                self.assertEqual(ns.use_mp, 2)
                self.checkError([opt], 'expected one argument')
                self.checkError([opt, 'foo'], 'invalid int value')
-                self.checkError([opt, '2', '-T'], "don't go together")
-                self.checkError([opt, '0', '-T'], "don't go together")

-    def test_coverage(self):
+    def test_coverage_sequential(self):
        for opt in '-T', '--coverage':
            with self.subTest(opt=opt):
-                ns = self.parse_args([opt])
+                with support.captured_stderr() as stderr:
+                    ns = self.parse_args([opt])
+                self.assertTrue(ns.trace)
+                self.assertIn(
+                    "collecting coverage without -j is imprecise",
+                    stderr.getvalue(),
+                )
+
+    @unittest.skipUnless(support.Py_DEBUG, 'need a debug build')
+    def test_coverage_mp(self):
+        for opt in '-T', '--coverage':
+            with self.subTest(opt=opt):
+                ns = self.parse_args([opt, '-j1'])
                self.assertTrue(ns.trace)

    def test_coverdir(self):
--- a/Lib/trace.py
+++ b/Lib/trace.py
@ -202,7 +202,8 @@ def update(self, other):
        for key in other_callers:
            callers[key] = 1

-    def write_results(self, show_missing=True, summary=False, coverdir=None):
+    def write_results(self, show_missing=True, summary=False, coverdir=None, *,
+                      ignore_missing_files=False):
        """
        Write the coverage results.

@ -211,6 +212,9 @@ def write_results(self, show_missing=True, summary=False, coverdir=None):
        :param coverdir: If None, the results of each module are placed in its
                         directory, otherwise it is included in the directory
                         specified.
+        :param ignore_missing_files: If True, counts for files that no longer
+                         exist are silently ignored. Otherwise, a missing file
+                         will raise a FileNotFoundError.
        """
        if self.calledfuncs:
            print()
@ -253,6 +257,9 @@ def write_results(self, show_missing=True, summary=False, coverdir=None):
            if filename.endswith(".pyc"):
                filename = filename[:-1]

+            if ignore_missing_files and not os.path.isfile(filename):
+                continue
+
            if coverdir is None:
                dir = os.path.dirname(os.path.abspath(filename))
                modulename = _modname(filename)
@ -278,7 +285,6 @@ def write_results(self, show_missing=True, summary=False, coverdir=None):
                percent = int(100 * n_hits / n_lines)
                sums[modulename] = n_lines, percent, modulename, filename

-
        if summary and sums:
            print("lines   cov%   module   (path)")
            for m in sorted(sums):
--- a/Misc/NEWS.d/next/Tests/2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst
+++ b/Misc/NEWS.d/next/Tests/2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst
@ -0,0 +1,2 @@
+Gathering line coverage of standard libraries within the regression test
+suite is now precise, as well as much faster. Patch by Łukasz Langa.