From 806d7c98a5da5c1fd2e52a5b666f36ca4f545092 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sat, 22 Jul 2023 10:12:43 +0200 Subject: [PATCH] gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) --- .github/workflows/reusable-docs.yml | 27 ++--- Doc/tools/.nitignore | 11 +- Doc/tools/check-warnings.py | 149 ++++++++++++++++++++++++++++ Doc/tools/touch-clean-files.py | 65 ------------ Doc/tools/warnings-to-gh-actions.py | 25 ----- 5 files changed, 161 insertions(+), 116 deletions(-) create mode 100644 Doc/tools/check-warnings.py delete mode 100644 Doc/tools/touch-clean-files.py delete mode 100644 Doc/tools/warnings-to-gh-actions.py diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index b39d8cea642..56932c48605 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -28,10 +28,8 @@ jobs: cache-dependency-path: 'Doc/requirements.txt' - name: 'Install build dependencies' run: make -C Doc/ venv - - name: 'Build HTML documentation' - run: make -C Doc/ SPHINXOPTS="-q" SPHINXERRORHANDLING="-W --keep-going" html - # Add pull request annotations for Sphinx nitpicks (missing references) + # To annotate PRs with Sphinx nitpicks (missing references) - name: 'Get list of changed files' if: github.event_name == 'pull_request' id: changed_files @@ -39,24 +37,19 @@ jobs: with: filter: "Doc/**" format: csv # works for paths with spaces - - name: 'Build changed files in nit-picky mode' - if: github.event_name == 'pull_request' + - name: 'Build HTML documentation' continue-on-error: true run: | set -Eeuo pipefail - # Mark files the pull request modified - python Doc/tools/touch-clean-files.py --clean '${{ steps.changed_files.outputs.added_modified }}' - # Build docs with the '-n' (nit-picky) option; convert warnings to annotations - make -C Doc/ PYTHON=../python SPHINXOPTS="-q -n --keep-going" html 2>&1 | - python Doc/tools/warnings-to-gh-actions.py - - # Ensure some files always pass Sphinx nit-picky mode (no missing references) - - name: 'Build known-good files in nit-picky mode' + # Build docs with the '-n' (nit-picky) option; write warnings to file + make -C Doc/ PYTHON=../python SPHINXOPTS="-q -n -W --keep-going -w sphinx-warnings.txt" html + - name: 'Check warnings' + if: github.event_name == 'pull_request' run: | - # Mark files that must pass nit-picky - python Doc/tools/touch-clean-files.py - # Build docs with the '-n' (nit-picky) option, convert warnings to errors (-W) - make -C Doc/ PYTHON=../python SPHINXOPTS="-q -n -W --keep-going" html 2>&1 + python Doc/tools/check-warnings.py \ + --check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \ + --fail-if-regression \ + --fail-if-improved # This build doesn't use problem matchers or check annotations build_doc_oldest_supported_sphinx: diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 23aa30c956b..ca6f32c55ac 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -1,7 +1,6 @@ # All RST files under Doc/ -- except these -- must pass Sphinx nit-picky mode, -# as tested on the CI via touch-clean-files.py in doc.yml. -# Add blank lines between files and keep them sorted lexicographically -# to help avoid merge conflicts. +# as tested on the CI via check-warnings.py in reusable-docs.yml. +# Keep lines sorted lexicographically to help avoid merge conflicts. Doc/c-api/allocation.rst Doc/c-api/apiabiversion.rst @@ -18,7 +17,6 @@ Doc/c-api/complex.rst Doc/c-api/conversion.rst Doc/c-api/datetime.rst Doc/c-api/descriptor.rst -Doc/c-api/dict.rst Doc/c-api/exceptions.rst Doc/c-api/file.rst Doc/c-api/float.rst @@ -48,7 +46,6 @@ Doc/c-api/typehints.rst Doc/c-api/typeobj.rst Doc/c-api/unicode.rst Doc/c-api/veryhigh.rst -Doc/c-api/weakref.rst Doc/extending/embedding.rst Doc/extending/extending.rst Doc/extending/newtypes.rst @@ -88,8 +85,6 @@ Doc/library/bdb.rst Doc/library/bisect.rst Doc/library/bz2.rst Doc/library/calendar.rst -Doc/library/cgi.rst -Doc/library/cmath.rst Doc/library/cmd.rst Doc/library/code.rst Doc/library/codecs.rst @@ -105,7 +100,6 @@ Doc/library/contextlib.rst Doc/library/copy.rst Doc/library/csv.rst Doc/library/ctypes.rst -Doc/library/curses.ascii.rst Doc/library/curses.rst Doc/library/datetime.rst Doc/library/dbm.rst @@ -134,7 +128,6 @@ Doc/library/fractions.rst Doc/library/ftplib.rst Doc/library/functions.rst Doc/library/functools.rst -Doc/library/getopt.rst Doc/library/getpass.rst Doc/library/gettext.rst Doc/library/graphlib.rst diff --git a/Doc/tools/check-warnings.py b/Doc/tools/check-warnings.py new file mode 100644 index 00000000000..c17d0f51cd1 --- /dev/null +++ b/Doc/tools/check-warnings.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +Check the output of running Sphinx in nit-picky mode (missing references). +""" +import argparse +import csv +import os +import re +import sys +from pathlib import Path + +# Exclude these whether they're dirty or clean, +# because they trigger a rebuild of dirty files. +EXCLUDE_FILES = { + "Doc/whatsnew/changelog.rst", +} + +# Subdirectories of Doc/ to exclude. +EXCLUDE_SUBDIRS = { + ".env", + ".venv", + "env", + "includes", + "venv", +} + +PATTERN = re.compile(r"(?P[^:]+):(?P\d+): WARNING: (?P.+)") + + +def check_and_annotate(warnings: list[str], files_to_check: str) -> None: + """ + Convert Sphinx warning messages to GitHub Actions. + + Converts lines like: + .../Doc/library/cgi.rst:98: WARNING: reference target not found + to: + ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found + + Non-matching lines are echoed unchanged. + + see: + https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message + """ + files_to_check = next(csv.reader([files_to_check])) + for warning in warnings: + if any(filename in warning for filename in files_to_check): + if match := PATTERN.fullmatch(warning): + print("::warning file={file},line={line}::{msg}".format_map(match)) + + +def fail_if_regression( + warnings: list[str], files_with_expected_nits: set[str], files_with_nits: set[str] +) -> int: + """ + Ensure some files always pass Sphinx nit-picky mode (no missing references). + These are files which are *not* in .nitignore. + """ + all_rst = { + str(rst) + for rst in Path("Doc/").rglob("*.rst") + if rst.parts[1] not in EXCLUDE_SUBDIRS + } + should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES + problem_files = sorted(should_be_clean & files_with_nits) + if problem_files: + print("\nError: must not contain warnings:\n") + for filename in problem_files: + print(filename) + for warning in warnings: + if filename in warning: + if match := PATTERN.fullmatch(warning): + print(" {line}: {msg}".format_map(match)) + return -1 + return 0 + + +def fail_if_improved( + files_with_expected_nits: set[str], files_with_nits: set[str] +) -> int: + """ + We may have fixed warnings in some files so that the files are now completely clean. + Good news! Let's add them to .nitignore to prevent regression. + """ + files_with_no_nits = files_with_expected_nits - files_with_nits + if files_with_no_nits: + print("\nCongratulations! You improved:\n") + for filename in sorted(files_with_no_nits): + print(filename) + print("\nPlease remove from Doc/tools/.nitignore\n") + return -1 + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--check-and-annotate", + help="Comma-separated list of files to check, " + "and annotate those with warnings on GitHub Actions", + ) + parser.add_argument( + "--fail-if-regression", + action="store_true", + help="Fail if known-good files have warnings", + ) + parser.add_argument( + "--fail-if-improved", + action="store_true", + help="Fail if new files with no nits are found", + ) + args = parser.parse_args() + exit_code = 0 + + wrong_directory_msg = "Must run this script from the repo root" + assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg + + with Path("Doc/sphinx-warnings.txt").open() as f: + warnings = f.read().splitlines() + + cwd = str(Path.cwd()) + os.path.sep + files_with_nits = { + warning.removeprefix(cwd).split(":")[0] + for warning in warnings + if "Doc/" in warning + } + + with Path("Doc/tools/.nitignore").open() as clean_files: + files_with_expected_nits = { + filename.strip() + for filename in clean_files + if filename.strip() and not filename.startswith("#") + } + + if args.check_and_annotate: + check_and_annotate(warnings, args.check_and_annotate) + + if args.fail_if_regression: + exit_code += fail_if_regression( + warnings, files_with_expected_nits, files_with_nits + ) + + if args.fail_if_improved: + exit_code += fail_if_improved(files_with_expected_nits, files_with_nits) + + return exit_code + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Doc/tools/touch-clean-files.py b/Doc/tools/touch-clean-files.py deleted file mode 100644 index 2b045bd68a0..00000000000 --- a/Doc/tools/touch-clean-files.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python3 -""" -Touch files that must pass Sphinx nit-picky mode -so they are rebuilt and we can catch regressions. -""" -import argparse -import csv -import sys -from pathlib import Path - -wrong_directory_msg = "Must run this script from the repo root" -assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg - -# Exclude these whether they're dirty or clean, -# because they trigger a rebuild of dirty files. -EXCLUDE_FILES = { - Path("Doc/whatsnew/changelog.rst"), -} - -# Subdirectories of Doc/ to exclude. -EXCLUDE_SUBDIRS = { - ".env", - ".venv", - "env", - "includes", - "venv", -} - -ALL_RST = { - rst for rst in Path("Doc/").rglob("*.rst") if rst.parts[1] not in EXCLUDE_SUBDIRS -} - - -parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter -) -parser.add_argument("-c", "--clean", help="Comma-separated list of clean files") -args = parser.parse_args() - -if args.clean: - clean_files = next(csv.reader([args.clean])) - CLEAN = { - Path(filename.strip()) - for filename in clean_files - if Path(filename.strip()).is_file() - } -elif args.clean is not None: - print( - "Not touching any files: an empty string `--clean` arg value passed.", - ) - sys.exit(0) -else: - with Path("Doc/tools/.nitignore").open() as ignored_files: - IGNORED = { - Path(filename.strip()) - for filename in ignored_files - if filename.strip() and not filename.startswith("#") - } - CLEAN = ALL_RST - IGNORED - EXCLUDE_FILES - -print("Touching:") -for filename in sorted(CLEAN): - print(filename) - filename.touch() -print(f"Touched {len(CLEAN)} files") diff --git a/Doc/tools/warnings-to-gh-actions.py b/Doc/tools/warnings-to-gh-actions.py deleted file mode 100644 index da33a4ede07..00000000000 --- a/Doc/tools/warnings-to-gh-actions.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 - -""" -Convert Sphinx warning messages to GitHub Actions. - -Converts lines like: - .../Doc/library/cgi.rst:98: WARNING: reference target not found -to: - ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found - -Non-matching lines are echoed unchanged. - -see: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message -""" - -import re -import sys - -pattern = re.compile(r'(?P[^:]+):(?P\d+): WARNING: (?P.+)') - -for line in sys.stdin: - if match := pattern.fullmatch(line.strip()): - print('::warning file={file},line={line}::{msg}'.format_map(match)) - else: - print(line)