1
0
mirror of https://github.com/dart-lang/sdk synced 2024-07-08 12:06:26 +00:00

[presubmit] first check for formatting errors in bulk.

The motivation behind this CL is performance: currently
PRESUBMIT.py checks for formatting errors a file at a time.
This can be very slow when a CL modifies a lot of files. For
reference, a command-line invocation of `dart format`
takes 2s to check 100 files at once, vs 70s if you run the
command one file at a time.

This change updates PRESUBMIT.py to run `dart format` first
on all the changed files in bulk to determine whether any
has formatting errors. If any formatting error is found, it
goes through the process of checking each file individually
again for formatting errors and compares the result against
the state before a CL's changes (same process as today).
However, if no errors are found, we bypass the slow check
entirely!

To validate the implementation I created a pretend CL with
100 modifications. I leveraged
`tests/language/function_type/test_generator.dart` to touch
100 test files easily. The results were as expected:

  * if all files are formatted properly, the presubmit
    completes now in 4s (was 76s).

  * If all files have formatting errors, the presubmit
    didn't regress beyond the usual observable performance
    variances (150s in both cases).

Notes about the change itself:

  * refactored to compute the set of affected files before
    running the formatting checks

  * refactored to provide some information as data (e.g.
    `excluded_folders`) instead of lambdas (e.g.
    `should_skip`) to support the first change

  * added a bulk option to `HasFormatErrors` to take the
    fast pass

Eventually we could take this a step further and improve the
performance of the failure case. That would require parsing
the output of the bulk run in order to avoid the individual
runs on all files, and only check for the previous version
of the files that indeed required formatting changes.

Fixes https://github.com/dart-lang/sdk/issues/54864

Change-Id: I0cfbd84f6c62eca48ac91449599ef9ac4c1800f8
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/352740
Commit-Queue: Sigmund Cherem <sigmund@google.com>
Reviewed-by: William Hesse <whesse@google.com>
Reviewed-by: Bob Nystrom <rnystrom@google.com>
This commit is contained in:
Sigmund Cherem 2024-02-15 19:29:42 +00:00 committed by Commit Queue
parent 628d744391
commit 9a4d8b47cf

View File

@ -30,34 +30,53 @@ def is_dart_file(path):
return path.endswith('.dart')
def _CheckFormat(input_api,
identification,
extension,
windows,
hasFormatErrors: Callable[[str, str], bool],
should_skip=lambda path: False):
def get_old_contents(input_api, path):
local_root = input_api.change.RepositoryRoot()
upstream = input_api.change._upstream
unformatted_files = []
return scm.GIT.Capture(['show', upstream + ':' + path],
cwd=local_root,
strip_out=False)
def files_to_check_for_format(input_api, extension, exclude_folders):
files = []
exclude_folders += [
"pkg/front_end/testcases/", "pkg/front_end/parser_testcases/"
]
for git_file in input_api.AffectedTextFiles():
if git_file.LocalPath().startswith("pkg/front_end/testcases/"):
local_path = git_file.LocalPath()
if not local_path.endswith(extension):
continue
if git_file.LocalPath().startswith("pkg/front_end/parser_testcases/"):
continue
if should_skip(git_file.LocalPath()):
if any([local_path.startswith(f) for f in exclude_folders]):
continue
files.append(git_file)
return files
def _CheckFormat(input_api, identification, extension, windows,
hasFormatErrors: Callable[[str, list, str],
bool], exclude_folders):
files = files_to_check_for_format(input_api, extension, exclude_folders)
if not files:
return []
# Check for formatting errors in bulk first. This is orders of magnitude
# faster than checking file-by-file on large changes with hundreds of files.
if not hasFormatErrors(filenames=[f.AbsoluteLocalPath() for f in files]):
return []
print("Formatting errors found, comparing against old versions.")
unformatted_files = []
for git_file in files:
filename = git_file.AbsoluteLocalPath()
if filename.endswith(extension) and hasFormatErrors(filename=filename):
if hasFormatErrors(filename=filename):
old_version_has_errors = False
try:
path = git_file.LocalPath()
if windows:
# Git expects a linux style path.
path = path.replace(os.sep, '/')
old_contents = scm.GIT.Capture(['show', upstream + ':' + path],
cwd=local_root,
strip_out=False)
if hasFormatErrors(contents=old_contents):
if hasFormatErrors(contents=get_old_contents(input_api, path)):
old_version_has_errors = True
except subprocess.CalledProcessError as e:
old_version_has_errors = False
@ -102,15 +121,22 @@ def _CheckDartFormat(input_api, output_api):
'--fix-named-default-separator',
]
def HasFormatErrors(filename: str = None, contents: str = None):
def HasFormatErrors(filename: str = None,
filenames: list = None,
contents: str = None):
# Don't look for formatting errors in multitests. Since those are very
# sensitive to whitespace, many cannot be reformatted without breaking
# them.
if filename and filename.endswith('_test.dart'):
with open(filename) as f:
contents = f.read()
if '//#' in contents:
return False
def skip_file(path):
if path.endswith('_test.dart'):
with open(path) as f:
contents = f.read()
if '//#' in contents:
return True
return False
if filename and skip_file(filename):
return False
args = [
dart,
@ -138,6 +164,9 @@ def _CheckDartFormat(input_api, output_api):
os.unlink(f.name)
elif contents:
process = subprocess.run(args, input=contents, text=True)
elif filenames:
args += [f for f in filenames if not skip_file(f)]
process = subprocess.run(args)
else:
args.append(filename)
process = subprocess.run(args)
@ -149,7 +178,7 @@ def _CheckDartFormat(input_api, output_api):
return process.returncode == 1
unformatted_files = _CheckFormat(input_api, "dart format", ".dart", windows,
HasFormatErrors)
HasFormatErrors, [])
if unformatted_files:
lineSep = " \\\n"
@ -185,17 +214,22 @@ def _CheckStatusFiles(input_api, output_api):
print('WARNING: Status file linter not found: %s' % lint)
return []
def HasFormatErrors(filename=None, contents=None):
def HasFormatErrors(filename=None, filenames=None, contents=None):
if filenames:
# The status file linter doesn't support checking files in bulk.
# Returning `True` causes `_CheckFormat` to fallback to check
# formatting file by file below.
return True
args = [dart, lint] + (['-t'] if contents else [filename])
process = subprocess.run(args, input=contents, text=True)
return process.returncode != 0
def should_skip(path):
return (path.startswith("pkg/status_file/test/data/") or
path.startswith("pkg/front_end/"))
exclude_folders = [
"pkg/status_file/test/data/",
"pkg/front_end/",
]
unformatted_files = _CheckFormat(input_api, "status file", ".status",
windows, HasFormatErrors, should_skip)
windows, HasFormatErrors, exclude_folders)
if unformatted_files:
normalize = os.path.join(local_root, 'pkg', 'status_file', 'bin',