serenity/Meta/check-style.py
Ben Wiederhake 4a359b5a42 Meta: Check that local includes can be resolved
If a local include does not point to a file in the repository, it should
be a system include instead. This is now checked on every commit.

While this does introduce significant overhead in terms of percentage,
I think that an additional 10ms on huge commits (or less on smaller
commits) are acceptable:

hyperfine -w1 './Meta/check-style.py AK/*.h AK/*.cpp' # Before
Benchmark 1: ./Meta/check-style.py AK/*.h AK/*.cpp
  Time (mean ± σ):      20.3 ms ±   0.4 ms    [User: 17.1 ms, System: 3.
5 ms]
  Range (min … max):    19.5 ms …  21.6 ms    128 runs

hyperfine -w1 './Meta/check-style.py AK/*.h AK/*.cpp' # After
Benchmark 1: ./Meta/check-style.py AK/*.h AK/*.cpp
  Time (mean ± σ):      32.3 ms ±   0.4 ms    [User: 27.9 ms, System: 4.
4 ms]
  Range (min … max):    31.4 ms …  34.9 ms    91 runs
2023-06-06 23:19:50 +02:00

172 lines
6.1 KiB
Python
Executable file

#!/usr/bin/env python3
import os
import pathlib
import re
import subprocess
import sys
# Ensure copyright headers match this format and are followed by a blank line:
# /*
# * Copyright (c) YYYY(-YYYY), Whatever
# * ... more of these ...
# *
# * SPDX-License-Identifier: BSD-2-Clause
# */
GOOD_LICENSE_HEADER_PATTERN = re.compile(
'^/\\*\n' +
'( \\* Copyright \\(c\\) [0-9]{4}(-[0-9]{4})?, .*\n)+' +
' \\*\n' +
' \\* SPDX-License-Identifier: BSD-2-Clause\n' +
' \\*/\n' +
'\n')
LICENSE_HEADER_CHECK_EXCLUDES = {
'AK/Checked.h',
'AK/Function.h',
'Userland/Libraries/LibJS/SafeFunction.h',
'Userland/Libraries/LibC/elf.h',
'Userland/Libraries/LibCodeComprehension/Cpp/Tests/',
'Userland/Libraries/LibCpp/Tests/parser/',
'Userland/Libraries/LibCpp/Tests/preprocessor/'
}
LIBC_CHECK_EXCLUDES = {
'Kernel/',
'Userland/Libraries/LibELF/',
'Userland/Libraries/LibRegex/'
}
# We check that "#pragma once" is present
PRAGMA_ONCE_STRING = '#pragma once'
PRAGMA_ONCE_CHECK_EXCLUDES = {
'Userland/Libraries/LibC/assert.h',
}
# We make sure that there's a blank line before and after pragma once
GOOD_PRAGMA_ONCE_PATTERN = re.compile('(^|\\S\n\n)#pragma once(\n\n\\S.|$)')
# LibC is supposed to be a system library; don't mention the directory.
BAD_INCLUDE_LIBC = re.compile("# *include <LibC/")
# Make sure that all includes are either system includes or immediately resolvable local includes
ANY_INCLUDE_PATTERN = re.compile('^ *# *include\\b.*[>"](?!\\)).*$', re.M)
SYSTEM_INCLUDE_PATTERN = re.compile("^ *# *include *<([^>]+)>(?: /[*/].*)?$")
LOCAL_INCLUDE_PATTERN = re.compile('^ *# *include *"([^>]+)"(?: /[*/].*)?$')
INCLUDE_CHECK_EXCLUDES = {
"Userland/Libraries/LibCodeComprehension/Cpp/Tests/",
"Userland/Libraries/LibCpp/Tests/parser/",
"Userland/Libraries/LibCpp/Tests/preprocessor/",
}
def should_check_file(filename):
if not filename.endswith('.cpp') and not filename.endswith('.h'):
return False
if filename.startswith('Base/'):
return False
if filename == 'Kernel/FileSystem/Ext2FS/Definitions.h':
return False
return True
def find_files_here_or_argv():
if len(sys.argv) > 1:
raw_list = sys.argv[1:]
else:
process = subprocess.run(["git", "ls-files"], check=True, capture_output=True)
raw_list = process.stdout.decode().strip('\n').split('\n')
return filter(should_check_file, raw_list)
def is_in_prefix_list(filename, prefix_list):
return any(
filename.startswith(prefix) for prefix in prefix_list
)
def run():
errors_license = []
errors_pragma_once_bad = []
errors_pragma_once_missing = []
errors_include_libc = []
errors_include_weird_format = []
errors_include_missing_local = []
for filename in find_files_here_or_argv():
with open(filename, "r") as f:
file_content = f.read()
if not is_in_prefix_list(filename, LICENSE_HEADER_CHECK_EXCLUDES):
if not GOOD_LICENSE_HEADER_PATTERN.search(file_content):
errors_license.append(filename)
if filename.endswith('.h'):
if is_in_prefix_list(filename, PRAGMA_ONCE_CHECK_EXCLUDES):
# File was excluded
pass
elif GOOD_PRAGMA_ONCE_PATTERN.search(file_content):
# Excellent, the formatting is correct.
pass
elif PRAGMA_ONCE_STRING in file_content:
# Bad, the '#pragma once' is present but it's formatted wrong.
errors_pragma_once_bad.append(filename)
else:
# Bad, the '#pragma once' is missing completely.
errors_pragma_once_missing.append(filename)
if not is_in_prefix_list(filename, LIBC_CHECK_EXCLUDES):
if BAD_INCLUDE_LIBC.search(file_content):
errors_include_libc.append(filename)
if not is_in_prefix_list(filename, INCLUDE_CHECK_EXCLUDES):
file_directory = pathlib.Path(filename).parent
for include_line in ANY_INCLUDE_PATTERN.findall(file_content):
if SYSTEM_INCLUDE_PATTERN.match(include_line):
# Don't try to resolve system-style includes, as these might depend on generators.
continue
local_match = LOCAL_INCLUDE_PATTERN.match(include_line)
if local_match is None:
print(f"Cannot parse include-line '{include_line}' in {filename}")
if filename not in errors_include_weird_format:
errors_include_weird_format.append(filename)
continue
relative_filename = local_match.group(1)
referenced_file = file_directory.joinpath(relative_filename)
if not referenced_file.exists():
print(f"In {filename}: Cannot find {referenced_file}")
if filename not in errors_include_missing_local:
errors_include_missing_local.append(filename)
have_errors = False
if errors_license:
print("Files with bad licenses:", " ".join(errors_license))
have_errors = True
if errors_pragma_once_missing:
print("Files without #pragma once:", " ".join(errors_pragma_once_missing))
have_errors = True
if errors_pragma_once_bad:
print("Files with a bad #pragma once:", " ".join(errors_pragma_once_bad))
have_errors = True
if errors_include_libc:
print(
"Files that include a LibC header using #include <LibC/...>:",
" ".join(errors_include_libc),
)
have_errors = True
if errors_include_weird_format:
print(
"Files that contain badly-formatted #include statements:",
" ".join(errors_include_weird_format),
)
have_errors = True
if errors_include_missing_local:
print(
"Files that #include a missing local file:",
" ".join(errors_include_missing_local),
)
have_errors = True
if have_errors:
sys.exit(1)
if __name__ == '__main__':
os.chdir(os.path.dirname(__file__) + "/..")
run()