Add a fuzzer for Py_CompileStringExFlags (#111721)

This commit is contained in:
Brad Larsen 2023-12-10 12:16:15 -05:00 committed by GitHub
parent 1f9cd3c1e5
commit eb27c9a99e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 265 additions and 0 deletions

View file

@ -0,0 +1,165 @@
# bits of syntax
"( "
") "
"[ "
"] "
": "
", "
"; "
"{ "
"} "
# operators
"+ "
"- "
"* "
"** "
"/ "
"// "
"| "
"& "
"< "
"> "
"= "
". "
"% "
"` "
"^ "
"~ "
"@ "
"== "
"!= "
"<> "
"<< "
"<= "
">= "
">> "
"+= "
"-= "
"*= "
"** "
"/= "
"//= "
"|= "
"%= "
"&= "
"^= "
"<<= "
">>= "
"**= "
":= "
"@= "
# whitespace
" "
":\\n "
# type signatures and functions
"-> "
": List[int]"
": Dict[int, str]"
"# type:"
"# type: List[int]"
"# type: Dict[int, str]"
", *"
", /"
", *args"
", **kwargs"
", x=42"
# literals
"0x0a"
"0b0000"
"42"
"0o70"
"42j"
"42.01"
"-5"
"+42e-3"
"0_0_0"
"1e1_0"
".1_4"
"{}"
# variable names
"x"
"y"
# strings
"r'x'"
"b'x'"
"rb\"x\""
"br\"x\""
"f'{x + 5}'"
"f\"{x + 5}\""
"'''"
"\"\"\""
"\\u"
"\\x"
# keywords
"def "
"del "
"pass "
"break "
"continue "
"return "
"raise "
"from "
"import "
".. "
"... "
"__future__ "
"as "
"global "
"nonlocal "
"assert "
"print "
"if "
"elif "
"else: "
"while "
"try: "
"except "
"finally: "
"with "
"lambda "
"or "
"and "
"not "
"None "
"__peg_parser__"
"True "
"False "
"yield "
"async "
"await "
"for "
"in "
"is "
"class "
# shebangs and encodings
"#!"
"# coding:"
"# coding="
"# coding: latin-1"
"# coding=latin-1"
"# coding: utf-8"
"# coding=utf-8"
"# coding: ascii"
"# coding=ascii"
"# coding: cp860"
"# coding=cp860"
"# coding: gbk"
"# coding=gbk"

View file

@ -0,0 +1,7 @@
from __future__ import annotations
def test() -> None:
x: list[int] = []
x: dict[int, str] = {}
x: set[bytes] = {}
print(5 + 42 * 3, x)

View file

@ -0,0 +1,5 @@
class Foo(metaclass=42):
__slots__ = ['x']
pass
foo = Foo()

View file

@ -0,0 +1,6 @@
def evens():
i = 0
while True:
i += 1
if i % 2 == 0:
yield i

View file

@ -0,0 +1,3 @@
async def hello(name: str):
await name
print(name)

View file

@ -0,0 +1,7 @@
try:
eval('importer exporter... really long matches')
except SyntaxError:
print("nothing to see here")
finally:
print("all done here")
raise

View file

@ -0,0 +1,8 @@
"""Some module docstring"""
import sys
def main():
print("Hello world!", file=sys.stderr)
if __name__ == '__main__':
main()

View file

@ -8,3 +8,4 @@ fuzz_csv_reader
fuzz_struct_unpack
fuzz_ast_literal_eval
fuzz_elementtree_parsewhole
fuzz_pycompile

View file

@ -501,6 +501,63 @@ static int fuzz_elementtree_parsewhole(const char* data, size_t size) {
return 0;
}
#define MAX_PYCOMPILE_TEST_SIZE 16384
static char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE];
static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input};
const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]);
static const int optimize_vals[] = {-1, 0, 1, 2};
const size_t NUM_OPTIMIZE_VALS = sizeof(optimize_vals) / sizeof(optimize_vals[0]);
/* Fuzz `PyCompileStringExFlags` using a variety of input parameters.
* That function is essentially behind the `compile` builtin */
static int fuzz_pycompile(const char* data, size_t size) {
// Ignore overly-large inputs, and account for a NUL terminator
if (size > MAX_PYCOMPILE_TEST_SIZE - 1) {
return 0;
}
// Need 2 bytes for parameter selection
if (size < 2) {
return 0;
}
// Use first byte to determine element of `start_vals` to use
unsigned char start_idx = (unsigned char) data[0];
int start = start_vals[start_idx % NUM_START_VALS];
// Use second byte to determine element of `optimize_vals` to use
unsigned char optimize_idx = (unsigned char) data[1];
int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS];
// Create a NUL-terminated C string from the remaining input
memcpy(pycompile_scratch, data + 2, size - 2);
// Put a NUL terminator just after the copied data. (Space was reserved already.)
pycompile_scratch[size - 2] = '\0';
// XXX: instead of always using NULL for the `flags` value to
// `Py_CompileStringExFlags`, there are many flags that conditionally
// change parser behavior:
//
// #define PyCF_TYPE_COMMENTS 0x1000
// #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000
// #define PyCF_ONLY_AST 0x0400
//
// It would be good to test various combinations of these, too.
PyCompilerFlags *flags = NULL;
PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "<fuzz input>", start, flags, optimize);
if (result == NULL) {
/* compilation failed, most likely from a syntax error */
PyErr_Clear();
} else {
Py_DECREF(result);
}
return 0;
}
/* Run fuzzer and abort on failure. */
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
int rv = fuzzer((const char*) data, size);
@ -642,6 +699,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
}
rv |= _run_fuzz(data, size, fuzz_elementtree_parsewhole);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_pycompile)
rv |= _run_fuzz(data, size, fuzz_pycompile);
#endif
return rv;
}

View file

@ -599,6 +599,9 @@ Modules/_xxtestfuzz/fuzzer.c - re_error_exception -
Modules/_xxtestfuzz/fuzzer.c - struct_error -
Modules/_xxtestfuzz/fuzzer.c - struct_unpack_method -
Modules/_xxtestfuzz/fuzzer.c - xmlparser_type -
Modules/_xxtestfuzz/fuzzer.c - pycompile_scratch -
Modules/_xxtestfuzz/fuzzer.c - start_vals -
Modules/_xxtestfuzz/fuzzer.c - optimize_vals -
Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput CSV_READER_INITIALIZED -
Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput JSON_LOADS_INITIALIZED -
Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput SRE_COMPILE_INITIALIZED -

Can't render this file because it has a wrong number of fields in line 4.