From eb27c9a99edb6bf3be1c93579d885edd0f403901 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Sun, 10 Dec 2023 12:16:15 -0500 Subject: [PATCH] Add a fuzzer for `Py_CompileStringExFlags` (#111721) --- .../dictionaries/fuzz_pycompile.dict | 165 ++++++++++++++++++ .../fuzz_pycompile_corpus/input1.py | 7 + .../fuzz_pycompile_corpus/input2.py | 5 + .../fuzz_pycompile_corpus/input3.py | 6 + .../fuzz_pycompile_corpus/input4.py | 3 + .../fuzz_pycompile_corpus/input5.py | 7 + .../fuzz_pycompile_corpus/input6.py | 8 + Modules/_xxtestfuzz/fuzz_tests.txt | 1 + Modules/_xxtestfuzz/fuzzer.c | 60 +++++++ Tools/c-analyzer/cpython/ignored.tsv | 3 + 10 files changed, 265 insertions(+) create mode 100644 Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict create mode 100644 Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py create mode 100644 Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py create mode 100644 Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py create mode 100644 Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py create mode 100644 Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py create mode 100644 Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict new file mode 100644 index 00000000000..c6a44d94628 --- /dev/null +++ b/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict @@ -0,0 +1,165 @@ +# bits of syntax +"( " +") " +"[ " +"] " +": " +", " +"; " +"{ " +"} " + +# operators +"+ " +"- " +"* " +"** " +"/ " +"// " +"| " +"& " +"< " +"> " +"= " +". " +"% " +"` " +"^ " +"~ " +"@ " +"== " +"!= " +"<> " +"<< " +"<= " +">= " +">> " +"+= " +"-= " +"*= " +"** " +"/= " +"//= " +"|= " +"%= " +"&= " +"^= " +"<<= " +">>= " +"**= " +":= " +"@= " + +# whitespace +" " +":\\n " + +# type signatures and functions +"-> " +": List[int]" +": Dict[int, str]" + +"# type:" +"# type: List[int]" +"# type: Dict[int, str]" + +", *" +", /" +", *args" +", **kwargs" +", x=42" + + +# literals +"0x0a" +"0b0000" +"42" +"0o70" +"42j" +"42.01" +"-5" +"+42e-3" +"0_0_0" +"1e1_0" +".1_4" + +"{}" + +# variable names +"x" +"y" + +# strings +"r'x'" + +"b'x'" + +"rb\"x\"" + +"br\"x\"" + +"f'{x + 5}'" +"f\"{x + 5}\"" + +"'''" +"\"\"\"" + +"\\u" +"\\x" + +# keywords +"def " +"del " +"pass " +"break " +"continue " +"return " +"raise " +"from " +"import " +".. " +"... " +"__future__ " +"as " +"global " +"nonlocal " +"assert " +"print " +"if " +"elif " +"else: " +"while " +"try: " +"except " +"finally: " +"with " +"lambda " +"or " +"and " +"not " +"None " +"__peg_parser__" +"True " +"False " +"yield " +"async " +"await " +"for " +"in " +"is " +"class " + +# shebangs and encodings +"#!" +"# coding:" +"# coding=" +"# coding: latin-1" +"# coding=latin-1" +"# coding: utf-8" +"# coding=utf-8" +"# coding: ascii" +"# coding=ascii" +"# coding: cp860" +"# coding=cp860" +"# coding: gbk" +"# coding=gbk" diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py new file mode 100644 index 00000000000..c43994dda29 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +def test() -> None: + x: list[int] = [] + x: dict[int, str] = {} + x: set[bytes] = {} + print(5 + 42 * 3, x) diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py new file mode 100644 index 00000000000..7be326e95be --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py @@ -0,0 +1,5 @@ +class Foo(metaclass=42): + __slots__ = ['x'] + pass + +foo = Foo() diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py new file mode 100644 index 00000000000..9bc3a45ebe7 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py @@ -0,0 +1,6 @@ +def evens(): + i = 0 + while True: + i += 1 + if i % 2 == 0: + yield i diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py new file mode 100644 index 00000000000..490de90fb97 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py @@ -0,0 +1,3 @@ +async def hello(name: str): + await name + print(name) diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py new file mode 100644 index 00000000000..4cfcfe590eb --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py @@ -0,0 +1,7 @@ +try: + eval('importer exporter... really long matches') +except SyntaxError: + print("nothing to see here") +finally: + print("all done here") + raise diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py new file mode 100644 index 00000000000..d8e59ade503 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py @@ -0,0 +1,8 @@ +"""Some module docstring""" +import sys + +def main(): + print("Hello world!", file=sys.stderr) + +if __name__ == '__main__': + main() diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt index 40aa22110e7..ea6f982eefc 100644 --- a/Modules/_xxtestfuzz/fuzz_tests.txt +++ b/Modules/_xxtestfuzz/fuzz_tests.txt @@ -8,3 +8,4 @@ fuzz_csv_reader fuzz_struct_unpack fuzz_ast_literal_eval fuzz_elementtree_parsewhole +fuzz_pycompile diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index 77d29ce773a..e133b4d3c44 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -501,6 +501,63 @@ static int fuzz_elementtree_parsewhole(const char* data, size_t size) { return 0; } +#define MAX_PYCOMPILE_TEST_SIZE 16384 +static char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE]; + +static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input}; +const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]); + +static const int optimize_vals[] = {-1, 0, 1, 2}; +const size_t NUM_OPTIMIZE_VALS = sizeof(optimize_vals) / sizeof(optimize_vals[0]); + +/* Fuzz `PyCompileStringExFlags` using a variety of input parameters. + * That function is essentially behind the `compile` builtin */ +static int fuzz_pycompile(const char* data, size_t size) { + // Ignore overly-large inputs, and account for a NUL terminator + if (size > MAX_PYCOMPILE_TEST_SIZE - 1) { + return 0; + } + + // Need 2 bytes for parameter selection + if (size < 2) { + return 0; + } + + // Use first byte to determine element of `start_vals` to use + unsigned char start_idx = (unsigned char) data[0]; + int start = start_vals[start_idx % NUM_START_VALS]; + + // Use second byte to determine element of `optimize_vals` to use + unsigned char optimize_idx = (unsigned char) data[1]; + int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS]; + + // Create a NUL-terminated C string from the remaining input + memcpy(pycompile_scratch, data + 2, size - 2); + // Put a NUL terminator just after the copied data. (Space was reserved already.) + pycompile_scratch[size - 2] = '\0'; + + // XXX: instead of always using NULL for the `flags` value to + // `Py_CompileStringExFlags`, there are many flags that conditionally + // change parser behavior: + // + // #define PyCF_TYPE_COMMENTS 0x1000 + // #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000 + // #define PyCF_ONLY_AST 0x0400 + // + // It would be good to test various combinations of these, too. + PyCompilerFlags *flags = NULL; + + PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "", start, flags, optimize); + if (result == NULL) { + /* compilation failed, most likely from a syntax error */ + PyErr_Clear(); + } else { + Py_DECREF(result); + } + + return 0; +} + /* Run fuzzer and abort on failure. */ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { int rv = fuzzer((const char*) data, size); @@ -642,6 +699,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { } rv |= _run_fuzz(data, size, fuzz_elementtree_parsewhole); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_pycompile) + rv |= _run_fuzz(data, size, fuzz_pycompile); #endif return rv; } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index d59e0ddcdfd..ff6e1ef4f99 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -599,6 +599,9 @@ Modules/_xxtestfuzz/fuzzer.c - re_error_exception - Modules/_xxtestfuzz/fuzzer.c - struct_error - Modules/_xxtestfuzz/fuzzer.c - struct_unpack_method - Modules/_xxtestfuzz/fuzzer.c - xmlparser_type - +Modules/_xxtestfuzz/fuzzer.c - pycompile_scratch - +Modules/_xxtestfuzz/fuzzer.c - start_vals - +Modules/_xxtestfuzz/fuzzer.c - optimize_vals - Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput CSV_READER_INITIALIZED - Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput JSON_LOADS_INITIALIZED - Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput SRE_COMPILE_INITIALIZED -