mirror of
https://github.com/XAMPPRocky/tokei
synced 2024-10-01 05:23:37 +00:00
Fuzzing (#726)
* Add fuzzing support See fuzz/README.md for details. Example bug: #725 * improve fuzz docs * fuzzing: make relevant config part of input Most of the config is to do with what files to parse, however one setting - treat_doc_strings_as_comments - does impact parse_from_slice * fuzzing: improve docs + config clarity * fuzz/README.md: install instructions + another todo item
This commit is contained in:
parent
9f4f457a3a
commit
66967a1b9e
21
Cargo.lock
generated
21
Cargo.lock
generated
|
@ -18,6 +18,15 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "698b65a961a9d730fb45b6b0327e20207810c9f61ee421b082b27ba003f49e2b"
|
||||
dependencies = [
|
||||
"derive_arbitrary",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.4.12"
|
||||
|
@ -207,6 +216,17 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df89dd0d075dea5cc5fdd6d5df6b8a61172a710b3efac1d6bdb9dd8b78f82c1a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deunicode"
|
||||
version = "0.4.3"
|
||||
|
@ -1073,6 +1093,7 @@ name = "tokei"
|
|||
version = "12.1.2"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"arbitrary",
|
||||
"clap",
|
||||
"crossbeam-channel",
|
||||
"dashmap",
|
||||
|
|
|
@ -30,6 +30,7 @@ serde_json = "1.0.64"
|
|||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7.15"
|
||||
arbitrary = { version = "1.0.0", features = ["derive"] }
|
||||
clap = "2.33.3"
|
||||
crossbeam-channel = "0.5.0"
|
||||
encoding_rs_io = "0.1.7"
|
||||
|
|
4
fuzz/.gitignore
vendored
Normal file
4
fuzz/.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
|
||||
target
|
||||
corpus
|
||||
artifacts
|
1147
fuzz/Cargo.lock
generated
Normal file
1147
fuzz/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
33
fuzz/Cargo.toml
Normal file
33
fuzz/Cargo.toml
Normal file
|
@ -0,0 +1,33 @@
|
|||
|
||||
[package]
|
||||
name = "tokei-fuzz"
|
||||
version = "0.0.1"
|
||||
authors = ["Michael Macnair"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
arbitrary = { version = "1.0.0", features = ["derive"] }
|
||||
|
||||
[dependencies.tokei]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "parse_from_slice_total"
|
||||
path = "fuzz_targets/parse_from_slice_total.rs"
|
||||
test = false
|
||||
doc = false
|
||||
|
||||
[[bin]]
|
||||
name = "parse_from_slice_panic"
|
||||
path = "fuzz_targets/parse_from_slice_panic.rs"
|
||||
test = false
|
||||
doc = false
|
30
fuzz/README.md
Normal file
30
fuzz/README.md
Normal file
|
@ -0,0 +1,30 @@
|
|||
## Fuzzing Tokei
|
||||
|
||||
Tokei can be fuzzed using libFuzzer, via [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz/).
|
||||
|
||||
First install cargo-fuzz: `cargo install cargo-fuzz`.
|
||||
|
||||
To launch a fuzzing job: `cargo +nightly fuzz run <target>` - it will run until you kill it with ctrl-c.
|
||||
|
||||
To use multiple cores: `cargo +nightly fuzz run <target> --jobs=6`
|
||||
|
||||
To speed things up (at the expensive of missing bugs that only manifest in larger files):
|
||||
`cargo +nightly fuzz run <target> -- -max_len=200`
|
||||
|
||||
Available fuzz targets:
|
||||
|
||||
- `parse_from_slice_panic` - checks that all of the LanguageType instances' `parse_from_slice` function doesn't panic.
|
||||
- `parse_from_slice_total` - checks that the language stats pass a basic test of reporting no more total lines than
|
||||
there are new lines in the file. At the time of writing there are low-hanging bugs here.
|
||||
|
||||
With the two `parse_from_slice` fuzz targets, it makes sense to share a common corpus directory as they have identical
|
||||
input formats, e.g.: `cargo +nightly fuzz run parse_from_slice_{panic,total} fuzz/corpus/common`
|
||||
|
||||
Potential improvements:
|
||||
|
||||
- Build the fuzz harnesses in CI, so they don't rot.
|
||||
- Do some coverage analysis to check if we're missing any code we would benefit from fuzzing (once it's
|
||||
[integrated into cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz/pull/248))
|
||||
- Tighten the `parse_from_slice_total` fuzz target to check the total lines exactly matches the number of lines in the
|
||||
file. Only once any bugs found with the current fuzzer are fixed.
|
||||
- Check in a minimized corpus, and run regression over it in CI.
|
51
fuzz/fuzz_targets/parse_from_slice.rs
Normal file
51
fuzz/fuzz_targets/parse_from_slice.rs
Normal file
|
@ -0,0 +1,51 @@
|
|||
use arbitrary::Arbitrary;
|
||||
use std::str;
|
||||
|
||||
use tokei::{Config, LanguageType};
|
||||
|
||||
#[derive(Arbitrary, Debug)]
|
||||
pub struct FuzzInput<'a> {
|
||||
lang: LanguageType,
|
||||
treat_doc_strings_as_comments: bool,
|
||||
data: &'a [u8],
|
||||
}
|
||||
|
||||
// The first byte of data is used to select a language; remaining input is parsed
|
||||
// If check_total is true, asserts that the parsed stats pass a basic sanity test
|
||||
pub fn parse_from_slice(input: FuzzInput, check_total: bool) {
|
||||
let config = &Config {
|
||||
treat_doc_strings_as_comments: Some(input.treat_doc_strings_as_comments),
|
||||
|
||||
// these options don't impact the behaviour of parse_from_slice:
|
||||
columns: None,
|
||||
hidden: None,
|
||||
no_ignore: None,
|
||||
no_ignore_parent: None,
|
||||
no_ignore_dot: None,
|
||||
no_ignore_vcs: None,
|
||||
sort: None,
|
||||
types: None,
|
||||
};
|
||||
|
||||
// check that parsing doesn't panic
|
||||
let stats = input.lang.parse_from_slice(input.data, config);
|
||||
|
||||
if check_total {
|
||||
// verify that the parsed total lines is not more than the total occurences of \n and \r\n.
|
||||
// if/when all of the current discrepancies are fixed, we could make this stronger by checking it is equal.
|
||||
if let Ok(s) = str::from_utf8(input.data) {
|
||||
assert!(
|
||||
stats.lines() <= s.lines().count(),
|
||||
"{} got more total lines ({}) than str::lines ({}). Code: {}, Comments: {}, Blanks: {}. treat_doc_strings_as_comments: {}. File contents (as UTF-8):\n{}",
|
||||
input.lang.name(),
|
||||
stats.lines(),
|
||||
s.lines().count(),
|
||||
stats.code,
|
||||
stats.comments,
|
||||
input.treat_doc_strings_as_comments,
|
||||
stats.blanks,
|
||||
s
|
||||
)
|
||||
};
|
||||
}
|
||||
}
|
9
fuzz/fuzz_targets/parse_from_slice_panic.rs
Normal file
9
fuzz/fuzz_targets/parse_from_slice_panic.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
mod parse_from_slice;
|
||||
use parse_from_slice::{parse_from_slice, FuzzInput};
|
||||
|
||||
fuzz_target!(|data: FuzzInput| {
|
||||
parse_from_slice(data, false);
|
||||
});
|
9
fuzz/fuzz_targets/parse_from_slice_total.rs
Normal file
9
fuzz/fuzz_targets/parse_from_slice_total.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
mod parse_from_slice;
|
||||
use parse_from_slice::{parse_from_slice, FuzzInput};
|
||||
|
||||
fuzz_target!(|data: FuzzInput| {
|
||||
parse_from_slice(data, true);
|
||||
});
|
|
@ -1,9 +1,11 @@
|
|||
use arbitrary::Arbitrary;
|
||||
|
||||
/// Represents a individual programming language. Can be used to provide
|
||||
/// information about the language, such as multi line comments, single line
|
||||
/// comments, string literal syntax, whether a given language allows nesting
|
||||
/// comments.
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
#[derive(Arbitrary, Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
#[non_exhaustive]
|
||||
pub enum LanguageType {
|
||||
{% for key, _ in languages -%}
|
||||
|
|
Loading…
Reference in a new issue