added a few tests

This commit is contained in:
epi 2022-03-03 06:38:37 -06:00
parent 063e7b0420
commit 6c5c812784
4 changed files with 159 additions and 31 deletions

View File

@ -149,8 +149,15 @@ mod tests {
let empty = Document::from_html("<html></html>");
assert_eq!(empty.number_of_terms, 0);
let other_empty = Document::from_html("<html><body><p></p></body></html>");
assert_eq!(other_empty.number_of_terms, 0);
let third_empty = Document::from_html("<!DOCTYPE html><html><!DOCTYPE html><p></p></html>");
assert_eq!(third_empty.number_of_terms, 0);
// p tag for is_text check and comment for is_comment
let doc = Document::from_html(
"<html><body><p>The air quality in Singapore got worse on Wednesday.</p></body></html>",
"<html><body><p>The air quality in Singapore.</p><!--got worse on Wednesday--></body></html>",
);
let expected_terms = ["air", "quality", "singapore", "worse", "wednesday"];
@ -197,4 +204,20 @@ mod tests {
assert!(expected.contains(&key));
}
}
#[test]
/// ensure words in script/style tags aren't processed
fn document_creation_skips_script_and_style_tags() {
let html = "<body><script>The air quality</script><style>in Singapore</style><p>got worse on Wednesday.</p></body>";
let doc = Document::from_html(html);
let keys = doc.terms().keys().map(|key| key.raw()).collect::<Vec<_>>();
let expected = ["worse", "wednesday"];
assert_eq!(doc.number_of_terms(), 2);
for key in keys {
assert!(expected.contains(&key));
}
}
}

View File

@ -152,6 +152,8 @@ mod tests {
model.add_document(d);
}
assert_eq!(model.num_documents(), 4);
model.calculate_tf_idf_scores();
let non_zero_words = model.all_words();

View File

@ -1,3 +1,4 @@
use std::sync::atomic::AtomicBool;
use std::{ops::Deref, sync::atomic::Ordering, sync::Arc, time::Instant};
use anyhow::{bail, Result};
@ -31,6 +32,43 @@ lazy_static! {
pub static ref RESPONSES: FeroxResponses = FeroxResponses::default();
// todo consider removing this
}
/// check to see if `pause_flag` is set to true. when true; enter a busy loop that only exits
/// by setting PAUSE_SCAN back to false
async fn check_for_user_input(
pause_flag: &AtomicBool,
scanned_urls: Arc<FeroxScans>,
handles: Arc<Handles>,
) {
log::trace!(
"enter: check_for_user_input({:?}, SCANNED_URLS, HANDLES)",
pause_flag
);
// todo write a test or two for this function at some point...
if pause_flag.load(Ordering::Acquire) {
match scanned_urls.pause(true).await {
Some(MenuCmdResult::Url(url)) => {
// user wants to add a new url to be scanned, need to send
// it over to the event handler for processing
handles
.send_scan_command(Command::ScanNewUrl(url))
.unwrap_or_else(|e| log::warn!("Could not add scan to scan queue: {}", e))
}
Some(MenuCmdResult::NumCancelled(num_canx)) => {
if num_canx > 0 {
handles
.stats
.send(SubtractFromUsizeField(TotalExpected, num_canx))
.unwrap_or_else(|e| log::warn!("Could not update overall scan bar: {}", e));
}
}
_ => {}
}
}
log::trace!("exit: check_for_user_input");
}
/// handles the main muscle movement of scanning a url
pub struct FeroxScanner {
/// handles to handlers and config
@ -69,6 +107,7 @@ impl FeroxScanner {
}
}
/// produces and awaits tasks (mp of mpsc); responsible for making requests
async fn stream_requests(
&self,
looping_words: Arc<Vec<String>>,
@ -76,6 +115,8 @@ impl FeroxScanner {
scanned_urls: Arc<FeroxScans>,
requester: Arc<Requester>,
) {
log::trace!("enter: stream_requests(params too verbose to print)");
let producers = stream::iter(looping_words.deref().to_owned())
.map(|word| {
let pb = progress_bar.clone(); // progress bar is an Arc around internal state
@ -84,36 +125,11 @@ impl FeroxScanner {
let handles_clone = self.handles.clone();
(
tokio::spawn(async move {
if PAUSE_SCAN.load(Ordering::Acquire) {
// for every word in the wordlist, check to see if PAUSE_SCAN is set to true
// when true; enter a busy loop that only exits by setting PAUSE_SCAN back
// to false
match scanned_urls_clone.pause(true).await {
Some(MenuCmdResult::Url(url)) => {
// user wants to add a new url to be scanned, need to send
// it over to the event handler for processing
handles_clone
.send_scan_command(Command::ScanNewUrl(url))
.unwrap_or_else(|e| {
log::warn!("Could not add scan to scan queue: {}", e)
})
}
Some(MenuCmdResult::NumCancelled(num_canx)) => {
if num_canx > 0 {
handles_clone
.stats
.send(SubtractFromUsizeField(TotalExpected, num_canx))
.unwrap_or_else(|e| {
log::warn!(
"Could not update overall scan bar: {}",
e
)
});
}
}
_ => {}
}
}
// for every word in the wordlist, check to see if user has pressed enter
// in order to go into the interactive menu
check_for_user_input(&PAUSE_SCAN, scanned_urls_clone, handles_clone).await;
// after checking for user input, send the request
requester_clone
.request(&word)
.await
@ -139,6 +155,7 @@ impl FeroxScanner {
log::trace!("awaiting scan producers");
producers.await;
log::trace!("done awaiting scan producers");
log::trace!("exit: stream_requests");
}
/// Scan a given url using a given wordlist

View File

@ -757,3 +757,89 @@ fn collect_backups_makes_appropriate_requests() {
teardown_tmp_directory(tmp_dir);
}
#[test]
/// send a request to 4 200 files, expect non-zero tf-idf rated words to be requested as well
fn collect_words_makes_appropriate_requests() {
let srv = MockServer::start();
let wordlist: Vec<_> = ["doc1", "doc2", "doc3", "doc4"]
.iter()
.map(|w| w.to_string())
.collect();
let (tmp_dir, file) = setup_tmp_directory(&wordlist, "wordlist").unwrap();
srv.mock(|when, then| {
when.method(GET).path("/doc1");
then.status(200)
.body("Air quality in the sunny island improved gradually throughout Wednesday.");
});
srv.mock(|when, then| {
when.method(GET).path("/doc2");
then.status(200).body(
"Air quality in Singapore on Wednesday continued to get worse as haze hit the island.",
);
});
srv.mock(|when, then| {
when.method(GET).path("/doc3");
then.status(200).body("The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island");
});
srv.mock(|when, then| {
when.method(GET).path("/doc4");
then.status(200)
.body("The air quality in Singapore got worse on Wednesday.");
});
let valid_paths = vec![
"/gradually",
"/network",
"/hit",
"/located",
"/continued",
"/island",
"/worse",
"/monitored",
"/monitoring",
"/haze",
"/different",
"/stations",
"/sunny",
"/singapore",
"/improved",
"/parts",
"/wednesday",
];
let valid_mocks: Vec<_> = valid_paths
.iter()
.map(|&p| {
srv.mock(|when, then| {
when.method(GET).path(p);
then.status(200).body("this is a valid test");
})
})
.collect();
let cmd = Command::cargo_bin("feroxbuster")
.unwrap()
.arg("--url")
.arg(srv.url("/"))
.arg("--collect-words")
.arg("--wordlist")
.arg(file.as_os_str())
.unwrap();
cmd.assert().success().stdout(
predicate::str::contains("/doc1")
.and(predicate::str::contains("/doc2"))
.and(predicate::str::contains("/doc3"))
.and(predicate::str::contains("/doc4")),
);
for valid_mock in valid_mocks {
assert_eq!(valid_mock.hits(), 1);
}
teardown_tmp_directory(tmp_dir);
}