mirror of
https://github.com/epi052/feroxbuster
synced 2024-07-08 19:45:45 +00:00
updated Url::parse callsites to use the new utility function
This commit is contained in:
parent
3dd070a0db
commit
f1fd2fc379
|
@ -2,12 +2,11 @@ use super::entry::BannerEntry;
|
|||
use crate::{
|
||||
config::Configuration,
|
||||
event_handlers::Handles,
|
||||
utils::{logged_request, status_colorizer},
|
||||
utils::{logged_request, parse_url_with_raw_path, status_colorizer},
|
||||
DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, VERSION,
|
||||
};
|
||||
use anyhow::{bail, Result};
|
||||
use console::{style, Emoji};
|
||||
use reqwest::Url;
|
||||
use serde_json::Value;
|
||||
use std::{io::Write, sync::Arc};
|
||||
|
||||
|
@ -478,7 +477,7 @@ by Ben "epi" Risher {} ver: {}"#,
|
|||
pub async fn check_for_updates(&mut self, url: &str, handles: Arc<Handles>) -> Result<()> {
|
||||
log::trace!("enter: needs_update({}, {:?})", url, handles);
|
||||
|
||||
let api_url = Url::parse(url)?;
|
||||
let api_url = parse_url_with_raw_path(url)?;
|
||||
|
||||
let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone()).await?;
|
||||
let body = result.text().await?;
|
||||
|
|
|
@ -6,7 +6,10 @@ use super::utils::{
|
|||
use crate::config::determine_output_level;
|
||||
use crate::config::utils::determine_requester_policy;
|
||||
use crate::{
|
||||
client, parser, scan_manager::resume_scan, traits::FeroxSerialize, utils::fmt_err,
|
||||
client, parser,
|
||||
scan_manager::resume_scan,
|
||||
traits::FeroxSerialize,
|
||||
utils::{fmt_err, parse_url_with_raw_path},
|
||||
DEFAULT_CONFIG_NAME,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
@ -673,7 +676,7 @@ impl Configuration {
|
|||
for denier in arg {
|
||||
// could be an absolute url or a regex, need to determine which and populate the
|
||||
// appropriate vector
|
||||
match Url::parse(denier.trim_end_matches('/')) {
|
||||
match parse_url_with_raw_path(denier.trim_end_matches('/')) {
|
||||
Ok(absolute) => {
|
||||
// denier is an absolute url and can be parsed as such
|
||||
config.url_denylist.push(absolute);
|
||||
|
|
|
@ -16,7 +16,7 @@ use crate::{
|
|||
use super::command::Command::AddToUsizeField;
|
||||
use super::*;
|
||||
use crate::statistics::StatField;
|
||||
use reqwest::Url;
|
||||
use crate::utils::parse_url_with_raw_path;
|
||||
use tokio::time::Duration;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -325,7 +325,9 @@ impl ScanHandler {
|
|||
self.data.add_directory_scan(&target, order).1 // add the new target; return FeroxScan
|
||||
};
|
||||
|
||||
if should_test_deny && should_deny_url(&Url::parse(&target)?, self.handles.clone())? {
|
||||
if should_test_deny
|
||||
&& should_deny_url(&parse_url_with_raw_path(&target)?, self.handles.clone())?
|
||||
{
|
||||
// response was caught by a user-provided deny list
|
||||
// checking this last, since it's most susceptible to longer runtimes due to what
|
||||
// input is received
|
||||
|
|
|
@ -11,7 +11,10 @@ use crate::{
|
|||
StatField::{LinksExtracted, TotalExpected},
|
||||
},
|
||||
url::FeroxUrl,
|
||||
utils::{logged_request, make_request, send_try_recursion_command, should_deny_url},
|
||||
utils::{
|
||||
logged_request, make_request, parse_url_with_raw_path, send_try_recursion_command,
|
||||
should_deny_url,
|
||||
},
|
||||
ExtractionResult, DEFAULT_METHOD,
|
||||
};
|
||||
use anyhow::{bail, Context, Result};
|
||||
|
@ -122,7 +125,7 @@ impl<'a> Extractor<'a> {
|
|||
) -> Result<()> {
|
||||
log::trace!("enter: parse_url_and_add_subpaths({:?})", links);
|
||||
|
||||
match Url::parse(url_to_parse) {
|
||||
match parse_url_with_raw_path(url_to_parse) {
|
||||
Ok(absolute) => {
|
||||
if absolute.domain() != original_url.domain()
|
||||
|| absolute.host() != original_url.host()
|
||||
|
@ -475,7 +478,7 @@ impl<'a> Extractor<'a> {
|
|||
ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => {
|
||||
self.response.unwrap().url().clone()
|
||||
}
|
||||
ExtractionTarget::RobotsTxt => match Url::parse(&self.url) {
|
||||
ExtractionTarget::RobotsTxt => match parse_url_with_raw_path(&self.url) {
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
bail!("Could not parse {}: {}", self.url, e);
|
||||
|
@ -524,7 +527,7 @@ impl<'a> Extractor<'a> {
|
|||
|
||||
for capture in self.robots_regex.captures_iter(body) {
|
||||
if let Some(new_path) = capture.name("url_path") {
|
||||
let mut new_url = Url::parse(&self.url)?;
|
||||
let mut new_url = parse_url_with_raw_path(&self.url)?;
|
||||
|
||||
new_url.set_path(new_path.as_str());
|
||||
|
||||
|
@ -654,7 +657,7 @@ impl<'a> Extractor<'a> {
|
|||
&client
|
||||
};
|
||||
|
||||
let mut url = Url::parse(&self.url)?;
|
||||
let mut url = parse_url_with_raw_path(&self.url)?;
|
||||
url.set_path(location); // overwrite existing path
|
||||
|
||||
// purposefully not using logged_request here due to using the special client
|
||||
|
|
|
@ -4,11 +4,10 @@ use crate::event_handlers::Handles;
|
|||
use crate::filters::similarity::SIM_HASHER;
|
||||
use crate::nlp::preprocess;
|
||||
use crate::response::FeroxResponse;
|
||||
use crate::utils::logged_request;
|
||||
use crate::utils::{logged_request, parse_url_with_raw_path};
|
||||
use crate::DEFAULT_METHOD;
|
||||
use anyhow::Result;
|
||||
use regex::Regex;
|
||||
use reqwest::Url;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// wrapper around logic necessary to create a SimilarityFilter
|
||||
|
@ -23,7 +22,7 @@ pub(crate) async fn create_similarity_filter(
|
|||
handles: Arc<Handles>,
|
||||
) -> Result<SimilarityFilter> {
|
||||
// url as-is based on input, ignores user-specified url manipulation options (add-slash etc)
|
||||
let url = Url::parse(similarity_filter)?;
|
||||
let url = parse_url_with_raw_path(similarity_filter)?;
|
||||
|
||||
// attempt to request the given url
|
||||
let resp = logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await?;
|
||||
|
|
|
@ -21,7 +21,7 @@ use crate::{
|
|||
event_handlers::{Command, Handles},
|
||||
traits::FeroxSerialize,
|
||||
url::FeroxUrl,
|
||||
utils::{self, fmt_err, status_colorizer},
|
||||
utils::{self, fmt_err, parse_url_with_raw_path, status_colorizer},
|
||||
CommandSender,
|
||||
};
|
||||
|
||||
|
@ -140,7 +140,7 @@ impl FeroxResponse {
|
|||
|
||||
/// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs
|
||||
pub fn set_url(&mut self, url: &str) {
|
||||
match Url::parse(url) {
|
||||
match parse_url_with_raw_path(url) {
|
||||
Ok(url) => {
|
||||
self.url = url;
|
||||
}
|
||||
|
@ -599,7 +599,7 @@ impl<'de> Deserialize<'de> for FeroxResponse {
|
|||
match key.as_str() {
|
||||
"url" => {
|
||||
if let Some(url) = value.as_str() {
|
||||
if let Ok(parsed) = Url::parse(url) {
|
||||
if let Ok(parsed) = parse_url_with_raw_path(url) {
|
||||
response.url = parsed;
|
||||
}
|
||||
}
|
||||
|
|
25
src/url.rs
25
src/url.rs
|
@ -1,3 +1,4 @@
|
|||
use crate::utils::parse_url_with_raw_path;
|
||||
use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use reqwest::Url;
|
||||
|
@ -142,19 +143,19 @@ impl FeroxUrl {
|
|||
word = word.trim_start_matches('/').to_string();
|
||||
};
|
||||
|
||||
let base_url = Url::parse(&url)?;
|
||||
let joined = base_url.join(&word)?;
|
||||
let base_url = parse_url_with_raw_path(&url)?;
|
||||
let mut joined = base_url.join(&word)?;
|
||||
|
||||
if self.handles.config.queries.is_empty() {
|
||||
// no query params to process
|
||||
log::trace!("exit: format -> {}", joined);
|
||||
Ok(joined)
|
||||
} else {
|
||||
let with_params =
|
||||
Url::parse_with_params(joined.as_str(), &self.handles.config.queries)?;
|
||||
log::trace!("exit: format_url -> {}", with_params);
|
||||
Ok(with_params) // request with params attached
|
||||
if !self.handles.config.queries.is_empty() {
|
||||
// if called, this adds a '?' to the url, whether or not there are queries to be added
|
||||
// so we need to check if there are queries to be added before blindly adding the '?'
|
||||
joined
|
||||
.query_pairs_mut()
|
||||
.extend_pairs(self.handles.config.queries.iter());
|
||||
}
|
||||
|
||||
log::trace!("exit: format_url -> {}", joined);
|
||||
Ok(joined)
|
||||
}
|
||||
|
||||
/// Simple helper to abstract away adding a forward-slash to a url if not present
|
||||
|
@ -189,7 +190,7 @@ impl FeroxUrl {
|
|||
|
||||
let target = self.normalize();
|
||||
|
||||
let parsed = Url::parse(&target)?;
|
||||
let parsed = parse_url_with_raw_path(&target)?;
|
||||
let parts = parsed
|
||||
.path_segments()
|
||||
.ok_or_else(|| anyhow!("No path segments found"))?;
|
||||
|
|
22
src/utils.rs
22
src/utils.rs
|
@ -425,9 +425,14 @@ fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>)
|
|||
// current deny-url, now we just need to check to see if this deny-url is a parent
|
||||
// to a scanned url that is also a parent of the given url
|
||||
for ferox_scan in handles.ferox_scans()?.get_active_scans() {
|
||||
let scanner = Url::parse(ferox_scan.url().trim_end_matches('/'))
|
||||
let scanner = parse_url_with_raw_path(ferox_scan.url().trim_end_matches('/'))
|
||||
.with_context(|| format!("Could not parse {ferox_scan} as a url"))?;
|
||||
|
||||
// by calling the new parse_url_with_raw_path, and reaching this point without an
|
||||
// error, we know we have an authority and therefore a host. leaving the code
|
||||
// below, but we should never hit the else condition. leaving it in so if we find
|
||||
// a case where i'm mistaken, we'll know about it and can address it
|
||||
|
||||
if let Some(scan_host) = scanner.host() {
|
||||
// same domain/ip check we perform on the denier above
|
||||
if tested_host != scan_host {
|
||||
|
@ -436,7 +441,7 @@ fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>)
|
|||
}
|
||||
} else {
|
||||
// couldn't process .host from scanner
|
||||
continue;
|
||||
unreachable!("should_deny_absolute: scanner.host() returned None, which shouldn't be possible");
|
||||
};
|
||||
|
||||
let scan_path = scanner.path();
|
||||
|
@ -487,7 +492,7 @@ pub fn should_deny_url(url: &Url, handles: Arc<Handles>) -> Result<bool> {
|
|||
|
||||
// normalization for comparison is to remove the trailing / if one exists, this is done for
|
||||
// the given url and any url to which it's compared
|
||||
let normed_url = Url::parse(url.to_string().trim_end_matches('/'))?;
|
||||
let normed_url = parse_url_with_raw_path(url.to_string().trim_end_matches('/'))?;
|
||||
|
||||
for denier in &handles.config.url_denylist {
|
||||
// note to self: it may seem as though we can use regex only for --dont-scan, however, in
|
||||
|
@ -555,6 +560,7 @@ pub fn parse_url_with_raw_path(url: &str) -> Result<Url> {
|
|||
if !parsed.has_authority() {
|
||||
// parsed correctly, but no authority, meaning mailto: or tel: or
|
||||
// some other url that we don't care about
|
||||
println!("url to parse has no authority and is therefore invalid");
|
||||
bail!("url to parse has no authority and is therefore invalid");
|
||||
}
|
||||
|
||||
|
@ -1004,6 +1010,13 @@ mod tests {
|
|||
/// provide a denier from which we can't check a host, which results in no comparison, expect false
|
||||
/// because the denier is a parent to the tested, even tho the scanned doesn't compare, it
|
||||
/// still returns true
|
||||
///
|
||||
/// note: adding parse_url_with_raw_path changed the behavior of this test, it used to return
|
||||
/// true, now it returns false. see my note in should_deny_absolute and the unreachable!
|
||||
/// call block to see why
|
||||
///
|
||||
/// leaving this test here to document the behavior change and to catch regressions in the
|
||||
/// new expected behavior
|
||||
fn should_deny_url_doesnt_compare_non_domains_in_scanned() {
|
||||
let deny_url = "https://testdomain.com/";
|
||||
let scan_url = "unix:/run/foo.socket";
|
||||
|
@ -1017,8 +1030,7 @@ mod tests {
|
|||
let config = Arc::new(config);
|
||||
|
||||
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
||||
|
||||
assert!(should_deny_url(&tested_url, handles).unwrap());
|
||||
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in New Issue
Block a user