feat(parser): add support for reading GZIP files

This commit is contained in:
Orhun Parmaksız 2021-12-18 19:24:46 +03:00
parent aeef248a69
commit 3c6fb58a41
No known key found for this signature in database
GPG key ID: F83424824B3E4B90
3 changed files with 69 additions and 7 deletions

38
Cargo.lock generated
View file

@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aho-corasick"
version = "0.7.18"
@ -66,6 +72,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "crc32fast"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "738c290dfaea84fc1ca15ad9c168d083b05a714e1efddd8edaab678dc28d2836"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
@ -137,6 +152,18 @@ version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "flate2"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f"
dependencies = [
"cfg-if",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -256,6 +283,16 @@ dependencies = [
"autocfg",
]
[[package]]
name = "miniz_oxide"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b"
dependencies = [
"adler",
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
@ -455,6 +492,7 @@ dependencies = [
name = "systeroid-parser"
version = "0.1.0"
dependencies = [
"flate2",
"globwalk",
"regex",
"thiserror",

View file

@ -8,3 +8,4 @@ edition = "2021"
regex = "1.5.4"
globwalk = "0.8.1"
thiserror = "1.0.29"
flate2 = "1.0.22"

View file

@ -1,7 +1,12 @@
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader as IoBufReader, Result as IoResult};
use std::io::{
BufRead, BufReader as IoBufReader, Error as IoError, ErrorKind as IoErrorKind, Read,
Result as IoResult,
};
use std::path::Path;
use std::rc::Rc;
use std::str;
/// Default buffer size of the reader.
const DEFAULT_BUFFER_SIZE: usize = 1024;
@ -11,7 +16,7 @@ pub struct BufReader {
/// Inner type.
reader: IoBufReader<File>,
/// Buffer.
buffer: Rc<String>,
buffer: Rc<Vec<u8>>,
}
impl BufReader {
@ -24,15 +29,15 @@ impl BufReader {
}
/// Creates a new buffer with the given size.
fn new_buffer(buffer_size: Option<usize>) -> Rc<String> {
Rc::new(String::with_capacity(
fn new_buffer(buffer_size: Option<usize>) -> Rc<Vec<u8>> {
Rc::new(Vec::with_capacity(
buffer_size.unwrap_or(DEFAULT_BUFFER_SIZE),
))
}
}
impl Iterator for BufReader {
type Item = IoResult<Rc<String>>;
type Item = IoResult<Rc<Vec<u8>>>;
fn next(&mut self) -> Option<Self::Item> {
let buffer = match Rc::get_mut(&mut self.buffer) {
Some(rc_buffer) => {
@ -45,7 +50,7 @@ impl Iterator for BufReader {
}
};
self.reader
.read_line(buffer)
.read_until(b'\n', buffer)
.map(|u| {
if u == 0 {
None
@ -63,11 +68,29 @@ impl Iterator for BufReader {
pub fn read_to_string<P: AsRef<Path>>(path: P) -> IoResult<String> {
let mut lines = Vec::<String>::new();
for line in BufReader::open(path, None)? {
lines.push(line?.to_string());
lines.push(
str::from_utf8(&line?)
.map_err(|e| IoError::new(IoErrorKind::Other, e))?
.to_string(),
);
}
Ok(lines.join(""))
}
/// Reads (decodes) the given gzip file into a string.
///
/// Uses [`BufReader`] under the hood.
pub fn read_gzip<P: AsRef<Path>>(path: P) -> IoResult<String> {
let mut bytes = Vec::<u8>::new();
for read_bytes in BufReader::open(path, None)? {
bytes.extend(read_bytes?.to_vec());
}
let mut gz = GzDecoder::new(&bytes[..]);
let mut data = String::new();
gz.read_to_string(&mut data)?;
Ok(data)
}
#[cfg(test)]
mod tests {
use super::*;