mirror of
https://github.com/koraa/huniq
synced 2024-10-02 18:23:29 +00:00
feat: Tests and basic benchmarks
This commit is contained in:
parent
5ad413137b
commit
2b82786895
71
benchmark.sh
Normal file
71
benchmark.sh
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
trap "exit" SIGINT SIGTERM # exit from loop
|
||||||
|
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
huniq2bin="./target/release/huniq"
|
||||||
|
huniq1dir="./target/benchmark/huniq1"
|
||||||
|
huniq1bin="${huniq1dir}/huniq"
|
||||||
|
|
||||||
|
measure() {
|
||||||
|
env time -f'%e %M' "$@" >/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
bench_rust() {
|
||||||
|
measure ./target/release/huniq "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
bench_cpp() {
|
||||||
|
measure "$huniq1bin" "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
bench_shell() {
|
||||||
|
if [[ "$@" = "" ]]; then
|
||||||
|
measure sort -u
|
||||||
|
else
|
||||||
|
{
|
||||||
|
measure sort | measure uniq
|
||||||
|
} 2>&1 | awk '
|
||||||
|
{
|
||||||
|
elapsed=$1;
|
||||||
|
mem+=$2;
|
||||||
|
}
|
||||||
|
|
||||||
|
END {
|
||||||
|
print(elapsed, mem);
|
||||||
|
}'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
test -e "$huniq2bin" || {
|
||||||
|
cargo build --release
|
||||||
|
}
|
||||||
|
|
||||||
|
test -e "$huniq1dir" || {
|
||||||
|
git clone --recursive "https://github.com/SoftwearDevelopment/huniq" "$huniq1dir"
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
test -e "$huniq1bin" || {
|
||||||
|
cd "$huniq1dir"
|
||||||
|
make
|
||||||
|
cd -
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
declare -A modeargs
|
||||||
|
modeargs[uniq]=""
|
||||||
|
modeargs[count]="-c"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
for mode in "uniq" "count"; do
|
||||||
|
for repeats in 1 2 5 10 50 100; do
|
||||||
|
for impl in rust cpp shell; do
|
||||||
|
echo -n "$mode $repeats $impl "
|
||||||
|
yes | head -n "$repeats" | while read _; do cat /usr/share/dict/*; done \
|
||||||
|
| "bench_${impl}" ${modeargs[${mode}]}
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
}
|
2
benchmark_results.txt
Normal file
2
benchmark_results.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
uniq 1 rust uniq 1 cpp uniq 1 shell uniq 2 rust uniq 2 cpp uniq 2 shell uniq 5 rust uniq 5 cpp uniq 5 shell uniq 10 rust uniq 10 cpp uniq 10 shell uniq 50 rust uniq 50 cpp uniq 50 shell uniq 100 rust uniq 100 cpp uniq 100 shell count 1 rust count 1 cpp count 1 shell 9.97 11332
|
||||||
|
count 2 rust count 2 cpp
|
55
readme.md
55
readme.md
|
@ -26,8 +26,8 @@ in -c/count mode.
|
||||||
|
|
||||||
## Motivation
|
## Motivation
|
||||||
|
|
||||||
Sorting is slow and requires lot's of memory. By using hash tables/hash sets instead of sorting
|
Sorting is slow. By using hash tables/hash sets instead of sorting
|
||||||
the input huniq is generally faster and requires less memory than the combination of `sort` and `uniq.`
|
the input huniq is generally faster than `sort -u` or `sort | uniq -c`.
|
||||||
|
|
||||||
## Version History
|
## Version History
|
||||||
|
|
||||||
|
@ -37,6 +37,57 @@ Changes made in version 2:
|
||||||
|
|
||||||
* The -d/-0 flags where added so you can specify custom delimiters
|
* The -d/-0 flags where added so you can specify custom delimiters
|
||||||
* Completely rewritten in rust.
|
* Completely rewritten in rust.
|
||||||
|
* Version two was in the (admittedly very limited) between 1.4x
|
||||||
|
|
||||||
|
## Build
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the tests execute:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
bash ./test.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Benchmark
|
||||||
|
|
||||||
|
You can use `bash ./benchmark.sh` to execute the benchmarks. They will execute until you manually abort them (e.g. by pressing Ctrl-C).
|
||||||
|
|
||||||
|
From my very limited benchmarking results, I found that the rust implementation is between 1-2x faster than the C++ implementation and between 5-10 times
|
||||||
|
faster than the `uniq/sort` standard implementation.
|
||||||
|
|
||||||
|
Surprisingly, sort features the lowest memory usage. All three implementations' memory usage grow with the number of unique lines, and not the number
|
||||||
|
of total lines, so sort probably manually optimizes for that. Sort's memory usage us about a third of the rust implementation…
|
||||||
|
The difference in memory usage between the rust implementation and the C++ one is quite small; the C++ one uses around 10% less memory.
|
||||||
|
|
||||||
|
The benchmark
|
||||||
|
|
||||||
|
```
|
||||||
|
repetitions implemetation seconds memory/kb
|
||||||
|
1 rust 0.89 29568
|
||||||
|
1 cpp 2.62 26080
|
||||||
|
1 shell 10.21 9820
|
||||||
|
2 rust 2.02 29604
|
||||||
|
2 cpp 6.21 26036
|
||||||
|
2 shell 34.33 9724
|
||||||
|
5 rust 5.25 29548
|
||||||
|
5 cpp 12.08 26076
|
||||||
|
5 shell 72.30 10004
|
||||||
|
10 rust 10.26 29548
|
||||||
|
10 cpp 18.87 26128
|
||||||
|
10 shell 151.40 10060
|
||||||
|
50 rust 50.16 29548
|
||||||
|
50 cpp 88.51 26096
|
||||||
|
50 shell 675.88 11048
|
||||||
|
100 rust 84.96 29604
|
||||||
|
100 cpp 149.10 26048
|
||||||
|
```
|
||||||
|
|
||||||
|
## Future direction
|
||||||
|
|
||||||
|
There is some potential for optimizations…like
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|
33
test.sh
Normal file
33
test.sh
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
bin="./target/debug/huniq"
|
||||||
|
|
||||||
|
failures=0
|
||||||
|
count=0
|
||||||
|
|
||||||
|
assert() {
|
||||||
|
local desc="$1"; shift
|
||||||
|
local ref="$1"; shift
|
||||||
|
|
||||||
|
(( count++ ))
|
||||||
|
diff <(eval "$@") "$ref" >/dev/null || {
|
||||||
|
echo >&2 "Assertion failed \"$desc\": \`$@\`"
|
||||||
|
diff <(eval "$@") "$ref" >&2
|
||||||
|
(( failures++ ))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
test -e "$huniq2bin" || {
|
||||||
|
cargo build
|
||||||
|
}
|
||||||
|
|
||||||
|
assert uniq test/expect_uniq.txt "$bin <test/input.txt"
|
||||||
|
assert count test/expect_count.txt "$bin -c <test/input.txt | sort -nr"
|
||||||
|
|
||||||
|
echo >&2 "$count tests $failures failures"
|
||||||
|
test "$failures" -eq 0
|
||||||
|
}
|
||||||
|
|
||||||
|
main
|
3
test/expect_count.txt
Normal file
3
test/expect_count.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
3 hello
|
||||||
|
2 bar
|
||||||
|
1 foo
|
3
test/expect_uniq.txt
Normal file
3
test/expect_uniq.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
hello
|
||||||
|
foo
|
||||||
|
bar
|
6
test/input.txt
Normal file
6
test/input.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
hello
|
||||||
|
foo
|
||||||
|
hello
|
||||||
|
hello
|
||||||
|
bar
|
||||||
|
bar
|
Loading…
Reference in a new issue