Added heuristics

This commit is contained in:
Aaronepower 2016-11-06 19:39:34 +00:00 committed by =
parent 04f6e1e3e4
commit a72121f67b
13 changed files with 369 additions and 176 deletions

105
Cargo.lock generated
View file

@ -1,15 +1,18 @@
[root]
name = "tokei"
version = "4.3.0"
version = "4.4.0"
dependencies = [
"clap 2.10.4 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"handlebars 0.21.1 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_cbor 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
@ -136,7 +139,17 @@ version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.79 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fs2"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -171,6 +184,11 @@ dependencies = [
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libc"
version = "0.2.17"
@ -194,6 +212,17 @@ dependencies = [
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memmap"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.1.36"
@ -251,19 +280,19 @@ dependencies = [
[[package]]
name = "regex"
version = "0.1.79"
version = "0.1.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.3.8"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
@ -489,3 +518,69 @@ name = "yaml-rust"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
"checksum ansi_term 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c877397e09fec7a240af5fa74ad0124054b8066149d6544cd1ace93f8de3be68"
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
"checksum byteorder 0.3.13 (registry+https://github.com/rust-lang/crates.io-index)" = "29b2aa490a8f546381308d68fc79e6bd753cd3ad839f7a7172897f1feedfa175"
"checksum clap 2.10.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3df6dcb3122b085b96399062f4fa59d69f4d0af50519944f2d76b7a7686629e3"
"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
"checksum dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0dd841b58510c9618291ffa448da2e4e0f699d984d436122372f446dae62263d"
"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
"checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum handlebars 0.21.1 (registry+https://github.com/rust-lang/crates.io-index)" = "937e9d49d65ffb5f70e95710a6c8539addf40200275ad8b6cdba0f0a59d5814d"
"checksum itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae3088ea4baeceb0284ee9eea42f591226e6beaecf65373e41b38d95a1b8e7a1"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
"checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8"
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
"checksum maplit 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "be384c560e0c3ad868b590ffb88d2c0a1effde6f59885234e4ea811c1202bfea"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
"checksum memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "065ce59af31c18ea2c419100bda6247dd4ec3099423202b12f0bd32e529fabd2"
"checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c"
"checksum num_cpus 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "cee7e88156f3f9e19bdd598f8d6c9db7bf4078f99f8381f43a55b09648d1a6e3"
"checksum pest 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0a6dda33d67c26f0aac90d324ab2eb7239c819fc7b2552fe9faa4fe88441edc8"
"checksum post-expansion 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "31a834a6060acaef74a8d878f6ca37a2b86fefe042bbfe70689ba587e42526f9"
"checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c"
"checksum quote 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1e0c9bc6bfb0a60d539aab6e338207c1a5456e62f5bd5375132cee119aa4b3"
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
"checksum rayon 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "655df67c314c30fa3055a365eae276eb88aa4f3413a352a1ab32c1320eda41ea"
"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
"checksum serde 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "1b0e0732aa8ec4267f61815a396a942ba3525062e3bd5520aa8419927cfc0a92"
"checksum serde 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "1105e65d0a0b212d2d735c8b5a4f6aba2adc501e8ad4497e9f1a39e4c4ac943e"
"checksum serde_cbor 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "45189be234a4f73320489f8c91dd4121dd40e1be4d5e16d31d0335a96f910395"
"checksum serde_codegen 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "446384bcfd7d9276a23b51e9dc14341909ad05377b68ac5da6f83a7a2094bcd0"
"checksum serde_codegen_internals 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "318f7e77aa5187391d74aaf4553d2189f56b0ce25e963414c951b97877ffdcec"
"checksum serde_json 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1cb6b19e74d9f65b9d03343730b643d729a446b29376785cd65efdff4675e2fc"
"checksum serde_yaml 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7f79936ed255f34afa72332a8901650a8b9772471e45569ef9ba410a4419a723"
"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
"checksum syn 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "76c2db66dc579998854d84ff0ff4a81cb73e69596764d144ce7cece4d04ce6b5"
"checksum syntex 0.46.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6e0c45ebc6d4b9dcb45b74b8e5a4f484dbe3db1edf626c582e7bf2973926d3fe"
"checksum syntex_errors 0.46.0 (registry+https://github.com/rust-lang/crates.io-index)" = "59d5e0eab8b15b3aa47acc2c2f57fa8143f23a3632d269020404258af71bdb1b"
"checksum syntex_pos 0.46.0 (registry+https://github.com/rust-lang/crates.io-index)" = "06970950ad719185882a658f194dbc78dab9fa2527f2174b9f8f3061c686cd0c"
"checksum syntex_syntax 0.46.0 (registry+https://github.com/rust-lang/crates.io-index)" = "51fb8ac3fcb2c4485082bef800eb5bea7a7fa5b6745eab583e3484edb86142ee"
"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6"
"checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a"
"checksum term_size 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d470ef1b870a5c71e691676ff34397b175820fd35e30550e5244f35079be02bf"
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
"checksum toml 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "736b60249cb25337bc196faa43ee12c705e426f3d55c214d73a4e7be06f92cb4"
"checksum unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b905d0fc2a1f0befd86b0e72e31d1787944efef9d38b9358a9e92a69757f7e3b"
"checksum unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d6722facc10989f63ee0e20a83cd4e1714a9ae11529403ac7e0afd069abc39e"
"checksum unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "36dff09cafb4ec7c8cf0023eb0b686cb6ce65499116a12201c9e11840ca01beb"
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f"
"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
"checksum yaml-rust 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "371cea3a33a58d11dc83c0992fb37e44f651ebdf2df12f9d939f6cb24be2a8fd"

View file

@ -16,23 +16,26 @@ name = "tokei"
path = "src/main.rs"
[build-dependencies]
serde_json = "~0.8.0"
serde = "~0.8.0"
serde_json = "~0.8.0"
[build-dependencies.handlebars]
features = ["serde_type"]
version = "0.21.1"
[build-dependencies.serde_codegen]
optional = true
version = "0.8.0"
[build-dependencies.handlebars]
version = "0.21.1"
features = ["serde_type"]
[dependencies]
encoding = "0.2.33"
glob = "~0.2.11"
lazy_static = "0.2.1"
log = "0.3.6"
maplit = "~0.1.3"
memmap = "0.5.0"
rayon = "=0.4.2"
regex = "0.1.80"
walkdir = "~0.1.5"
[dependencies.clap]
@ -84,3 +87,8 @@ yaml = ["io", "serde_yaml"]
[lib]
name = "tokei"
path = "src/lib/lib.rs"
[profile]
[profile.release]
debug = true

View file

@ -7,6 +7,8 @@ extern crate handlebars;
use serde_json::Value;
use handlebars::{Context, Handlebars};
use std::fs::File;
use std::env;
use std::path::Path;
fn main() {
expand();
@ -14,8 +16,6 @@ fn main() {
#[cfg(feature = "io")]
fn expand() {
use std::env;
use std::path::Path;
use std::thread;
render_handlebars();
@ -43,11 +43,17 @@ fn expand() {
fn render_handlebars() {
let mut handlebars = Handlebars::new();
handlebars.register_escape_fn(handlebars::no_escape);
let raw_data: Value = serde_json::from_reader(File::open(&"src/lib/languages.json").unwrap()).unwrap();
let raw_data: Value = serde_json::from_reader(
File::open(&"src/lib/languages.json").unwrap()).unwrap();
let data = Context::wraps(&raw_data);
let mut source_template = File::open(&"src/lib/language/language_type.rs.hbs").expect("Can't find Template");
let mut output_file = File::create("src/lib/language/language_type.rs").expect("Can't create!");
if let Err(err) = handlebars.template_renderw2(&mut source_template, &data, &mut output_file) {
let out = Path::new(&env::var_os("OUT_DIR").unwrap()).join("language_type.rs");
let mut source_template = File::open(&"src/lib/language/language_type.rs.hbs")
.expect("Can't find Template");
let mut output_file = File::create(&out).expect("Can't create!");
if let Err(err) = handlebars.template_renderw2(&mut source_template,
&data,
&mut output_file)
{
panic!("Failed to generate languages! ERROR: {:?}", err);
}
}

View file

@ -1 +0,0 @@
language_type.rs

View file

@ -1,13 +1,21 @@
use std::borrow::Cow;
use std::path::PathBuf;
use std::ops::AddAssign;
use regex::{self, Regex};
use sort::Sort;
use sort::Sort::*;
use stats::Stats;
/// Struct representing a single Language.
#[cfg(feature = "io")]
#[cfg_attr(feature = "io", derive(Clone, Debug, Deserialize, Default, Eq, Ord, PartialEq, PartialOrd, Serialize))]
#[cfg_attr(feature = "io",
derive(Clone,
Debug,
Deserialize,
Serialize
))]
pub struct Language {
/// Number of blank lines.
pub blanks: usize,
@ -25,22 +33,26 @@ pub struct Language {
/// A collection of single line comments in the language. ie. `//` in Rust.
#[serde(skip_deserializing, skip_serializing)]
pub line_comment: Vec<&'static str>,
/// A collection of tuples representing the start and end of multi line comments. ie. `/* comment */` in Rust.
/// A collection of tuples representing the start and end of multi line
/// comments. ie. `/* comment */` in Rust.
#[serde(skip_deserializing, skip_serializing)]
pub multi_line: Vec<(&'static str, &'static str)>,
/// Whether the language supports nested multi line comments or not.
#[serde(skip_deserializing, skip_serializing)]
pub nested: bool,
/// A list of specific nested comments if this is empty all `multi_line` comments count.
/// A list of specific nested comments if this is empty all `multi_line`
/// comments count.
#[serde(skip_deserializing, skip_serializing)]
pub nested_comments: Vec<(&'static str, &'static str)>,
/// A list of quotes by default it is `""`.
#[serde(skip_deserializing, skip_serializing)]
pub quotes: Vec<(&'static str, &'static str)>,
#[serde(skip_deserializing, skip_serializing)]
pub regex: Cow<'static, Regex>
}
#[cfg(not(feature = "io"))]
#[derive(Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Debug)]
pub struct Language {
/// Number of blank lines.
pub blanks: usize,
@ -56,17 +68,36 @@ pub struct Language {
pub lines: usize,
/// A collection of single line comments in the language. ie. `//` in Rust.
pub line_comment: Vec<&'static str>,
/// A collection of tuples representing the start and end of multi line comments. ie. `/* comment */` in Rust.
/// A collection of tuples representing the start and end of multi line
/// comments. ie. `/* comment */` in Rust.
pub multi_line: Vec<(&'static str, &'static str)>,
/// Whether the language supports nested multi line comments or not.
pub nested: bool,
/// A list of specific nested comments if this is empty all `multi_line` comments count.
/// A list of specific nested comments if this is empty all `multi_line`
/// comments count.
pub nested_comments: Vec<(&'static str, &'static str)>,
/// A list of quotes by default it is `""`.
pub quotes: Vec<(&'static str, &'static str)>,
/// A regular expression for searching for multi line comments.
pub regex: Cow<'static, Regex>
}
fn generate_regex(multi_line: &[(&'static str, &'static str)]) -> Cow<'static, Regex> {
let mut raw_regex = String::new();
for &(start, _) in multi_line {
raw_regex.push_str(start);
raw_regex.push_str("|");
}
let _ = raw_regex.pop();
Cow::Owned(Regex::new(&*regex::quote(&*raw_regex)).unwrap())
}
lazy_static! {
static ref C_REGEX: Regex = Regex::new(r"/\*").unwrap();
static ref EMPTY_REGEX: Regex = Regex::new("").unwrap();
}
impl Language {
/// Constructs a new empty Language with the comments provided.
///
@ -76,16 +107,18 @@ impl Language {
/// ```
pub fn new(line_comment: Vec<&'static str>,
multi_line: Vec<(&'static str, &'static str)>)
-> Self {
-> Self {
Language {
line_comment: line_comment,
multi_line: multi_line,
..Self::default()
Language {
line_comment: line_comment,
regex: generate_regex(&multi_line),
multi_line: multi_line,
..Self::default()
}
}
}
/// Convience constructor for creating a language that has no commenting syntax.
/// Convience constructor for creating a language that has no commenting
/// syntax.
///
/// ```
/// # use tokei::*;
@ -97,7 +130,8 @@ impl Language {
Self::default()
}
/// Convience constructor for creating a language that has the same commenting syntax as C like languages.
/// Convience constructor for creating a language that has the same
/// commenting syntax as C like languages.
///
/// ```
/// # use tokei::*;
@ -108,15 +142,18 @@ impl Language {
/// assert_eq!(rust.multi_line, c.multi_line);
/// ```
pub fn new_c() -> Self {
Language {
line_comment: vec!["//"],
multi_line: vec![("/*", "*/")],
quotes: vec![("\"", "\"")],
regex: Cow::Borrowed(&*C_REGEX),
..Self::default()
}
}
/// Convience constructor for creating a language that has the same commenting syntax as ML like languages.
/// Convience constructor for creating a language that has the same
/// commenting syntax as ML like languages.
///
/// ```
/// # use tokei::*;
@ -127,14 +164,19 @@ impl Language {
/// assert_eq!(ocaml.multi_line, coq.multi_line);
/// ```
pub fn new_func() -> Self {
lazy_static! {
static ref FUNC_REGEX: Regex = Regex::new(r"\(\*").unwrap();
}
Language {
multi_line: vec![("(*", "*)")],
quotes: vec![("\"", "\"")],
regex: Cow::Borrowed(&*FUNC_REGEX),
..Self::default()
}
}
/// Convience constructor for creating a language that has the same commenting syntax as HTML like languages.
/// Convience constructor for creating a language that has the same
/// commenting syntax as HTML like languages.
///
/// ```
/// # use tokei::*;
@ -145,14 +187,19 @@ impl Language {
/// assert_eq!(xml.multi_line, html.multi_line);
/// ```
pub fn new_html() -> Self {
lazy_static! {
static ref HTML_REGEX: Regex = Regex::new(r"<!--").unwrap();
}
Language {
multi_line: vec![("<!--", "-->")],
quotes: vec![("\"", "\"")],
regex: Cow::Borrowed(&*HTML_REGEX),
..Self::default()
}
}
/// Convience constructor for creating a language that has the same commenting syntax as Bash.
/// Convience constructor for creating a language that has the same
/// commenting syntax as Bash.
///
/// ```
/// # use tokei::*;
@ -166,8 +213,8 @@ impl Language {
Self::new_single(vec!["#"])
}
/// Convience constructor for creating a language that has the same commenting syntax as
/// Haskell.
/// Convience constructor for creating a language that has the same
/// commenting syntax as Haskell.
///
/// ```
/// # use tokei::*;
@ -178,10 +225,14 @@ impl Language {
/// assert_eq!(haskell.multi_line, haskell.multi_line);
/// ```
pub fn new_haskell() -> Self {
Self::new(vec!["--"], vec![("{-", "-}")]).nested()
lazy_static! {
static ref HASKELL_REGEX: Regex = Regex::new(r"\{-").unwrap();
}
Self::new(vec!["--"], vec![("{-", "-}")]).nested().regex(Cow::Borrowed(&*HASKELL_REGEX))
}
/// Convience constructor for creating a language that only has multi line comments.
/// Convience constructor for creating a language that only has multi line
/// comments.
///
/// ```
/// # use tokei::*;
@ -189,13 +240,15 @@ impl Language {
/// ```
pub fn new_multi(multi_line: Vec<(&'static str, &'static str)>) -> Self {
Language {
regex: generate_regex(&multi_line),
multi_line: multi_line,
quotes: vec![("\"", "\"")],
..Self::default()
}
}
/// Convience constructor for creating a language that has the same commenting syntax as Prolog.
/// Convience constructor for creating a language that has the same
/// commenting syntax as Prolog.
///
/// ```
/// # use tokei::*;
@ -210,11 +263,13 @@ impl Language {
line_comment: vec!["%"],
multi_line: vec![("/*", "*/")],
quotes: vec![("\"", "\"")],
regex: Cow::Borrowed(&*C_REGEX),
..Self::default()
}
}
/// Convience constructor for creating a language that only has single line comments.
/// Convience constructor for creating a language that only has single line
/// comments.
///
/// ```
/// # use tokei::*;
@ -228,7 +283,8 @@ impl Language {
}
}
/// Checks if the language is empty. Empty meaning it doesn't have any statistics.
/// Checks if the language is empty. Empty meaning it doesn't have any
/// statistics.
///
/// ```
/// # use tokei::*;
@ -275,9 +331,18 @@ impl Language {
/// assert!(d.nested);
/// assert_eq!(d.nested_comments, vec![("/+", "+/")]);
/// ```
pub fn nested_comments(mut self, nested_comments: Vec<(&'static str, &'static str)>) -> Self {
self.nested = true;
self.nested_comments = nested_comments;
pub fn nested_comments(mut self,
nested_comments: Vec<(&'static str, &'static str)>)
-> Self
{
self.nested = true;
self.nested_comments = nested_comments;
self
}
/// Adds a regex for searching for multi line comments within a file.
fn regex(mut self, regex: Cow<'static, Regex>) -> Self {
self.regex = regex;
self
}
@ -291,10 +356,13 @@ impl Language {
/// .set_quotes(vec![("\"", "\""), ("'", "'")]);
/// assert!(!javascript.quotes.is_empty());
/// ```
pub fn set_quotes(mut self, quotes: Vec<(&'static str, &'static str)>) -> Self {
self.quotes = quotes;
self
}
pub fn set_quotes(mut self,
quotes: Vec<(&'static str, &'static str)>)
-> Self
{
self.quotes = quotes;
self
}
/// Sorts each of the `Stats` structs contained in the language based
/// on what category is provided
@ -369,3 +437,22 @@ impl AddAssign<Stats> for Language {
self.stats.push(rhs);
}
}
impl Default for Language {
fn default() -> Self {
Language {
blanks: 0,
code: 0,
comments: 0,
files: Vec::new(),
stats: Vec::new(),
lines: 0,
line_comment: Vec::new(),
multi_line: Vec::new(),
nested: false,
nested_comments: Vec::new(),
quotes: Vec::new(),
regex: Cow::Borrowed(&*EMPTY_REGEX),
}
}
}

View file

@ -0,0 +1,2 @@
include!(concat!(env!("OUT_DIR"), "/language_type.rs"));

View file

@ -18,7 +18,6 @@ use Language;
#[cfg_attr(feature = "io", derive(Deserialize, Serialize))]
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub enum LanguageType {
{{~#each languages}}
{{~@key}},
{{/each}}

View file

@ -10,14 +10,15 @@ use std::iter::IntoIterator;
use std::ops::{AddAssign, Deref, DerefMut};
use encoding::{self, DecoderTrap};
use memmap::{Mmap, Protection};
// #[cfg(feature = "cbor")]
// use serde_cbor;
#[cfg(feature = "json")]
#[cfg(feature = "io")]
use serde_cbor;
#[cfg(feature = "io")]
use serde_json;
#[cfg(feature = "yaml")]
#[cfg(feature = "io")]
use serde_yaml;
#[cfg(feature = "toml-io")]
#[cfg(feature = "io")]
use toml;
use rayon::prelude::*;
@ -26,13 +27,10 @@ use super::{Language, LanguageType};
use super::LanguageType::*;
use stats::Stats;
#[cfg(not(feature = "json"))]
const JSON_ERROR: &'static str = "Tokei was not compiled with the `json` flag.";
#[cfg(not(feature = "toml-io"))]
const TOML_ERROR: &'static str = "Tokei was not compiled with the `toml-io` flag.";
#[cfg(not(feature = "yaml"))]
const YAML_ERROR: &'static str = "Tokei was not compiled with the `yaml` flag.";
#[cfg(not(feature = "io"))]
const IO_ERROR: &'static str = "Tokei was not compiled with the `io` flag.";
#[inline(never)]
fn count_files(mut language_tuple: (&LanguageType, &mut Language)) {
let (name, ref mut language) = language_tuple;
@ -44,18 +42,20 @@ fn count_files(mut language_tuple: (&LanguageType, &mut Language)) {
let is_fortran = name == &FortranModern || name == &FortranLegacy;
let files: Vec<_> = language.files.drain(..).collect();
let mut contents = Vec::new();
//let mut contents = Vec::new();
let mut stack = vec![];
let mut quote;
let has_multi_line = !language.multi_line.is_empty() && !language.nested_comments.is_empty();
for file in files {
let mut stats = Stats::new(opt_error!(file.to_str(), "Couldn't convert path to String."));
stack.clear();
contents.clear();
//contents.clear();
quote = None;
rs_error!(rs_error!(File::open(file)).read_to_end(&mut contents));
let file = rs_error!(Mmap::open_path(file, Protection::Read));
let contents = unsafe { file.as_slice() };
//rs_error!(rs_error!(File::open(file)).read_to_end(&mut contents));
let text = match encoding::decode(&contents, DecoderTrap::Replace, encoding::all::UTF_8) {
(Ok(string), _) => Cow::Owned(string),
@ -72,20 +72,21 @@ fn count_files(mut language_tuple: (&LanguageType, &mut Language)) {
continue;
}
let should_handle_multi_line = has_multi_line && language.regex.is_match(&text);
'line: for line in lines {
stats.lines += 1;
let no_stack = stack.is_empty();
// FORTRAN has a rule where it only counts as a comment if it's the first
// character in the column, so removing starting whitespace could cause a
// miscount.
let line = if is_fortran { line } else { line.trim_left() };
if line.trim().is_empty() {
stats.blanks += 1;
continue;
}
// FORTRAN has a rule where it only counts as a comment if it's the first
// character in the column, so removing starting whitespace could cause a
// miscount.
let line = if is_fortran { line } else { line.trim_left() };
for single in &language.line_comment {
if line.starts_with(single) {
stats.comments += 1;
@ -93,7 +94,9 @@ fn count_files(mut language_tuple: (&LanguageType, &mut Language)) {
}
}
multi_line::handle_multi_line(line, &language, &mut stack, &mut quote);
if should_handle_multi_line {
multi_line::handle_multi_line(line, &language, &mut stack, &mut quote);
}
if no_stack {
stats.code += 1;
@ -105,7 +108,8 @@ fn count_files(mut language_tuple: (&LanguageType, &mut Language)) {
}
}
/// A collection of existing languages([_List of Languages_](https://github.com/Aaronepower/tokei#supported-languages))
/// A collection of existing languages([_List of Languages_]
/// (https://github.com/Aaronepower/tokei#supported-languages))
#[derive(Debug, Clone)]
pub struct Languages {
inner: BTreeMap<LanguageType, Language>,
@ -113,39 +117,40 @@ pub struct Languages {
impl Languages {
// /// Creates a `Languages` struct from cbor.
// ///
// /// ```
// /// # extern crate tokei;
// /// # use tokei::*;
// /// # extern crate rustc_serialize;
// /// # use rustc_serialize::hex::FromHex;
// /// # fn main () {
// /// let cbor = "a16452757374a666626c616e6b730564636f64650c68636f6d6d656e7473\
// /// 0065737461747381a566626c616e6b730564636f64650c68636f6d6d656e74730065\
// /// 6c696e657311646e616d65722e5c7372635c6c69625c6275696c642e7273656c696e\
// /// 6573116b746f74616c5f66696c657301";
// ///
// /// let mut languages = Languages::from_cbor(&*cbor.from_hex().unwrap()).unwrap();
// /// assert_eq!(12, languages.get_mut(&LanguageType::Rust).unwrap().code);
// /// # }
// /// ```
// #[cfg(feature = "cbor")]
// pub fn from_cbor<'a, I: Into<&'a [u8]>>(cbor: I) -> serde_cbor::Result<Self> {
// let map = try!(serde_cbor::from_slice(cbor.into()));
//
// Ok(Self::from_previous(map))
// }
/// Creates a `Languages` struct from cbor.
///
/// ```
/// extern crate tokei;
/// use tokei::*;
/// extern crate rustc_serialize;
/// use rustc_serialize::hex::FromHex;
/// # fn main () {
/// let cbor = "a16452757374a666626c616e6b730564636f64650c68636f6d6d656e7473\
/// 0065737461747381a566626c616e6b730564636f64650c68636f6d6d656e74730065\
/// 6c696e657311646e616d65722e5c7372635c6c69625c6275696c642e7273656c696e\
/// 6573116b746f74616c5f66696c657301";
///
/// let mut languages = Languages::from_cbor(&*cbor.from_hex().unwrap()).unwrap();
/// assert_eq!(12, languages.get_mut(&LanguageType::Rust).unwrap().code);
/// # }
/// ```
#[cfg(feature = "cbor")]
pub fn from_cbor<'a, I: Into<&'a [u8]>>(cbor: I) -> serde_cbor::Result<Self> {
let map = try!(serde_cbor::from_slice(cbor.into()));
// #[cfg(not(feature = "cbor"))]
// pub fn from_cbor<'a, I: Into<&'a [u8]>>(cbor: I) -> ! {
// panic!(CBOR_ERROR)
// }
Ok(Self::from_previous(map))
}
#[cfg(not(feature = "io"))]
#[allow(unused_variables)]
pub fn from_cbor<'a, I: Into<&'a [u8]>>(cbor: I) -> ! {
panic!(IO_ERROR)
}
/// Creates a `Languages` struct from json.
///
/// ```
/// # use tokei::*;
/// use tokei::*;
/// let json = r#"{
/// "Rust": {
/// "blanks": 5,
@ -176,7 +181,7 @@ impl Languages {
#[cfg(not(feature = "json"))]
#[allow(unused_variables)]
pub fn from_json<'a, I: Into<&'a [u8]>>(json: I) -> ! {
panic!(JSON_ERROR)
panic!(IO_ERROR)
}
/// Creates a `Languages` struct from json.
@ -213,7 +218,7 @@ impl Languages {
#[cfg(not(feature = "yaml"))]
#[allow(unused_variables)]
pub fn from_yaml<'a, I: Into<&'a [u8]>>(yaml: I) -> ! {
panic!(YAML_ERROR)
panic!(IO_ERROR)
}
#[cfg(feature = "io")]
@ -268,7 +273,7 @@ impl Languages {
/// let empty_map = languages.remove_empty();
/// let new_map: BTreeMap<LanguageType, Language> = BTreeMap::new();
///
/// assert_eq!(empty_map, new_map);
/// assert_eq!(empty_map.len(), 0);
/// ```
pub fn remove_empty(&self) -> BTreeMap<LanguageType, Language> {
let mut map = BTreeMap::new();
@ -347,7 +352,7 @@ impl Languages {
#[cfg(not(feature = "json"))]
#[allow(unused_variables)]
pub fn to_json(&self) -> ! {
panic!(JSON_ERROR)
panic!(IO_ERROR)
}
#[cfg(feature = "toml-io")]
@ -358,7 +363,7 @@ impl Languages {
#[cfg(not(feature = "toml-io"))]
#[allow(unused_variables)]
pub fn to_toml(&self) -> ! {
panic!(TOML_ERROR)
panic!(IO_ERROR)
}
/// Converts `Languages` to YAML.
@ -391,7 +396,7 @@ impl Languages {
#[cfg(not(feature = "yaml"))]
#[allow(unused_variables)]
pub fn to_yaml(&self) -> ! {
panic!(YAML_ERROR)
panic!(IO_ERROR)
}
}

View file

@ -453,15 +453,7 @@
]
},
"Haskell":{
"single":[
"--"
],
"multi":[
[
"{-",
"-}"
]
],
"base": "haskell",
"extensions":[
"hs"
]

View file

@ -1,6 +1,5 @@
#![deny(missing_debug_implementations, missing_copy_implementations,
trivial_casts, trivial_numeric_casts,
unsafe_code,
unstable_features,
unused_import_braces)]
@ -50,24 +49,30 @@
//! }
//! ```
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;
extern crate encoding;
extern crate glob;
#[macro_use]
extern crate maplit;
extern crate memmap;
extern crate rayon;
extern crate regex;
extern crate walkdir;
#[cfg(feature = "io")]
extern crate serde;
// #[cfg(feature = "cbor")]
// extern crate serde_cbor;
#[cfg(feature = "cbor")]
extern crate serde_cbor;
#[cfg(feature = "json")]
extern crate serde_json;
#[cfg(feature = "yaml")]
extern crate serde_yaml;
#[cfg(feature = "toml-io")]
extern crate toml;
extern crate walkdir;
#[cfg(feature = "io")]
include!(concat!(env!("OUT_DIR"), "/lib.rs.in"));

View file

@ -34,8 +34,7 @@ pub fn get_all_files<'a>(paths: Cow<'a, [&'a str]>,
if let Ok(paths) = glob(path) {
'path: for path in paths {
let path = rs_error!(path);
let path_str = opt_error!(path.to_str(),
"DURING FILE LOOKUP: Couldn't convert path to string.");
let path_str = path.to_string_lossy();
for ig in &*ignored_directories {
if path_str.contains(ig) {
@ -56,7 +55,7 @@ pub fn get_all_files<'a>(paths: Cow<'a, [&'a str]>,
} else {
let walker = WalkDir::new(path).into_iter().filter_entry(|entry| {
for ig in &*ignored_directories {
if entry.path().to_str().unwrap().contains(&*ig) {
if entry.path().to_string_lossy().contains(&*ig) {
return false;
}
}
@ -65,13 +64,12 @@ pub fn get_all_files<'a>(paths: Cow<'a, [&'a str]>,
for entry in walker {
let entry = rs_error!(entry);
let entry = entry.path();
let mut language = if opt_error!(entry.path().to_str(),
"Walkdir: Couldn't convert path to string")
.contains("Makefile") {
let mut language = if entry.to_string_lossy().contains("Makefile") {
languages.get_mut(&Makefile).unwrap()
} else {
get_language!(languages, entry.path())
get_language!(languages, entry)
};
if rs_error!(entry.metadata()).is_file() {
@ -86,10 +84,7 @@ pub fn get_extension<P: AsRef<Path>>(path: P) -> Option<String> {
let path = path.as_ref();
match path.extension() {
Some(extension_os) => {
match extension_os.to_str() {
Some(extension) => Some(extension.to_lowercase()),
None => None,
}
Some(extension_os.to_string_lossy().to_lowercase())
}
None => {
match get_filetype_from_shebang(path) {

View file

@ -1,27 +1,30 @@
use language::Language;
/// This is used to catch lines like "let x = 5; /* Comment */"
#[inline(never)]
pub fn handle_multi_line(line: &str,
language: &Language,
stack: &mut Vec<&'static str>,
quote: &mut Option<&'static str>) {
let mut chars = line.chars();
let nested_is_empty = language.nested_comments.is_empty();
let mut skip = false;
'window: loop {
let window = chars.as_str();
if window.is_empty() {
break;
let window_size = language.multi_line.iter()
.chain(language.nested_comments.iter())
.map(|&(first, second)| ::std::cmp::max(first.len(), second.len()))
.max().unwrap();
'window: for window in line.as_bytes().windows(window_size) {
if skip {
skip = false;
continue;
}
chars.next();
let mut end = false;
if let &mut Some(quote_str) = quote {
if window.starts_with("\\") {
chars.next();
if window.starts_with(b"\\") {
continue;
} else if window.starts_with(quote_str) {
} else if window.starts_with(quote_str.as_bytes()) {
end = true;
}
}
@ -30,8 +33,9 @@ pub fn handle_multi_line(line: &str,
if let &mut Some(quote_str) = quote {
*quote = None;
// Prevent the quote being counted as both a end and start of a quote
if quote_str.chars().count() == 1 {
chars.next();
skip = true;
}
continue;
}
@ -43,53 +47,47 @@ pub fn handle_multi_line(line: &str,
let mut pop = false;
if let Some(last) = stack.last() {
if window.starts_with(last) {
if window.starts_with(last.as_bytes()) {
pop = true;
}
}
if pop {
stack.pop();
chars.next();
skip = true;
continue;
}
if stack.is_empty() {
for comment in &language.line_comment {
if window.starts_with(comment.as_bytes()) {
break 'window;
}
}
for &(start, end) in &language.quotes {
if window.starts_with(start) {
if window.starts_with(start.as_bytes()) {
*quote = Some(end);
chars.next();
skip = true;
continue 'window;
}
}
}
if stack.is_empty() {
for comment in &language.line_comment {
if window.starts_with(comment) {
break 'window;
}
}
}
for &(start, end) in &language.nested_comments {
if window.starts_with(start) {
if window.starts_with(start.as_bytes()) {
stack.push(end);
chars.next();
skip = true;
continue 'window;
}
}
for &(start, end) in &language.multi_line {
if window.starts_with(start) {
if language.nested && nested_is_empty {
stack.push(end);
} else if stack.len() == 0 {
if window.starts_with(start.as_bytes()) {
if (language.nested && nested_is_empty) || stack.len() == 0 {
stack.push(end);
}
chars.next();
skip = true;
continue 'window;
}
}

View file

@ -7,18 +7,20 @@ extern crate clap;
#[macro_use]
extern crate log;
extern crate env_logger;
// #[cfg(feature = "cbor")]
// extern crate serde_cbor;
#[cfg(feature = "json")]
extern crate serde_json;
#[cfg(feature = "yaml")]
extern crate serde_yaml;
#[cfg(feature = "toml-io")]
extern crate toml;
// #[cfg(feature = "cbor")]
// extern crate rustc_serialize;
extern crate tokei;
#[cfg(feature = "io")]
extern crate serde_cbor;
#[cfg(feature = "io")]
extern crate serde_json;
#[cfg(feature = "io")]
extern crate serde_yaml;
#[cfg(feature = "io")]
extern crate toml;
#[cfg(feature = "io")]
extern crate rustc_serialize;
use std::borrow::Cow;
#[cfg(feature = "io")]
use std::collections::BTreeMap;
@ -31,8 +33,8 @@ use std::io::{Write, stderr};
use clap::App;
use log::LogLevelFilter;
use env_logger::LogBuilder;
// #[cfg(feature = "cbor")]
// use rustc_serialize::hex::FromHex;
#[cfg(feature = "io")]
use rustc_serialize::hex::FromHex;
use tokei::{Languages, Language, LanguageType};
use tokei::Sort::*;
@ -214,7 +216,7 @@ fn main() {
}
#[cfg(feature = "all")]
#[cfg(feature = "io")]
fn add_input(input: &str, languages: &mut Languages) {
use std::fs::File;
use std::io::Read;
@ -260,7 +262,7 @@ fn add_input(input: &str, map: &mut Languages) -> ! {
/// This originally too a &[u8], but the u8 didn't directly correspond with the hexadecimal u8, so
/// it had to be changed to a String, and add the rustc_serialize dependency.
#[cfg(feature = "all")]
#[cfg(feature = "io")]
pub fn convert_input(contents: String) -> Option<BTreeMap<LanguageType, Language>> {
if contents.is_empty() {
None
@ -275,7 +277,7 @@ pub fn convert_input(contents: String) -> Option<BTreeMap<LanguageType, Language
}
}
#[cfg(feature = "all")]
#[cfg(feature = "io")]
fn match_output(format: &str, languages: &Languages) {
match format {
"cbor" => {