diff --git a/Cargo.lock b/Cargo.lock index 56003dd..b3c8a96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,7 @@ name = "tokei" version = "4.2.0" dependencies = [ "clap 2.10.4 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "maplit 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -74,6 +75,63 @@ name = "dtoa" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "encoding" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding_index_tests" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "glob" version = "0.2.11" diff --git a/Cargo.toml b/Cargo.toml index 8a8c917..03c0a3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,14 +15,14 @@ doc = false name = "tokei" path = "src/main.rs" -[dev-dependencies] -tempdir = "~0.3.5" +[build-dependencies] [build-dependencies.serde_codegen] optional = true version = "0.8.0" [dependencies] +encoding = "0.2.33" glob = "~0.2.11" maplit = "~0.1.3" rayon = "~0.4.0" @@ -55,8 +55,11 @@ version = "~0.4.0" [dependencies.toml] default-features = false features = ["serde"] -version = "~0.2.0" optional = true +version = "~0.2.0" + +[dev-dependencies] +tempdir = "~0.3.5" [features] all = ["json", "cbor", "toml-io", "yaml"] diff --git a/src/lib/language/languages.rs b/src/lib/language/languages.rs index dfdeb17..aabc071 100644 --- a/src/lib/language/languages.rs +++ b/src/lib/language/languages.rs @@ -9,6 +9,8 @@ use std::io::Read; use std::iter::IntoIterator; use std::ops::{AddAssign, Deref, DerefMut}; +use encoding::{self, DecoderTrap}; + // #[cfg(feature = "cbor")] // use serde_cbor; #[cfg(feature = "json")] @@ -54,7 +56,12 @@ fn count_files(language_tuple: &mut (&LanguageType, &mut Language)) { rs_or_cont!(rs_or_cont!(File::open(file)).read_to_end(&mut contents)); - let text = String::from_utf8_lossy(&contents); + + let text = match encoding::decode(&contents, DecoderTrap::Replace, encoding::all::UTF_8) { + (Ok(string), _) => Cow::Owned(string), + (Err(cow), _) => cow, + }; + let lines = text.lines(); if language.is_blank() { diff --git a/src/lib/lib.rs b/src/lib/lib.rs index d1b6630..4d547e6 100644 --- a/src/lib/lib.rs +++ b/src/lib/lib.rs @@ -6,13 +6,17 @@ //! # Tokei: Code Analysis Library //! -//! A simple, effcient library for analysing code in directories.[_For the binary_](https://github.com/Aaronepower/tokei/) +//! A simple, effcient library for counting code in directories. +//! [_For the binary_](https://github.com/Aaronepower/tokei/) //! //! ## How to use //! -//! Tokei provides both `Languages` a map of existing programming languages and `Language` for creating custom languages. +//! Tokei provides both `Languages` struct which a map of many existing programming languages, +//! and `Language` for creating custom languages. //! -//! ### Example(Get total lines of code from all rust files in current directory, and all subdirectories) +//! ### Example +//! +//! Gets the total lines of code from all rust files in current directory, and all subdirectories. //! //! ```no_run //! extern crate tokei; @@ -46,9 +50,10 @@ //! } //! ``` +extern crate encoding; +extern crate glob; #[macro_use] extern crate maplit; -extern crate glob; extern crate rayon; #[cfg(feature = "io")] extern crate serde;