2020-02-26 02:43:55 +00:00
/*
* Copyright ( c ) 2020 , Fei Wu < f . eiwu @ yahoo . com >
*
2021-04-22 08:24:48 +00:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-02-26 02:43:55 +00:00
*/
2022-12-04 18:02:33 +00:00
# include <AK/DeprecatedString.h>
2020-05-26 11:52:44 +00:00
# include <AK/LexicalPath.h>
2021-06-11 17:38:56 +00:00
# include <AK/NumberFormat.h>
2020-02-26 02:43:55 +00:00
# include <AK/Vector.h>
# include <LibCore/ArgsParser.h>
# include <LibCore/DateTime.h>
# include <LibCore/DirIterator.h>
2023-02-09 02:02:46 +00:00
# include <LibCore/File.h>
2021-12-18 17:59:15 +00:00
# include <LibCore/System.h>
# include <LibMain/Main.h>
2020-02-26 02:43:55 +00:00
# include <limits.h>
# include <string.h>
struct DuOption {
enum class TimeType {
NotUsed ,
Modification ,
Access ,
Status
} ;
2021-06-11 17:38:56 +00:00
bool human_readable = false ;
2023-01-10 12:08:57 +00:00
bool human_readable_si = false ;
2020-02-26 02:43:55 +00:00
bool all = false ;
bool apparent_size = false ;
2022-07-23 16:25:21 +00:00
i64 threshold = 0 ;
2020-02-26 02:43:55 +00:00
TimeType time_type = TimeType : : NotUsed ;
2022-12-04 18:02:33 +00:00
Vector < DeprecatedString > excluded_patterns ;
2022-07-23 16:25:21 +00:00
u64 block_size = 1024 ;
2022-07-23 08:37:29 +00:00
size_t max_depth = SIZE_MAX ;
2020-02-26 02:43:55 +00:00
} ;
2023-09-26 17:28:15 +00:00
struct VisitedFile {
dev_t device ;
ino_t inode ;
} ;
template < >
struct AK : : Traits < VisitedFile > : public GenericTraits < VisitedFile > {
static unsigned hash ( VisitedFile const & visited_file )
{
return pair_int_hash ( u64_hash ( visited_file . device ) , u64_hash ( visited_file . inode ) ) ;
}
static bool equals ( VisitedFile const & a , VisitedFile const & b )
{
return a . device = = b . device & & a . inode = = b . inode ;
}
} ;
static HashTable < VisitedFile > s_visited_files ;
2022-12-04 18:02:33 +00:00
static ErrorOr < void > parse_args ( Main : : Arguments arguments , Vector < DeprecatedString > & files , DuOption & du_option ) ;
2023-05-22 17:17:36 +00:00
static u64 print_space_usage ( DeprecatedString const & path , DuOption const & du_option , size_t current_depth , bool inside_dir = false ) ;
2020-02-26 02:43:55 +00:00
2021-12-18 17:59:15 +00:00
ErrorOr < int > serenity_main ( Main : : Arguments arguments )
2020-02-26 02:43:55 +00:00
{
2022-12-04 18:02:33 +00:00
Vector < DeprecatedString > files ;
2020-02-26 02:43:55 +00:00
DuOption du_option ;
2022-07-23 08:37:29 +00:00
TRY ( parse_args ( arguments , files , du_option ) ) ;
2020-02-26 02:43:55 +00:00
2022-04-01 17:58:27 +00:00
for ( auto const & file : files )
2023-05-22 17:17:36 +00:00
print_space_usage ( file , du_option , 0 ) ;
2020-02-26 02:43:55 +00:00
return 0 ;
}
2022-12-04 18:02:33 +00:00
ErrorOr < void > parse_args ( Main : : Arguments arguments , Vector < DeprecatedString > & files , DuOption & du_option )
2020-02-26 02:43:55 +00:00
{
bool summarize = false ;
2022-09-14 11:02:33 +00:00
StringView pattern ;
StringView exclude_from ;
2022-01-02 16:10:00 +00:00
Vector < StringView > files_to_process ;
2020-02-26 02:43:55 +00:00
Core : : ArgsParser : : Option time_option {
2022-07-12 20:13:38 +00:00
Core : : ArgsParser : : OptionArgumentMode : : Required ,
2020-02-26 02:43:55 +00:00
" Show time of type time-type of any file in the directory, or any of its subdirectories. "
" Available choices: mtime, modification, ctime, status, use, atime, access " ,
" time " ,
0 ,
" time-type " ,
2023-02-21 11:44:41 +00:00
[ & du_option ] ( StringView option ) {
2022-07-11 19:53:29 +00:00
if ( option = = " mtime " sv | | option = = " modification " sv )
2020-02-26 02:43:55 +00:00
du_option . time_type = DuOption : : TimeType : : Modification ;
2022-07-11 19:53:29 +00:00
else if ( option = = " ctime " sv | | option = = " status " sv | | option = = " use " sv )
2020-02-26 02:43:55 +00:00
du_option . time_type = DuOption : : TimeType : : Status ;
2022-07-11 19:53:29 +00:00
else if ( option = = " atime " sv | | option = = " access " sv )
2020-02-26 02:43:55 +00:00
du_option . time_type = DuOption : : TimeType : : Access ;
else
return false ;
return true ;
}
} ;
2022-07-20 21:10:08 +00:00
Core : : ArgsParser : : Option block_size_1k_option {
Core : : ArgsParser : : OptionArgumentMode : : None ,
" Equivalent to `--block-size 1024` " ,
nullptr ,
' k ' ,
nullptr ,
2023-02-21 11:44:41 +00:00
[ & du_option ] ( StringView ) {
2022-07-20 21:10:08 +00:00
du_option . block_size = 1024 ;
return true ;
}
} ;
2020-02-26 02:43:55 +00:00
Core : : ArgsParser args_parser ;
2020-12-05 15:22:58 +00:00
args_parser . set_general_help ( " Display actual or apparent disk usage of files or directories. " ) ;
2020-02-26 02:43:55 +00:00
args_parser . add_option ( du_option . all , " Write counts for all files, not just directories " , " all " , ' a ' ) ;
args_parser . add_option ( du_option . apparent_size , " Print apparent sizes, rather than disk usage " , " apparent-size " , 0 ) ;
2021-06-11 17:38:56 +00:00
args_parser . add_option ( du_option . human_readable , " Print human-readable sizes " , " human-readable " , ' h ' ) ;
2023-01-10 12:08:57 +00:00
args_parser . add_option ( du_option . human_readable_si , " Print human-readable sizes in SI units " , " si " , 0 ) ;
2022-07-23 08:37:29 +00:00
args_parser . add_option ( du_option . max_depth , " Print the total for a directory or file only if it is N or fewer levels below the command line argument " , " max-depth " , ' d ' , " N " ) ;
2020-02-26 02:43:55 +00:00
args_parser . add_option ( summarize , " Display only a total for each argument " , " summarize " , ' s ' ) ;
args_parser . add_option ( du_option . threshold , " Exclude entries smaller than size if positive, or entries greater than size if negative " , " threshold " , ' t ' , " size " ) ;
args_parser . add_option ( move ( time_option ) ) ;
args_parser . add_option ( pattern , " Exclude files that match pattern " , " exclude " , 0 , " pattern " ) ;
2022-07-23 08:43:20 +00:00
args_parser . add_option ( exclude_from , " Exclude files that match any pattern in file " , " exclude-from " , ' X ' , " file " ) ;
2022-07-20 21:00:34 +00:00
args_parser . add_option ( du_option . block_size , " Outputs file sizes as the required blocks with the given size (defaults to 1024) " , " block-size " , ' B ' , " size " ) ;
2022-07-20 21:10:08 +00:00
args_parser . add_option ( move ( block_size_1k_option ) ) ;
2020-02-26 02:43:55 +00:00
args_parser . add_positional_argument ( files_to_process , " File to process " , " file " , Core : : ArgsParser : : Required : : No ) ;
2021-12-18 17:59:15 +00:00
args_parser . parse ( arguments ) ;
2020-02-26 02:43:55 +00:00
if ( summarize )
2022-07-23 08:37:29 +00:00
du_option . max_depth = 0 ;
2020-02-26 02:43:55 +00:00
2022-09-14 11:02:33 +00:00
if ( ! pattern . is_empty ( ) )
2020-02-26 02:43:55 +00:00
du_option . excluded_patterns . append ( pattern ) ;
2022-09-14 11:02:33 +00:00
if ( ! exclude_from . is_empty ( ) ) {
2023-02-09 02:02:46 +00:00
auto file = TRY ( Core : : File : : open ( exclude_from , Core : : File : : OpenMode : : Read ) ) ;
2022-12-11 16:49:00 +00:00
auto const buff = TRY ( file - > read_until_eof ( ) ) ;
2021-05-16 06:47:46 +00:00
if ( ! buff . is_empty ( ) ) {
2022-12-04 18:02:33 +00:00
DeprecatedString patterns = DeprecatedString : : copy ( buff , Chomp ) ;
2021-06-12 11:24:45 +00:00
du_option . excluded_patterns . extend ( patterns . split ( ' \n ' ) ) ;
2020-02-26 02:43:55 +00:00
}
}
2022-01-02 16:10:00 +00:00
for ( auto const & file : files_to_process ) {
2020-02-26 02:43:55 +00:00
files . append ( file ) ;
}
if ( files . is_empty ( ) ) {
files . append ( " . " ) ;
}
2021-12-18 17:59:15 +00:00
return { } ;
2020-02-26 02:43:55 +00:00
}
2023-05-22 17:17:36 +00:00
u64 print_space_usage ( DeprecatedString const & path , DuOption const & du_option , size_t current_depth , bool inside_dir )
2020-02-26 02:43:55 +00:00
{
2022-07-23 16:36:03 +00:00
u64 size = 0 ;
2023-05-22 17:17:36 +00:00
auto path_stat_or_error = Core : : System : : lstat ( path ) ;
if ( path_stat_or_error . is_error ( ) ) {
warnln ( " du: cannot stat '{}': {} " , path , path_stat_or_error . release_error ( ) ) ;
return 0 ;
}
auto path_stat = path_stat_or_error . release_value ( ) ;
2023-09-26 17:28:15 +00:00
VisitedFile visited_file { path_stat . st_dev , path_stat . st_ino } ;
if ( s_visited_files . contains ( visited_file ) ) {
return 0 ;
}
s_visited_files . set ( visited_file ) ;
2022-04-01 17:58:27 +00:00
bool const is_directory = S_ISDIR ( path_stat . st_mode ) ;
2022-07-23 08:37:29 +00:00
if ( is_directory ) {
2020-02-26 02:43:55 +00:00
auto di = Core : : DirIterator ( path , Core : : DirIterator : : SkipParentAndBaseDir ) ;
if ( di . has_error ( ) ) {
2023-03-01 15:55:15 +00:00
auto error = di . error ( ) ;
2023-05-22 17:17:23 +00:00
warnln ( " du: cannot read directory '{}': {} " , path , error ) ;
2023-05-22 17:17:36 +00:00
return 0 ;
2020-02-26 02:43:55 +00:00
}
2021-12-18 17:59:15 +00:00
2020-02-26 02:43:55 +00:00
while ( di . has_next ( ) ) {
2022-04-01 17:58:27 +00:00
auto const child_path = di . next_full_path ( ) ;
2023-05-22 17:17:36 +00:00
size + = print_space_usage ( child_path , du_option , current_depth + 1 , true ) ;
2020-02-26 02:43:55 +00:00
}
}
2022-04-01 17:58:27 +00:00
auto const basename = LexicalPath : : basename ( path ) ;
for ( auto const & pattern : du_option . excluded_patterns ) {
2020-02-26 02:43:55 +00:00
if ( basename . matches ( pattern , CaseSensitivity : : CaseSensitive ) )
2023-05-22 17:17:36 +00:00
return 0 ;
2020-02-26 02:43:55 +00:00
}
2022-07-20 21:02:35 +00:00
if ( ! du_option . apparent_size ) {
2022-01-02 16:10:00 +00:00
constexpr auto block_size = 512 ;
2022-07-23 16:36:03 +00:00
size + = path_stat . st_blocks * block_size ;
} else {
size + = path_stat . st_size ;
2020-02-26 02:43:55 +00:00
}
2022-07-23 16:45:53 +00:00
bool is_beyond_depth = current_depth > du_option . max_depth ;
bool is_inner_file = inside_dir & & ! is_directory ;
bool is_outside_threshold = ( du_option . threshold > 0 & & size < static_cast < u64 > ( du_option . threshold ) ) | | ( du_option . threshold < 0 & & size > static_cast < u64 > ( - du_option . threshold ) ) ;
2020-02-26 02:43:55 +00:00
2022-07-23 16:45:53 +00:00
// All of these still count towards the full size, they are just not reported on individually.
if ( is_beyond_depth | | ( is_inner_file & & ! du_option . all ) | | is_outside_threshold )
return size ;
2022-07-23 08:37:29 +00:00
2021-06-11 17:38:56 +00:00
if ( du_option . human_readable ) {
out ( " {} " , human_readable_size ( size ) ) ;
2023-01-10 12:08:57 +00:00
} else if ( du_option . human_readable_si ) {
out ( " {} " , human_readable_size ( size , AK : : HumanReadableBasedOn : : Base10 ) ) ;
2021-06-11 17:38:56 +00:00
} else {
2022-07-23 16:36:03 +00:00
out ( " {} " , ceil_div ( size , du_option . block_size ) ) ;
2021-06-11 17:38:56 +00:00
}
2020-02-26 02:43:55 +00:00
2021-06-11 17:38:56 +00:00
if ( du_option . time_type = = DuOption : : TimeType : : NotUsed ) {
outln ( " \t {} " , path ) ;
} else {
2020-02-26 02:43:55 +00:00
auto time = path_stat . st_mtime ;
switch ( du_option . time_type ) {
case DuOption : : TimeType : : Access :
time = path_stat . st_atime ;
break ;
case DuOption : : TimeType : : Status :
time = path_stat . st_ctime ;
2021-07-05 17:17:08 +00:00
break ;
2020-02-26 02:43:55 +00:00
default :
break ;
}
2022-12-06 01:12:49 +00:00
auto const formatted_time = Core : : DateTime : : from_timestamp ( time ) . to_deprecated_string ( ) ;
2021-06-11 17:38:56 +00:00
outln ( " \t {} \t {} " , formatted_time , path ) ;
2020-02-26 02:43:55 +00:00
}
2023-05-22 17:17:36 +00:00
return size ;
2020-02-26 02:43:55 +00:00
}