split(1): add '-c' to continue creating files

Currently, split(1) will clobber any existing output files: $ split file; ls xaa xab xac xad $ split second-file; ls xaa xab xac xad xae xaf This patch adds a flag "-c" (mnemonic "create, don't overwrite" or "continue where you left off"): $ split file; ls xaa xab xac xad $ split -c second-file; ls xaa xab xac xad xae xaf xag xah xai xaj Reviewed by: christos Approved by: kevans Different Revision: https://reviews.freebsd.org/D38553
2024-07-23 03:06:48 +00:00 · 2023-05-30 16:13:16 +03:00 · 2023-05-30 16:13:16 +03:00 · ac17fc816e
parent c4f7198f47
commit ac17fc816e
2 changed files with 36 additions and 12 deletions
--- a/usr.bin/split/split.1
+++ b/usr.bin/split/split.1
@ -36,12 +36,12 @@
 .Nd split a file into pieces
 .Sh SYNOPSIS
 .Nm
-.Op Fl d
+.Op Fl cd
 .Op Fl l Ar line_count
 .Op Fl a Ar suffix_length
 .Op Ar file Op Ar prefix
 .Nm
-.Op Fl d
+.Op Fl cd
 .Fl b Ar byte_count Ns
 .Oo
 .Sm off
@ -51,12 +51,12 @@
 .Op Fl a Ar suffix_length
 .Op Ar file Op Ar prefix
 .Nm
-.Op Fl d
+.Op Fl cd
 .Fl n Ar chunk_count
 .Op Fl a Ar suffix_length
 .Op Ar file Op Ar prefix
 .Nm
-.Op Fl d
+.Op Fl cd
 .Fl p Ar pattern
 .Op Fl a Ar suffix_length
 .Op Ar file Op Ar prefix
@ -112,6 +112,9 @@ or
 is appended to the number, the file is split into
 .Ar byte_count
 gigabyte pieces.
+.It Fl c
+Continue creating files and do not overwrite existing
+output files.
 .It Fl d
 Use a numeric suffix instead of a alphabetic suffix.
 .It Fl l Ar line_count
@ -163,6 +166,15 @@ argument is not specified, the file is split into lexically ordered
 files named with the prefix
 .Dq Li x
 and with suffixes as above.
+.Pp
+By default,
+.Nm
+will overwrite any existing output files.
+If the
+.Fl c
+flag is specified,
+.Nm
+will instead create files with names that do not already exist.
 .Sh ENVIRONMENT
 The
 .Ev LANG , LC_ALL , LC_CTYPE
--- a/usr.bin/split/split.c
+++ b/usr.bin/split/split.c
@ -67,6 +67,7 @@ static const char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";

 static off_t	 bytecnt;		/* Byte count to split on. */
 static off_t	 chunks = 0;		/* Chunks count to split into. */
+static bool      clobber = true;        /* Whether to overwrite existing output files. */
 static long	 numlines;		/* Line count to split on. */
 static int	 file_open;		/* If a file open. */
 static int	 ifd = -1, ofd = -1;	/* Input/output file descriptors. */
@ -93,7 +94,7 @@ main(int argc, char **argv)
 	setlocale(LC_ALL, "");

 	dflag = false;
-	while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1)
+	while ((ch = getopt(argc, argv, "0123456789a:b:cdl:n:p:")) != -1)
 		switch (ch) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
@ -125,6 +126,9 @@ main(int argc, char **argv)
 			if (error == -1)
 				errx(EX_USAGE, "%s: offset too large", optarg);
 			break;
+		case 'c':               /* Continue, don't overwrite output files. */
+			clobber = false;
+			break;
 		case 'd':		/* Decimal suffix */
 			dflag = true;
 			break;
@ -347,6 +351,10 @@ newfile(void)
 	static char *fpnt;
 	char beg, end;
 	int pattlen;
+	int flags = O_WRONLY | O_CREAT | O_TRUNC;
+
+	if (!clobber)
+		flags |= O_EXCL;

 	if (ofd == -1) {
 		if (fname[0] == '\0') {
@ -355,9 +363,10 @@ newfile(void)
 		} else {
 			fpnt = fname + strlen(fname);
 		}
-		ofd = fileno(stdout);
-	}
+	} else if (close(ofd) != 0)
+		err(1, "%s", fname);

+	again:
 	if (dflag) {
 		beg = '0';
 		end = '9';
@ -417,8 +426,11 @@ newfile(void)
 	fpnt[sufflen] = '\0';

 	++fnum;
-	if (!freopen(fname, "w", stdout))
+	if ((ofd = open(fname, flags, DEFFILEMODE)) < 0) {
+		if (!clobber && errno == EEXIST)
+			goto again;
 		err(EX_IOERR, "%s", fname);
+	}
 	file_open = 1;
 }

@ -426,9 +438,9 @@ static void
 usage(void)
 {
 	(void)fprintf(stderr,
-"usage: split [-d] [-l line_count] [-a suffix_length] [file [prefix]]\n"
-"       split [-d] -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
-"       split [-d] -n chunk_count [-a suffix_length] [file [prefix]]\n"
-"       split [-d] -p pattern [-a suffix_length] [file [prefix]]\n");
+"usage: split [-cd] [-l line_count] [-a suffix_length] [file [prefix]]\n"
+"       split [-cd] -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
+"       split [-cd] -n chunk_count [-a suffix_length] [file [prefix]]\n"
+"       split [-cd] -p pattern [-a suffix_length] [file [prefix]]\n");
 	exit(EX_USAGE);
 }