Improve performance of "read" built-in command when using a seekable

fd.

The read built-in command calls read(2) with a 1-byte buffer because
newline characters need to be detected even on a byte stream which
comes from a non-seekable file descriptor.  Because of this, the
following script calls >6,000 read(2) to show a 6KiB file:

 while read IN; do echo "$IN"; done < /COPYRIGHT

When the input byte stream is seekable, it is possible to read a data
block and then reposition the file pointer to where a newline
character found.  This change adds a small buffer to do this and
reduces the number of read(2) calls.

Theoretically, multiple built-in commands reading the same seekable
byte stream in a single pipe chain can share the buffer.  However,
this change just makes a single invocation of the read built-in
allocate a buffer and deallocate it every time for simplicity.
Although this causes read(2) to read the same regions multiple times,
the performance penalty should be small compared to the reduction of
read(2) calls.

Reviewed by:		jilles
MFC after:		1 week
Differential Revision:	https://reviews.freebsd.org/D23747
This commit is contained in:
Hiroki Sato 2020-02-20 03:01:27 +00:00
parent cafbf0c664
commit be860ca2a7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=358152

View file

@ -66,10 +66,79 @@ __FBSDID("$FreeBSD$");
#undef eflag
#define READ_BUFLEN 1024
struct fdctx {
int fd;
size_t off; /* offset in buf */
size_t buflen;
char *ep; /* tail pointer */
char buf[READ_BUFLEN];
};
static void fdctx_init(int, struct fdctx *);
static void fdctx_destroy(struct fdctx *);
static ssize_t fdgetc(struct fdctx *, char *);
int readcmd(int, char **);
int umaskcmd(int, char **);
int ulimitcmd(int, char **);
static void
fdctx_init(int fd, struct fdctx *fdc)
{
off_t cur;
/* Check if fd is seekable. */
cur = lseek(fd, 0, SEEK_CUR);
*fdc = (struct fdctx){
.fd = fd,
.buflen = (cur != -1) ? READ_BUFLEN : 1,
.ep = &fdc->buf[0], /* No data */
};
}
static ssize_t
fdgetc(struct fdctx *fdc, char *c)
{
ssize_t nread;
if (&fdc->buf[fdc->off] == fdc->ep) {
nread = read(fdc->fd, fdc->buf, fdc->buflen);
if (nread > 0) {
fdc->off = 0;
fdc->ep = fdc->buf + nread;
} else
return (nread);
}
*c = fdc->buf[fdc->off++];
return (1);
}
static void
fdctx_destroy(struct fdctx *fdc)
{
size_t residue;
if (fdc->buflen > 1) {
/*
* Reposition the file offset. Here is the layout of buf:
*
* | off
* v
* |*****************|-------|
* buf ep buf+buflen
* |<- residue ->|
*
* off: current character
* ep: offset just after read(2)
* residue: length for reposition
*/
residue = (fdc->ep - fdc->buf) - fdc->off;
if (residue > 0)
(void) lseek(fdc->fd, -residue, SEEK_CUR);
}
}
/*
* The read builtin. The -r option causes backslashes to be treated like
* ordinary characters.
@ -108,6 +177,7 @@ readcmd(int argc __unused, char **argv __unused)
fd_set ifds;
ssize_t nread;
int sig;
struct fdctx fdctx;
rflag = 0;
prompt = NULL;
@ -173,8 +243,9 @@ readcmd(int argc __unused, char **argv __unused)
backslash = 0;
STARTSTACKSTR(p);
lastnonifs = lastnonifsws = -1;
fdctx_init(STDIN_FILENO, &fdctx);
for (;;) {
nread = read(STDIN_FILENO, &c, 1);
nread = fdgetc(&fdctx, &c);
if (nread == -1) {
if (errno == EINTR) {
sig = pendingsig;
@ -260,6 +331,7 @@ readcmd(int argc __unused, char **argv __unused)
STARTSTACKSTR(p);
lastnonifs = lastnonifsws = -1;
}
fdctx_destroy(&fdctx);
STACKSTRNUL(p);
/*