Merge branch 'vd/loose-ref-iteration-optimization'

The code to iterate over loose references have been optimized to
reduce the number of lstat() system calls.

* vd/loose-ref-iteration-optimization:
  files-backend.c: avoid stat in 'loose_fill_ref_dir'
  dir.[ch]: add 'follow_symlink' arg to 'get_dtype'
  dir.[ch]: expose 'get_dtype'
  ref-cache.c: fix prefix matching in ref iteration
This commit is contained in:
Junio C Hamano 2023-10-20 16:23:11 -07:00
commit 6b1e2254d6
7 changed files with 112 additions and 49 deletions

View file

@ -71,42 +71,6 @@ static int dir_file_stats(struct object_directory *object_dir, void *data)
return 0;
}
/*
* Get the d_type of a dirent. If the d_type is unknown, derive it from
* stat.st_mode.
*
* Note that 'path' is assumed to have a trailing slash. It is also modified
* in-place during the execution of the function, but is then reverted to its
* original value before returning.
*/
static unsigned char get_dtype(struct dirent *e, struct strbuf *path)
{
struct stat st;
unsigned char dtype = DTYPE(e);
size_t base_path_len;
if (dtype != DT_UNKNOWN)
return dtype;
/* d_type unknown in dirent, try to fall back on lstat results */
base_path_len = path->len;
strbuf_addstr(path, e->d_name);
if (lstat(path->buf, &st))
goto cleanup;
/* determine d_type from st_mode */
if (S_ISREG(st.st_mode))
dtype = DT_REG;
else if (S_ISDIR(st.st_mode))
dtype = DT_DIR;
else if (S_ISLNK(st.st_mode))
dtype = DT_LNK;
cleanup:
strbuf_setlen(path, base_path_len);
return dtype;
}
static int count_files(struct strbuf *path)
{
DIR *dir = opendir(path->buf);
@ -117,7 +81,7 @@ static int count_files(struct strbuf *path)
return 0;
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
if (get_dtype(e, path) == DT_REG)
if (get_dtype(e, path, 0) == DT_REG)
count++;
closedir(dir);
@ -146,7 +110,7 @@ static void loose_objs_stats(struct strbuf *buf, const char *path)
base_path_len = count_path.len;
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
if (get_dtype(e, &count_path) == DT_DIR &&
if (get_dtype(e, &count_path, 0) == DT_DIR &&
strlen(e->d_name) == 2 &&
!hex_to_bytes(&c, e->d_name, 1)) {
strbuf_setlen(&count_path, base_path_len);
@ -191,7 +155,7 @@ static int add_directory_to_archiver(struct strvec *archiver_args,
strbuf_add_absolute_path(&abspath, at_root ? "." : path);
strbuf_addch(&abspath, '/');
dtype = get_dtype(e, &abspath);
dtype = get_dtype(e, &abspath, 0);
strbuf_setlen(&buf, len);
strbuf_addstr(&buf, e->d_name);

33
dir.c
View file

@ -2235,6 +2235,39 @@ static int get_index_dtype(struct index_state *istate,
return DT_UNKNOWN;
}
unsigned char get_dtype(struct dirent *e, struct strbuf *path,
int follow_symlink)
{
struct stat st;
unsigned char dtype = DTYPE(e);
size_t base_path_len;
if (dtype != DT_UNKNOWN && !(follow_symlink && dtype == DT_LNK))
return dtype;
/*
* d_type unknown or unfollowed symlink, try to fall back on [l]stat
* results. If [l]stat fails, explicitly set DT_UNKNOWN.
*/
base_path_len = path->len;
strbuf_addstr(path, e->d_name);
if ((follow_symlink && stat(path->buf, &st)) ||
(!follow_symlink && lstat(path->buf, &st)))
goto cleanup;
/* determine d_type from st_mode */
if (S_ISREG(st.st_mode))
dtype = DT_REG;
else if (S_ISDIR(st.st_mode))
dtype = DT_DIR;
else if (S_ISLNK(st.st_mode))
dtype = DT_LNK;
cleanup:
strbuf_setlen(path, base_path_len);
return dtype;
}
static int resolve_dtype(int dtype, struct index_state *istate,
const char *path, int len)
{

16
dir.h
View file

@ -363,6 +363,22 @@ struct dir_struct {
struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp);
/*
* Get the d_type of a dirent. If the d_type is unknown, derive it from
* stat.st_mode using the path to the dirent's containing directory (path) and
* the name of the dirent itself.
*
* If 'follow_symlink' is 1, this function will attempt to follow DT_LNK types
* using 'stat'. Links are *not* followed recursively, so a symlink pointing
* to another symlink will still resolve to 'DT_LNK'.
*
* Note that 'path' is assumed to have a trailing slash. It is also modified
* in-place during the execution of the function, but is then reverted to its
* original value before returning.
*/
unsigned char get_dtype(struct dirent *e, struct strbuf *path,
int follow_symlink);
/*Count the number of slashes for string s*/
int count_slashes(const char *s);

View file

@ -246,10 +246,8 @@ static void loose_fill_ref_dir(struct ref_store *ref_store,
int dirnamelen = strlen(dirname);
struct strbuf refname;
struct strbuf path = STRBUF_INIT;
size_t path_baselen;
files_ref_path(refs, &path, dirname);
path_baselen = path.len;
d = opendir(path.buf);
if (!d) {
@ -262,23 +260,22 @@ static void loose_fill_ref_dir(struct ref_store *ref_store,
while ((de = readdir(d)) != NULL) {
struct object_id oid;
struct stat st;
int flag;
unsigned char dtype;
if (de->d_name[0] == '.')
continue;
if (ends_with(de->d_name, ".lock"))
continue;
strbuf_addstr(&refname, de->d_name);
strbuf_addstr(&path, de->d_name);
if (stat(path.buf, &st) < 0) {
; /* silently ignore */
} else if (S_ISDIR(st.st_mode)) {
dtype = get_dtype(de, &path, 1);
if (dtype == DT_DIR) {
strbuf_addch(&refname, '/');
add_entry_to_dir(dir,
create_dir_entry(dir->cache, refname.buf,
refname.len));
} else {
} else if (dtype == DT_REG) {
if (!refs_resolve_ref_unsafe(&refs->base,
refname.buf,
RESOLVE_REF_READING,
@ -308,7 +305,6 @@ static void loose_fill_ref_dir(struct ref_store *ref_store,
create_ref_entry(refname.buf, &oid, flag));
}
strbuf_setlen(&refname, dirnamelen);
strbuf_setlen(&path, path_baselen);
}
strbuf_release(&refname);
strbuf_release(&path);

View file

@ -412,7 +412,8 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator)
if (level->prefix_state == PREFIX_WITHIN_DIR) {
entry_prefix_state = overlaps_prefix(entry->name, iter->prefix);
if (entry_prefix_state == PREFIX_EXCLUDES_DIR)
if (entry_prefix_state == PREFIX_EXCLUDES_DIR ||
(entry_prefix_state == PREFIX_WITHIN_DIR && !(entry->flag & REF_DIR)))
continue;
} else {
entry_prefix_state = level->prefix_state;

View file

@ -264,4 +264,27 @@ test_expect_success 'rev-parse --since= unsqueezed ordering' '
test_cmp expect actual
'
test_expect_success 'rev-parse --bisect includes bad, excludes good' '
test_commit_bulk 6 &&
git update-ref refs/bisect/bad-1 HEAD~1 &&
git update-ref refs/bisect/b HEAD~2 &&
git update-ref refs/bisect/bad-3 HEAD~3 &&
git update-ref refs/bisect/good-3 HEAD~3 &&
git update-ref refs/bisect/bad-4 HEAD~4 &&
git update-ref refs/bisect/go HEAD~4 &&
# Note: refs/bisect/b and refs/bisect/go should be ignored because they
# do not match the refs/bisect/bad or refs/bisect/good prefixes.
cat >expect <<-EOF &&
refs/bisect/bad-1
refs/bisect/bad-3
refs/bisect/bad-4
^refs/bisect/good-3
EOF
git rev-parse --symbolic-full-name --bisect >actual &&
test_cmp expect actual
'
test_done

View file

@ -956,6 +956,36 @@ test_expect_success '%S in git log --format works with other placeholders (part
test_cmp expect actual
'
test_expect_success 'setup more commits for %S with --bisect' '
test_commit four &&
test_commit five &&
head1=$(git rev-parse --verify HEAD~0) &&
head2=$(git rev-parse --verify HEAD~1) &&
head3=$(git rev-parse --verify HEAD~2) &&
head4=$(git rev-parse --verify HEAD~3)
'
test_expect_success '%S with --bisect labels commits with refs/bisect/bad ref' '
git update-ref refs/bisect/bad-$head1 $head1 &&
git update-ref refs/bisect/go $head1 &&
git update-ref refs/bisect/bad-$head2 $head2 &&
git update-ref refs/bisect/b $head3 &&
git update-ref refs/bisect/bad-$head4 $head4 &&
git update-ref refs/bisect/good-$head4 $head4 &&
# We expect to see the range of commits betwee refs/bisect/good-$head4
# and refs/bisect/bad-$head1. The "source" ref is the nearest bisect ref
# from which the commit is reachable.
cat >expect <<-EOF &&
$head1 refs/bisect/bad-$head1
$head2 refs/bisect/bad-$head2
$head3 refs/bisect/bad-$head2
EOF
git log --bisect --format="%H %S" >actual &&
test_cmp expect actual
'
test_expect_success 'log --pretty=reference' '
git log --pretty="tformat:%h (%s, %as)" >expect &&
git log --pretty=reference >actual &&