mirror of
https://github.com/git/git
synced 2024-10-30 14:03:28 +00:00
5143ac07b1
The index stores file sizes using a uint32_t. This causes any file that is a multiple of 2^32 to have a cached file size of zero. Zero is a special value used by racily clean. This causes git to rehash every file that is a multiple of 2^32 every time git status or git commit is run. This patch mitigates the problem by making all files that are a multiple of 2^32 appear to have a size of 1<<31 instead of zero. The value of 1<<31 is chosen to keep it as far away from zero as possible to help prevent things getting mixed up with unpatched versions of git. An example would be to have a 2^32 sized file in the index of patched git. Patched git would save the file as 2^31 in the cache. An unpatched git would very much see the file has changed in size and force it to rehash the file, which is safe. The file would have to grow or shrink by exactly 2^31 and retain all of its ctime, mtime, and other attributes for old git to not notice the change. This patch does not change the behavior of any file that is not an exact multiple of 2^32. Signed-off-by: Jason D. Hatton <jhatton@globalfinishing.com> Signed-off-by: brian m. carlson <bk2204@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
103 lines
2.4 KiB
C
103 lines
2.4 KiB
C
#include "git-compat-util.h"
|
|
#include "environment.h"
|
|
#include "statinfo.h"
|
|
|
|
/*
|
|
* Munge st_size into an unsigned int.
|
|
*/
|
|
static unsigned int munge_st_size(off_t st_size) {
|
|
unsigned int sd_size = st_size;
|
|
|
|
/*
|
|
* If the file is an exact multiple of 4 GiB, modify the value so it
|
|
* doesn't get marked as racily clean (zero).
|
|
*/
|
|
if (!sd_size && st_size)
|
|
return 0x80000000;
|
|
else
|
|
return sd_size;
|
|
}
|
|
|
|
void fill_stat_data(struct stat_data *sd, struct stat *st)
|
|
{
|
|
sd->sd_ctime.sec = (unsigned int)st->st_ctime;
|
|
sd->sd_mtime.sec = (unsigned int)st->st_mtime;
|
|
sd->sd_ctime.nsec = ST_CTIME_NSEC(*st);
|
|
sd->sd_mtime.nsec = ST_MTIME_NSEC(*st);
|
|
sd->sd_dev = st->st_dev;
|
|
sd->sd_ino = st->st_ino;
|
|
sd->sd_uid = st->st_uid;
|
|
sd->sd_gid = st->st_gid;
|
|
sd->sd_size = munge_st_size(st->st_size);
|
|
}
|
|
|
|
int match_stat_data(const struct stat_data *sd, struct stat *st)
|
|
{
|
|
int changed = 0;
|
|
|
|
if (sd->sd_mtime.sec != (unsigned int)st->st_mtime)
|
|
changed |= MTIME_CHANGED;
|
|
if (trust_ctime && check_stat &&
|
|
sd->sd_ctime.sec != (unsigned int)st->st_ctime)
|
|
changed |= CTIME_CHANGED;
|
|
|
|
#ifdef USE_NSEC
|
|
if (check_stat && sd->sd_mtime.nsec != ST_MTIME_NSEC(*st))
|
|
changed |= MTIME_CHANGED;
|
|
if (trust_ctime && check_stat &&
|
|
sd->sd_ctime.nsec != ST_CTIME_NSEC(*st))
|
|
changed |= CTIME_CHANGED;
|
|
#endif
|
|
|
|
if (check_stat) {
|
|
if (sd->sd_uid != (unsigned int) st->st_uid ||
|
|
sd->sd_gid != (unsigned int) st->st_gid)
|
|
changed |= OWNER_CHANGED;
|
|
if (sd->sd_ino != (unsigned int) st->st_ino)
|
|
changed |= INODE_CHANGED;
|
|
}
|
|
|
|
#ifdef USE_STDEV
|
|
/*
|
|
* st_dev breaks on network filesystems where different
|
|
* clients will have different views of what "device"
|
|
* the filesystem is on
|
|
*/
|
|
if (check_stat && sd->sd_dev != (unsigned int) st->st_dev)
|
|
changed |= INODE_CHANGED;
|
|
#endif
|
|
|
|
if (sd->sd_size != munge_st_size(st->st_size))
|
|
changed |= DATA_CHANGED;
|
|
|
|
return changed;
|
|
}
|
|
|
|
void stat_validity_clear(struct stat_validity *sv)
|
|
{
|
|
FREE_AND_NULL(sv->sd);
|
|
}
|
|
|
|
int stat_validity_check(struct stat_validity *sv, const char *path)
|
|
{
|
|
struct stat st;
|
|
|
|
if (stat(path, &st) < 0)
|
|
return sv->sd == NULL;
|
|
if (!sv->sd)
|
|
return 0;
|
|
return S_ISREG(st.st_mode) && !match_stat_data(sv->sd, &st);
|
|
}
|
|
|
|
void stat_validity_update(struct stat_validity *sv, int fd)
|
|
{
|
|
struct stat st;
|
|
|
|
if (fstat(fd, &st) < 0 || !S_ISREG(st.st_mode))
|
|
stat_validity_clear(sv);
|
|
else {
|
|
if (!sv->sd)
|
|
CALLOC_ARRAY(sv->sd, 1);
|
|
fill_stat_data(sv->sd, &st);
|
|
}
|
|
}
|