git/entry.c
Junio C Hamano de6182db67 streaming_write_entry(): support files with holes
One typical use of a large binary file is to hold a sparse on-disk hash
table with a lot of holes. Help preserving the holes with lseek().

Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-20 23:16:53 -07:00

316 lines
7.6 KiB
C

#include "cache.h"
#include "blob.h"
#include "dir.h"
#include "streaming.h"
static void create_directories(const char *path, int path_len,
const struct checkout *state)
{
char *buf = xmalloc(path_len + 1);
int len = 0;
while (len < path_len) {
do {
buf[len] = path[len];
len++;
} while (len < path_len && path[len] != '/');
if (len >= path_len)
break;
buf[len] = 0;
/*
* For 'checkout-index --prefix=<dir>', <dir> is
* allowed to be a symlink to an existing directory,
* and we set 'state->base_dir_len' below, such that
* we test the path components of the prefix with the
* stat() function instead of the lstat() function.
*/
if (has_dirs_only_path(buf, len, state->base_dir_len))
continue; /* ok, it is already a directory. */
/*
* If this mkdir() would fail, it could be that there
* is already a symlink or something else exists
* there, therefore we then try to unlink it and try
* one more time to create the directory.
*/
if (mkdir(buf, 0777)) {
if (errno == EEXIST && state->force &&
!unlink_or_warn(buf) && !mkdir(buf, 0777))
continue;
die_errno("cannot create directory at '%s'", buf);
}
}
free(buf);
}
static void remove_subtree(const char *path)
{
DIR *dir = opendir(path);
struct dirent *de;
char pathbuf[PATH_MAX];
char *name;
if (!dir)
die_errno("cannot opendir '%s'", path);
strcpy(pathbuf, path);
name = pathbuf + strlen(path);
*name++ = '/';
while ((de = readdir(dir)) != NULL) {
struct stat st;
if (is_dot_or_dotdot(de->d_name))
continue;
strcpy(name, de->d_name);
if (lstat(pathbuf, &st))
die_errno("cannot lstat '%s'", pathbuf);
if (S_ISDIR(st.st_mode))
remove_subtree(pathbuf);
else if (unlink(pathbuf))
die_errno("cannot unlink '%s'", pathbuf);
}
closedir(dir);
if (rmdir(path))
die_errno("cannot rmdir '%s'", path);
}
static int create_file(const char *path, unsigned int mode)
{
mode = (mode & 0100) ? 0777 : 0666;
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}
static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
{
enum object_type type;
void *new = read_sha1_file(ce->sha1, &type, size);
if (new) {
if (type == OBJ_BLOB)
return new;
free(new);
}
return NULL;
}
static int open_output_fd(char *path, struct cache_entry *ce, int to_tempfile)
{
int symlink = (ce->ce_mode & S_IFMT) != S_IFREG;
if (to_tempfile) {
strcpy(path, symlink
? ".merge_link_XXXXXX" : ".merge_file_XXXXXX");
return mkstemp(path);
} else {
return create_file(path, !symlink ? ce->ce_mode : 0666);
}
}
static int fstat_output(int fd, const struct checkout *state, struct stat *st)
{
/* use fstat() only when path == ce->name */
if (fstat_is_reliable() &&
state->refresh_cache && !state->base_dir_len) {
fstat(fd, st);
return 1;
}
return 0;
}
static int streaming_write_entry(struct cache_entry *ce, char *path,
const struct checkout *state, int to_tempfile,
int *fstat_done, struct stat *statbuf)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
int result = -1;
ssize_t kept = 0;
int fd = -1;
st = open_istream(ce->sha1, &type, &sz);
if (!st)
return -1;
if (type != OBJ_BLOB)
goto close_and_exit;
fd = open_output_fd(path, ce, to_tempfile);
if (fd < 0)
goto close_and_exit;
for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = read_istream(st, buf, sizeof(buf));
if (!readlen)
break;
if (sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);
if (wrote != readlen)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
write(fd, "", 1) != 1))
goto close_and_exit;
*fstat_done = fstat_output(fd, state, statbuf);
close_and_exit:
close_istream(st);
if (0 <= fd)
result = close(fd);
if (result && 0 <= fd)
unlink(path);
return result;
}
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
{
unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
int fd, ret, fstat_done = 0;
char *new;
struct strbuf buf = STRBUF_INIT;
unsigned long size;
size_t wrote, newsize = 0;
struct stat st;
if ((ce_mode_s_ifmt == S_IFREG) &&
can_bypass_conversion(path) &&
!streaming_write_entry(ce, path, state, to_tempfile,
&fstat_done, &st))
goto finish;
switch (ce_mode_s_ifmt) {
case S_IFREG:
case S_IFLNK:
new = read_blob_entry(ce, &size);
if (!new)
return error("unable to read sha1 file of %s (%s)",
path, sha1_to_hex(ce->sha1));
if (ce_mode_s_ifmt == S_IFLNK && has_symlinks && !to_tempfile) {
ret = symlink(new, path);
free(new);
if (ret)
return error("unable to create symlink %s (%s)",
path, strerror(errno));
break;
}
/*
* Convert from git internal format to working tree format
*/
if (ce_mode_s_ifmt == S_IFREG &&
convert_to_working_tree(ce->name, new, size, &buf)) {
free(new);
new = strbuf_detach(&buf, &newsize);
size = newsize;
}
fd = open_output_fd(path, ce, to_tempfile);
if (fd < 0) {
free(new);
return error("unable to create file %s (%s)",
path, strerror(errno));
}
wrote = write_in_full(fd, new, size);
if (!to_tempfile)
fstat_done = fstat_output(fd, state, &st);
close(fd);
free(new);
if (wrote != size)
return error("unable to write file %s", path);
break;
case S_IFGITLINK:
if (to_tempfile)
return error("cannot create temporary subproject %s", path);
if (mkdir(path, 0777) < 0)
return error("cannot create subproject directory %s", path);
break;
default:
return error("unknown file mode for %s in index", path);
}
finish:
if (state->refresh_cache) {
if (!fstat_done)
lstat(ce->name, &st);
fill_stat_cache_info(ce, &st);
}
return 0;
}
/*
* This is like 'lstat()', except it refuses to follow symlinks
* in the path, after skipping "skiplen".
*/
static int check_path(const char *path, int len, struct stat *st, int skiplen)
{
const char *slash = path + len;
while (path < slash && *slash != '/')
slash--;
if (!has_dirs_only_path(path, slash - path, skiplen)) {
errno = ENOENT;
return -1;
}
return lstat(path, st);
}
int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath)
{
static char path[PATH_MAX + 1];
struct stat st;
int len = state->base_dir_len;
if (topath)
return write_entry(ce, topath, state, 1);
memcpy(path, state->base_dir, len);
strcpy(path + len, ce->name);
len += ce_namelen(ce);
if (!check_path(path, len, &st, state->base_dir_len)) {
unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
if (!changed)
return 0;
if (!state->force) {
if (!state->quiet)
fprintf(stderr, "%s already exists, no checkout\n", path);
return -1;
}
/*
* We unlink the old file, to get the new one with the
* right permissions (including umask, which is nasty
* to emulate by hand - much easier to let the system
* just do the right thing)
*/
if (S_ISDIR(st.st_mode)) {
/* If it is a gitlink, leave it alone! */
if (S_ISGITLINK(ce->ce_mode))
return 0;
if (!state->force)
return error("%s is a directory", path);
remove_subtree(path);
} else if (unlink(path))
return error("unable to unlink old '%s' (%s)", path, strerror(errno));
} else if (state->not_new)
return 0;
create_directories(path, len, state);
return write_entry(ce, path, state, 0);
}