git/entry.c
Junio C Hamano dd8e912190 streaming_write_entry(): use streaming API in write_entry()
When the output to a path does not have to be converted, we can read from
the object database from the streaming API and write to the file in the
working tree, without having to hold everything in the memory.

The ident, auto- and safe- crlf conversions inherently require you to read
the whole thing before deciding what to do, so while it is technically
possible to support them by using a buffer of an unbound size or rewinding
and reading the stream twice, it is less practical than the traditional
"read the whole thing in core and convert" approach.

Adding streaming filters for the other conversions on top of this should
be doable by tweaking the can_bypass_conversion() function (it should be
renamed to can_filter_stream() when it happens). Then the streaming API
can be extended to wrap the git_istream streaming_write_entry() opens on
the underlying object in another git_istream that reads from it, filters
what is read, and let the streaming_write_entry() read the filtered
result. But that is outside the scope of this series.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-20 18:46:58 -07:00

299 lines
7.2 KiB
C

#include "cache.h"
#include "blob.h"
#include "dir.h"
#include "streaming.h"
static void create_directories(const char *path, int path_len,
const struct checkout *state)
{
char *buf = xmalloc(path_len + 1);
int len = 0;
while (len < path_len) {
do {
buf[len] = path[len];
len++;
} while (len < path_len && path[len] != '/');
if (len >= path_len)
break;
buf[len] = 0;
/*
* For 'checkout-index --prefix=<dir>', <dir> is
* allowed to be a symlink to an existing directory,
* and we set 'state->base_dir_len' below, such that
* we test the path components of the prefix with the
* stat() function instead of the lstat() function.
*/
if (has_dirs_only_path(buf, len, state->base_dir_len))
continue; /* ok, it is already a directory. */
/*
* If this mkdir() would fail, it could be that there
* is already a symlink or something else exists
* there, therefore we then try to unlink it and try
* one more time to create the directory.
*/
if (mkdir(buf, 0777)) {
if (errno == EEXIST && state->force &&
!unlink_or_warn(buf) && !mkdir(buf, 0777))
continue;
die_errno("cannot create directory at '%s'", buf);
}
}
free(buf);
}
static void remove_subtree(const char *path)
{
DIR *dir = opendir(path);
struct dirent *de;
char pathbuf[PATH_MAX];
char *name;
if (!dir)
die_errno("cannot opendir '%s'", path);
strcpy(pathbuf, path);
name = pathbuf + strlen(path);
*name++ = '/';
while ((de = readdir(dir)) != NULL) {
struct stat st;
if (is_dot_or_dotdot(de->d_name))
continue;
strcpy(name, de->d_name);
if (lstat(pathbuf, &st))
die_errno("cannot lstat '%s'", pathbuf);
if (S_ISDIR(st.st_mode))
remove_subtree(pathbuf);
else if (unlink(pathbuf))
die_errno("cannot unlink '%s'", pathbuf);
}
closedir(dir);
if (rmdir(path))
die_errno("cannot rmdir '%s'", path);
}
static int create_file(const char *path, unsigned int mode)
{
mode = (mode & 0100) ? 0777 : 0666;
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}
static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
{
enum object_type type;
void *new = read_sha1_file(ce->sha1, &type, size);
if (new) {
if (type == OBJ_BLOB)
return new;
free(new);
}
return NULL;
}
static int open_output_fd(char *path, struct cache_entry *ce, int to_tempfile)
{
int symlink = (ce->ce_mode & S_IFMT) != S_IFREG;
if (to_tempfile) {
strcpy(path, symlink
? ".merge_link_XXXXXX" : ".merge_file_XXXXXX");
return mkstemp(path);
} else {
return create_file(path, !symlink ? ce->ce_mode : 0666);
}
}
static int fstat_output(int fd, const struct checkout *state, struct stat *st)
{
/* use fstat() only when path == ce->name */
if (fstat_is_reliable() &&
state->refresh_cache && !state->base_dir_len) {
fstat(fd, st);
return 1;
}
return 0;
}
static int streaming_write_entry(struct cache_entry *ce, char *path,
const struct checkout *state, int to_tempfile,
int *fstat_done, struct stat *statbuf)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
int result = -1;
int fd = -1;
st = open_istream(ce->sha1, &type, &sz);
if (!st)
return -1;
if (type != OBJ_BLOB)
goto close_and_exit;
fd = open_output_fd(path, ce, to_tempfile);
if (fd < 0)
goto close_and_exit;
for (;;) {
char buf[10240];
ssize_t wrote;
ssize_t readlen = read_istream(st, buf, sizeof(buf));
if (!readlen)
break;
wrote = write_in_full(fd, buf, readlen);
if (wrote != readlen)
goto close_and_exit;
}
*fstat_done = fstat_output(fd, state, statbuf);
close_and_exit:
close_istream(st);
if (0 <= fd)
result = close(fd);
if (result && 0 <= fd)
unlink(path);
return result;
}
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
{
unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
int fd, ret, fstat_done = 0;
char *new;
struct strbuf buf = STRBUF_INIT;
unsigned long size;
size_t wrote, newsize = 0;
struct stat st;
if ((ce_mode_s_ifmt == S_IFREG) &&
can_bypass_conversion(path) &&
!streaming_write_entry(ce, path, state, to_tempfile,
&fstat_done, &st))
goto finish;
switch (ce_mode_s_ifmt) {
case S_IFREG:
case S_IFLNK:
new = read_blob_entry(ce, &size);
if (!new)
return error("unable to read sha1 file of %s (%s)",
path, sha1_to_hex(ce->sha1));
if (ce_mode_s_ifmt == S_IFLNK && has_symlinks && !to_tempfile) {
ret = symlink(new, path);
free(new);
if (ret)
return error("unable to create symlink %s (%s)",
path, strerror(errno));
break;
}
/*
* Convert from git internal format to working tree format
*/
if (ce_mode_s_ifmt == S_IFREG &&
convert_to_working_tree(ce->name, new, size, &buf)) {
free(new);
new = strbuf_detach(&buf, &newsize);
size = newsize;
}
fd = open_output_fd(path, ce, to_tempfile);
if (fd < 0) {
free(new);
return error("unable to create file %s (%s)",
path, strerror(errno));
}
wrote = write_in_full(fd, new, size);
if (!to_tempfile)
fstat_done = fstat_output(fd, state, &st);
close(fd);
free(new);
if (wrote != size)
return error("unable to write file %s", path);
break;
case S_IFGITLINK:
if (to_tempfile)
return error("cannot create temporary subproject %s", path);
if (mkdir(path, 0777) < 0)
return error("cannot create subproject directory %s", path);
break;
default:
return error("unknown file mode for %s in index", path);
}
finish:
if (state->refresh_cache) {
if (!fstat_done)
lstat(ce->name, &st);
fill_stat_cache_info(ce, &st);
}
return 0;
}
/*
* This is like 'lstat()', except it refuses to follow symlinks
* in the path, after skipping "skiplen".
*/
static int check_path(const char *path, int len, struct stat *st, int skiplen)
{
const char *slash = path + len;
while (path < slash && *slash != '/')
slash--;
if (!has_dirs_only_path(path, slash - path, skiplen)) {
errno = ENOENT;
return -1;
}
return lstat(path, st);
}
int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath)
{
static char path[PATH_MAX + 1];
struct stat st;
int len = state->base_dir_len;
if (topath)
return write_entry(ce, topath, state, 1);
memcpy(path, state->base_dir, len);
strcpy(path + len, ce->name);
len += ce_namelen(ce);
if (!check_path(path, len, &st, state->base_dir_len)) {
unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
if (!changed)
return 0;
if (!state->force) {
if (!state->quiet)
fprintf(stderr, "%s already exists, no checkout\n", path);
return -1;
}
/*
* We unlink the old file, to get the new one with the
* right permissions (including umask, which is nasty
* to emulate by hand - much easier to let the system
* just do the right thing)
*/
if (S_ISDIR(st.st_mode)) {
/* If it is a gitlink, leave it alone! */
if (S_ISGITLINK(ce->ce_mode))
return 0;
if (!state->force)
return error("%s is a directory", path);
remove_subtree(path);
} else if (unlink(path))
return error("unable to unlink old '%s' (%s)", path, strerror(errno));
} else if (state->not_new)
return 0;
create_directories(path, len, state);
return write_entry(ce, path, state, 0);
}