Merge branch 'sp/mmap'

* sp/mmap: (27 commits)
  Spell default packedgitlimit slightly differently
  Increase packedGit{Limit,WindowSize} on 64 bit systems.
  Update packedGit config option documentation.
  mmap: set FD_CLOEXEC for file descriptors we keep open for mmap()
  pack-objects: fix use of use_pack().
  Fix random segfaults in pack-objects.
  Cleanup read_cache_from error handling.
  Replace mmap with xmmap, better handling MAP_FAILED.
  Release pack windows before reporting out of memory.
  Default core.packdGitWindowSize to 1 MiB if NO_MMAP.
  Test suite for sliding window mmap implementation.
  Create pack_report() as a debugging aid.
  Support unmapping windows on 'temporary' packfiles.
  Improve error message when packfile mmap fails.
  Ensure core.packedGitWindowSize cannot be less than 2 pages.
  Load core configuration in git-verify-pack.
  Fully activate the sliding window pack access.
  Unmap individual windows rather than entire files.
  Document why header parsing won't exceed a window.
  Loop over pack_windows when inflating/accessing data.
  ...

Conflicts:

	cache.h
	pack-check.c
This commit is contained in:
Junio C Hamano 2007-01-07 00:12:47 -08:00
commit cf2999eb4c
14 changed files with 579 additions and 228 deletions

View file

@ -118,6 +118,34 @@ core.legacyheaders::
database directly (where the "http://" and "rsync://" protocols
count as direct access).
core.packedGitWindowSize::
Number of bytes of a pack file to map into memory in a
single mapping operation. Larger window sizes may allow
your system to process a smaller number of large pack files
more quickly. Smaller window sizes will negatively affect
performance due to increased calls to the operating system's
memory manager, but may improve performance when accessing
a large number of large pack files.
+
Default is 1 MiB if NO_MMAP was set at compile time, otherwise 32
MiB on 32 bit platforms and 1 GiB on 64 bit platforms. This should
be reasonable for all users/operating systems. You probably do
not need to adjust this value.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.
core.packedGitLimit::
Maximum number of bytes to map simultaneously into memory
from pack files. If Git needs to access more than this many
bytes at once to complete an operation it will unmap existing
regions to reclaim virtual address space within the process.
+
Default is 256 MiB on 32 bit platforms and 8 GiB on 64 bit platforms.
This should be reasonable for all users/operating systems, except on
the largest projects. You probably do not need to adjust this value.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.
alias.*::
Command aliases for the gitlink:git[1] command wrapper - e.g.
after defining "alias.last = cat-file commit HEAD", the invocation

View file

@ -276,7 +276,52 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
* we are going to reuse the existing object data as is. make
* sure it is not corrupt.
*/
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
static int check_pack_inflate(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long len,
unsigned long expect)
{
z_stream stream;
unsigned char fakebuf[4096], *in;
int st;
memset(&stream, 0, sizeof(stream));
inflateInit(&stream);
do {
in = use_pack(p, w_curs, offset, &stream.avail_in);
stream.next_in = in;
stream.next_out = fakebuf;
stream.avail_out = sizeof(fakebuf);
st = inflate(&stream, Z_FINISH);
offset += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR);
inflateEnd(&stream);
return (st == Z_STREAM_END &&
stream.total_out == expect &&
stream.total_in == len) ? 0 : -1;
}
static void copy_pack_data(struct sha1file *f,
struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long len)
{
unsigned char *in;
unsigned int avail;
while (len) {
in = use_pack(p, w_curs, offset, &avail);
if (avail > len)
avail = len;
sha1write(f, in, avail);
offset += avail;
len -= avail;
}
}
static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
{
z_stream stream;
unsigned char fakebuf[4096];
@ -323,7 +368,7 @@ static int revalidate_loose_object(struct object_entry *entry,
return -1;
map += used;
mapsize -= used;
return check_inflate(map, mapsize, size);
return check_loose_inflate(map, mapsize, size);
}
static unsigned long write_object(struct sha1file *f,
@ -416,6 +461,8 @@ static unsigned long write_object(struct sha1file *f,
}
else {
struct packed_git *p = entry->in_pack;
struct pack_window *w_curs = NULL;
unsigned long offset;
if (entry->delta) {
obj_type = (allow_ofs_delta && entry->delta->offset) ?
@ -437,16 +484,14 @@ static unsigned long write_object(struct sha1file *f,
hdrlen += 20;
}
use_packed_git(p);
buf = (char *) p->pack_base
+ entry->in_pack_offset
+ entry->in_pack_header_size;
offset = entry->in_pack_offset + entry->in_pack_header_size;
datalen = find_packed_object_size(p, entry->in_pack_offset)
- entry->in_pack_header_size;
if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
if (!pack_to_stdout && check_pack_inflate(p, &w_curs,
offset, datalen, entry->size))
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
sha1write(f, buf, datalen);
unuse_packed_git(p);
copy_pack_data(f, p, &w_curs, offset, datalen);
unuse_pack(&w_curs);
reused++;
}
if (entry->delta)
@ -937,22 +982,19 @@ static void check_object(struct object_entry *entry)
if (entry->in_pack && !entry->preferred_base) {
struct packed_git *p = entry->in_pack;
struct pack_window *w_curs = NULL;
unsigned long left = p->pack_size - entry->in_pack_offset;
unsigned long size, used;
unsigned char *buf;
struct object_entry *base_entry = NULL;
use_packed_git(p);
buf = p->pack_base;
buf += entry->in_pack_offset;
buf = use_pack(p, &w_curs, entry->in_pack_offset, NULL);
/* We want in_pack_type even if we do not reuse delta.
* There is no point not reusing non-delta representations.
*/
used = unpack_object_header_gently(buf, left,
&entry->in_pack_type, &size);
if (!used || left - used <= 20)
die("corrupt pack for %s", sha1_to_hex(entry->sha1));
/* Check if it is delta, and the base is also an object
* we are going to pack. If so we will reuse the existing
@ -961,21 +1003,26 @@ static void check_object(struct object_entry *entry)
if (!no_reuse_delta) {
unsigned char c, *base_name;
unsigned long ofs;
unsigned long used_0;
/* there is at least 20 bytes left in the pack */
switch (entry->in_pack_type) {
case OBJ_REF_DELTA:
base_name = buf + used;
base_name = use_pack(p, &w_curs,
entry->in_pack_offset + used, NULL);
used += 20;
break;
case OBJ_OFS_DELTA:
c = buf[used++];
buf = use_pack(p, &w_curs,
entry->in_pack_offset + used, NULL);
used_0 = 0;
c = buf[used_0++];
ofs = c & 127;
while (c & 128) {
ofs += 1;
if (!ofs || ofs & ~(~0UL >> 7))
die("delta base offset overflow in pack for %s",
sha1_to_hex(entry->sha1));
c = buf[used++];
c = buf[used_0++];
ofs = (ofs << 7) + (c & 127);
}
if (ofs >= entry->in_pack_offset)
@ -983,6 +1030,7 @@ static void check_object(struct object_entry *entry)
sha1_to_hex(entry->sha1));
ofs = entry->in_pack_offset - ofs;
base_name = find_packed_object_name(p, ofs);
used += used_0;
break;
default:
base_name = NULL;
@ -990,7 +1038,7 @@ static void check_object(struct object_entry *entry)
if (base_name)
base_entry = locate_object_entry(base_name);
}
unuse_packed_git(p);
unuse_pack(&w_curs);
entry->in_pack_header_size = used;
if (base_entry) {

View file

@ -55,6 +55,7 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix)
int no_more_options = 0;
int nothing_done = 1;
git_config(git_default_config);
while (1 < argc) {
if (!no_more_options && argv[1][0] == '-') {
if (!strcmp("-v", argv[1]))

28
cache.h
View file

@ -197,6 +197,8 @@ extern int warn_ambiguous_refs;
extern int shared_repository;
extern const char *apply_default_whitespace;
extern int zlib_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
#define GIT_REPO_VERSION 0
extern int repository_format_version;
@ -336,14 +338,22 @@ extern struct alternate_object_database {
} *alt_odb_list;
extern void prepare_alt_odb(void);
struct pack_window {
struct pack_window *next;
unsigned char *base;
off_t offset;
size_t len;
unsigned int last_used;
unsigned int inuse_cnt;
};
extern struct packed_git {
struct packed_git *next;
unsigned long index_size;
unsigned long pack_size;
struct pack_window *windows;
unsigned int *index_base;
void *pack_base;
unsigned int pack_last_used;
unsigned int pack_use_cnt;
off_t index_size;
off_t pack_size;
int pack_fd;
int pack_local;
unsigned char sha1[20];
/* something like ".git/objects/pack/xxxxx.pack" */
@ -389,13 +399,14 @@ extern void install_packed_git(struct packed_git *pack);
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
struct packed_git *packs);
extern int use_packed_git(struct packed_git *);
extern void unuse_packed_git(struct packed_git *);
extern void pack_report();
extern unsigned char* use_pack(struct packed_git *, struct pack_window **, unsigned long, unsigned int *);
extern void unuse_pack(struct pack_window **);
extern struct packed_git *add_packed_git(char *, int, int);
extern int num_packed_objects(const struct packed_git *p);
extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*);
extern unsigned long find_pack_entry_one(const unsigned char *, struct packed_git *);
extern void *unpack_entry_gently(struct packed_git *, unsigned long, char *, unsigned long *);
extern void *unpack_entry(struct packed_git *, unsigned long, char *, unsigned long *);
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern void packed_object_info_detail(struct packed_git *, unsigned long, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
@ -421,6 +432,7 @@ extern char *git_commit_encoding;
extern char *git_log_output_encoding;
extern int copy_fd(int ifd, int ofd);
extern void read_or_die(int fd, void *buf, size_t count);
extern int write_in_full(int fd, const void *buf, size_t count, const char *);
extern void write_or_die(int fd, const void *buf, size_t count);
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);

View file

@ -304,6 +304,21 @@ int git_default_config(const char *var, const char *value)
return 0;
}
if (!strcmp(var, "core.packedgitwindowsize")) {
int pgsz = getpagesize();
packed_git_window_size = git_config_int(var, value);
packed_git_window_size /= pgsz;
if (packed_git_window_size < 2)
packed_git_window_size = 2;
packed_git_window_size *= pgsz;
return 0;
}
if (!strcmp(var, "core.packedgitlimit")) {
packed_git_limit = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "user.name")) {
strlcpy(git_default_name, value, sizeof(git_default_name));
return 0;
@ -695,7 +710,7 @@ int git_config_set_multivar(const char* key, const char* value,
}
fstat(in_fd, &st);
contents = mmap(NULL, st.st_size, PROT_READ,
contents = xmmap(NULL, st.st_size, PROT_READ,
MAP_PRIVATE, in_fd, 0);
close(in_fd);

4
diff.c
View file

@ -1341,10 +1341,8 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
s->data = xmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (s->data == MAP_FAILED)
goto err_empty;
s->should_munmap = 1;
}
else {

View file

@ -23,6 +23,8 @@ char *git_log_output_encoding;
int shared_repository = PERM_UMASK;
const char *apply_default_whitespace;
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
int pager_in_use;
int pager_use_color = 1;

View file

@ -92,12 +92,21 @@ extern void set_warn_routine(void (*routine)(const char *warn, va_list params));
extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
extern int git_munmap(void *start, size_t length);
#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
#else /* NO_MMAP */
#include <sys/mman.h>
#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
(sizeof(void*) >= 8 \
? 1 * 1024 * 1024 * 1024 \
: 32 * 1024 * 1024)
#endif /* NO_MMAP */
#define DEFAULT_PACKED_GIT_LIMIT \
((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
#ifdef NO_SETENV
#define setenv gitsetenv
extern int gitsetenv(const char *, const char *, int);
@ -118,11 +127,17 @@ extern char *gitstrcasestr(const char *haystack, const char *needle);
extern size_t gitstrlcpy(char *, const char *, size_t);
#endif
extern void release_pack_memory(size_t);
static inline char* xstrdup(const char *str)
{
char *ret = strdup(str);
if (!ret)
die("Out of memory, strdup failed");
if (!ret) {
release_pack_memory(strlen(str) + 1);
ret = strdup(str);
if (!ret)
die("Out of memory, strdup failed");
}
return ret;
}
@ -131,8 +146,14 @@ static inline void *xmalloc(size_t size)
void *ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret)
die("Out of memory, malloc failed");
if (!ret) {
release_pack_memory(size);
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret)
die("Out of memory, malloc failed");
}
#ifdef XMALLOC_POISON
memset(ret, 0xA5, size);
#endif
@ -144,8 +165,14 @@ static inline void *xrealloc(void *ptr, size_t size)
void *ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
if (!ret)
die("Out of memory, realloc failed");
if (!ret) {
release_pack_memory(size);
ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
if (!ret)
die("Out of memory, realloc failed");
}
return ret;
}
@ -154,8 +181,27 @@ static inline void *xcalloc(size_t nmemb, size_t size)
void *ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);
if (!ret)
die("Out of memory, calloc failed");
if (!ret) {
release_pack_memory(nmemb * size);
ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);
if (!ret)
die("Out of memory, calloc failed");
}
return ret;
}
static inline void *xmmap(void *start, size_t length,
int prot, int flags, int fd, off_t offset)
{
void *ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED) {
release_pack_memory(length);
ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED)
die("Out of memory? mmap failed: %s", strerror(errno));
}
return ret;
}

View file

@ -1,55 +1,45 @@
#include "cache.h"
#include "pack.h"
#define BATCH (1u<<20)
static int verify_packfile(struct packed_git *p)
static int verify_packfile(struct packed_git *p,
struct pack_window **w_curs)
{
unsigned long index_size = p->index_size;
void *index_base = p->index_base;
SHA_CTX ctx;
unsigned char sha1[20];
struct pack_header *hdr;
unsigned long offset = 0, pack_sig = p->pack_size - 20;
int nr_objects, err, i;
unsigned char *packdata;
unsigned long datasize;
/* Header consistency check */
hdr = p->pack_base;
if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
return error("Packfile %s signature mismatch", p->pack_name);
if (!pack_version_ok(hdr->hdr_version))
return error("Packfile version %d unsupported",
ntohl(hdr->hdr_version));
nr_objects = ntohl(hdr->hdr_entries);
if (num_packed_objects(p) != nr_objects)
return error("Packfile claims to have %d objects, "
"while idx size expects %d", nr_objects,
num_packed_objects(p));
/* Note that the pack header checks are actually performed by
* use_pack when it first opens the pack file. If anything
* goes wrong during those checks then the call will die out
* immediately.
*/
/* Check integrity of pack data with its SHA-1 checksum */
SHA1_Init(&ctx);
packdata = p->pack_base;
datasize = p->pack_size - 20;
while (datasize) {
unsigned long batch = (datasize < BATCH) ? datasize : BATCH;
SHA1_Update(&ctx, packdata, batch);
datasize -= batch;
packdata += batch;
while (offset < pack_sig) {
unsigned int remaining;
unsigned char *in = use_pack(p, w_curs, offset, &remaining);
offset += remaining;
if (offset > pack_sig)
remaining -= offset - pack_sig;
SHA1_Update(&ctx, in, remaining);
}
SHA1_Final(sha1, &ctx);
if (hashcmp(sha1, (unsigned char *)(p->pack_base) + p->pack_size - 20))
if (hashcmp(sha1, use_pack(p, w_curs, pack_sig, NULL)))
return error("Packfile %s SHA1 mismatch with itself",
p->pack_name);
if (hashcmp(sha1, (unsigned char *)index_base + index_size - 40))
return error("Packfile %s SHA1 mismatch with idx",
p->pack_name);
unuse_pack(w_curs);
/* Make sure everything reachable from idx is valid. Since we
* have verified that nr_objects matches between idx and pack,
* we do not do scan-streaming check on the pack file.
*/
nr_objects = num_packed_objects(p);
for (i = err = 0; i < nr_objects; i++) {
unsigned char sha1[20];
void *data;
@ -61,7 +51,7 @@ static int verify_packfile(struct packed_git *p)
offset = find_pack_entry_one(sha1, p);
if (!offset)
die("internal error pack-check find-pack-entry-one");
data = unpack_entry_gently(p, offset, type, &size);
data = unpack_entry(p, offset, type, &size);
if (!data) {
err = error("cannot unpack %s from %s",
sha1_to_hex(sha1), p->pack_name);
@ -84,12 +74,10 @@ static int verify_packfile(struct packed_git *p)
static void show_pack_info(struct packed_git *p)
{
struct pack_header *hdr;
int nr_objects, i;
unsigned int chain_histogram[MAX_CHAIN];
hdr = p->pack_base;
nr_objects = ntohl(hdr->hdr_entries);
nr_objects = num_packed_objects(p);
memset(chain_histogram, 0, sizeof(chain_histogram));
for (i = 0; i < nr_objects; i++) {
@ -152,18 +140,16 @@ int verify_pack(struct packed_git *p, int verbose)
if (!ret) {
/* Verify pack file */
use_packed_git(p);
ret = verify_packfile(p);
unuse_packed_git(p);
struct pack_window *w_curs = NULL;
ret = verify_packfile(p, &w_curs);
unuse_pack(&w_curs);
}
if (verbose) {
if (ret)
printf("%s: bad\n", p->pack_name);
else {
use_packed_git(p);
show_pack_info(p);
unuse_packed_git(p);
printf("%s: ok\n", p->pack_name);
}
}

View file

@ -793,16 +793,16 @@ int read_cache_from(const char *path)
die("index file open failed (%s)", strerror(errno));
}
cache_mmap = MAP_FAILED;
if (!fstat(fd, &st)) {
cache_mmap_size = st.st_size;
errno = EINVAL;
if (cache_mmap_size >= sizeof(struct cache_header) + 20)
cache_mmap = mmap(NULL, cache_mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
}
cache_mmap = xmmap(NULL, cache_mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
else
die("index file smaller than expected");
} else
die("cannot stat the open index (%s)", strerror(errno));
close(fd);
if (cache_mmap == MAP_FAILED)
die("index file mmap failed (%s)", strerror(errno));
hdr = cache_mmap;
if (verify_hdr(hdr, cache_mmap_size) < 0)

2
refs.c
View file

@ -1025,7 +1025,7 @@ int read_ref_at(const char *ref, unsigned long at_time, int cnt, unsigned char *
fstat(logfd, &st);
if (!st.st_size)
die("Log %s is empty.", logfile);
logdata = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0);
logdata = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0);
close(logfd);
lastrec = NULL;

View file

@ -355,10 +355,8 @@ static void read_info_alternates(const char * relative_base, int depth)
close(fd);
return;
}
map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (map == MAP_FAILED)
return;
link_alt_odb_entries(map, map + st.st_size, '\n', relative_base, depth);
@ -397,11 +395,35 @@ static char *find_sha1_file(const unsigned char *sha1, struct stat *st)
return NULL;
}
#define PACK_MAX_SZ (1<<26)
static int pack_used_ctr;
static unsigned long pack_mapped;
static unsigned int pack_used_ctr;
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
static unsigned int pack_open_windows;
static size_t peak_pack_mapped;
static size_t pack_mapped;
static size_t page_size;
struct packed_git *packed_git;
void pack_report()
{
fprintf(stderr,
"pack_report: getpagesize() = %10lu\n"
"pack_report: core.packedGitWindowSize = %10lu\n"
"pack_report: core.packedGitLimit = %10lu\n",
page_size,
packed_git_window_size,
packed_git_limit);
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
"pack_report: pack_open_windows = %10u / %10u\n"
"pack_report: pack_mapped = %10lu / %10lu\n",
pack_used_ctr,
pack_mmap_calls,
pack_open_windows, peak_pack_open_windows,
pack_mapped, peak_pack_mapped);
}
static int check_packed_git_idx(const char *path, unsigned long *idx_size_,
void **idx_map_)
{
@ -418,10 +440,8 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_,
return -1;
}
idx_size = st.st_size;
idx_map = mmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (idx_map == MAP_FAILED)
return -1;
index = idx_map;
*idx_map_ = idx_map;
@ -451,86 +471,198 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_,
return 0;
}
static int unuse_one_packed_git(void)
static void scan_windows(struct packed_git *p,
struct packed_git **lru_p,
struct pack_window **lru_w,
struct pack_window **lru_l)
{
struct packed_git *p, *lru = NULL;
struct pack_window *w, *w_l;
for (p = packed_git; p; p = p->next) {
if (p->pack_use_cnt || !p->pack_base)
continue;
if (!lru || p->pack_last_used < lru->pack_last_used)
lru = p;
for (w_l = NULL, w = p->windows; w; w = w->next) {
if (!w->inuse_cnt) {
if (!*lru_w || w->last_used < (*lru_w)->last_used) {
*lru_p = p;
*lru_w = w;
*lru_l = w_l;
}
}
w_l = w;
}
if (!lru)
return 0;
munmap(lru->pack_base, lru->pack_size);
lru->pack_base = NULL;
return 1;
}
void unuse_packed_git(struct packed_git *p)
static int unuse_one_window(struct packed_git *current)
{
p->pack_use_cnt--;
struct packed_git *p, *lru_p = NULL;
struct pack_window *lru_w = NULL, *lru_l = NULL;
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
for (p = packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
pack_mapped -= lru_w->len;
if (lru_l)
lru_l->next = lru_w->next;
else {
lru_p->windows = lru_w->next;
if (!lru_p->windows && lru_p != current) {
close(lru_p->pack_fd);
lru_p->pack_fd = -1;
}
}
free(lru_w);
pack_open_windows--;
return 1;
}
return 0;
}
int use_packed_git(struct packed_git *p)
void release_pack_memory(size_t need)
{
size_t cur = pack_mapped;
while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
; /* nothing */
}
void unuse_pack(struct pack_window **w_cursor)
{
struct pack_window *w = *w_cursor;
if (w) {
w->inuse_cnt--;
*w_cursor = NULL;
}
}
static void open_packed_git(struct packed_git *p)
{
struct stat st;
struct pack_header hdr;
unsigned char sha1[20];
unsigned char *idx_sha1;
long fd_flag;
p->pack_fd = open(p->pack_name, O_RDONLY);
if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
die("packfile %s cannot be opened", p->pack_name);
/* If we created the struct before we had the pack we lack size. */
if (!p->pack_size) {
struct stat st;
/* We created the struct before we had the pack */
stat(p->pack_name, &st);
if (!S_ISREG(st.st_mode))
die("packfile %s not a regular file", p->pack_name);
p->pack_size = st.st_size;
}
if (!p->pack_base) {
int fd;
struct stat st;
void *map;
struct pack_header *hdr;
} else if (p->pack_size != st.st_size)
die("packfile %s size changed", p->pack_name);
pack_mapped += p->pack_size;
while (PACK_MAX_SZ < pack_mapped && unuse_one_packed_git())
; /* nothing */
fd = open(p->pack_name, O_RDONLY);
if (fd < 0)
die("packfile %s cannot be opened", p->pack_name);
if (fstat(fd, &st)) {
close(fd);
die("packfile %s cannot be opened", p->pack_name);
/* We leave these file descriptors open with sliding mmap;
* there is no point keeping them open across exec(), though.
*/
fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
if (fd_flag < 0)
die("cannot determine file descriptor flags");
fd_flag |= FD_CLOEXEC;
if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
die("cannot set FD_CLOEXEC");
/* Verify we recognize this pack file format. */
read_or_die(p->pack_fd, &hdr, sizeof(hdr));
if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
die("file %s is not a GIT packfile", p->pack_name);
if (!pack_version_ok(hdr.hdr_version))
die("packfile %s is version %u and not supported"
" (try upgrading GIT to a newer version)",
p->pack_name, ntohl(hdr.hdr_version));
/* Verify the pack matches its index. */
if (num_packed_objects(p) != ntohl(hdr.hdr_entries))
die("packfile %s claims to have %u objects"
" while index size indicates %u objects",
p->pack_name, ntohl(hdr.hdr_entries),
num_packed_objects(p));
if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
die("end of packfile %s is unavailable", p->pack_name);
read_or_die(p->pack_fd, sha1, sizeof(sha1));
idx_sha1 = ((unsigned char *)p->index_base) + p->index_size - 40;
if (hashcmp(sha1, idx_sha1))
die("packfile %s does not match index", p->pack_name);
}
static int in_window(struct pack_window *win, unsigned long offset)
{
/* We must promise at least 20 bytes (one hash) after the
* offset is available from this window, otherwise the offset
* is not actually in this window and a different window (which
* has that one hash excess) must be used. This is to support
* the object header and delta base parsing routines below.
*/
off_t win_off = win->offset;
return win_off <= offset
&& (offset + 20) <= (win_off + win->len);
}
unsigned char* use_pack(struct packed_git *p,
struct pack_window **w_cursor,
unsigned long offset,
unsigned int *left)
{
struct pack_window *win = *w_cursor;
if (p->pack_fd == -1)
open_packed_git(p);
/* Since packfiles end in a hash of their content and its
* pointless to ask for an offset into the middle of that
* hash, and the in_window function above wouldn't match
* don't allow an offset too close to the end of the file.
*/
if (offset > (p->pack_size - 20))
die("offset beyond end of packfile (truncated pack?)");
if (!win || !in_window(win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
if (in_window(win, offset))
break;
}
if (st.st_size != p->pack_size)
die("packfile %s size mismatch.", p->pack_name);
map = mmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (map == MAP_FAILED)
die("packfile %s cannot be mapped.", p->pack_name);
p->pack_base = map;
/* Check if we understand this pack file. If we don't we're
* likely too old to handle it.
*/
hdr = map;
if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
die("packfile %s isn't actually a pack.", p->pack_name);
if (!pack_version_ok(hdr->hdr_version))
die("packfile %s is version %i and not supported"
" (try upgrading GIT to a newer version)",
p->pack_name, ntohl(hdr->hdr_version));
/* Check if the pack file matches with the index file.
* this is cheap.
*/
if (hashcmp((unsigned char *)(p->index_base) +
p->index_size - 40,
(unsigned char *)p->pack_base +
p->pack_size - 20)) {
die("packfile %s does not match index.", p->pack_name);
if (!win) {
if (!page_size)
page_size = getpagesize();
win = xcalloc(1, sizeof(*win));
win->offset = (offset / page_size) * page_size;
win->len = p->pack_size - win->offset;
if (win->len > packed_git_window_size)
win->len = packed_git_window_size;
pack_mapped += win->len;
while (packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap(NULL, win->len,
PROT_READ, MAP_PRIVATE,
p->pack_fd, win->offset);
if (win->base == MAP_FAILED)
die("packfile %s cannot be mapped: %s",
p->pack_name,
strerror(errno));
pack_mmap_calls++;
pack_open_windows++;
if (pack_mapped > peak_pack_mapped)
peak_pack_mapped = pack_mapped;
if (pack_open_windows > peak_pack_open_windows)
peak_pack_open_windows = pack_open_windows;
win->next = p->windows;
p->windows = win;
}
}
p->pack_last_used = pack_used_ctr++;
p->pack_use_cnt++;
return 0;
if (win != *w_cursor) {
win->last_used = pack_used_ctr++;
win->inuse_cnt++;
*w_cursor = win;
}
offset -= win->offset;
if (left)
*left = win->len - offset;
return win->base + offset;
}
struct packed_git *add_packed_git(char *path, int path_len, int local)
@ -559,9 +691,8 @@ struct packed_git *add_packed_git(char *path, int path_len, int local)
p->pack_size = st.st_size;
p->index_base = idx_map;
p->next = NULL;
p->pack_base = NULL;
p->pack_last_used = 0;
p->pack_use_cnt = 0;
p->windows = NULL;
p->pack_fd = -1;
p->pack_local = local;
if ((path_len > 44) && !get_sha1_hex(path + path_len - 44, sha1))
hashcpy(p->sha1, sha1);
@ -592,9 +723,8 @@ struct packed_git *parse_pack_index_file(const unsigned char *sha1, char *idx_pa
p->pack_size = 0;
p->index_base = idx_map;
p->next = NULL;
p->pack_base = NULL;
p->pack_last_used = 0;
p->pack_use_cnt = 0;
p->windows = NULL;
p->pack_fd = -1;
hashcpy(p->sha1, sha1);
return p;
}
@ -705,10 +835,8 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
*/
sha1_file_open_flag = 0;
}
map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (map == MAP_FAILED)
return NULL;
*size = st.st_size;
return map;
}
@ -878,18 +1006,21 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
}
static unsigned long get_delta_base(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
enum object_type kind,
unsigned long delta_obj_offset,
unsigned long *base_obj_offset)
{
unsigned char *base_info = (unsigned char *) p->pack_base + offset;
unsigned char *base_info = use_pack(p, w_curs, offset, NULL);
unsigned long base_offset;
/* there must be at least 20 bytes left regardless of delta type */
if (p->pack_size <= offset + 20)
die("truncated pack file");
/* use_pack() assured us we have [base_info, base_info + 20)
* as a range that we can look at without walking off the
* end of the mapped window. Its actually the hash size
* that is assured. An OFS_DELTA longer than the hash size
* is stupid, as then a REF_DELTA would be smaller to store.
*/
if (kind == OBJ_OFS_DELTA) {
unsigned used = 0;
unsigned char c = base_info[used++];
@ -923,6 +1054,7 @@ static int packed_object_info(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep);
static int packed_delta_info(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
enum object_type kind,
unsigned long obj_offset,
@ -931,7 +1063,8 @@ static int packed_delta_info(struct packed_git *p,
{
unsigned long base_offset;
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
offset = get_delta_base(p, w_curs, offset, kind,
obj_offset, &base_offset);
/* We choose to only get the type of the base object and
* ignore potentially corrupt pack file that expects the delta
@ -943,20 +1076,23 @@ static int packed_delta_info(struct packed_git *p,
if (sizep) {
const unsigned char *data;
unsigned char delta_head[20];
unsigned char delta_head[20], *in;
unsigned long result_size;
z_stream stream;
int st;
memset(&stream, 0, sizeof(stream));
stream.next_in = (unsigned char *) p->pack_base + offset;
stream.avail_in = p->pack_size - offset;
stream.next_out = delta_head;
stream.avail_out = sizeof(delta_head);
inflateInit(&stream);
st = inflate(&stream, Z_FINISH);
do {
in = use_pack(p, w_curs, offset, &stream.avail_in);
stream.next_in = in;
st = inflate(&stream, Z_FINISH);
offset += stream.next_in - in;
} while ((st == Z_OK || st == Z_BUF_ERROR)
&& stream.total_out < sizeof(delta_head));
inflateEnd(&stream);
if ((st != Z_STREAM_END) &&
stream.total_out != sizeof(delta_head))
@ -977,17 +1113,24 @@ static int packed_delta_info(struct packed_git *p,
return 0;
}
static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset,
enum object_type *type, unsigned long *sizep)
static unsigned long unpack_object_header(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
enum object_type *type,
unsigned long *sizep)
{
unsigned char *base;
unsigned int left;
unsigned long used;
if (p->pack_size <= offset)
die("object offset outside of pack file");
used = unpack_object_header_gently((unsigned char *)p->pack_base +
offset,
p->pack_size - offset, type, sizep);
/* use_pack() assures us we have [base, base + 20) available
* as a range that we can look at at. (Its actually the hash
* size that is assurred.) With our object header encoding
* the maximum deflated object size is 2^137, which is just
* insane, so we know won't exceed what we have been given.
*/
base = use_pack(p, w_curs, offset, &left);
used = unpack_object_header_gently(base, left, type, sizep);
if (!used)
die("object offset outside of pack file");
@ -1002,13 +1145,14 @@ void packed_object_info_detail(struct packed_git *p,
unsigned int *delta_chain_length,
unsigned char *base_sha1)
{
struct pack_window *w_curs = NULL;
unsigned long obj_offset, val;
unsigned char *next_sha1;
enum object_type kind;
*delta_chain_length = 0;
obj_offset = offset;
offset = unpack_object_header(p, offset, &kind, size);
offset = unpack_object_header(p, &w_curs, offset, &kind, size);
for (;;) {
switch (kind) {
@ -1021,25 +1165,24 @@ void packed_object_info_detail(struct packed_git *p,
case OBJ_TAG:
strcpy(type, type_names[kind]);
*store_size = 0; /* notyet */
unuse_pack(&w_curs);
return;
case OBJ_OFS_DELTA:
get_delta_base(p, offset, kind, obj_offset, &offset);
get_delta_base(p, &w_curs, offset, kind,
obj_offset, &offset);
if (*delta_chain_length == 0) {
/* TODO: find base_sha1 as pointed by offset */
}
break;
case OBJ_REF_DELTA:
if (p->pack_size <= offset + 20)
die("pack file %s records an incomplete delta base",
p->pack_name);
next_sha1 = (unsigned char *) p->pack_base + offset;
next_sha1 = use_pack(p, &w_curs, offset, NULL);
if (*delta_chain_length == 0)
hashcpy(base_sha1, next_sha1);
offset = find_pack_entry_one(next_sha1, p);
break;
}
obj_offset = offset;
offset = unpack_object_header(p, offset, &kind, &val);
offset = unpack_object_header(p, &w_curs, offset, &kind, &val);
(*delta_chain_length)++;
}
}
@ -1047,20 +1190,26 @@ void packed_object_info_detail(struct packed_git *p,
static int packed_object_info(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep)
{
struct pack_window *w_curs = NULL;
unsigned long size, obj_offset = offset;
enum object_type kind;
int r;
offset = unpack_object_header(p, offset, &kind, &size);
offset = unpack_object_header(p, &w_curs, offset, &kind, &size);
switch (kind) {
case OBJ_OFS_DELTA:
case OBJ_REF_DELTA:
return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
r = packed_delta_info(p, &w_curs, offset, kind,
obj_offset, type, sizep);
unuse_pack(&w_curs);
return r;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
strcpy(type, type_names[kind]);
unuse_pack(&w_curs);
break;
default:
die("pack %s contains unknown object type %d",
@ -1072,23 +1221,27 @@ static int packed_object_info(struct packed_git *p, unsigned long offset,
}
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long size)
{
int st;
z_stream stream;
unsigned char *buffer;
unsigned char *buffer, *in;
buffer = xmalloc(size + 1);
buffer[size] = 0;
memset(&stream, 0, sizeof(stream));
stream.next_in = (unsigned char*)p->pack_base + offset;
stream.avail_in = p->pack_size - offset;
stream.next_out = buffer;
stream.avail_out = size;
inflateInit(&stream);
st = inflate(&stream, Z_FINISH);
do {
in = use_pack(p, w_curs, offset, &stream.avail_in);
stream.next_in = in;
st = inflate(&stream, Z_FINISH);
offset += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR);
inflateEnd(&stream);
if ((st != Z_STREAM_END) || stream.total_out != size) {
free(buffer);
@ -1099,6 +1252,7 @@ static void *unpack_compressed_entry(struct packed_git *p,
}
static void *unpack_delta_entry(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long delta_size,
enum object_type kind,
@ -1109,13 +1263,14 @@ static void *unpack_delta_entry(struct packed_git *p,
void *delta_data, *result, *base;
unsigned long result_size, base_size, base_offset;
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
base = unpack_entry_gently(p, base_offset, type, &base_size);
offset = get_delta_base(p, w_curs, offset, kind,
obj_offset, &base_offset);
base = unpack_entry(p, base_offset, type, &base_size);
if (!base)
die("failed to read delta base object at %lu from %s",
base_offset, p->pack_name);
delta_data = unpack_compressed_entry(p, offset, delta_size);
delta_data = unpack_compressed_entry(p, w_curs, offset, delta_size);
result = patch_delta(base, base_size,
delta_data, delta_size,
&result_size);
@ -1127,43 +1282,34 @@ static void *unpack_delta_entry(struct packed_git *p,
return result;
}
static void *unpack_entry(struct pack_entry *entry,
char *type, unsigned long *sizep)
{
struct packed_git *p = entry->p;
void *retval;
if (use_packed_git(p))
die("cannot map packed file");
retval = unpack_entry_gently(p, entry->offset, type, sizep);
unuse_packed_git(p);
if (!retval)
die("corrupted pack file %s", p->pack_name);
return retval;
}
/* The caller is responsible for use_packed_git()/unuse_packed_git() pair */
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
void *unpack_entry(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep)
{
struct pack_window *w_curs = NULL;
unsigned long size, obj_offset = offset;
enum object_type kind;
void *retval;
offset = unpack_object_header(p, offset, &kind, &size);
offset = unpack_object_header(p, &w_curs, offset, &kind, &size);
switch (kind) {
case OBJ_OFS_DELTA:
case OBJ_REF_DELTA:
return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
retval = unpack_delta_entry(p, &w_curs, offset, size,
kind, obj_offset, type, sizep);
break;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
strcpy(type, type_names[kind]);
*sizep = size;
return unpack_compressed_entry(p, offset, size);
retval = unpack_compressed_entry(p, &w_curs, offset, size);
break;
default:
return NULL;
die("unknown object type %i in %s", kind, p->pack_name);
}
unuse_pack(&w_curs);
return retval;
}
int num_packed_objects(const struct packed_git *p)
@ -1289,7 +1435,6 @@ static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigne
int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep)
{
int status;
struct pack_entry e;
if (!find_pack_entry(sha1, &e, NULL)) {
@ -1297,11 +1442,7 @@ int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep
if (!find_pack_entry(sha1, &e, NULL))
return sha1_loose_object_info(sha1, type, sizep);
}
if (use_packed_git(e.p))
die("cannot map packed file");
status = packed_object_info(e.p, e.offset, type, sizep);
unuse_packed_git(e.p);
return status;
return packed_object_info(e.p, e.offset, type, sizep);
}
static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned long *size)
@ -1312,7 +1453,7 @@ static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned lo
error("cannot read sha1_file for %s", sha1_to_hex(sha1));
return NULL;
}
return unpack_entry(&e, type, size);
return unpack_entry(e.p, e.offset, type, size);
}
void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size)
@ -1851,10 +1992,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, con
buf = "";
if (size)
buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (buf == MAP_FAILED)
return -1;
if (!type)
type = blob_type;

60
t/t5301-sliding-window.sh Executable file
View file

@ -0,0 +1,60 @@
#!/bin/sh
#
# Copyright (c) 2006 Shawn Pearce
#
test_description='mmap sliding window tests'
. ./test-lib.sh
test_expect_success \
'setup' \
'rm -f .git/index*
for i in a b c
do
echo $i >$i &&
dd if=/dev/urandom bs=32k count=1 >>$i &&
git-update-index --add $i || return 1
done &&
echo d >d && cat c >>d && git-update-index --add d &&
tree=`git-write-tree` &&
commit1=`git-commit-tree $tree </dev/null` &&
git-update-ref HEAD $commit1 &&
git-repack -a -d &&
test "`git-count-objects`" = "0 objects, 0 kilobytes" &&
pack1=`ls .git/objects/pack/*.pack` &&
test -f "$pack1"'
test_expect_success \
'verify-pack -v, defaults' \
'git-verify-pack -v "$pack1"'
test_expect_success \
'verify-pack -v, packedGitWindowSize == 1 page' \
'git-repo-config core.packedGitWindowSize 512 &&
git-verify-pack -v "$pack1"'
test_expect_success \
'verify-pack -v, packedGit{WindowSize,Limit} == 1 page' \
'git-repo-config core.packedGitWindowSize 512 &&
git-repo-config core.packedGitLimit 512 &&
git-verify-pack -v "$pack1"'
test_expect_success \
'repack -a -d, packedGit{WindowSize,Limit} == 1 page' \
'git-repo-config core.packedGitWindowSize 512 &&
git-repo-config core.packedGitLimit 512 &&
commit2=`git-commit-tree $tree -p $commit1 </dev/null` &&
git-update-ref HEAD $commit2 &&
git-repack -a -d &&
test "`git-count-objects`" = "0 objects, 0 kilobytes" &&
pack2=`ls .git/objects/pack/*.pack` &&
test -f "$pack2"
test "$pack1" \!= "$pack2"'
test_expect_success \
'verify-pack -v, defaults' \
'git-repo-config --unset core.packedGitWindowSize &&
git-repo-config --unset core.packedGitLimit &&
git-verify-pack -v "$pack2"'
test_done

View file

@ -1,5 +1,21 @@
#include "cache.h"
void read_or_die(int fd, void *buf, size_t count)
{
char *p = buf;
ssize_t loaded;
while (count > 0) {
loaded = xread(fd, p, count);
if (loaded == 0)
die("unexpected end of file");
else if (loaded < 0)
die("read error (%s)", strerror(errno));
count -= loaded;
p += loaded;
}
}
void write_or_die(int fd, const void *buf, size_t count)
{
const char *p = buf;