Merge branch 'kn/cat-file-literally'

Add the "--allow-unknown-type" option to "cat-file" to allow
inspecting loose objects of an experimental or a broken type.

* kn/cat-file-literally:
  t1006: add tests for git cat-file --allow-unknown-type
  cat-file: teach cat-file a '--allow-unknown-type' option
  cat-file: make the options mutually exclusive
  sha1_file: support reading from a loose object of unknown type
This commit is contained in:
Junio C Hamano 2015-05-19 13:17:58 -07:00
commit 3b7d373ae2
5 changed files with 188 additions and 43 deletions

View file

@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec
SYNOPSIS
--------
[verse]
'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
'git cat-file' (-t [--allow-unknown-type]| -s [--allow-unknown-type]| -e | -p | <type> | --textconv ) <object>
'git cat-file' (--batch | --batch-check) < <list-of-objects>
DESCRIPTION
@ -69,6 +69,9 @@ OPTIONS
not be combined with any other options or arguments. See the
section `BATCH OUTPUT` below for details.
--allow-unknown-type::
Allow -s or -t to query broken/corrupt objects of unknown type.
OUTPUT
------
If '-t' is specified, one of the <type>.

View file

@ -9,13 +9,20 @@
#include "userdiff.h"
#include "streaming.h"
static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
int unknown_type)
{
unsigned char sha1[20];
enum object_type type;
char *buf;
unsigned long size;
struct object_context obj_context;
struct object_info oi = {NULL};
struct strbuf sb = STRBUF_INIT;
unsigned flags = LOOKUP_REPLACE_OBJECT;
if (unknown_type)
flags |= LOOKUP_UNKNOWN_OBJECT;
if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
die("Not a valid object name %s", obj_name);
@ -23,20 +30,22 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
buf = NULL;
switch (opt) {
case 't':
type = sha1_object_info(sha1, NULL);
if (type > 0) {
printf("%s\n", typename(type));
oi.typename = &sb;
if (sha1_object_info_extended(sha1, &oi, flags) < 0)
die("git cat-file: could not get object info");
if (sb.len) {
printf("%s\n", sb.buf);
strbuf_release(&sb);
return 0;
}
break;
case 's':
type = sha1_object_info(sha1, &size);
if (type > 0) {
printf("%lu\n", size);
return 0;
}
break;
oi.sizep = &size;
if (sha1_object_info_extended(sha1, &oi, flags) < 0)
die("git cat-file: could not get object info");
printf("%lu\n", size);
return 0;
case 'e':
return !has_sha1_file(sha1);
@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt)
}
static const char * const cat_file_usage[] = {
N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p|<type>|--textconv) <object>"),
N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
NULL
};
@ -359,16 +368,19 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
int opt = 0;
const char *exp_type = NULL, *obj_name = NULL;
struct batch_options batch = {0};
int unknown_type = 0;
const struct option options[] = {
OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'),
OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'),
OPT_SET_INT('e', NULL, &opt,
OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'),
OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'),
OPT_CMDMODE('e', NULL, &opt,
N_("exit with zero when there's no error"), 'e'),
OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
OPT_SET_INT(0, "textconv", &opt,
OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
OPT_CMDMODE(0, "textconv", &opt,
N_("for blob objects, run textconv on object's content"), 'c'),
OPT_BOOL( 0, "allow-unknown-type", &unknown_type,
N_("allow -s and -t to work with broken/corrupt objects")),
{ OPTION_CALLBACK, 0, "batch", &batch, "format",
N_("show info and content of objects fed from the standard input"),
PARSE_OPT_OPTARG, batch_option_callback },
@ -380,9 +392,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
git_config(git_cat_file_config, NULL);
if (argc != 3 && argc != 2)
usage_with_options(cat_file_usage, options);
argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
if (opt) {
@ -405,5 +414,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
if (batch.enabled)
return batch_objects(&batch);
return cat_one_file(opt, exp_type, obj_name);
if (unknown_type && opt != 't' && opt != 's')
die("git cat-file --allow-unknown-type: use with -s or -t");
return cat_one_file(opt, exp_type, obj_name, unknown_type);
}

View file

@ -879,6 +879,7 @@ extern char *xdg_config_home(const char *filename);
/* object replacement */
#define LOOKUP_REPLACE_OBJECT 1
#define LOOKUP_UNKNOWN_OBJECT 2
extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
{
@ -1351,6 +1352,7 @@ struct object_info {
unsigned long *sizep;
unsigned long *disk_sizep;
unsigned char *delta_base_sha1;
struct strbuf *typename;
/* Response */
enum {

View file

@ -1564,6 +1564,40 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
return git_inflate(stream, 0);
}
static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
unsigned long mapsize, void *buffer,
unsigned long bufsiz, struct strbuf *header)
{
int status;
status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
/*
* Check if entire header is unpacked in the first iteration.
*/
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0;
/*
* buffer[0..bufsiz] was not large enough. Copy the partial
* result out to header, and then append the result of further
* reading the stream.
*/
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
stream->next_out = buffer;
stream->avail_out = bufsiz;
do {
status = git_inflate(stream, 0);
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0;
stream->next_out = buffer;
stream->avail_out = bufsiz;
} while (status != Z_STREAM_END);
return -1;
}
static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
{
int bytes = strlen(buffer) + 1;
@ -1614,27 +1648,38 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
int parse_sha1_header(const char *hdr, unsigned long *sizep)
static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
unsigned int flags)
{
char type[10];
int i;
const char *type_buf = hdr;
unsigned long size;
int type, type_len = 0;
/*
* The type can be at most ten bytes (including the
* terminating '\0' that we add), and is followed by
* The type can be of any size but is followed by
* a space.
*/
i = 0;
for (;;) {
char c = *hdr++;
if (c == ' ')
break;
type[i++] = c;
if (i >= sizeof(type))
return -1;
type_len++;
}
type[i] = 0;
type = type_from_string_gently(type_buf, type_len, 1);
if (oi->typename)
strbuf_add(oi->typename, type_buf, type_len);
/*
* Set type to 0 if its an unknown object and
* we're obtaining the type using '--allow-unkown-type'
* option.
*/
if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
type = 0;
else if (type < 0)
die("invalid object type");
if (oi->typep)
*oi->typep = type;
/*
* The length must follow immediately, and be in canonical
@ -1652,12 +1697,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
size = size * 10 + c;
}
}
*sizep = size;
if (oi->sizep)
*oi->sizep = size;
/*
* The length must be followed by a zero byte
*/
return *hdr ? -1 : type_from_string(type);
return *hdr ? -1 : type;
}
int parse_sha1_header(const char *hdr, unsigned long *sizep)
{
struct object_info oi;
oi.sizep = sizep;
oi.typename = NULL;
oi.typep = NULL;
return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
}
static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@ -2522,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
}
static int sha1_loose_object_info(const unsigned char *sha1,
struct object_info *oi)
struct object_info *oi,
int flags)
{
int status;
unsigned long mapsize, size;
int status = 0;
unsigned long mapsize;
void *map;
git_zstream stream;
char hdr[32];
struct strbuf hdrbuf = STRBUF_INIT;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
@ -2541,7 +2600,7 @@ static int sha1_loose_object_info(const unsigned char *sha1,
* return value implicitly indicates whether the
* object even exists.
*/
if (!oi->typep && !oi->sizep) {
if (!oi->typep && !oi->typename && !oi->sizep) {
struct stat st;
if (stat_sha1_file(sha1, &st) < 0)
return -1;
@ -2555,17 +2614,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
return -1;
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
status = error("unable to unpack %s header with --allow-unknown-type",
sha1_to_hex(sha1));
} else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
status = error("unable to unpack %s header",
sha1_to_hex(sha1));
else if ((status = parse_sha1_header(hdr, &size)) < 0)
if (status < 0)
; /* Do nothing */
else if (hdrbuf.len) {
if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
status = error("unable to parse %s header with --allow-unknown-type",
sha1_to_hex(sha1));
} else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
status = error("unable to parse %s header", sha1_to_hex(sha1));
else if (oi->sizep)
*oi->sizep = size;
git_inflate_end(&stream);
munmap(map, mapsize);
if (oi->typep)
if (status && oi->typep)
*oi->typep = status;
strbuf_release(&hdrbuf);
return 0;
}
@ -2574,6 +2642,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
struct cached_object *co;
struct pack_entry e;
int rtype;
enum object_type real_type;
const unsigned char *real = lookup_replace_object_extended(sha1, flags);
co = find_cached_object(real);
@ -2586,13 +2655,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
*(oi->disk_sizep) = 0;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
if (oi->typename)
strbuf_addstr(oi->typename, typename(co->type));
oi->whence = OI_CACHED;
return 0;
}
if (!find_pack_entry(real, &e)) {
/* Most likely it's a loose object. */
if (!sha1_loose_object_info(real, oi)) {
if (!sha1_loose_object_info(real, oi, flags)) {
oi->whence = OI_LOOSE;
return 0;
}
@ -2603,9 +2674,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
return -1;
}
/*
* packed_object_info() does not follow the delta chain to
* find out the real type, unless it is given oi->typep.
*/
if (oi->typename && !oi->typep)
oi->typep = &real_type;
rtype = packed_object_info(e.p, e.offset, oi);
if (rtype < 0) {
mark_bad_packed_object(e.p, real);
if (oi->typep == &real_type)
oi->typep = NULL;
return sha1_object_info_extended(real, oi, 0);
} else if (in_delta_base_cache(e.p, e.offset)) {
oi->whence = OI_DBCACHED;
@ -2616,6 +2696,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA);
}
if (oi->typename)
strbuf_addstr(oi->typename, typename(*oi->typep));
if (oi->typep == &real_type)
oi->typep = NULL;
return 0;
}

View file

@ -47,6 +47,18 @@ $content"
test_cmp expect actual
'
test_expect_success "Type of $type is correct using --allow-unknown-type" '
echo $type >expect &&
git cat-file -t --allow-unknown-type $sha1 >actual &&
test_cmp expect actual
'
test_expect_success "Size of $type is correct using --allow-unknown-type" '
echo $size >expect &&
git cat-file -s --allow-unknown-type $sha1 >actual &&
test_cmp expect actual
'
test -z "$content" ||
test_expect_success "Content of $type is correct" '
maybe_remove_timestamp "$content" $no_ts >expect &&
@ -296,4 +308,37 @@ test_expect_success '%(deltabase) reports packed delta bases' '
}
'
bogus_type="bogus"
bogus_content="bogus"
bogus_size=$(strlen "$bogus_content")
bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin)
test_expect_success "Type of broken object is correct" '
echo $bogus_type >expect &&
git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
test_cmp expect actual
'
test_expect_success "Size of broken object is correct" '
echo $bogus_size >expect &&
git cat-file -s --allow-unknown-type $bogus_sha1 >actual &&
test_cmp expect actual
'
bogus_type="abcdefghijklmnopqrstuvwxyz1234679"
bogus_content="bogus"
bogus_size=$(strlen "$bogus_content")
bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin)
test_expect_success "Type of broken object is correct when type is large" '
echo $bogus_type >expect &&
git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
test_cmp expect actual
'
test_expect_success "Size of large broken object is correct when type is large" '
echo $bogus_size >expect &&
git cat-file -s --allow-unknown-type $bogus_sha1 >actual &&
test_cmp expect actual
'
test_done