Converted fast-import to a text based protocol.

Frontend clients can now send a text stream to fast-import rather
than a binary stream.  This should facilitate developing frontend
software as the data stream is easier to view, manipulate and debug
my hand and Mark-I eyeball.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2006-08-14 20:16:28 -04:00
parent 7111feede9
commit c44cdc7eef

View file

@ -4,57 +4,93 @@ Format of STDIN stream:
stream ::= cmd*; stream ::= cmd*;
cmd ::= new_blob cmd ::= new_blob
| new_commit
| new_branch | new_branch
| new_commit
| new_tag | new_tag
; ;
new_blob ::= 'blob' blob_data; new_blob ::= 'blob' lf
mark?
file_content;
file_content ::= data;
new_commit ::= 'comt' ref_name author_committer_msg new_branch ::= 'branch' sp ref_str lf
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
lf;
new_commit ::= 'commit' sp ref_str lf
mark?
('author' sp name '<' email '>' ts tz lf)?
'committer' sp name '<' email '>' ts tz lf
commit_msg
file_change* file_change*
'0'; lf;
commit_msg ::= data;
new_branch ::= 'brch' dst_ref_name src_ref_name; file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
dst_ref_name ::= ref_name; | 'D' sp path_str lf
src_ref_name ::= ref_name | sha1_exp;
new_tag ::= 'tagg' ref_name tag_name tagger_msg;
file_change ::= 'M' path_name hexsha1
| 'D' path_name
; ;
mode ::= '644' | '755';
author_committer_msg ::= len32 new_tag ::= 'tag' sp tag_str lf
'author' sp name '<' email '>' ts tz lf 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
'committer' sp name '<' email '>' ts tz lf 'tagger' sp name '<' email '>' ts tz lf
lf tag_msg;
binary_data; tag_msg ::= data;
tagger_msg ::= len32 # note: the first idnum in a stream should be 1 and subsequent
'tagger' sp name '<' email '>' ts tz lf # idnums should not have gaps between values as this will cause
lf # the stream parser to reserve space for the gapped values. An
binary_data; # idnum can be updated in the future to a new object by issuing
# a new mark directive with the old idnum.
#
mark ::= 'mark' sp idnum lf;
blob_data ::= len32 binary_data; # max len is 2^32-1 # note: declen indicates the length of binary_data in bytes.
path_name ::= len32 path; # max len is PATH_MAX-1 # declen does not include the lf preceeding or trailing the
ref_name ::= len32 ref; # max len is PATH_MAX-1 # binary data.
tag_name ::= len32 tag; # max len is PATH_MAX-1 #
sha1_exp ::= len32 sha1exp; # max len is PATH_MAX-1 data ::= 'data' sp declen lf
binary_data
lf;
len32 ::= # unsigned 32 bit value, native format; # note: quoted strings are C-style quoting supporting \c for
# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
# is the signed byte value in octal. Note that the only
# characters which must actually be escaped to protect the
# stream formatting is: \, " and LF. Otherwise these values
# are UTF8.
#
ref_str ::= ref | '"' quoted(ref) '"' ;
sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
tag_str ::= tag | '"' quoted(tag) '"' ;
path_str ::= path | '"' quoted(path) '"' ;
declen ::= # unsigned 32 bit value, ascii base10 notation;
binary_data ::= # file content, not interpreted; binary_data ::= # file content, not interpreted;
sp ::= # ASCII space character; sp ::= # ASCII space character;
lf ::= # ASCII newline (LF) character; lf ::= # ASCII newline (LF) character;
path ::= # GIT style file path, e.g. "a/b/c";
ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; # note: a colon (':') must precede the numerical value assigned to
tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; # an idnum. This is to distinguish it from a ref or tag name as
# GIT does not permit ':' in ref or tag strings.
#
idnum ::= ':' declen;
path ::= # GIT style file path, e.g. "a/b/c";
ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
sha1exp ::= # Any valid GIT SHA1 expression; sha1exp ::= # Any valid GIT SHA1 expression;
hexsha1 ::= # SHA1 in hexadecimal format; hexsha1 ::= # SHA1 in hexadecimal format;
name ::= # valid GIT author/committer name;
# note: name and email are UTF8 strings, however name must not
# contain '<' or lf and email must not contain any of the
# following: '<', '>', lf.
#
name ::= # valid GIT author/committer name;
email ::= # valid GIT author/committer email; email ::= # valid GIT author/committer email;
ts ::= # time since the epoch in seconds, ascii decimal; ts ::= # time since the epoch in seconds, ascii base10 notation;
tz ::= # GIT style timezone; tz ::= # GIT style timezone;
*/ */
#include "builtin.h" #include "builtin.h"
@ -66,6 +102,8 @@ Format of STDIN stream:
#include "pack.h" #include "pack.h"
#include "refs.h" #include "refs.h"
#include "csum-file.h" #include "csum-file.h"
#include "strbuf.h"
#include "quote.h"
struct object_entry struct object_entry
{ {
@ -153,7 +191,7 @@ static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
static size_t total_allocd; static size_t total_allocd;
static struct mem_pool *mem_pool; static struct mem_pool *mem_pool;
/* atom management */ /* Atom management */
static unsigned int atom_table_sz = 4451; static unsigned int atom_table_sz = 4451;
static unsigned int atom_cnt; static unsigned int atom_cnt;
static struct atom_str **atom_table; static struct atom_str **atom_table;
@ -184,6 +222,10 @@ static unsigned int branch_table_sz = 1039;
static struct branch **branch_table; static struct branch **branch_table;
static struct branch *active_branches; static struct branch *active_branches;
/* Input stream parsing */
static struct strbuf command_buf;
static unsigned long command_mark;
static void alloc_objects(int cnt) static void alloc_objects(int cnt)
{ {
@ -330,6 +372,8 @@ static struct branch* new_branch(const char *name)
if (b) if (b)
die("Invalid attempt to create duplicate branch: %s", name); die("Invalid attempt to create duplicate branch: %s", name);
if (check_ref_format(name))
die("Branch name doesn't conform to GIT standards: %s", name);
b = pool_calloc(1, sizeof(struct branch)); b = pool_calloc(1, sizeof(struct branch));
b->name = pool_strdup(name); b->name = pool_strdup(name);
@ -433,22 +477,6 @@ static void yread(int fd, void *buffer, size_t length)
} }
} }
static int optional_read(int fd, void *buffer, size_t length)
{
ssize_t ret = 0;
while (ret < length) {
ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
if (!size && !ret)
return 1;
if (!size)
die("Read from descriptor %i: end of stream", fd);
if (size < 0)
die("Read from descriptor %i: %s", fd, strerror(errno));
ret += size;
}
return 0;
}
static void ywrite(int fd, void *buffer, size_t length) static void ywrite(int fd, void *buffer, size_t length)
{ {
ssize_t ret = 0; ssize_t ret = 0;
@ -462,24 +490,9 @@ static void ywrite(int fd, void *buffer, size_t length)
} }
} }
static const char* read_path() static size_t encode_header(
{
static char sn[PATH_MAX];
unsigned long slen;
yread(0, &slen, 4);
if (!slen)
die("Expected string command parameter, didn't find one");
if (slen > (PATH_MAX - 1))
die("Can't handle excessive string length %lu", slen);
yread(0, sn, slen);
sn[slen] = 0;
return sn;
}
static unsigned long encode_header(
enum object_type type, enum object_type type,
unsigned long size, size_t size,
unsigned char *hdr) unsigned char *hdr)
{ {
int n = 1; int n = 1;
@ -503,7 +516,7 @@ static unsigned long encode_header(
static int store_object( static int store_object(
enum object_type type, enum object_type type,
void *dat, void *dat,
unsigned long datlen, size_t datlen,
struct last_object *last, struct last_object *last,
unsigned char *sha1out) unsigned char *sha1out)
{ {
@ -896,15 +909,57 @@ static void dump_branches()
} }
} }
static void read_next_command()
{
read_line(&command_buf, stdin, '\n');
}
static void cmd_mark()
{
if (!strncmp("mark :", command_buf.buf, 6)) {
command_mark = strtoul(command_buf.buf + 6, NULL, 10);
read_next_command();
}
else
command_mark = 0;
}
static void* cmd_data (size_t *size)
{
size_t n = 0;
void *buffer;
size_t length;
if (strncmp("data ", command_buf.buf, 5))
die("Expected 'data n' command, found: %s", command_buf.buf);
length = strtoul(command_buf.buf + 5, NULL, 10);
buffer = xmalloc(length);
while (n < length) {
size_t s = fread((char*)buffer + n, 1, length - n, stdin);
if (!s && feof(stdin))
die("EOF in data (%lu bytes remaining)", length - n);
n += s;
}
if (fgetc(stdin) != '\n')
die("An lf did not trail the binary data as expected.");
*size = length;
return buffer;
}
static void cmd_new_blob() static void cmd_new_blob()
{ {
unsigned long datlen; size_t datlen;
unsigned char sha1[20];
void *dat; void *dat;
unsigned char sha1[20];
read_next_command();
cmd_mark();
dat = cmd_data(&datlen);
yread(0, &datlen, 4);
dat = xmalloc(datlen);
yread(0, dat, datlen);
if (store_object(OBJ_BLOB, dat, datlen, &last_blob, sha1)) if (store_object(OBJ_BLOB, dat, datlen, &last_blob, sha1))
free(dat); free(dat);
} }
@ -949,122 +1004,231 @@ static void load_branch(struct branch *b)
static void file_change_m(struct branch *b) static void file_change_m(struct branch *b)
{ {
const char *path = read_path(); const char *p = command_buf.buf + 2;
char *p_uq;
const char *endp;
struct object_entry *oe; struct object_entry *oe;
char hexsha1[41];
unsigned char sha1[20]; unsigned char sha1[20];
unsigned int mode;
char type[20]; char type[20];
yread(0, hexsha1, 40); p = get_mode(p, &mode);
hexsha1[40] = 0; if (!p)
die("Corrupt mode: %s", command_buf.buf);
switch (mode) {
case S_IFREG | 0644:
case S_IFREG | 0755:
case 0644:
case 0755:
/* ok */
break;
default:
die("Corrupt mode: %s", command_buf.buf);
}
if (get_sha1_hex(p, sha1))
die("Invalid SHA1: %s", command_buf.buf);
p += 40;
if (*p++ != ' ')
die("Missing space after SHA1: %s", command_buf.buf);
p_uq = unquote_c_style(p, &endp);
if (p_uq) {
if (*endp)
die("Garbage after path in: %s", command_buf.buf);
p = p_uq;
}
if (get_sha1_hex(hexsha1, sha1))
die("Invalid sha1 %s for %s", hexsha1, path);
oe = find_object(sha1); oe = find_object(sha1);
if (oe) { if (oe) {
if (oe->type != OBJ_BLOB) if (oe->type != OBJ_BLOB)
die("%s is a %s not a blob (for %s)", hexsha1, type_names[oe->type], path); die("Not a blob (actually a %s): %s",
command_buf.buf, type_names[oe->type]);
} else { } else {
if (sha1_object_info(sha1, type, NULL)) if (sha1_object_info(sha1, type, NULL))
die("No blob %s for %s", hexsha1, path); die("Blob not found: %s", command_buf.buf);
if (strcmp(blob_type, type)) if (strcmp(blob_type, type))
die("%s is a %s not a blob (for %s)", hexsha1, type, path); die("Not a blob (actually a %s): %s",
command_buf.buf, type);
} }
tree_content_set(&b->branch_tree, path, sha1, S_IFREG | 0644); tree_content_set(&b->branch_tree, p, sha1, S_IFREG | mode);
if (p_uq)
free(p_uq);
} }
static void file_change_d(struct branch *b) static void file_change_d(struct branch *b)
{ {
tree_content_remove(&b->branch_tree, read_path()); const char *p = command_buf.buf + 2;
char *p_uq;
const char *endp;
p_uq = unquote_c_style(p, &endp);
if (p_uq) {
if (*endp)
die("Garbage after path in: %s", command_buf.buf);
p = p_uq;
}
tree_content_remove(&b->branch_tree, p);
if (p_uq)
free(p_uq);
} }
static void cmd_new_commit() static void cmd_new_commit()
{ {
static const unsigned int max_hdr_len = 94; struct branch *b;
const char *name = read_path(); void *msg;
struct branch *b = lookup_branch(name); size_t msglen;
unsigned int acmsglen; char *str_uq;
char *body, *c; const char *endp;
char *sp;
char *author = NULL;
char *committer = NULL;
char *body;
/* Obtain the branch name from the rest of our command */
sp = strchr(command_buf.buf, ' ') + 1;
str_uq = unquote_c_style(sp, &endp);
if (str_uq) {
if (*endp)
die("Garbage after ref in: %s", command_buf.buf);
sp = str_uq;
}
b = lookup_branch(sp);
if (!b) if (!b)
die("Branch not declared: %s", name); die("Branch not declared: %s", sp);
if (str_uq)
free(str_uq);
read_next_command();
cmd_mark();
if (!strncmp("author ", command_buf.buf, 7)) {
author = strdup(command_buf.buf);
read_next_command();
}
if (!strncmp("committer ", command_buf.buf, 10)) {
committer = strdup(command_buf.buf);
read_next_command();
}
if (!committer)
die("Expected committer but didn't get one");
msg = cmd_data(&msglen);
/* ensure the branch is active/loaded */
if (!b->branch_tree.tree) { if (!b->branch_tree.tree) {
unload_one_branch(); unload_one_branch();
load_branch(b); load_branch(b);
} }
/* author_committer_msg */
yread(0, &acmsglen, 4);
body = xmalloc(acmsglen + max_hdr_len);
c = body + max_hdr_len;
yread(0, c, acmsglen);
/* oddly enough this is all that fsck-objects cares about */
if (memcmp(c, "author ", 7))
die("Invalid commit format on branch %s", name);
/* file_change* */ /* file_change* */
for (;;) { for (;;) {
unsigned char cmd; read_next_command();
yread(0, &cmd, 1); if (1 == command_buf.len)
if (cmd == '0')
break; break;
else if (cmd == 'M') else if (!strncmp("M ", command_buf.buf, 2))
file_change_m(b); file_change_m(b);
else if (cmd == 'D') else if (!strncmp("D ", command_buf.buf, 2))
file_change_d(b); file_change_d(b);
else else
die("Unsupported file_change: %c", cmd); die("Unsupported file_change: %s", command_buf.buf);
} }
if (memcmp(b->sha1, null_sha1, 20)) { /* build the tree and the commit */
sprintf(c - 48, "parent %s", sha1_to_hex(b->sha1));
*(c - 1) = '\n';
c -= 48;
}
store_tree(&b->branch_tree); store_tree(&b->branch_tree);
sprintf(c - 46, "tree %s", sha1_to_hex(b->branch_tree.sha1)); body = xmalloc(97 + msglen
*(c - 1) = '\n'; + (author
c -= 46; ? strlen(author) + strlen(committer)
: 2 * strlen(committer)));
sp = body;
sp += sprintf(sp, "tree %s\n", sha1_to_hex(b->branch_tree.sha1));
if (memcmp(b->sha1, null_sha1, 20))
sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
if (author)
sp += sprintf(sp, "%s\n", author);
else
sp += sprintf(sp, "author %s\n", committer + 10);
sp += sprintf(sp, "%s\n\n", committer);
memcpy(sp, msg, msglen);
sp += msglen;
if (author)
free(author);
free(committer);
free(msg);
store_object(OBJ_COMMIT, store_object(OBJ_COMMIT, body, sp - body, NULL, b->sha1);
c, (body + max_hdr_len + acmsglen) - c,
NULL, b->sha1);
free(body); free(body);
b->last_commit = object_count_by_type[OBJ_COMMIT]; b->last_commit = object_count_by_type[OBJ_COMMIT];
} }
static void cmd_new_branch() static void cmd_new_branch()
{ {
struct branch *b = new_branch(read_path()); struct branch *b;
const char *base = read_path(); char *str_uq;
struct branch *s = lookup_branch(base); const char *endp;
char *sp;
if (!strcmp(b->name, base)) /* Obtain the new branch name from the rest of our command */
die("Can't create a branch from itself: %s", base); sp = strchr(command_buf.buf, ' ') + 1;
else if (s) { str_uq = unquote_c_style(sp, &endp);
memcpy(b->sha1, s->sha1, 20); if (str_uq) {
memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20); if (*endp)
die("Garbage after ref in: %s", command_buf.buf);
sp = str_uq;
} }
else if (!get_sha1(base, b->sha1)) { b = new_branch(sp);
if (!memcmp(b->sha1, null_sha1, 20)) if (str_uq)
memcpy(b->branch_tree.sha1, null_sha1, 20); free(str_uq);
else { read_next_command();
unsigned long size;
char *buf;
buf = read_object_with_reference(b->sha1, /* from ... */
type_names[OBJ_COMMIT], &size, b->sha1); if (!strncmp("from ", command_buf.buf, 5)) {
if (!buf || size < 46) const char *from;
die("Not a valid commit: %s", base); struct branch *s;
if (memcmp("tree ", buf, 5)
|| get_sha1_hex(buf + 5, b->branch_tree.sha1)) from = strchr(command_buf.buf, ' ') + 1;
die("The commit %s is corrupt", sha1_to_hex(b->sha1)); str_uq = unquote_c_style(from, &endp);
free(buf); if (str_uq) {
if (*endp)
die("Garbage after string in: %s", command_buf.buf);
from = str_uq;
} }
} else
die("Not a SHA1 or branch: %s", base); s = lookup_branch(from);
if (b == s)
die("Can't create a branch from itself: %s", b->name);
else if (s) {
memcpy(b->sha1, s->sha1, 20);
memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
} else if (!get_sha1(from, b->sha1)) {
if (!memcmp(b->sha1, null_sha1, 20))
memcpy(b->branch_tree.sha1, null_sha1, 20);
else {
unsigned long size;
char *buf;
buf = read_object_with_reference(b->sha1,
type_names[OBJ_COMMIT], &size, b->sha1);
if (!buf || size < 46)
die("Not a valid commit: %s", from);
if (memcmp("tree ", buf, 5)
|| get_sha1_hex(buf + 5, b->branch_tree.sha1))
die("The commit %s is corrupt", sha1_to_hex(b->sha1));
free(buf);
}
} else
die("Invalid ref name or SHA1 expression: %s", from);
if (str_uq)
free(str_uq);
read_next_command();
} else {
memcpy(b->sha1, null_sha1, 20);
memcpy(b->branch_tree.sha1, null_sha1, 20);
}
if (command_buf.eof || command_buf.len > 1)
die("An lf did not terminate the branch command as expected.");
} }
int main(int argc, const char **argv) int main(int argc, const char **argv)
@ -1087,26 +1251,28 @@ int main(int argc, const char **argv)
if (pack_fd < 0) if (pack_fd < 0)
die("Can't create %s: %s", pack_name, strerror(errno)); die("Can't create %s: %s", pack_name, strerror(errno));
init_pack_header();
alloc_objects(est_obj_cnt); alloc_objects(est_obj_cnt);
strbuf_init(&command_buf);
atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*)); atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
branch_table = xcalloc(branch_table_sz, sizeof(struct branch*)); branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*)); avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
init_pack_header();
for (;;) { for (;;) {
unsigned long cmd; read_next_command();
if (optional_read(0, &cmd, 4)) if (command_buf.eof)
break; break;
else if (!strcmp("blob", command_buf.buf))
switch (ntohl(cmd)) { cmd_new_blob();
case 'blob': cmd_new_blob(); break; else if (!strncmp("branch ", command_buf.buf, 7))
case 'comt': cmd_new_commit(); break; cmd_new_branch();
case 'brch': cmd_new_branch(); break; else if (!strncmp("commit ", command_buf.buf, 7))
default: cmd_new_commit();
die("Invalid command %lu", cmd); else
} die("Unsupported command: %s", command_buf.buf);
} }
fixup_header_footer(); fixup_header_footer();
close(pack_fd); close(pack_fd);
write_index(idx_name); write_index(idx_name);