git/reftable/reader.c
Junio C Hamano 9f67cbd0a7 Merge branch 'ps/reftable-iteration-perf'
The code to iterate over refs with the reftable backend has seen
some optimization.

* ps/reftable-iteration-perf:
  reftable/reader: add comments to `table_iter_next()`
  reftable/record: don't try to reallocate ref record name
  reftable/block: swap buffers instead of copying
  reftable/pq: allocation-less comparison of entry keys
  reftable/merged: skip comparison for records of the same subiter
  reftable/merged: allocation-less dropping of shadowed records
  reftable/record: introduce function to compare records by key
2024-02-26 18:10:24 -08:00

850 lines
19 KiB
C

/*
Copyright 2020 Google LLC
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/
#include "reader.h"
#include "system.h"
#include "block.h"
#include "constants.h"
#include "generic.h"
#include "iter.h"
#include "record.h"
#include "reftable-error.h"
#include "reftable-generic.h"
uint64_t block_source_size(struct reftable_block_source *source)
{
return source->ops->size(source->arg);
}
int block_source_read_block(struct reftable_block_source *source,
struct reftable_block *dest, uint64_t off,
uint32_t size)
{
int result = source->ops->read_block(source->arg, dest, off, size);
dest->source = *source;
return result;
}
void block_source_close(struct reftable_block_source *source)
{
if (!source->ops) {
return;
}
source->ops->close(source->arg);
source->ops = NULL;
}
static struct reftable_reader_offsets *
reader_offsets_for(struct reftable_reader *r, uint8_t typ)
{
switch (typ) {
case BLOCK_TYPE_REF:
return &r->ref_offsets;
case BLOCK_TYPE_LOG:
return &r->log_offsets;
case BLOCK_TYPE_OBJ:
return &r->obj_offsets;
}
abort();
}
static int reader_get_block(struct reftable_reader *r,
struct reftable_block *dest, uint64_t off,
uint32_t sz)
{
if (off >= r->size)
return 0;
if (off + sz > r->size) {
sz = r->size - off;
}
return block_source_read_block(&r->source, dest, off, sz);
}
uint32_t reftable_reader_hash_id(struct reftable_reader *r)
{
return r->hash_id;
}
const char *reader_name(struct reftable_reader *r)
{
return r->name;
}
static int parse_footer(struct reftable_reader *r, uint8_t *footer,
uint8_t *header)
{
uint8_t *f = footer;
uint8_t first_block_typ;
int err = 0;
uint32_t computed_crc;
uint32_t file_crc;
if (memcmp(f, "REFT", 4)) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
f += 4;
if (memcmp(footer, header, header_size(r->version))) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
f++;
r->block_size = get_be24(f);
f += 3;
r->min_update_index = get_be64(f);
f += 8;
r->max_update_index = get_be64(f);
f += 8;
if (r->version == 1) {
r->hash_id = GIT_SHA1_FORMAT_ID;
} else {
r->hash_id = get_be32(f);
switch (r->hash_id) {
case GIT_SHA1_FORMAT_ID:
break;
case GIT_SHA256_FORMAT_ID:
break;
default:
err = REFTABLE_FORMAT_ERROR;
goto done;
}
f += 4;
}
r->ref_offsets.index_offset = get_be64(f);
f += 8;
r->obj_offsets.offset = get_be64(f);
f += 8;
r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1);
r->obj_offsets.offset >>= 5;
r->obj_offsets.index_offset = get_be64(f);
f += 8;
r->log_offsets.offset = get_be64(f);
f += 8;
r->log_offsets.index_offset = get_be64(f);
f += 8;
computed_crc = crc32(0, footer, f - footer);
file_crc = get_be32(f);
f += 4;
if (computed_crc != file_crc) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
first_block_typ = header[header_size(r->version)];
r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF);
r->ref_offsets.offset = 0;
r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG ||
r->log_offsets.offset > 0);
r->obj_offsets.is_present = r->obj_offsets.offset > 0;
if (r->obj_offsets.is_present && !r->object_id_len) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
err = 0;
done:
return err;
}
int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
const char *name)
{
struct reftable_block footer = { NULL };
struct reftable_block header = { NULL };
int err = 0;
uint64_t file_size = block_source_size(source);
/* Need +1 to read type of first block. */
uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
memset(r, 0, sizeof(struct reftable_reader));
if (read_size > file_size) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
err = block_source_read_block(source, &header, 0, read_size);
if (err != read_size) {
err = REFTABLE_IO_ERROR;
goto done;
}
if (memcmp(header.data, "REFT", 4)) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
r->version = header.data[4];
if (r->version != 1 && r->version != 2) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
r->size = file_size - footer_size(r->version);
r->source = *source;
r->name = xstrdup(name);
r->hash_id = 0;
err = block_source_read_block(source, &footer, r->size,
footer_size(r->version));
if (err != footer_size(r->version)) {
err = REFTABLE_IO_ERROR;
goto done;
}
err = parse_footer(r, footer.data, header.data);
done:
reftable_block_done(&footer);
reftable_block_done(&header);
return err;
}
struct table_iter {
struct reftable_reader *r;
uint8_t typ;
uint64_t block_off;
struct block_iter bi;
int is_finished;
};
#define TABLE_ITER_INIT { \
.bi = BLOCK_ITER_INIT \
}
static void table_iter_copy_from(struct table_iter *dest,
struct table_iter *src)
{
dest->r = src->r;
dest->typ = src->typ;
dest->block_off = src->block_off;
dest->is_finished = src->is_finished;
block_iter_copy_from(&dest->bi, &src->bi);
}
static int table_iter_next_in_block(struct table_iter *ti,
struct reftable_record *rec)
{
int res = block_iter_next(&ti->bi, rec);
if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) {
rec->u.ref.update_index += ti->r->min_update_index;
}
return res;
}
static void table_iter_block_done(struct table_iter *ti)
{
if (!ti->bi.br) {
return;
}
reftable_block_done(&ti->bi.br->block);
FREE_AND_NULL(ti->bi.br);
ti->bi.last_key.len = 0;
ti->bi.next_off = 0;
}
static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
int version)
{
int32_t result = 0;
if (off == 0) {
data += header_size(version);
}
*typ = data[0];
if (reftable_is_block_type(*typ)) {
result = get_be24(data + 1);
}
return result;
}
int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br,
uint64_t next_off, uint8_t want_typ)
{
int32_t guess_block_size = r->block_size ? r->block_size :
DEFAULT_BLOCK_SIZE;
struct reftable_block block = { NULL };
uint8_t block_typ = 0;
int err = 0;
uint32_t header_off = next_off ? 0 : header_size(r->version);
int32_t block_size = 0;
if (next_off >= r->size)
return 1;
err = reader_get_block(r, &block, next_off, guess_block_size);
if (err < 0)
goto done;
block_size = extract_block_size(block.data, &block_typ, next_off,
r->version);
if (block_size < 0) {
err = block_size;
goto done;
}
if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
err = 1;
goto done;
}
if (block_size > guess_block_size) {
reftable_block_done(&block);
err = reader_get_block(r, &block, next_off, block_size);
if (err < 0) {
goto done;
}
}
err = block_reader_init(br, &block, header_off, r->block_size,
hash_size(r->hash_id));
done:
reftable_block_done(&block);
return err;
}
static int table_iter_next_block(struct table_iter *dest,
struct table_iter *src)
{
uint64_t next_block_off = src->block_off + src->bi.br->full_block_size;
struct block_reader br = { 0 };
int err = 0;
dest->r = src->r;
dest->typ = src->typ;
dest->block_off = next_block_off;
err = reader_init_block_reader(src->r, &br, next_block_off, src->typ);
if (err > 0) {
dest->is_finished = 1;
return 1;
}
if (err != 0)
return err;
else {
struct block_reader *brp =
reftable_malloc(sizeof(struct block_reader));
*brp = br;
dest->is_finished = 0;
block_reader_start(brp, &dest->bi);
}
return 0;
}
static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
{
if (reftable_record_type(rec) != ti->typ)
return REFTABLE_API_ERROR;
while (1) {
struct table_iter next = TABLE_ITER_INIT;
int err;
if (ti->is_finished)
return 1;
/*
* Check whether the current block still has more records. If
* so, return it. If the iterator returns positive then the
* current block has been exhausted.
*/
err = table_iter_next_in_block(ti, rec);
if (err <= 0)
return err;
/*
* Otherwise, we need to continue to the next block in the
* table and retry. If there are no more blocks then the
* iterator is drained.
*/
err = table_iter_next_block(&next, ti);
table_iter_block_done(ti);
if (err) {
ti->is_finished = 1;
return err;
}
table_iter_copy_from(ti, &next);
block_iter_close(&next.bi);
}
}
static int table_iter_next_void(void *ti, struct reftable_record *rec)
{
return table_iter_next(ti, rec);
}
static void table_iter_close(void *p)
{
struct table_iter *ti = p;
table_iter_block_done(ti);
block_iter_close(&ti->bi);
}
static struct reftable_iterator_vtable table_iter_vtable = {
.next = &table_iter_next_void,
.close = &table_iter_close,
};
static void iterator_from_table_iter(struct reftable_iterator *it,
struct table_iter *ti)
{
assert(!it->ops);
it->iter_arg = ti;
it->ops = &table_iter_vtable;
}
static int reader_table_iter_at(struct reftable_reader *r,
struct table_iter *ti, uint64_t off,
uint8_t typ)
{
struct block_reader br = { 0 };
struct block_reader *brp = NULL;
int err = reader_init_block_reader(r, &br, off, typ);
if (err != 0)
return err;
brp = reftable_malloc(sizeof(struct block_reader));
*brp = br;
ti->r = r;
ti->typ = block_reader_type(brp);
ti->block_off = off;
block_reader_start(brp, &ti->bi);
return 0;
}
static int reader_start(struct reftable_reader *r, struct table_iter *ti,
uint8_t typ, int index)
{
struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
uint64_t off = offs->offset;
if (index) {
off = offs->index_offset;
if (off == 0) {
return 1;
}
typ = BLOCK_TYPE_INDEX;
}
return reader_table_iter_at(r, ti, off, typ);
}
static int reader_seek_linear(struct table_iter *ti,
struct reftable_record *want)
{
struct strbuf want_key = STRBUF_INIT;
struct strbuf got_key = STRBUF_INIT;
struct table_iter next = TABLE_ITER_INIT;
struct reftable_record rec;
int err = -1;
reftable_record_init(&rec, reftable_record_type(want));
reftable_record_key(want, &want_key);
while (1) {
err = table_iter_next_block(&next, ti);
if (err < 0)
goto done;
if (err > 0) {
break;
}
err = block_reader_first_key(next.bi.br, &got_key);
if (err < 0)
goto done;
if (strbuf_cmp(&got_key, &want_key) > 0) {
table_iter_block_done(&next);
break;
}
table_iter_block_done(ti);
table_iter_copy_from(ti, &next);
}
err = block_iter_seek(&ti->bi, &want_key);
if (err < 0)
goto done;
err = 0;
done:
block_iter_close(&next.bi);
reftable_record_release(&rec);
strbuf_release(&want_key);
strbuf_release(&got_key);
return err;
}
static int reader_seek_indexed(struct reftable_reader *r,
struct reftable_iterator *it,
struct reftable_record *rec)
{
struct reftable_record want_index = {
.type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }
};
struct reftable_record index_result = {
.type = BLOCK_TYPE_INDEX,
.u.idx = { .last_key = STRBUF_INIT },
};
struct table_iter index_iter = TABLE_ITER_INIT;
struct table_iter next = TABLE_ITER_INIT;
int err = 0;
reftable_record_key(rec, &want_index.u.idx.last_key);
err = reader_start(r, &index_iter, reftable_record_type(rec), 1);
if (err < 0)
goto done;
/*
* The index may consist of multiple levels, where each level may have
* multiple index blocks. We start by doing a linear search in the
* highest layer that identifies the relevant index block as well as
* the record inside that block that corresponds to our wanted key.
*/
err = reader_seek_linear(&index_iter, &want_index);
if (err < 0)
goto done;
/*
* Traverse down the levels until we find a non-index entry.
*/
while (1) {
/*
* In case we seek a record that does not exist the index iter
* will tell us that the iterator is over. This works because
* the last index entry of the current level will contain the
* last key it knows about. So in case our seeked key is larger
* than the last indexed key we know that it won't exist.
*
* There is one subtlety in the layout of the index section
* that makes this work as expected: the highest-level index is
* at end of the section and will point backwards and thus we
* start reading from the end of the index section, not the
* beginning.
*
* If that wasn't the case and the order was reversed then the
* linear seek would seek into the lower levels and traverse
* all levels of the index only to find out that the key does
* not exist.
*/
err = table_iter_next(&index_iter, &index_result);
table_iter_block_done(&index_iter);
if (err != 0)
goto done;
err = reader_table_iter_at(r, &next, index_result.u.idx.offset,
0);
if (err != 0)
goto done;
err = block_iter_seek(&next.bi, &want_index.u.idx.last_key);
if (err < 0)
goto done;
if (next.typ == reftable_record_type(rec)) {
err = 0;
break;
}
if (next.typ != BLOCK_TYPE_INDEX) {
err = REFTABLE_FORMAT_ERROR;
break;
}
table_iter_copy_from(&index_iter, &next);
}
if (err == 0) {
struct table_iter empty = TABLE_ITER_INIT;
struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced));
*malloced = empty;
table_iter_copy_from(malloced, &next);
iterator_from_table_iter(it, malloced);
}
done:
block_iter_close(&next.bi);
table_iter_close(&index_iter);
reftable_record_release(&want_index);
reftable_record_release(&index_result);
return err;
}
static int reader_seek_internal(struct reftable_reader *r,
struct reftable_iterator *it,
struct reftable_record *rec)
{
struct reftable_reader_offsets *offs =
reader_offsets_for(r, reftable_record_type(rec));
uint64_t idx = offs->index_offset;
struct table_iter ti = TABLE_ITER_INIT;
int err = 0;
if (idx > 0)
return reader_seek_indexed(r, it, rec);
err = reader_start(r, &ti, reftable_record_type(rec), 0);
if (err < 0)
return err;
err = reader_seek_linear(&ti, rec);
if (err < 0)
return err;
else {
struct table_iter *p =
reftable_malloc(sizeof(struct table_iter));
*p = ti;
iterator_from_table_iter(it, p);
}
return 0;
}
static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,
struct reftable_record *rec)
{
uint8_t typ = reftable_record_type(rec);
struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
if (!offs->is_present) {
iterator_set_empty(it);
return 0;
}
return reader_seek_internal(r, it, rec);
}
int reftable_reader_seek_ref(struct reftable_reader *r,
struct reftable_iterator *it, const char *name)
{
struct reftable_record rec = {
.type = BLOCK_TYPE_REF,
.u.ref = {
.refname = (char *)name,
},
};
return reader_seek(r, it, &rec);
}
int reftable_reader_seek_log_at(struct reftable_reader *r,
struct reftable_iterator *it, const char *name,
uint64_t update_index)
{
struct reftable_record rec = { .type = BLOCK_TYPE_LOG,
.u.log = {
.refname = (char *)name,
.update_index = update_index,
} };
return reader_seek(r, it, &rec);
}
int reftable_reader_seek_log(struct reftable_reader *r,
struct reftable_iterator *it, const char *name)
{
uint64_t max = ~((uint64_t)0);
return reftable_reader_seek_log_at(r, it, name, max);
}
void reader_close(struct reftable_reader *r)
{
block_source_close(&r->source);
FREE_AND_NULL(r->name);
}
int reftable_new_reader(struct reftable_reader **p,
struct reftable_block_source *src, char const *name)
{
struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd));
int err = init_reader(rd, src, name);
if (err == 0) {
*p = rd;
} else {
block_source_close(src);
reftable_free(rd);
}
return err;
}
void reftable_reader_free(struct reftable_reader *r)
{
if (!r)
return;
reader_close(r);
reftable_free(r);
}
static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
struct reftable_iterator *it,
uint8_t *oid)
{
struct reftable_record want = {
.type = BLOCK_TYPE_OBJ,
.u.obj = {
.hash_prefix = oid,
.hash_prefix_len = r->object_id_len,
},
};
struct reftable_iterator oit = { NULL };
struct reftable_record got = {
.type = BLOCK_TYPE_OBJ,
.u.obj = { 0 },
};
int err = 0;
struct indexed_table_ref_iter *itr = NULL;
/* Look through the reverse index. */
err = reader_seek(r, &oit, &want);
if (err != 0)
goto done;
/* read out the reftable_obj_record */
err = iterator_next(&oit, &got);
if (err < 0)
goto done;
if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix,
r->object_id_len)) {
/* didn't find it; return empty iterator */
iterator_set_empty(it);
err = 0;
goto done;
}
err = new_indexed_table_ref_iter(&itr, r, oid, hash_size(r->hash_id),
got.u.obj.offsets,
got.u.obj.offset_len);
if (err < 0)
goto done;
got.u.obj.offsets = NULL;
iterator_from_indexed_table_ref_iter(it, itr);
done:
reftable_iterator_destroy(&oit);
reftable_record_release(&got);
return err;
}
static int reftable_reader_refs_for_unindexed(struct reftable_reader *r,
struct reftable_iterator *it,
uint8_t *oid)
{
struct table_iter ti_empty = TABLE_ITER_INIT;
struct table_iter *ti = reftable_calloc(1, sizeof(*ti));
struct filtering_ref_iterator *filter = NULL;
struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
int oid_len = hash_size(r->hash_id);
int err;
*ti = ti_empty;
err = reader_start(r, ti, BLOCK_TYPE_REF, 0);
if (err < 0) {
reftable_free(ti);
return err;
}
filter = reftable_malloc(sizeof(struct filtering_ref_iterator));
*filter = empty;
strbuf_add(&filter->oid, oid, oid_len);
reftable_table_from_reader(&filter->tab, r);
filter->double_check = 0;
iterator_from_table_iter(&filter->it, ti);
iterator_from_filtering_ref_iterator(it, filter);
return 0;
}
int reftable_reader_refs_for(struct reftable_reader *r,
struct reftable_iterator *it, uint8_t *oid)
{
if (r->obj_offsets.is_present)
return reftable_reader_refs_for_indexed(r, it, oid);
return reftable_reader_refs_for_unindexed(r, it, oid);
}
uint64_t reftable_reader_max_update_index(struct reftable_reader *r)
{
return r->max_update_index;
}
uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
{
return r->min_update_index;
}
/* generic table interface. */
static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it,
struct reftable_record *rec)
{
return reader_seek(tab, it, rec);
}
static uint32_t reftable_reader_hash_id_void(void *tab)
{
return reftable_reader_hash_id(tab);
}
static uint64_t reftable_reader_min_update_index_void(void *tab)
{
return reftable_reader_min_update_index(tab);
}
static uint64_t reftable_reader_max_update_index_void(void *tab)
{
return reftable_reader_max_update_index(tab);
}
static struct reftable_table_vtable reader_vtable = {
.seek_record = reftable_reader_seek_void,
.hash_id = reftable_reader_hash_id_void,
.min_update_index = reftable_reader_min_update_index_void,
.max_update_index = reftable_reader_max_update_index_void,
};
void reftable_table_from_reader(struct reftable_table *tab,
struct reftable_reader *reader)
{
assert(!tab->ops);
tab->ops = &reader_vtable;
tab->table_arg = reader;
}
int reftable_reader_print_file(const char *tablename)
{
struct reftable_block_source src = { NULL };
int err = reftable_block_source_from_file(&src, tablename);
struct reftable_reader *r = NULL;
struct reftable_table tab = { NULL };
if (err < 0)
goto done;
err = reftable_new_reader(&r, &src, tablename);
if (err < 0)
goto done;
reftable_table_from_reader(&tab, r);
err = reftable_table_print(&tab);
done:
reftable_reader_free(r);
return err;
}