2021-10-07 20:25:08 +00:00
|
|
|
/*
|
|
|
|
Copyright 2020 Google LLC
|
|
|
|
|
|
|
|
Use of this source code is governed by a BSD-style
|
|
|
|
license that can be found in the LICENSE file or at
|
|
|
|
https://developers.google.com/open-source/licenses/bsd
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "reader.h"
|
|
|
|
|
|
|
|
#include "system.h"
|
|
|
|
#include "block.h"
|
|
|
|
#include "constants.h"
|
|
|
|
#include "iter.h"
|
|
|
|
#include "record.h"
|
|
|
|
#include "reftable-error.h"
|
|
|
|
|
|
|
|
uint64_t block_source_size(struct reftable_block_source *source)
|
|
|
|
{
|
|
|
|
return source->ops->size(source->arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int block_source_read_block(struct reftable_block_source *source,
|
|
|
|
struct reftable_block *dest, uint64_t off,
|
|
|
|
uint32_t size)
|
|
|
|
{
|
|
|
|
int result = source->ops->read_block(source->arg, dest, off, size);
|
|
|
|
dest->source = *source;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void block_source_close(struct reftable_block_source *source)
|
|
|
|
{
|
|
|
|
if (!source->ops) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
source->ops->close(source->arg);
|
|
|
|
source->ops = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct reftable_reader_offsets *
|
|
|
|
reader_offsets_for(struct reftable_reader *r, uint8_t typ)
|
|
|
|
{
|
|
|
|
switch (typ) {
|
|
|
|
case BLOCK_TYPE_REF:
|
|
|
|
return &r->ref_offsets;
|
|
|
|
case BLOCK_TYPE_LOG:
|
|
|
|
return &r->log_offsets;
|
|
|
|
case BLOCK_TYPE_OBJ:
|
|
|
|
return &r->obj_offsets;
|
|
|
|
}
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_get_block(struct reftable_reader *r,
|
|
|
|
struct reftable_block *dest, uint64_t off,
|
|
|
|
uint32_t sz)
|
|
|
|
{
|
|
|
|
if (off >= r->size)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (off + sz > r->size) {
|
|
|
|
sz = r->size - off;
|
|
|
|
}
|
|
|
|
|
|
|
|
return block_source_read_block(&r->source, dest, off, sz);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t reftable_reader_hash_id(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->hash_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *reader_name(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_footer(struct reftable_reader *r, uint8_t *footer,
|
|
|
|
uint8_t *header)
|
|
|
|
{
|
|
|
|
uint8_t *f = footer;
|
|
|
|
uint8_t first_block_typ;
|
|
|
|
int err = 0;
|
|
|
|
uint32_t computed_crc;
|
|
|
|
uint32_t file_crc;
|
|
|
|
|
|
|
|
if (memcmp(f, "REFT", 4)) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
f += 4;
|
|
|
|
|
|
|
|
if (memcmp(footer, header, header_size(r->version))) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
f++;
|
|
|
|
r->block_size = get_be24(f);
|
|
|
|
|
|
|
|
f += 3;
|
|
|
|
r->min_update_index = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
r->max_update_index = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
if (r->version == 1) {
|
|
|
|
r->hash_id = GIT_SHA1_FORMAT_ID;
|
|
|
|
} else {
|
|
|
|
r->hash_id = get_be32(f);
|
|
|
|
switch (r->hash_id) {
|
|
|
|
case GIT_SHA1_FORMAT_ID:
|
|
|
|
break;
|
|
|
|
case GIT_SHA256_FORMAT_ID:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
f += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->ref_offsets.index_offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
r->obj_offsets.offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1);
|
|
|
|
r->obj_offsets.offset >>= 5;
|
|
|
|
|
|
|
|
r->obj_offsets.index_offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
r->log_offsets.offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
r->log_offsets.index_offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
computed_crc = crc32(0, footer, f - footer);
|
|
|
|
file_crc = get_be32(f);
|
|
|
|
f += 4;
|
|
|
|
if (computed_crc != file_crc) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
first_block_typ = header[header_size(r->version)];
|
|
|
|
r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF);
|
|
|
|
r->ref_offsets.offset = 0;
|
|
|
|
r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG ||
|
|
|
|
r->log_offsets.offset > 0);
|
|
|
|
r->obj_offsets.is_present = r->obj_offsets.offset > 0;
|
2022-02-21 18:46:05 +00:00
|
|
|
if (r->obj_offsets.is_present && !r->object_id_len) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
err = 0;
|
|
|
|
done:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct table_iter {
|
|
|
|
struct reftable_reader *r;
|
|
|
|
uint8_t typ;
|
|
|
|
uint64_t block_off;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
struct block_reader br;
|
2021-10-07 20:25:08 +00:00
|
|
|
struct block_iter bi;
|
|
|
|
int is_finished;
|
|
|
|
};
|
2024-05-13 08:47:26 +00:00
|
|
|
|
|
|
|
static int table_iter_init(struct table_iter *ti, struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
struct block_iter bi = BLOCK_ITER_INIT;
|
|
|
|
memset(ti, 0, sizeof(*ti));
|
2024-08-23 14:12:51 +00:00
|
|
|
reftable_reader_incref(r);
|
2024-05-13 08:47:26 +00:00
|
|
|
ti->r = r;
|
|
|
|
ti->bi = bi;
|
|
|
|
return 0;
|
2023-12-11 09:08:07 +00:00
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
|
|
|
|
static int table_iter_next_in_block(struct table_iter *ti,
|
|
|
|
struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
int res = block_iter_next(&ti->bi, rec);
|
|
|
|
if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) {
|
2022-01-20 15:12:13 +00:00
|
|
|
rec->u.ref.update_index += ti->r->min_update_index;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void table_iter_block_done(struct table_iter *ti)
|
|
|
|
{
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
block_reader_release(&ti->br);
|
|
|
|
block_iter_reset(&ti->bi);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
|
|
|
|
int version)
|
|
|
|
{
|
|
|
|
int32_t result = 0;
|
|
|
|
|
|
|
|
if (off == 0) {
|
|
|
|
data += header_size(version);
|
|
|
|
}
|
|
|
|
|
|
|
|
*typ = data[0];
|
|
|
|
if (reftable_is_block_type(*typ)) {
|
|
|
|
result = get_be24(data + 1);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br,
|
|
|
|
uint64_t next_off, uint8_t want_typ)
|
|
|
|
{
|
|
|
|
int32_t guess_block_size = r->block_size ? r->block_size :
|
|
|
|
DEFAULT_BLOCK_SIZE;
|
|
|
|
struct reftable_block block = { NULL };
|
|
|
|
uint8_t block_typ = 0;
|
|
|
|
int err = 0;
|
|
|
|
uint32_t header_off = next_off ? 0 : header_size(r->version);
|
|
|
|
int32_t block_size = 0;
|
|
|
|
|
|
|
|
if (next_off >= r->size)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
err = reader_get_block(r, &block, next_off, guess_block_size);
|
|
|
|
if (err < 0)
|
2022-01-20 15:12:01 +00:00
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
|
|
|
block_size = extract_block_size(block.data, &block_typ, next_off,
|
|
|
|
r->version);
|
2022-01-20 15:12:01 +00:00
|
|
|
if (block_size < 0) {
|
|
|
|
err = block_size;
|
|
|
|
goto done;
|
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
|
2022-01-20 15:12:01 +00:00
|
|
|
err = 1;
|
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (block_size > guess_block_size) {
|
|
|
|
reftable_block_done(&block);
|
|
|
|
err = reader_get_block(r, &block, next_off, block_size);
|
|
|
|
if (err < 0) {
|
2022-01-20 15:12:01 +00:00
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-20 15:12:01 +00:00
|
|
|
err = block_reader_init(br, &block, header_off, r->block_size,
|
|
|
|
hash_size(r->hash_id));
|
|
|
|
done:
|
|
|
|
reftable_block_done(&block);
|
|
|
|
|
|
|
|
return err;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
static void table_iter_close(struct table_iter *ti)
|
|
|
|
{
|
|
|
|
table_iter_block_done(ti);
|
|
|
|
block_iter_close(&ti->bi);
|
2024-08-23 14:12:51 +00:00
|
|
|
reftable_reader_decref(ti->r);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
}
|
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
static int table_iter_next_block(struct table_iter *ti)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
uint64_t next_block_off = ti->block_off + ti->br.full_block_size;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
int err;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
if (err > 0)
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
ti->is_finished = 1;
|
|
|
|
if (err)
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
ti->block_off = next_block_off;
|
|
|
|
ti->is_finished = 0;
|
|
|
|
block_iter_seek_start(&ti->bi, &ti->br);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
if (reftable_record_type(rec) != ti->typ)
|
|
|
|
return REFTABLE_API_ERROR;
|
|
|
|
|
|
|
|
while (1) {
|
2024-02-12 08:32:57 +00:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if (ti->is_finished)
|
2021-10-07 20:25:08 +00:00
|
|
|
return 1;
|
|
|
|
|
2024-02-12 08:32:57 +00:00
|
|
|
/*
|
|
|
|
* Check whether the current block still has more records. If
|
|
|
|
* so, return it. If the iterator returns positive then the
|
|
|
|
* current block has been exhausted.
|
|
|
|
*/
|
2021-10-07 20:25:08 +00:00
|
|
|
err = table_iter_next_in_block(ti, rec);
|
2024-02-12 08:32:57 +00:00
|
|
|
if (err <= 0)
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
|
2024-02-12 08:32:57 +00:00
|
|
|
/*
|
|
|
|
* Otherwise, we need to continue to the next block in the
|
|
|
|
* table and retry. If there are no more blocks then the
|
|
|
|
* iterator is drained.
|
|
|
|
*/
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
err = table_iter_next_block(ti);
|
2024-02-12 08:32:57 +00:00
|
|
|
if (err) {
|
|
|
|
ti->is_finished = 1;
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
int err;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
err = reader_init_block_reader(ti->r, &ti->br, off, typ);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
return err;
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
ti->typ = block_reader_type(&ti->br);
|
2021-10-07 20:25:08 +00:00
|
|
|
ti->block_off = off;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
block_iter_seek_start(&ti->bi, &ti->br);
|
2024-09-16 08:50:14 +00:00
|
|
|
ti->is_finished = 0;
|
2021-10-07 20:25:08 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2024-05-13 08:47:26 +00:00
|
|
|
struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ);
|
2021-10-07 20:25:08 +00:00
|
|
|
uint64_t off = offs->offset;
|
|
|
|
if (index) {
|
|
|
|
off = offs->index_offset;
|
|
|
|
if (off == 0) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
typ = BLOCK_TYPE_INDEX;
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
return table_iter_seek_to(ti, off, typ);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:16 +00:00
|
|
|
static int table_iter_seek_linear(struct table_iter *ti,
|
|
|
|
struct reftable_record *want)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2024-10-17 04:53:56 +00:00
|
|
|
struct reftable_buf want_key = REFTABLE_BUF_INIT;
|
|
|
|
struct reftable_buf got_key = REFTABLE_BUF_INIT;
|
2024-02-06 06:35:59 +00:00
|
|
|
struct reftable_record rec;
|
2024-05-13 08:47:11 +00:00
|
|
|
int err;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
2024-02-06 06:35:59 +00:00
|
|
|
reftable_record_init(&rec, reftable_record_type(want));
|
2024-10-17 04:54:08 +00:00
|
|
|
err = reftable_record_key(want, &want_key);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
/*
|
|
|
|
* First we need to locate the block that must contain our record. To
|
|
|
|
* do so we scan through blocks linearly until we find the first block
|
|
|
|
* whose first key is bigger than our wanted key. Once we have found
|
|
|
|
* that block we know that the key must be contained in the preceding
|
|
|
|
* block.
|
|
|
|
*
|
|
|
|
* This algorithm is somewhat unfortunate because it means that we
|
|
|
|
* always have to seek one block too far and then back up. But as we
|
|
|
|
* can only decode the _first_ key of a block but not its _last_ key we
|
|
|
|
* have no other way to do this.
|
|
|
|
*/
|
2021-10-07 20:25:08 +00:00
|
|
|
while (1) {
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
struct table_iter next = *ti;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We must be careful to not modify underlying data of `ti`
|
|
|
|
* because we may find that `next` does not contain our desired
|
|
|
|
* block, but that `ti` does. In that case, we would discard
|
|
|
|
* `next` and continue with `ti`.
|
|
|
|
*
|
|
|
|
* This also means that we cannot reuse allocated memory for
|
|
|
|
* `next` here. While it would be great if we could, it should
|
|
|
|
* in practice not be too bad given that we should only ever
|
|
|
|
* end up doing linear seeks with at most three blocks. As soon
|
|
|
|
* as we have more than three blocks we would have an index, so
|
|
|
|
* we would not do a linear search there anymore.
|
|
|
|
*/
|
|
|
|
memset(&next.br.block, 0, sizeof(next.br.block));
|
reftable/block: reuse `zstream` state on inflation
When calling `inflateInit()` and `inflate()`, the zlib library will
allocate several data structures for the underlying `zstream` to keep
track of various information. Thus, when inflating repeatedly, it is
possible to optimize memory allocation patterns by reusing the `zstream`
and then calling `inflateReset()` on it to prepare it for the next chunk
of data to inflate.
This is exactly what the reftable code is doing: when iterating through
reflogs we need to potentially inflate many log blocks, but we discard
the `zstream` every single time. Instead, as we reuse the `block_reader`
for each of the blocks anyway, we can initialize the `zstream` once and
then reuse it for subsequent inflations.
Refactor the code to do so, which leads to a significant reduction in
the number of allocations. The following measurements were done when
iterating through 1 million reflog entries. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,552 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 302 allocs, 180 frees, 88,352 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:17:04 +00:00
|
|
|
next.br.zstream = NULL;
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
next.br.uncompressed_data = NULL;
|
|
|
|
next.br.uncompressed_cap = 0;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
err = table_iter_next_block(&next);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
if (err > 0)
|
2021-10-07 20:25:08 +00:00
|
|
|
break;
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
err = block_reader_first_key(&next.br, &got_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-10-17 04:53:56 +00:00
|
|
|
if (reftable_buf_cmp(&got_key, &want_key) > 0) {
|
2021-10-07 20:25:08 +00:00
|
|
|
table_iter_block_done(&next);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
table_iter_block_done(ti);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
*ti = next;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
/*
|
|
|
|
* We have located the block that must contain our record, so we seek
|
|
|
|
* the wanted key inside of it. If the block does not contain our key
|
|
|
|
* we know that the corresponding record does not exist.
|
|
|
|
*/
|
|
|
|
err = block_iter_seek_key(&ti->bi, &ti->br, &want_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
done:
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_release(&rec);
|
2024-10-17 04:53:56 +00:00
|
|
|
reftable_buf_release(&want_key);
|
|
|
|
reftable_buf_release(&got_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:16 +00:00
|
|
|
static int table_iter_seek_indexed(struct table_iter *ti,
|
|
|
|
struct reftable_record *rec)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record want_index = {
|
2024-10-17 04:53:56 +00:00
|
|
|
.type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = REFTABLE_BUF_INIT }
|
2022-01-20 15:12:13 +00:00
|
|
|
};
|
|
|
|
struct reftable_record index_result = {
|
|
|
|
.type = BLOCK_TYPE_INDEX,
|
2024-10-17 04:53:56 +00:00
|
|
|
.u.idx = { .last_key = REFTABLE_BUF_INIT },
|
2022-01-20 15:12:13 +00:00
|
|
|
};
|
2024-05-13 08:47:11 +00:00
|
|
|
int err;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
2024-10-17 04:54:08 +00:00
|
|
|
err = reftable_record_key(rec, &want_index.u.idx.last_key);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
2024-02-01 07:52:12 +00:00
|
|
|
/*
|
|
|
|
* The index may consist of multiple levels, where each level may have
|
|
|
|
* multiple index blocks. We start by doing a linear search in the
|
|
|
|
* highest layer that identifies the relevant index block as well as
|
|
|
|
* the record inside that block that corresponds to our wanted key.
|
|
|
|
*/
|
2024-05-13 08:47:16 +00:00
|
|
|
err = table_iter_seek_linear(ti, &want_index);
|
2024-02-01 07:51:56 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-02-01 07:52:12 +00:00
|
|
|
/*
|
|
|
|
* Traverse down the levels until we find a non-index entry.
|
|
|
|
*/
|
2021-10-07 20:25:08 +00:00
|
|
|
while (1) {
|
2024-02-01 07:52:12 +00:00
|
|
|
/*
|
|
|
|
* In case we seek a record that does not exist the index iter
|
|
|
|
* will tell us that the iterator is over. This works because
|
|
|
|
* the last index entry of the current level will contain the
|
|
|
|
* last key it knows about. So in case our seeked key is larger
|
|
|
|
* than the last indexed key we know that it won't exist.
|
|
|
|
*
|
|
|
|
* There is one subtlety in the layout of the index section
|
|
|
|
* that makes this work as expected: the highest-level index is
|
|
|
|
* at end of the section and will point backwards and thus we
|
|
|
|
* start reading from the end of the index section, not the
|
|
|
|
* beginning.
|
|
|
|
*
|
|
|
|
* If that wasn't the case and the order was reversed then the
|
|
|
|
* linear seek would seek into the lower levels and traverse
|
|
|
|
* all levels of the index only to find out that the key does
|
|
|
|
* not exist.
|
|
|
|
*/
|
2024-05-13 08:47:11 +00:00
|
|
|
err = table_iter_next(ti, &index_result);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
err = table_iter_seek_to(ti, index_result.u.idx.offset, 0);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-05-13 08:47:11 +00:00
|
|
|
err = block_iter_seek_key(&ti->bi, &ti->br, &want_index.u.idx.last_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-05-13 08:47:11 +00:00
|
|
|
if (ti->typ == reftable_record_type(rec)) {
|
2021-10-07 20:25:08 +00:00
|
|
|
err = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:11 +00:00
|
|
|
if (ti->typ != BLOCK_TYPE_INDEX) {
|
2021-10-07 20:25:08 +00:00
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
2024-05-13 08:47:06 +00:00
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
}
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
done:
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_release(&want_index);
|
|
|
|
reftable_record_release(&index_result);
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
static int table_iter_seek(struct table_iter *ti,
|
|
|
|
struct reftable_record *want)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
uint8_t typ = reftable_record_type(want);
|
|
|
|
struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ);
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
int err;
|
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
err = table_iter_seek_start(ti, reftable_record_type(want),
|
2024-05-13 08:47:21 +00:00
|
|
|
!!offs->index_offset);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
goto out;
|
|
|
|
|
2024-05-13 08:47:21 +00:00
|
|
|
if (offs->index_offset)
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
err = table_iter_seek_indexed(ti, want);
|
2024-05-13 08:47:11 +00:00
|
|
|
else
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
err = table_iter_seek_linear(ti, want);
|
2024-05-13 08:47:11 +00:00
|
|
|
if (err)
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
goto out;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
out:
|
|
|
|
return err;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
static int table_iter_seek_void(void *ti, struct reftable_record *want)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
return table_iter_seek(ti, want);
|
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
static int table_iter_next_void(void *ti, struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
return table_iter_next(ti, rec);
|
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
static void table_iter_close_void(void *ti)
|
|
|
|
{
|
|
|
|
table_iter_close(ti);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
static struct reftable_iterator_vtable table_iter_vtable = {
|
|
|
|
.seek = &table_iter_seek_void,
|
|
|
|
.next = &table_iter_next_void,
|
|
|
|
.close = &table_iter_close_void,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void iterator_from_table_iter(struct reftable_iterator *it,
|
|
|
|
struct table_iter *ti)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
assert(!it->ops);
|
|
|
|
it->iter_arg = ti;
|
|
|
|
it->ops = &table_iter_vtable;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
2024-10-02 10:55:59 +00:00
|
|
|
int reader_init_iter(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
uint8_t typ)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
|
|
|
|
|
|
|
|
if (offs->is_present) {
|
|
|
|
struct table_iter *ti;
|
|
|
|
REFTABLE_ALLOC_ARRAY(ti, 1);
|
2024-10-02 10:55:59 +00:00
|
|
|
if (!ti)
|
|
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
table_iter_init(ti, r);
|
|
|
|
iterator_from_table_iter(it, ti);
|
|
|
|
} else {
|
|
|
|
iterator_set_empty(it);
|
|
|
|
}
|
2024-10-02 10:55:59 +00:00
|
|
|
|
|
|
|
return 0;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
2024-10-02 10:55:59 +00:00
|
|
|
int reftable_reader_init_ref_iterator(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2024-10-02 10:55:59 +00:00
|
|
|
return reader_init_iter(r, it, BLOCK_TYPE_REF);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
2024-10-02 10:55:59 +00:00
|
|
|
int reftable_reader_init_log_iterator(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2024-10-02 10:55:59 +00:00
|
|
|
return reader_init_iter(r, it, BLOCK_TYPE_LOG);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
2024-08-23 14:12:37 +00:00
|
|
|
int reftable_reader_new(struct reftable_reader **out,
|
|
|
|
struct reftable_block_source *source, char const *name)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2024-08-23 14:12:37 +00:00
|
|
|
struct reftable_block footer = { 0 };
|
|
|
|
struct reftable_block header = { 0 };
|
|
|
|
struct reftable_reader *r;
|
|
|
|
uint64_t file_size = block_source_size(source);
|
|
|
|
uint32_t read_size;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
REFTABLE_CALLOC_ARRAY(r, 1);
|
2024-10-02 10:56:31 +00:00
|
|
|
if (!r) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
2024-08-23 14:12:37 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We need one extra byte to read the type of first block. We also
|
|
|
|
* pretend to always be reading v2 of the format because it is larger.
|
|
|
|
*/
|
|
|
|
read_size = header_size(2) + 1;
|
|
|
|
if (read_size > file_size) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = block_source_read_block(source, &header, 0, read_size);
|
|
|
|
if (err != read_size) {
|
|
|
|
err = REFTABLE_IO_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (memcmp(header.data, "REFT", 4)) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
r->version = header.data[4];
|
|
|
|
if (r->version != 1 && r->version != 2) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->size = file_size - footer_size(r->version);
|
|
|
|
r->source = *source;
|
2024-10-02 10:56:31 +00:00
|
|
|
r->name = reftable_strdup(name);
|
|
|
|
if (!r->name) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
2024-08-23 14:12:37 +00:00
|
|
|
r->hash_id = 0;
|
2024-08-23 14:12:48 +00:00
|
|
|
r->refcount = 1;
|
2024-08-23 14:12:37 +00:00
|
|
|
|
|
|
|
err = block_source_read_block(source, &footer, r->size,
|
|
|
|
footer_size(r->version));
|
|
|
|
if (err != footer_size(r->version)) {
|
|
|
|
err = REFTABLE_IO_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = parse_footer(r, footer.data, header.data);
|
|
|
|
if (err)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
*out = r;
|
|
|
|
|
|
|
|
done:
|
|
|
|
reftable_block_done(&footer);
|
|
|
|
reftable_block_done(&header);
|
|
|
|
if (err) {
|
|
|
|
reftable_free(r);
|
|
|
|
block_source_close(source);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2024-08-23 14:12:48 +00:00
|
|
|
void reftable_reader_incref(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
if (!r->refcount)
|
|
|
|
BUG("cannot increment ref counter of dead reader");
|
|
|
|
r->refcount++;
|
|
|
|
}
|
|
|
|
|
|
|
|
void reftable_reader_decref(struct reftable_reader *r)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
2022-01-20 15:12:06 +00:00
|
|
|
if (!r)
|
|
|
|
return;
|
2024-08-23 14:12:48 +00:00
|
|
|
if (!r->refcount)
|
|
|
|
BUG("cannot decrement ref counter of dead reader");
|
|
|
|
if (--r->refcount)
|
|
|
|
return;
|
2024-08-23 14:12:43 +00:00
|
|
|
block_source_close(&r->source);
|
2024-10-02 10:56:36 +00:00
|
|
|
REFTABLE_FREE_AND_NULL(r->name);
|
2021-10-07 20:25:08 +00:00
|
|
|
reftable_free(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
uint8_t *oid)
|
|
|
|
{
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record want = {
|
|
|
|
.type = BLOCK_TYPE_OBJ,
|
|
|
|
.u.obj = {
|
|
|
|
.hash_prefix = oid,
|
|
|
|
.hash_prefix_len = r->object_id_len,
|
|
|
|
},
|
2021-10-07 20:25:08 +00:00
|
|
|
};
|
|
|
|
struct reftable_iterator oit = { NULL };
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record got = {
|
|
|
|
.type = BLOCK_TYPE_OBJ,
|
|
|
|
.u.obj = { 0 },
|
|
|
|
};
|
2021-10-07 20:25:08 +00:00
|
|
|
int err = 0;
|
|
|
|
struct indexed_table_ref_iter *itr = NULL;
|
|
|
|
|
|
|
|
/* Look through the reverse index. */
|
2024-10-02 10:55:59 +00:00
|
|
|
err = reader_init_iter(r, &oit, BLOCK_TYPE_OBJ);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
reftable/generic: move seeking of records into the iterator
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.
While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.
The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.
To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.
Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-13 08:47:41 +00:00
|
|
|
err = iterator_seek(&oit, &want);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* read out the reftable_obj_record */
|
2022-01-20 15:12:13 +00:00
|
|
|
err = iterator_next(&oit, &got);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2022-01-20 15:12:13 +00:00
|
|
|
if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix,
|
|
|
|
r->object_id_len)) {
|
2021-10-07 20:25:08 +00:00
|
|
|
/* didn't find it; return empty iterator */
|
|
|
|
iterator_set_empty(it);
|
|
|
|
err = 0;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2024-10-02 10:56:14 +00:00
|
|
|
err = indexed_table_ref_iter_new(&itr, r, oid, hash_size(r->hash_id),
|
2022-01-20 15:12:13 +00:00
|
|
|
got.u.obj.offsets,
|
|
|
|
got.u.obj.offset_len);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
2022-01-20 15:12:13 +00:00
|
|
|
got.u.obj.offsets = NULL;
|
2021-10-07 20:25:08 +00:00
|
|
|
iterator_from_indexed_table_ref_iter(it, itr);
|
|
|
|
|
|
|
|
done:
|
|
|
|
reftable_iterator_destroy(&oit);
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_release(&got);
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_reader_refs_for_unindexed(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
uint8_t *oid)
|
|
|
|
{
|
2024-05-13 08:47:26 +00:00
|
|
|
struct table_iter *ti;
|
2021-10-07 20:25:08 +00:00
|
|
|
struct filtering_ref_iterator *filter = NULL;
|
|
|
|
struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
|
|
|
|
int oid_len = hash_size(r->hash_id);
|
|
|
|
int err;
|
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
REFTABLE_ALLOC_ARRAY(ti, 1);
|
2024-10-02 10:56:14 +00:00
|
|
|
if (!ti) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:47:26 +00:00
|
|
|
table_iter_init(ti, r);
|
|
|
|
err = table_iter_seek_start(ti, BLOCK_TYPE_REF, 0);
|
2024-10-02 10:55:56 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
2024-10-02 10:55:56 +00:00
|
|
|
filter = reftable_malloc(sizeof(*filter));
|
|
|
|
if (!filter) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
*filter = empty;
|
|
|
|
|
2024-10-17 04:54:16 +00:00
|
|
|
err = reftable_buf_add(&filter->oid, oid, oid_len);
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
iterator_from_table_iter(&filter->it, ti);
|
|
|
|
|
|
|
|
iterator_from_filtering_ref_iterator(it, filter);
|
2024-10-02 10:55:56 +00:00
|
|
|
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (err < 0) {
|
|
|
|
if (ti)
|
|
|
|
table_iter_close(ti);
|
|
|
|
reftable_free(ti);
|
|
|
|
}
|
|
|
|
return err;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_reader_refs_for(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it, uint8_t *oid)
|
|
|
|
{
|
|
|
|
if (r->obj_offsets.is_present)
|
|
|
|
return reftable_reader_refs_for_indexed(r, it, oid);
|
|
|
|
return reftable_reader_refs_for_unindexed(r, it, oid);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t reftable_reader_max_update_index(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->max_update_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->min_update_index;
|
|
|
|
}
|
|
|
|
|
2024-05-13 08:18:13 +00:00
|
|
|
int reftable_reader_print_blocks(const char *tablename)
|
|
|
|
{
|
|
|
|
struct {
|
|
|
|
const char *name;
|
|
|
|
int type;
|
|
|
|
} sections[] = {
|
|
|
|
{
|
|
|
|
.name = "ref",
|
|
|
|
.type = BLOCK_TYPE_REF,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "obj",
|
|
|
|
.type = BLOCK_TYPE_OBJ,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "log",
|
|
|
|
.type = BLOCK_TYPE_LOG,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
struct reftable_block_source src = { 0 };
|
|
|
|
struct reftable_reader *r = NULL;
|
2024-05-30 21:15:12 +00:00
|
|
|
struct table_iter ti = { 0 };
|
2024-05-13 08:18:13 +00:00
|
|
|
size_t i;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = reftable_block_source_from_file(&src, tablename);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-08-23 14:12:34 +00:00
|
|
|
err = reftable_reader_new(&r, &src, tablename);
|
2024-05-13 08:18:13 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-05-30 21:15:12 +00:00
|
|
|
table_iter_init(&ti, r);
|
|
|
|
|
2024-05-13 08:18:13 +00:00
|
|
|
printf("header:\n");
|
|
|
|
printf(" block_size: %d\n", r->block_size);
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(sections); i++) {
|
2024-05-30 21:15:12 +00:00
|
|
|
err = table_iter_seek_start(&ti, sections[i].type, 0);
|
2024-05-13 08:18:13 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
if (err > 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
printf("%s:\n", sections[i].name);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
printf(" - length: %u\n", ti.br.block_len);
|
|
|
|
printf(" restarts: %u\n", ti.br.restart_count);
|
|
|
|
|
|
|
|
err = table_iter_next_block(&ti);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
if (err > 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
2024-08-23 14:12:48 +00:00
|
|
|
reftable_reader_decref(r);
|
2024-05-13 08:18:13 +00:00
|
|
|
table_iter_close(&ti);
|
|
|
|
return err;
|
|
|
|
}
|