2021-10-07 20:25:08 +00:00
|
|
|
/*
|
|
|
|
Copyright 2020 Google LLC
|
|
|
|
|
|
|
|
Use of this source code is governed by a BSD-style
|
|
|
|
license that can be found in the LICENSE file or at
|
|
|
|
https://developers.google.com/open-source/licenses/bsd
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "reader.h"
|
|
|
|
|
|
|
|
#include "system.h"
|
|
|
|
#include "block.h"
|
|
|
|
#include "constants.h"
|
|
|
|
#include "generic.h"
|
|
|
|
#include "iter.h"
|
|
|
|
#include "record.h"
|
|
|
|
#include "reftable-error.h"
|
|
|
|
#include "reftable-generic.h"
|
|
|
|
|
|
|
|
uint64_t block_source_size(struct reftable_block_source *source)
|
|
|
|
{
|
|
|
|
return source->ops->size(source->arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int block_source_read_block(struct reftable_block_source *source,
|
|
|
|
struct reftable_block *dest, uint64_t off,
|
|
|
|
uint32_t size)
|
|
|
|
{
|
|
|
|
int result = source->ops->read_block(source->arg, dest, off, size);
|
|
|
|
dest->source = *source;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void block_source_close(struct reftable_block_source *source)
|
|
|
|
{
|
|
|
|
if (!source->ops) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
source->ops->close(source->arg);
|
|
|
|
source->ops = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct reftable_reader_offsets *
|
|
|
|
reader_offsets_for(struct reftable_reader *r, uint8_t typ)
|
|
|
|
{
|
|
|
|
switch (typ) {
|
|
|
|
case BLOCK_TYPE_REF:
|
|
|
|
return &r->ref_offsets;
|
|
|
|
case BLOCK_TYPE_LOG:
|
|
|
|
return &r->log_offsets;
|
|
|
|
case BLOCK_TYPE_OBJ:
|
|
|
|
return &r->obj_offsets;
|
|
|
|
}
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_get_block(struct reftable_reader *r,
|
|
|
|
struct reftable_block *dest, uint64_t off,
|
|
|
|
uint32_t sz)
|
|
|
|
{
|
|
|
|
if (off >= r->size)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (off + sz > r->size) {
|
|
|
|
sz = r->size - off;
|
|
|
|
}
|
|
|
|
|
|
|
|
return block_source_read_block(&r->source, dest, off, sz);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t reftable_reader_hash_id(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->hash_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *reader_name(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_footer(struct reftable_reader *r, uint8_t *footer,
|
|
|
|
uint8_t *header)
|
|
|
|
{
|
|
|
|
uint8_t *f = footer;
|
|
|
|
uint8_t first_block_typ;
|
|
|
|
int err = 0;
|
|
|
|
uint32_t computed_crc;
|
|
|
|
uint32_t file_crc;
|
|
|
|
|
|
|
|
if (memcmp(f, "REFT", 4)) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
f += 4;
|
|
|
|
|
|
|
|
if (memcmp(footer, header, header_size(r->version))) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
f++;
|
|
|
|
r->block_size = get_be24(f);
|
|
|
|
|
|
|
|
f += 3;
|
|
|
|
r->min_update_index = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
r->max_update_index = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
if (r->version == 1) {
|
|
|
|
r->hash_id = GIT_SHA1_FORMAT_ID;
|
|
|
|
} else {
|
|
|
|
r->hash_id = get_be32(f);
|
|
|
|
switch (r->hash_id) {
|
|
|
|
case GIT_SHA1_FORMAT_ID:
|
|
|
|
break;
|
|
|
|
case GIT_SHA256_FORMAT_ID:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
f += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->ref_offsets.index_offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
r->obj_offsets.offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1);
|
|
|
|
r->obj_offsets.offset >>= 5;
|
|
|
|
|
|
|
|
r->obj_offsets.index_offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
r->log_offsets.offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
r->log_offsets.index_offset = get_be64(f);
|
|
|
|
f += 8;
|
|
|
|
|
|
|
|
computed_crc = crc32(0, footer, f - footer);
|
|
|
|
file_crc = get_be32(f);
|
|
|
|
f += 4;
|
|
|
|
if (computed_crc != file_crc) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
first_block_typ = header[header_size(r->version)];
|
|
|
|
r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF);
|
|
|
|
r->ref_offsets.offset = 0;
|
|
|
|
r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG ||
|
|
|
|
r->log_offsets.offset > 0);
|
|
|
|
r->obj_offsets.is_present = r->obj_offsets.offset > 0;
|
2022-02-21 18:46:05 +00:00
|
|
|
if (r->obj_offsets.is_present && !r->object_id_len) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
err = 0;
|
|
|
|
done:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
struct reftable_block footer = { NULL };
|
|
|
|
struct reftable_block header = { NULL };
|
|
|
|
int err = 0;
|
|
|
|
uint64_t file_size = block_source_size(source);
|
|
|
|
|
|
|
|
/* Need +1 to read type of first block. */
|
|
|
|
uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
|
|
|
|
memset(r, 0, sizeof(struct reftable_reader));
|
|
|
|
|
|
|
|
if (read_size > file_size) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = block_source_read_block(source, &header, 0, read_size);
|
|
|
|
if (err != read_size) {
|
|
|
|
err = REFTABLE_IO_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (memcmp(header.data, "REFT", 4)) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
r->version = header.data[4];
|
|
|
|
if (r->version != 1 && r->version != 2) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->size = file_size - footer_size(r->version);
|
|
|
|
r->source = *source;
|
|
|
|
r->name = xstrdup(name);
|
|
|
|
r->hash_id = 0;
|
|
|
|
|
|
|
|
err = block_source_read_block(source, &footer, r->size,
|
|
|
|
footer_size(r->version));
|
|
|
|
if (err != footer_size(r->version)) {
|
|
|
|
err = REFTABLE_IO_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = parse_footer(r, footer.data, header.data);
|
|
|
|
done:
|
|
|
|
reftable_block_done(&footer);
|
|
|
|
reftable_block_done(&header);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct table_iter {
|
|
|
|
struct reftable_reader *r;
|
|
|
|
uint8_t typ;
|
|
|
|
uint64_t block_off;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
struct block_reader br;
|
2021-10-07 20:25:08 +00:00
|
|
|
struct block_iter bi;
|
|
|
|
int is_finished;
|
|
|
|
};
|
2023-12-11 09:08:07 +00:00
|
|
|
#define TABLE_ITER_INIT { \
|
|
|
|
.bi = BLOCK_ITER_INIT \
|
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
|
|
|
|
static int table_iter_next_in_block(struct table_iter *ti,
|
|
|
|
struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
int res = block_iter_next(&ti->bi, rec);
|
|
|
|
if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) {
|
2022-01-20 15:12:13 +00:00
|
|
|
rec->u.ref.update_index += ti->r->min_update_index;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void table_iter_block_done(struct table_iter *ti)
|
|
|
|
{
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
block_reader_release(&ti->br);
|
|
|
|
block_iter_reset(&ti->bi);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
|
|
|
|
int version)
|
|
|
|
{
|
|
|
|
int32_t result = 0;
|
|
|
|
|
|
|
|
if (off == 0) {
|
|
|
|
data += header_size(version);
|
|
|
|
}
|
|
|
|
|
|
|
|
*typ = data[0];
|
|
|
|
if (reftable_is_block_type(*typ)) {
|
|
|
|
result = get_be24(data + 1);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br,
|
|
|
|
uint64_t next_off, uint8_t want_typ)
|
|
|
|
{
|
|
|
|
int32_t guess_block_size = r->block_size ? r->block_size :
|
|
|
|
DEFAULT_BLOCK_SIZE;
|
|
|
|
struct reftable_block block = { NULL };
|
|
|
|
uint8_t block_typ = 0;
|
|
|
|
int err = 0;
|
|
|
|
uint32_t header_off = next_off ? 0 : header_size(r->version);
|
|
|
|
int32_t block_size = 0;
|
|
|
|
|
|
|
|
if (next_off >= r->size)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
err = reader_get_block(r, &block, next_off, guess_block_size);
|
|
|
|
if (err < 0)
|
2022-01-20 15:12:01 +00:00
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
|
|
|
block_size = extract_block_size(block.data, &block_typ, next_off,
|
|
|
|
r->version);
|
2022-01-20 15:12:01 +00:00
|
|
|
if (block_size < 0) {
|
|
|
|
err = block_size;
|
|
|
|
goto done;
|
|
|
|
}
|
2021-10-07 20:25:08 +00:00
|
|
|
if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
|
2022-01-20 15:12:01 +00:00
|
|
|
err = 1;
|
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (block_size > guess_block_size) {
|
|
|
|
reftable_block_done(&block);
|
|
|
|
err = reader_get_block(r, &block, next_off, block_size);
|
|
|
|
if (err < 0) {
|
2022-01-20 15:12:01 +00:00
|
|
|
goto done;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-20 15:12:01 +00:00
|
|
|
err = block_reader_init(br, &block, header_off, r->block_size,
|
|
|
|
hash_size(r->hash_id));
|
|
|
|
done:
|
|
|
|
reftable_block_done(&block);
|
|
|
|
|
|
|
|
return err;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
static void table_iter_close(struct table_iter *ti)
|
|
|
|
{
|
|
|
|
table_iter_block_done(ti);
|
|
|
|
block_iter_close(&ti->bi);
|
|
|
|
}
|
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
static int table_iter_next_block(struct table_iter *ti)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
uint64_t next_block_off = ti->block_off + ti->br.full_block_size;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
int err;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
if (err > 0)
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
ti->is_finished = 1;
|
|
|
|
if (err)
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
ti->block_off = next_block_off;
|
|
|
|
ti->is_finished = 0;
|
|
|
|
block_iter_seek_start(&ti->bi, &ti->br);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
if (reftable_record_type(rec) != ti->typ)
|
|
|
|
return REFTABLE_API_ERROR;
|
|
|
|
|
|
|
|
while (1) {
|
2024-02-12 08:32:57 +00:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if (ti->is_finished)
|
2021-10-07 20:25:08 +00:00
|
|
|
return 1;
|
|
|
|
|
2024-02-12 08:32:57 +00:00
|
|
|
/*
|
|
|
|
* Check whether the current block still has more records. If
|
|
|
|
* so, return it. If the iterator returns positive then the
|
|
|
|
* current block has been exhausted.
|
|
|
|
*/
|
2021-10-07 20:25:08 +00:00
|
|
|
err = table_iter_next_in_block(ti, rec);
|
2024-02-12 08:32:57 +00:00
|
|
|
if (err <= 0)
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
|
2024-02-12 08:32:57 +00:00
|
|
|
/*
|
|
|
|
* Otherwise, we need to continue to the next block in the
|
|
|
|
* table and retry. If there are no more blocks then the
|
|
|
|
* iterator is drained.
|
|
|
|
*/
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
err = table_iter_next_block(ti);
|
2024-02-12 08:32:57 +00:00
|
|
|
if (err) {
|
|
|
|
ti->is_finished = 1;
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int table_iter_next_void(void *ti, struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
return table_iter_next(ti, rec);
|
|
|
|
}
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
static void table_iter_close_void(void *ti)
|
2021-10-07 20:25:08 +00:00
|
|
|
{
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
table_iter_close(ti);
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct reftable_iterator_vtable table_iter_vtable = {
|
|
|
|
.next = &table_iter_next_void,
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
.close = &table_iter_close_void,
|
2021-10-07 20:25:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static void iterator_from_table_iter(struct reftable_iterator *it,
|
|
|
|
struct table_iter *ti)
|
|
|
|
{
|
|
|
|
assert(!it->ops);
|
|
|
|
it->iter_arg = ti;
|
|
|
|
it->ops = &table_iter_vtable;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_table_iter_at(struct reftable_reader *r,
|
|
|
|
struct table_iter *ti, uint64_t off,
|
|
|
|
uint8_t typ)
|
|
|
|
{
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
int err;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
err = reader_init_block_reader(r, &ti->br, off, typ);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
ti->r = r;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
ti->typ = block_reader_type(&ti->br);
|
2021-10-07 20:25:08 +00:00
|
|
|
ti->block_off = off;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
block_iter_seek_start(&ti->bi, &ti->br);
|
2021-10-07 20:25:08 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_start(struct reftable_reader *r, struct table_iter *ti,
|
|
|
|
uint8_t typ, int index)
|
|
|
|
{
|
|
|
|
struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
|
|
|
|
uint64_t off = offs->offset;
|
|
|
|
if (index) {
|
|
|
|
off = offs->index_offset;
|
|
|
|
if (off == 0) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
typ = BLOCK_TYPE_INDEX;
|
|
|
|
}
|
|
|
|
|
|
|
|
return reader_table_iter_at(r, ti, off, typ);
|
|
|
|
}
|
|
|
|
|
2022-08-19 08:54:56 +00:00
|
|
|
static int reader_seek_linear(struct table_iter *ti,
|
2021-10-07 20:25:08 +00:00
|
|
|
struct reftable_record *want)
|
|
|
|
{
|
|
|
|
struct strbuf want_key = STRBUF_INIT;
|
|
|
|
struct strbuf got_key = STRBUF_INIT;
|
2024-02-06 06:35:59 +00:00
|
|
|
struct reftable_record rec;
|
2021-10-07 20:25:08 +00:00
|
|
|
int err = -1;
|
|
|
|
|
2024-02-06 06:35:59 +00:00
|
|
|
reftable_record_init(&rec, reftable_record_type(want));
|
2021-10-07 20:25:08 +00:00
|
|
|
reftable_record_key(want, &want_key);
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
/*
|
|
|
|
* First we need to locate the block that must contain our record. To
|
|
|
|
* do so we scan through blocks linearly until we find the first block
|
|
|
|
* whose first key is bigger than our wanted key. Once we have found
|
|
|
|
* that block we know that the key must be contained in the preceding
|
|
|
|
* block.
|
|
|
|
*
|
|
|
|
* This algorithm is somewhat unfortunate because it means that we
|
|
|
|
* always have to seek one block too far and then back up. But as we
|
|
|
|
* can only decode the _first_ key of a block but not its _last_ key we
|
|
|
|
* have no other way to do this.
|
|
|
|
*/
|
2021-10-07 20:25:08 +00:00
|
|
|
while (1) {
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
struct table_iter next = *ti;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We must be careful to not modify underlying data of `ti`
|
|
|
|
* because we may find that `next` does not contain our desired
|
|
|
|
* block, but that `ti` does. In that case, we would discard
|
|
|
|
* `next` and continue with `ti`.
|
|
|
|
*
|
|
|
|
* This also means that we cannot reuse allocated memory for
|
|
|
|
* `next` here. While it would be great if we could, it should
|
|
|
|
* in practice not be too bad given that we should only ever
|
|
|
|
* end up doing linear seeks with at most three blocks. As soon
|
|
|
|
* as we have more than three blocks we would have an index, so
|
|
|
|
* we would not do a linear search there anymore.
|
|
|
|
*/
|
|
|
|
memset(&next.br.block, 0, sizeof(next.br.block));
|
reftable/block: reuse `zstream` state on inflation
When calling `inflateInit()` and `inflate()`, the zlib library will
allocate several data structures for the underlying `zstream` to keep
track of various information. Thus, when inflating repeatedly, it is
possible to optimize memory allocation patterns by reusing the `zstream`
and then calling `inflateReset()` on it to prepare it for the next chunk
of data to inflate.
This is exactly what the reftable code is doing: when iterating through
reflogs we need to potentially inflate many log blocks, but we discard
the `zstream` every single time. Instead, as we reuse the `block_reader`
for each of the blocks anyway, we can initialize the `zstream` once and
then reuse it for subsequent inflations.
Refactor the code to do so, which leads to a significant reduction in
the number of allocations. The following measurements were done when
iterating through 1 million reflog entries. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,552 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 302 allocs, 180 frees, 88,352 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:17:04 +00:00
|
|
|
next.br.zstream = NULL;
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
next.br.uncompressed_data = NULL;
|
|
|
|
next.br.uncompressed_cap = 0;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
reftable/reader: iterate to next block in place
The table iterator has to iterate towards the next block once it has
yielded all records of the current block. This is done by creating a new
table iterator, initializing it to the next block, releasing the old
iterator and then copying over the data.
Refactor the code to instead advance the table iterator in place. This
is simpler and unlocks some optimizations in subsequent patches. Also,
it allows us to avoid some allocations.
The following measurements show a single matching ref out of 1 million
refs. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 315 allocs, 190 frees, 107,027 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:50 +00:00
|
|
|
err = table_iter_next_block(&next);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
if (err > 0)
|
2021-10-07 20:25:08 +00:00
|
|
|
break;
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
err = block_reader_first_key(&next.br, &got_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (strbuf_cmp(&got_key, &want_key) > 0) {
|
|
|
|
table_iter_block_done(&next);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
table_iter_block_done(ti);
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
*ti = next;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
/*
|
|
|
|
* We have located the block that must contain our record, so we seek
|
|
|
|
* the wanted key inside of it. If the block does not contain our key
|
|
|
|
* we know that the corresponding record does not exist.
|
|
|
|
*/
|
|
|
|
err = block_iter_seek_key(&ti->bi, &ti->br, &want_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
done:
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_release(&rec);
|
2021-10-07 20:25:08 +00:00
|
|
|
strbuf_release(&want_key);
|
|
|
|
strbuf_release(&got_key);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_seek_indexed(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
struct reftable_record *rec)
|
|
|
|
{
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record want_index = {
|
|
|
|
.type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }
|
|
|
|
};
|
|
|
|
struct reftable_record index_result = {
|
|
|
|
.type = BLOCK_TYPE_INDEX,
|
|
|
|
.u.idx = { .last_key = STRBUF_INIT },
|
|
|
|
};
|
2021-10-07 20:25:08 +00:00
|
|
|
struct table_iter index_iter = TABLE_ITER_INIT;
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
struct table_iter empty = TABLE_ITER_INIT;
|
2021-10-07 20:25:08 +00:00
|
|
|
struct table_iter next = TABLE_ITER_INIT;
|
|
|
|
int err = 0;
|
|
|
|
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_key(rec, &want_index.u.idx.last_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
err = reader_start(r, &index_iter, reftable_record_type(rec), 1);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-02-01 07:52:12 +00:00
|
|
|
/*
|
|
|
|
* The index may consist of multiple levels, where each level may have
|
|
|
|
* multiple index blocks. We start by doing a linear search in the
|
|
|
|
* highest layer that identifies the relevant index block as well as
|
|
|
|
* the record inside that block that corresponds to our wanted key.
|
|
|
|
*/
|
2022-08-19 08:54:56 +00:00
|
|
|
err = reader_seek_linear(&index_iter, &want_index);
|
2024-02-01 07:51:56 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2024-02-01 07:52:12 +00:00
|
|
|
/*
|
|
|
|
* Traverse down the levels until we find a non-index entry.
|
|
|
|
*/
|
2021-10-07 20:25:08 +00:00
|
|
|
while (1) {
|
2024-02-01 07:52:12 +00:00
|
|
|
/*
|
|
|
|
* In case we seek a record that does not exist the index iter
|
|
|
|
* will tell us that the iterator is over. This works because
|
|
|
|
* the last index entry of the current level will contain the
|
|
|
|
* last key it knows about. So in case our seeked key is larger
|
|
|
|
* than the last indexed key we know that it won't exist.
|
|
|
|
*
|
|
|
|
* There is one subtlety in the layout of the index section
|
|
|
|
* that makes this work as expected: the highest-level index is
|
|
|
|
* at end of the section and will point backwards and thus we
|
|
|
|
* start reading from the end of the index section, not the
|
|
|
|
* beginning.
|
|
|
|
*
|
|
|
|
* If that wasn't the case and the order was reversed then the
|
|
|
|
* linear seek would seek into the lower levels and traverse
|
|
|
|
* all levels of the index only to find out that the key does
|
|
|
|
* not exist.
|
|
|
|
*/
|
2022-01-20 15:12:13 +00:00
|
|
|
err = table_iter_next(&index_iter, &index_result);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
goto done;
|
|
|
|
|
2022-01-20 15:12:13 +00:00
|
|
|
err = reader_table_iter_at(r, &next, index_result.u.idx.offset,
|
|
|
|
0);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
goto done;
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
err = block_iter_seek_key(&next.bi, &next.br, &want_index.u.idx.last_key);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (next.typ == reftable_record_type(rec)) {
|
|
|
|
err = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (next.typ != BLOCK_TYPE_INDEX) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
table_iter_close(&index_iter);
|
|
|
|
index_iter = next;
|
|
|
|
next = empty;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (err == 0) {
|
2024-02-06 06:35:27 +00:00
|
|
|
struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced));
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
*malloced = next;
|
|
|
|
next = empty;
|
2021-10-07 20:25:08 +00:00
|
|
|
iterator_from_table_iter(it, malloced);
|
|
|
|
}
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
done:
|
reftable/block: move ownership of block reader into `struct table_iter`
The table iterator allows the caller to iterate through all records in a
reftable table. To do so it iterates through all blocks of the desired
type one by one, where for each block it creates a new block iterator
and yields all its entries.
One of the things that is somewhat confusing in this context is who owns
the block reader that is being used to read the blocks and pass them to
the block iterator. Intuitively, as the table iterator is responsible
for iterating through the blocks, one would assume that this iterator is
also responsible for managing the lifecycle of the reader. And while it
somewhat is, the block reader is ultimately stored inside of the block
iterator.
Refactor the code such that the block reader is instead fully managed by
the table iterator. Instead of passing the reader to the block iterator,
we now only end up passing the block data to it. Despite clearing up the
lifecycle of the reader, it will also allow for better reuse of the
reader in subsequent patches.
The following benchmark prints a single matching ref out of 1 million
refs. Before:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,607 allocs, 6,482 frees, 509,635 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 7,235 allocs, 7,110 frees, 301,481 bytes allocated
Note that while there are more allocation and free calls now, the
overall number of bytes allocated is significantly lower. The number of
allocations will be reduced significantly by the next patch though.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:45 +00:00
|
|
|
table_iter_close(&next);
|
2021-10-07 20:25:08 +00:00
|
|
|
table_iter_close(&index_iter);
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_release(&want_index);
|
|
|
|
reftable_record_release(&index_result);
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_seek_internal(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
struct reftable_reader_offsets *offs =
|
|
|
|
reader_offsets_for(r, reftable_record_type(rec));
|
|
|
|
uint64_t idx = offs->index_offset;
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
struct table_iter ti = TABLE_ITER_INIT, *p;
|
|
|
|
int err;
|
|
|
|
|
2021-10-07 20:25:08 +00:00
|
|
|
if (idx > 0)
|
|
|
|
return reader_seek_indexed(r, it, rec);
|
|
|
|
|
|
|
|
err = reader_start(r, &ti, reftable_record_type(rec), 0);
|
|
|
|
if (err < 0)
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
goto out;
|
|
|
|
|
2022-08-19 08:54:56 +00:00
|
|
|
err = reader_seek_linear(&ti, rec);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
goto out;
|
2021-10-07 20:25:08 +00:00
|
|
|
|
reftable/block: reuse uncompressed blocks
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.
Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-04-08 12:16:54 +00:00
|
|
|
REFTABLE_ALLOC_ARRAY(p, 1);
|
|
|
|
*p = ti;
|
|
|
|
iterator_from_table_iter(it, p);
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (err)
|
|
|
|
table_iter_close(&ti);
|
|
|
|
return err;
|
2021-10-07 20:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,
|
|
|
|
struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
uint8_t typ = reftable_record_type(rec);
|
|
|
|
|
|
|
|
struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
|
|
|
|
if (!offs->is_present) {
|
|
|
|
iterator_set_empty(it);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return reader_seek_internal(r, it, rec);
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_reader_seek_ref(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it, const char *name)
|
|
|
|
{
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record rec = {
|
|
|
|
.type = BLOCK_TYPE_REF,
|
|
|
|
.u.ref = {
|
|
|
|
.refname = (char *)name,
|
|
|
|
},
|
2021-10-07 20:25:08 +00:00
|
|
|
};
|
|
|
|
return reader_seek(r, it, &rec);
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_reader_seek_log_at(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it, const char *name,
|
|
|
|
uint64_t update_index)
|
|
|
|
{
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record rec = { .type = BLOCK_TYPE_LOG,
|
|
|
|
.u.log = {
|
|
|
|
.refname = (char *)name,
|
|
|
|
.update_index = update_index,
|
|
|
|
} };
|
2021-10-07 20:25:08 +00:00
|
|
|
return reader_seek(r, it, &rec);
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_reader_seek_log(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it, const char *name)
|
|
|
|
{
|
|
|
|
uint64_t max = ~((uint64_t)0);
|
|
|
|
return reftable_reader_seek_log_at(r, it, name, max);
|
|
|
|
}
|
|
|
|
|
|
|
|
void reader_close(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
block_source_close(&r->source);
|
|
|
|
FREE_AND_NULL(r->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_new_reader(struct reftable_reader **p,
|
|
|
|
struct reftable_block_source *src, char const *name)
|
|
|
|
{
|
2024-02-06 06:35:27 +00:00
|
|
|
struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd));
|
2021-10-07 20:25:08 +00:00
|
|
|
int err = init_reader(rd, src, name);
|
|
|
|
if (err == 0) {
|
|
|
|
*p = rd;
|
|
|
|
} else {
|
|
|
|
block_source_close(src);
|
|
|
|
reftable_free(rd);
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void reftable_reader_free(struct reftable_reader *r)
|
|
|
|
{
|
2022-01-20 15:12:06 +00:00
|
|
|
if (!r)
|
|
|
|
return;
|
2021-10-07 20:25:08 +00:00
|
|
|
reader_close(r);
|
|
|
|
reftable_free(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
uint8_t *oid)
|
|
|
|
{
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record want = {
|
|
|
|
.type = BLOCK_TYPE_OBJ,
|
|
|
|
.u.obj = {
|
|
|
|
.hash_prefix = oid,
|
|
|
|
.hash_prefix_len = r->object_id_len,
|
|
|
|
},
|
2021-10-07 20:25:08 +00:00
|
|
|
};
|
|
|
|
struct reftable_iterator oit = { NULL };
|
2022-01-20 15:12:13 +00:00
|
|
|
struct reftable_record got = {
|
|
|
|
.type = BLOCK_TYPE_OBJ,
|
|
|
|
.u.obj = { 0 },
|
|
|
|
};
|
2021-10-07 20:25:08 +00:00
|
|
|
int err = 0;
|
|
|
|
struct indexed_table_ref_iter *itr = NULL;
|
|
|
|
|
|
|
|
/* Look through the reverse index. */
|
2022-01-20 15:12:13 +00:00
|
|
|
err = reader_seek(r, &oit, &want);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err != 0)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* read out the reftable_obj_record */
|
2022-01-20 15:12:13 +00:00
|
|
|
err = iterator_next(&oit, &got);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
2022-01-20 15:12:13 +00:00
|
|
|
if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix,
|
|
|
|
r->object_id_len)) {
|
2021-10-07 20:25:08 +00:00
|
|
|
/* didn't find it; return empty iterator */
|
|
|
|
iterator_set_empty(it);
|
|
|
|
err = 0;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = new_indexed_table_ref_iter(&itr, r, oid, hash_size(r->hash_id),
|
2022-01-20 15:12:13 +00:00
|
|
|
got.u.obj.offsets,
|
|
|
|
got.u.obj.offset_len);
|
2021-10-07 20:25:08 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
2022-01-20 15:12:13 +00:00
|
|
|
got.u.obj.offsets = NULL;
|
2021-10-07 20:25:08 +00:00
|
|
|
iterator_from_indexed_table_ref_iter(it, itr);
|
|
|
|
|
|
|
|
done:
|
|
|
|
reftable_iterator_destroy(&oit);
|
2022-01-20 15:12:13 +00:00
|
|
|
reftable_record_release(&got);
|
2021-10-07 20:25:08 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_reader_refs_for_unindexed(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it,
|
|
|
|
uint8_t *oid)
|
|
|
|
{
|
|
|
|
struct table_iter ti_empty = TABLE_ITER_INIT;
|
2024-02-06 06:35:27 +00:00
|
|
|
struct table_iter *ti = reftable_calloc(1, sizeof(*ti));
|
2021-10-07 20:25:08 +00:00
|
|
|
struct filtering_ref_iterator *filter = NULL;
|
|
|
|
struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
|
|
|
|
int oid_len = hash_size(r->hash_id);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
*ti = ti_empty;
|
|
|
|
err = reader_start(r, ti, BLOCK_TYPE_REF, 0);
|
|
|
|
if (err < 0) {
|
|
|
|
reftable_free(ti);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
filter = reftable_malloc(sizeof(struct filtering_ref_iterator));
|
|
|
|
*filter = empty;
|
|
|
|
|
|
|
|
strbuf_add(&filter->oid, oid, oid_len);
|
|
|
|
reftable_table_from_reader(&filter->tab, r);
|
|
|
|
filter->double_check = 0;
|
|
|
|
iterator_from_table_iter(&filter->it, ti);
|
|
|
|
|
|
|
|
iterator_from_filtering_ref_iterator(it, filter);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_reader_refs_for(struct reftable_reader *r,
|
|
|
|
struct reftable_iterator *it, uint8_t *oid)
|
|
|
|
{
|
|
|
|
if (r->obj_offsets.is_present)
|
|
|
|
return reftable_reader_refs_for_indexed(r, it, oid);
|
|
|
|
return reftable_reader_refs_for_unindexed(r, it, oid);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t reftable_reader_max_update_index(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->max_update_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
|
|
|
|
{
|
|
|
|
return r->min_update_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* generic table interface. */
|
|
|
|
|
|
|
|
static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it,
|
|
|
|
struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
return reader_seek(tab, it, rec);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t reftable_reader_hash_id_void(void *tab)
|
|
|
|
{
|
|
|
|
return reftable_reader_hash_id(tab);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t reftable_reader_min_update_index_void(void *tab)
|
|
|
|
{
|
|
|
|
return reftable_reader_min_update_index(tab);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t reftable_reader_max_update_index_void(void *tab)
|
|
|
|
{
|
|
|
|
return reftable_reader_max_update_index(tab);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct reftable_table_vtable reader_vtable = {
|
|
|
|
.seek_record = reftable_reader_seek_void,
|
|
|
|
.hash_id = reftable_reader_hash_id_void,
|
|
|
|
.min_update_index = reftable_reader_min_update_index_void,
|
|
|
|
.max_update_index = reftable_reader_max_update_index_void,
|
|
|
|
};
|
|
|
|
|
|
|
|
void reftable_table_from_reader(struct reftable_table *tab,
|
|
|
|
struct reftable_reader *reader)
|
|
|
|
{
|
|
|
|
assert(!tab->ops);
|
|
|
|
tab->ops = &reader_vtable;
|
|
|
|
tab->table_arg = reader;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int reftable_reader_print_file(const char *tablename)
|
|
|
|
{
|
|
|
|
struct reftable_block_source src = { NULL };
|
|
|
|
int err = reftable_block_source_from_file(&src, tablename);
|
|
|
|
struct reftable_reader *r = NULL;
|
|
|
|
struct reftable_table tab = { NULL };
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
err = reftable_new_reader(&r, &src, tablename);
|
|
|
|
if (err < 0)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
reftable_table_from_reader(&tab, r);
|
|
|
|
err = reftable_table_print(&tab);
|
|
|
|
done:
|
|
|
|
reftable_reader_free(r);
|
|
|
|
return err;
|
|
|
|
}
|