git/reftable/reftable-record.h

119 lines
3.5 KiB
C
Raw Normal View History

/*
Copyright 2020 Google LLC
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/
#ifndef REFTABLE_RECORD_H
#define REFTABLE_RECORD_H
#include "hash-ll.h"
#include <stdint.h>
/*
* Basic data types
*
* Reftables store the state of each ref in struct reftable_ref_record, and they
* store a sequence of reflog updates in struct reftable_log_record.
*/
/* reftable_ref_record holds a ref database entry target_value */
struct reftable_ref_record {
char *refname; /* Name of the ref, malloced. */
reftable/record: reuse refname when decoding When decoding a reftable record we will first release the user-provided record and then decode the new record into it. This is quite inefficient as we basically need to reallocate at least the refname every time. Refactor the function to start tracking the refname capacity. Like this, we can stow away the refname, release, restore and then grow the refname to the required number of bytes via `REFTABLE_ALLOC_GROW()`. This refactoring is safe to do because all functions that assigning to the refname will first call `reftable_ref_record_release()`, which will zero out the complete record after releasing memory. This change results in a nice speedup when iterating over 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms] Range (min … max): 120.4 ms … 152.7 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms] Range (min … max): 111.0 ms … 152.1 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Furthermore, with this change we now perform a mostly constant number of allocations when iterating. Before this change: HEAP SUMMARY: in use at exit: 13,603 bytes in 125 blocks total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated After this change: HEAP SUMMARY: in use at exit: 13,603 bytes in 125 blocks total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 10:49:22 +00:00
size_t refname_cap;
uint64_t update_index; /* Logical timestamp at which this value is
* written */
enum {
/* tombstone to hide deletions from earlier tables */
REFTABLE_REF_DELETION = 0x0,
/* a simple ref */
REFTABLE_REF_VAL1 = 0x1,
/* a tag, plus its peeled hash */
REFTABLE_REF_VAL2 = 0x2,
/* a symbolic reference */
REFTABLE_REF_SYMREF = 0x3,
#define REFTABLE_NR_REF_VALUETYPES 4
} value_type;
union {
unsigned char val1[GIT_MAX_RAWSZ];
struct {
unsigned char value[GIT_MAX_RAWSZ]; /* first hash */
unsigned char target_value[GIT_MAX_RAWSZ]; /* second hash */
} val2;
char *symref; /* referent, malloced 0-terminated string */
} value;
};
/* Returns the first hash, or NULL if `rec` is not of type
* REFTABLE_REF_VAL1 or REFTABLE_REF_VAL2. */
const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec);
/* Returns the second hash, or NULL if `rec` is not of type
* REFTABLE_REF_VAL2. */
const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec);
/* returns whether 'ref' represents a deletion */
int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref);
/* prints a reftable_ref_record onto stdout. Useful for debugging. */
void reftable_ref_record_print(const struct reftable_ref_record *ref,
uint32_t hash_id);
/* frees and nulls all pointer values inside `ref`. */
void reftable_ref_record_release(struct reftable_ref_record *ref);
/* returns whether two reftable_ref_records are the same. Useful for testing. */
int reftable_ref_record_equal(const struct reftable_ref_record *a,
const struct reftable_ref_record *b, int hash_size);
/* reftable_log_record holds a reflog entry */
struct reftable_log_record {
char *refname;
size_t refname_cap;
uint64_t update_index; /* logical timestamp of a transactional update.
*/
enum {
/* tombstone to hide deletions from earlier tables */
REFTABLE_LOG_DELETION = 0x0,
/* a simple update */
REFTABLE_LOG_UPDATE = 0x1,
#define REFTABLE_NR_LOG_VALUETYPES 2
} value_type;
union {
struct {
unsigned char new_hash[GIT_MAX_RAWSZ];
unsigned char old_hash[GIT_MAX_RAWSZ];
char *name;
char *email;
uint64_t time;
int16_t tz_offset;
char *message;
size_t message_cap;
} update;
} value;
};
/* returns whether 'ref' represents the deletion of a log record. */
int reftable_log_record_is_deletion(const struct reftable_log_record *log);
/* frees and nulls all pointer values. */
void reftable_log_record_release(struct reftable_log_record *log);
/* returns whether two records are equal. Useful for testing. */
int reftable_log_record_equal(const struct reftable_log_record *a,
const struct reftable_log_record *b, int hash_size);
/* dumps a reftable_log_record on stdout, for debugging/testing. */
void reftable_log_record_print(struct reftable_log_record *log,
uint32_t hash_id);
#endif