From 4669e7d68ec8fb6ff3572a1193ae75ae2094b8e6 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 13 Feb 2018 10:39:38 -0800 Subject: [PATCH 1/2] packfile: remove GIT_DEBUG_LOOKUP log statements In commit 628522ec1439 ("sha1-lookup: more memory efficient search in sorted list of SHA-1", 2008-04-09), a different algorithm for searching a sorted list was introduced, together with a set of log statements guarded by GIT_DEBUG_LOOKUP that are invoked both when using that algorithm and when using the existing binary search. Those log statements was meant for experiments and debugging, but with the removal of the aforementioned different algorithm in commit f1068efefe6d ("sha1_file: drop experimental GIT_USE_LOOKUP search", 2017-08-09), those log statements are probably no longer necessary. Remove those statements. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- packfile.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/packfile.c b/packfile.c index 4a5fe7ab18..58bdced3b1 100644 --- a/packfile.c +++ b/packfile.c @@ -1713,10 +1713,6 @@ off_t find_pack_entry_one(const unsigned char *sha1, const uint32_t *level1_ofs = p->index_data; const unsigned char *index = p->index_data; unsigned hi, lo, stride; - static int debug_lookup = -1; - - if (debug_lookup < 0) - debug_lookup = !!getenv("GIT_DEBUG_LOOKUP"); if (!index) { if (open_pack_index(p)) @@ -1738,17 +1734,10 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 4; } - if (debug_lookup) - printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n", - sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects); - while (lo < hi) { unsigned mi = lo + (hi - lo) / 2; int cmp = hashcmp(index + mi * stride, sha1); - if (debug_lookup) - printf("lo %u hi %u rg %u mi %u\n", - lo, hi, hi - lo, mi); if (!cmp) return nth_packed_object_offset(p, mi); if (cmp > 0) From b4e00f7306a160639f047b3421985e8f3d0c6fb1 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 13 Feb 2018 10:39:39 -0800 Subject: [PATCH 2/2] packfile: refactor hash search with fanout table Subsequent patches will introduce file formats that make use of a fanout array and a sorted table containing hashes, just like packfiles. Refactor the hash search in packfile.c into its own function, so that those patches can make use of it as well. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- packfile.c | 18 ++++-------------- sha1-lookup.c | 28 ++++++++++++++++++++++++++++ sha1-lookup.h | 22 ++++++++++++++++++++++ 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/packfile.c b/packfile.c index 58bdced3b1..59648a1820 100644 --- a/packfile.c +++ b/packfile.c @@ -1712,7 +1712,8 @@ off_t find_pack_entry_one(const unsigned char *sha1, { const uint32_t *level1_ofs = p->index_data; const unsigned char *index = p->index_data; - unsigned hi, lo, stride; + unsigned stride; + uint32_t result; if (!index) { if (open_pack_index(p)) @@ -1725,8 +1726,6 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 8; } index += 4 * 256; - hi = ntohl(level1_ofs[*sha1]); - lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); if (p->index_version > 1) { stride = 20; } else { @@ -1734,17 +1733,8 @@ off_t find_pack_entry_one(const unsigned char *sha1, index += 4; } - while (lo < hi) { - unsigned mi = lo + (hi - lo) / 2; - int cmp = hashcmp(index + mi * stride, sha1); - - if (!cmp) - return nth_packed_object_offset(p, mi); - if (cmp > 0) - hi = mi; - else - lo = mi+1; - } + if (bsearch_hash(sha1, level1_ofs, index, stride, &result)) + return nth_packed_object_offset(p, result); return 0; } diff --git a/sha1-lookup.c b/sha1-lookup.c index 4cf3ebd921..8d0b1db3e2 100644 --- a/sha1-lookup.c +++ b/sha1-lookup.c @@ -99,3 +99,31 @@ int sha1_pos(const unsigned char *sha1, void *table, size_t nr, } while (lo < hi); return -lo-1; } + +int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo, + const unsigned char *table, size_t stride, uint32_t *result) +{ + uint32_t hi, lo; + + hi = ntohl(fanout_nbo[*sha1]); + lo = ((*sha1 == 0x0) ? 0 : ntohl(fanout_nbo[*sha1 - 1])); + + while (lo < hi) { + unsigned mi = lo + (hi - lo) / 2; + int cmp = hashcmp(table + mi * stride, sha1); + + if (!cmp) { + if (result) + *result = mi; + return 1; + } + if (cmp > 0) + hi = mi; + else + lo = mi + 1; + } + + if (result) + *result = lo; + return 0; +} diff --git a/sha1-lookup.h b/sha1-lookup.h index cf5314f402..7678b23b36 100644 --- a/sha1-lookup.h +++ b/sha1-lookup.h @@ -7,4 +7,26 @@ extern int sha1_pos(const unsigned char *sha1, void *table, size_t nr, sha1_access_fn fn); + +/* + * Searches for sha1 in table, using the given fanout table to determine the + * interval to search, then using binary search. Returns 1 if found, 0 if not. + * + * Takes the following parameters: + * + * - sha1: the hash to search for + * - fanout_nbo: a 256-element array of NETWORK-order 32-bit integers; the + * integer at position i represents the number of elements in table whose + * first byte is less than or equal to i + * - table: a sorted list of hashes with optional extra information in between + * - stride: distance between two consecutive elements in table (should be + * GIT_MAX_RAWSZ or greater) + * - result: if not NULL, this function stores the element index of the + * position found (if the search is successful) or the index of the least + * element that is greater than sha1 (if the search is not successful) + * + * This function does not verify the validity of the fanout table. + */ +int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo, + const unsigned char *table, size_t stride, uint32_t *result); #endif