git/fsmonitor.c
Ævar Arnfjörð Bjarmason 6269f8eaad treewide: always have a valid "index_state.repo" member
When the "repo" member was added to "the_index" in [1] the
repo_read_index() was made to populate it, but the unpopulated
"the_index" variable didn't get the same treatment.

Let's do that in initialize_the_repository() when we set it up, and
likewise for all of the current callers initialized an empty "struct
index_state".

This simplifies code that needs to deal with "the_index" or a custom
"struct index_state", we no longer need to second-guess this part of
the "index_state" deep in the stack. A recent example of such
second-guessing is the "istate->repo ? istate->repo : the_repository"
code in [2]. We can now simply use "istate->repo".

We're doing this by making use of the INDEX_STATE_INIT() macro (and
corresponding function) added in [3], which now have mandatory "repo"
arguments.

Because we now call index_state_init() in repository.c's
initialize_the_repository() we don't need to handle the case where we
have a "repo->index" whose "repo" member doesn't match the "repo"
we're setting up, i.e. the "Complete the double-reference" code in
repo_read_index() being altered here. That logic was originally added
in [1], and was working around the lack of what we now have in
initialize_the_repository().

For "fsmonitor-settings.c" we can remove the initialization of a NULL
"r" argument to "the_repository". This was added back in [4], and was
needed at the time for callers that would pass us the "r" from an
"istate->repo". Before this change such a change to
"fsmonitor-settings.c" would segfault all over the test suite (e.g. in
t0002-gitfile.sh).

This change has wider eventual implications for
"fsmonitor-settings.c". The reason the other lazy loading behavior in
it is required (starting with "if (!r->settings.fsmonitor) ..." is
because of the previously passed "r" being "NULL".

I have other local changes on top of this which move its configuration
reading to "prepare_repo_settings()" in "repo-settings.c", as we could
now start to rely on it being called for our "r". But let's leave all
of that for now, and narrowly remove this particular part of the
lazy-loading.

1. 1fd9ae517c (repository: add repo reference to index_state,
   2021-01-23)
2. ee1f0c242e (read-cache: add index.skipHash config option,
   2023-01-06)
3. 2f6b1eb794 (cache API: add a "INDEX_STATE_INIT" macro/function,
   add release_index(), 2023-01-12)
4. 1e0ea5c431 (fsmonitor: config settings are repository-specific,
   2022-03-25)

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Acked-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-17 14:32:06 -08:00

612 lines
18 KiB
C

#include "cache.h"
#include "config.h"
#include "dir.h"
#include "ewah/ewok.h"
#include "fsmonitor.h"
#include "fsmonitor-ipc.h"
#include "run-command.h"
#include "strbuf.h"
#define INDEX_EXTENSION_VERSION1 (1)
#define INDEX_EXTENSION_VERSION2 (2)
#define HOOK_INTERFACE_VERSION1 (1)
#define HOOK_INTERFACE_VERSION2 (2)
struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR);
static void assert_index_minimum(struct index_state *istate, size_t pos)
{
if (pos > istate->cache_nr)
BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
(uintmax_t)pos, istate->cache_nr);
}
static void fsmonitor_ewah_callback(size_t pos, void *is)
{
struct index_state *istate = (struct index_state *)is;
struct cache_entry *ce;
assert_index_minimum(istate, pos + 1);
ce = istate->cache[pos];
ce->ce_flags &= ~CE_FSMONITOR_VALID;
}
static int fsmonitor_hook_version(void)
{
int hook_version;
if (git_config_get_int("core.fsmonitorhookversion", &hook_version))
return -1;
if (hook_version == HOOK_INTERFACE_VERSION1 ||
hook_version == HOOK_INTERFACE_VERSION2)
return hook_version;
warning("Invalid hook version '%i' in core.fsmonitorhookversion. "
"Must be 1 or 2.", hook_version);
return -1;
}
int read_fsmonitor_extension(struct index_state *istate, const void *data,
unsigned long sz)
{
const char *index = data;
uint32_t hdr_version;
uint32_t ewah_size;
struct ewah_bitmap *fsmonitor_dirty;
int ret;
uint64_t timestamp;
struct strbuf last_update = STRBUF_INIT;
if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t))
return error("corrupt fsmonitor extension (too short)");
hdr_version = get_be32(index);
index += sizeof(uint32_t);
if (hdr_version == INDEX_EXTENSION_VERSION1) {
timestamp = get_be64(index);
strbuf_addf(&last_update, "%"PRIu64"", timestamp);
index += sizeof(uint64_t);
} else if (hdr_version == INDEX_EXTENSION_VERSION2) {
strbuf_addstr(&last_update, index);
index += last_update.len + 1;
} else {
return error("bad fsmonitor version %d", hdr_version);
}
istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
ewah_size = get_be32(index);
index += sizeof(uint32_t);
fsmonitor_dirty = ewah_new();
ret = ewah_read_mmap(fsmonitor_dirty, index, ewah_size);
if (ret != ewah_size) {
ewah_free(fsmonitor_dirty);
return error("failed to parse ewah bitmap reading fsmonitor index extension");
}
istate->fsmonitor_dirty = fsmonitor_dirty;
if (!istate->split_index)
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
trace2_data_string("index", NULL, "extension/fsmn/read/token",
istate->fsmonitor_last_update);
trace_printf_key(&trace_fsmonitor,
"read fsmonitor extension successful '%s'",
istate->fsmonitor_last_update);
return 0;
}
void fill_fsmonitor_bitmap(struct index_state *istate)
{
unsigned int i, skipped = 0;
istate->fsmonitor_dirty = ewah_new();
for (i = 0; i < istate->cache_nr; i++) {
if (istate->cache[i]->ce_flags & CE_REMOVE)
skipped++;
else if (!(istate->cache[i]->ce_flags & CE_FSMONITOR_VALID))
ewah_set(istate->fsmonitor_dirty, i - skipped);
}
}
void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
{
uint32_t hdr_version;
uint32_t ewah_start;
uint32_t ewah_size = 0;
int fixup = 0;
if (!istate->split_index)
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
put_be32(&hdr_version, INDEX_EXTENSION_VERSION2);
strbuf_add(sb, &hdr_version, sizeof(uint32_t));
strbuf_addstr(sb, istate->fsmonitor_last_update);
strbuf_addch(sb, 0); /* Want to keep a NUL */
fixup = sb->len;
strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */
ewah_start = sb->len;
ewah_serialize_strbuf(istate->fsmonitor_dirty, sb);
ewah_free(istate->fsmonitor_dirty);
istate->fsmonitor_dirty = NULL;
/* fix up size field */
put_be32(&ewah_size, sb->len - ewah_start);
memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t));
trace2_data_string("index", NULL, "extension/fsmn/write/token",
istate->fsmonitor_last_update);
trace_printf_key(&trace_fsmonitor,
"write fsmonitor extension successful '%s'",
istate->fsmonitor_last_update);
}
/*
* Call the query-fsmonitor hook passing the last update token of the saved results.
*/
static int query_fsmonitor_hook(struct repository *r,
int version,
const char *last_update,
struct strbuf *query_result)
{
struct child_process cp = CHILD_PROCESS_INIT;
int result;
if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK)
return -1;
strvec_push(&cp.args, fsm_settings__get_hook_path(r));
strvec_pushf(&cp.args, "%d", version);
strvec_pushf(&cp.args, "%s", last_update);
cp.use_shell = 1;
cp.dir = get_git_work_tree();
trace2_region_enter("fsm_hook", "query", NULL);
result = capture_command(&cp, query_result, 1024);
if (result)
trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
else
trace2_data_intmax("fsm_hook", NULL, "query/response-length",
query_result->len);
trace2_region_leave("fsm_hook", "query", NULL);
return result;
}
static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
{
int i, len = strlen(name);
int pos = index_name_pos(istate, name, len);
trace_printf_key(&trace_fsmonitor,
"fsmonitor_refresh_callback '%s' (pos %d)",
name, pos);
if (name[len - 1] == '/') {
/*
* The daemon can decorate directory events, such as
* moves or renames, with a trailing slash if the OS
* FS Event contains sufficient information, such as
* MacOS.
*
* Use this to invalidate the entire cone under that
* directory.
*
* We do not expect an exact match because the index
* does not normally contain directory entries, so we
* start at the insertion point and scan.
*/
if (pos < 0)
pos = -pos - 1;
/* Mark all entries for the folder invalid */
for (i = pos; i < istate->cache_nr; i++) {
if (!starts_with(istate->cache[i]->name, name))
break;
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
}
/*
* We need to remove the traling "/" from the path
* for the untracked cache.
*/
name[len - 1] = '\0';
} else if (pos >= 0) {
/*
* We have an exact match for this path and can just
* invalidate it.
*/
istate->cache[pos]->ce_flags &= ~CE_FSMONITOR_VALID;
} else {
/*
* The path is not a tracked file -or- it is a
* directory event on a platform that cannot
* distinguish between file and directory events in
* the event handler, such as Windows.
*
* Scan as if it is a directory and invalidate the
* cone under it. (But remember to ignore items
* between "name" and "name/", such as "name-" and
* "name.".
*/
pos = -pos - 1;
for (i = pos; i < istate->cache_nr; i++) {
if (!starts_with(istate->cache[i]->name, name))
break;
if ((unsigned char)istate->cache[i]->name[len] > '/')
break;
if (istate->cache[i]->name[len] == '/')
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
}
}
/*
* Mark the untracked cache dirty even if it wasn't found in the index
* as it could be a new untracked file.
*/
untracked_cache_invalidate_path(istate, name, 0);
}
/*
* The number of pathnames that we need to receive from FSMonitor
* before we force the index to be updated.
*
* Note that any pathname within the set of received paths MAY cause
* cache-entry or istate flag bits to be updated and thus cause the
* index to be updated on disk.
*
* However, the response may contain many paths (such as ignored
* paths) that will not update any flag bits. And thus not force the
* index to be updated. (This is fine and normal.) It also means
* that the token will not be updated in the FSMonitor index
* extension. So the next Git command will find the same token in the
* index, make the same token-relative request, and receive the same
* response (plus any newly changed paths). If this response is large
* (and continues to grow), performance could be impacted.
*
* For example, if the user runs a build and it writes 100K object
* files but doesn't modify any source files, the index would not need
* to be updated. The FSMonitor response (after the build and
* relative to a pre-build token) might be 5MB. Each subsequent Git
* command will receive that same 100K/5MB response until something
* causes the index to be updated. And `refresh_fsmonitor()` will
* have to iterate over those 100K paths each time.
*
* Performance could be improved if we optionally force update the
* index after a very large response and get an updated token into
* the FSMonitor index extension. This should allow subsequent
* commands to get smaller and more current responses.
*
* The value chosen here does not need to be precise. The index
* will be updated automatically the first time the user touches
* a tracked file and causes a command like `git status` to
* update an mtime to be updated and/or set a flag bit.
*/
static int fsmonitor_force_update_threshold = 100;
void refresh_fsmonitor(struct index_state *istate)
{
static int warn_once = 0;
struct strbuf query_result = STRBUF_INIT;
int query_success = 0, hook_version = -1;
size_t bol = 0; /* beginning of line */
uint64_t last_update;
struct strbuf last_update_token = STRBUF_INIT;
char *buf;
unsigned int i;
int is_trivial = 0;
struct repository *r = istate->repo;
enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r);
enum fsmonitor_reason reason = fsm_settings__get_reason(r);
if (!warn_once && reason > FSMONITOR_REASON_OK) {
char *msg = fsm_settings__get_incompatible_msg(r, reason);
warn_once = 1;
warning("%s", msg);
free(msg);
}
if (fsm_mode <= FSMONITOR_MODE_DISABLED ||
istate->fsmonitor_has_run_once)
return;
istate->fsmonitor_has_run_once = 1;
trace_printf_key(&trace_fsmonitor, "refresh fsmonitor");
if (fsm_mode == FSMONITOR_MODE_IPC) {
query_success = !fsmonitor_ipc__send_query(
istate->fsmonitor_last_update ?
istate->fsmonitor_last_update : "builtin:fake",
&query_result);
if (query_success) {
/*
* The response contains a series of nul terminated
* strings. The first is the new token.
*
* Use `char *buf` as an interlude to trick the CI
* static analysis to let us use `strbuf_addstr()`
* here (and only copy the token) rather than
* `strbuf_addbuf()`.
*/
buf = query_result.buf;
strbuf_addstr(&last_update_token, buf);
bol = last_update_token.len + 1;
is_trivial = query_result.buf[bol] == '/';
if (is_trivial)
trace2_data_intmax("fsm_client", NULL,
"query/trivial-response", 1);
} else {
/*
* The builtin daemon is not available on this
* platform -OR- we failed to get a response.
*
* Generate a fake token (rather than a V1
* timestamp) for the index extension. (If
* they switch back to the hook API, we don't
* want ambiguous state.)
*/
strbuf_addstr(&last_update_token, "builtin:fake");
}
goto apply_results;
}
assert(fsm_mode == FSMONITOR_MODE_HOOK);
hook_version = fsmonitor_hook_version();
/*
* This could be racy so save the date/time now and query_fsmonitor_hook
* should be inclusive to ensure we don't miss potential changes.
*/
last_update = getnanotime();
if (hook_version == HOOK_INTERFACE_VERSION1)
strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
/*
* If we have a last update token, call query_fsmonitor_hook for the set of
* changes since that token, else assume everything is possibly dirty
* and check it all.
*/
if (istate->fsmonitor_last_update) {
if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
query_success = !query_fsmonitor_hook(
r, HOOK_INTERFACE_VERSION2,
istate->fsmonitor_last_update, &query_result);
if (query_success) {
if (hook_version < 0)
hook_version = HOOK_INTERFACE_VERSION2;
/*
* First entry will be the last update token
* Need to use a char * variable because static
* analysis was suggesting to use strbuf_addbuf
* but we don't want to copy the entire strbuf
* only the chars up to the first NUL
*/
buf = query_result.buf;
strbuf_addstr(&last_update_token, buf);
if (!last_update_token.len) {
warning("Empty last update token.");
query_success = 0;
} else {
bol = last_update_token.len + 1;
is_trivial = query_result.buf[bol] == '/';
}
} else if (hook_version < 0) {
hook_version = HOOK_INTERFACE_VERSION1;
if (!last_update_token.len)
strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
}
}
if (hook_version == HOOK_INTERFACE_VERSION1) {
query_success = !query_fsmonitor_hook(
r, HOOK_INTERFACE_VERSION1,
istate->fsmonitor_last_update, &query_result);
if (query_success)
is_trivial = query_result.buf[0] == '/';
}
if (is_trivial)
trace2_data_intmax("fsm_hook", NULL,
"query/trivial-response", 1);
trace_performance_since(last_update, "fsmonitor process '%s'",
fsm_settings__get_hook_path(r));
trace_printf_key(&trace_fsmonitor,
"fsmonitor process '%s' returned %s",
fsm_settings__get_hook_path(r),
query_success ? "success" : "failure");
}
apply_results:
/*
* The response from FSMonitor (excluding the header token) is
* either:
*
* [a] a (possibly empty) list of NUL delimited relative
* pathnames of changed paths. This list can contain
* files and directories. Directories have a trailing
* slash.
*
* [b] a single '/' to indicate the provider had no
* information and that we should consider everything
* invalid. We call this a trivial response.
*/
trace2_region_enter("fsmonitor", "apply_results", istate->repo);
if (query_success && !is_trivial) {
/*
* Mark all pathnames returned by the monitor as dirty.
*
* This updates both the cache-entries and the untracked-cache.
*/
int count = 0;
buf = query_result.buf;
for (i = bol; i < query_result.len; i++) {
if (buf[i] != '\0')
continue;
fsmonitor_refresh_callback(istate, buf + bol);
bol = i + 1;
count++;
}
if (bol < query_result.len) {
fsmonitor_refresh_callback(istate, buf + bol);
count++;
}
/* Now mark the untracked cache for fsmonitor usage */
if (istate->untracked)
istate->untracked->use_fsmonitor = 1;
if (count > fsmonitor_force_update_threshold)
istate->cache_changed |= FSMONITOR_CHANGED;
trace2_data_intmax("fsmonitor", istate->repo, "apply_count",
count);
} else {
/*
* We failed to get a response or received a trivial response,
* so invalidate everything.
*
* We only want to run the post index changed hook if
* we've actually changed entries, so keep track if we
* actually changed entries or not.
*/
int is_cache_changed = 0;
for (i = 0; i < istate->cache_nr; i++) {
if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) {
is_cache_changed = 1;
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
}
}
/*
* If we're going to check every file, ensure we save
* the results.
*/
if (is_cache_changed)
istate->cache_changed |= FSMONITOR_CHANGED;
if (istate->untracked)
istate->untracked->use_fsmonitor = 0;
}
trace2_region_leave("fsmonitor", "apply_results", istate->repo);
strbuf_release(&query_result);
/* Now that we've updated istate, save the last_update_token */
FREE_AND_NULL(istate->fsmonitor_last_update);
istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL);
}
/*
* The caller wants to turn on FSMonitor. And when the caller writes
* the index to disk, a FSMonitor extension should be included. This
* requires that `istate->fsmonitor_last_update` not be NULL. But we
* have not actually talked to a FSMonitor process yet, so we don't
* have an initial value for this field.
*
* For a protocol V1 FSMonitor process, this field is a formatted
* "nanoseconds since epoch" field. However, for a protocol V2
* FSMonitor process, this field is an opaque token.
*
* Historically, `add_fsmonitor()` has initialized this field to the
* current time for protocol V1 processes. There are lots of race
* conditions here, but that code has shipped...
*
* The only true solution is to use a V2 FSMonitor and get a current
* or default token value (that it understands), but we cannot do that
* until we have actually talked to an instance of the FSMonitor process
* (but the protocol requires that we send a token first...).
*
* For simplicity, just initialize like we have a V1 process and require
* that V2 processes adapt.
*/
static void initialize_fsmonitor_last_update(struct index_state *istate)
{
struct strbuf last_update = STRBUF_INIT;
strbuf_addf(&last_update, "%"PRIu64"", getnanotime());
istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
}
void add_fsmonitor(struct index_state *istate)
{
unsigned int i;
if (!istate->fsmonitor_last_update) {
trace_printf_key(&trace_fsmonitor, "add fsmonitor");
istate->cache_changed |= FSMONITOR_CHANGED;
initialize_fsmonitor_last_update(istate);
/* reset the fsmonitor state */
for (i = 0; i < istate->cache_nr; i++)
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
/* reset the untracked cache */
if (istate->untracked) {
add_untracked_cache(istate);
istate->untracked->use_fsmonitor = 1;
}
/* Update the fsmonitor state */
refresh_fsmonitor(istate);
}
}
void remove_fsmonitor(struct index_state *istate)
{
if (istate->fsmonitor_last_update) {
trace_printf_key(&trace_fsmonitor, "remove fsmonitor");
istate->cache_changed |= FSMONITOR_CHANGED;
FREE_AND_NULL(istate->fsmonitor_last_update);
}
}
void tweak_fsmonitor(struct index_state *istate)
{
unsigned int i;
int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo)
> FSMONITOR_MODE_DISABLED);
if (istate->fsmonitor_dirty) {
if (fsmonitor_enabled) {
/* Mark all entries valid */
for (i = 0; i < istate->cache_nr; i++) {
if (S_ISGITLINK(istate->cache[i]->ce_mode))
continue;
istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID;
}
/* Mark all previously saved entries as dirty */
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
refresh_fsmonitor(istate);
}
ewah_free(istate->fsmonitor_dirty);
istate->fsmonitor_dirty = NULL;
}
if (fsmonitor_enabled)
add_fsmonitor(istate);
else
remove_fsmonitor(istate);
}