git/t/t7519/fsmonitor-watchman

128 lines
3.3 KiB
Text
Raw Normal View History

#!/usr/bin/perl
use strict;
use warnings;
use IPC::Open2;
# An example hook script to integrate Watchman
# (https://facebook.github.io/watchman/) with git to speed up detecting
# new and modified files.
#
# The hook is passed a version (currently 1) and a time in nanoseconds
# formatted as a string and outputs to stdout all files that have been
# modified since the given time. Paths must be relative to the root of
# the working tree and separated by a single NUL.
#
# To enable this hook, rename this file to "query-watchman" and set
# 'git config core.fsmonitor .git/hooks/query-watchman'
#
my ($version, $time) = @ARGV;
#print STDERR "$0 $version $time\n";
# Check the hook interface version
if ($version == 1) {
# convert nanoseconds to seconds
fsmonitor: fix watchman integration When running Git commands quickly -- such as in a shell script or the test suite -- the Git commands frequently complete and start again during the same second. The example fsmonitor hooks to integrate with Watchman truncate the nanosecond times to seconds. In principle, this is fine, as Watchman claims to use inclusive comparisons [1]. The result should only be an over-representation of the changed paths since the last Git command. However, Watchman's own documentation claims "Using a timestamp is prone to race conditions in understanding the complete state of the file tree" [2]. All of their documented examples use a "clockspec" that looks like 'c:123:234'. Git should eventually learn how to store this type of string to provide a stronger integration, but that will be a more invasive change. When using GIT_TEST_FSMONITOR="$(pwd)/t7519/fsmonitor-watchman", scripts such as t7519-wtstatus.sh fail due to these race conditions. In fact, running any test script with GIT_TEST_FSMONITOR pointing at t/t7519/fsmonitor-wathcman will cause failures in the test_commit function. The 'git add "$indir$file"' command fails due to not enough time between the creation of '$file' and the 'git add' command. For now, subtract one second from the timestamp we pass to Watchman. This will make our window large enough to avoid these race conditions. Increasing the window causes tests like t7519-wtstatus.sh to pass. When the integration was introduced in def437671 (fsmonitor: add a sample integration script for Watchman, 2018-09-22), the query included an expression that would ignore files created and deleted in that window. The performance reason for this change was to ignore temporary files created by a build between Git commands. However, this causes failures in script scenarios where Git is creating or deleting files quickly. When using GIT_TEST_FSMONITOR as before, t2203-add-intent.sh fails due to this add-and-delete race condition. By removing the "expression" from the Watchman query, we remove this race condition. It will lead to some performance degradation in the case of users creating and deleting temporary files inside their working directory between Git commands. However, that is a cost we need to pay to be correct. [1] https://github.com/facebook/watchman/blob/master/query/since.cpp#L35-L39 [2] https://facebook.github.io/watchman/docs/clockspec.html Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Kevin Willford <Kevin.Willford@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-11-04 17:50:41 +00:00
# subtract one second to make sure watchman will return all changes
$time = int ($time / 1000000000) - 1;
} else {
exit 1;
}
my $git_work_tree;
if ($^O =~ 'msys' || $^O =~ 'cygwin') {
$git_work_tree = Win32::GetCwd();
$git_work_tree =~ tr/\\/\//;
} else {
require Cwd;
$git_work_tree = Cwd::cwd();
}
my $retry = 1;
launch_watchman();
sub launch_watchman {
my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j')
or die "open2() failed: $!\n" .
"Falling back to scanning...\n";
# In the query expression below we're asking for names of files that
# changed since $time but were not transient (ie created after
# $time but no longer exist).
#
# To accomplish this, we're using the "since" generator to use the
# recency index to select candidate nodes and "fields" to limit the
fsmonitor: fix watchman integration When running Git commands quickly -- such as in a shell script or the test suite -- the Git commands frequently complete and start again during the same second. The example fsmonitor hooks to integrate with Watchman truncate the nanosecond times to seconds. In principle, this is fine, as Watchman claims to use inclusive comparisons [1]. The result should only be an over-representation of the changed paths since the last Git command. However, Watchman's own documentation claims "Using a timestamp is prone to race conditions in understanding the complete state of the file tree" [2]. All of their documented examples use a "clockspec" that looks like 'c:123:234'. Git should eventually learn how to store this type of string to provide a stronger integration, but that will be a more invasive change. When using GIT_TEST_FSMONITOR="$(pwd)/t7519/fsmonitor-watchman", scripts such as t7519-wtstatus.sh fail due to these race conditions. In fact, running any test script with GIT_TEST_FSMONITOR pointing at t/t7519/fsmonitor-wathcman will cause failures in the test_commit function. The 'git add "$indir$file"' command fails due to not enough time between the creation of '$file' and the 'git add' command. For now, subtract one second from the timestamp we pass to Watchman. This will make our window large enough to avoid these race conditions. Increasing the window causes tests like t7519-wtstatus.sh to pass. When the integration was introduced in def437671 (fsmonitor: add a sample integration script for Watchman, 2018-09-22), the query included an expression that would ignore files created and deleted in that window. The performance reason for this change was to ignore temporary files created by a build between Git commands. However, this causes failures in script scenarios where Git is creating or deleting files quickly. When using GIT_TEST_FSMONITOR as before, t2203-add-intent.sh fails due to this add-and-delete race condition. By removing the "expression" from the Watchman query, we remove this race condition. It will lead to some performance degradation in the case of users creating and deleting temporary files inside their working directory between Git commands. However, that is a cost we need to pay to be correct. [1] https://github.com/facebook/watchman/blob/master/query/since.cpp#L35-L39 [2] https://facebook.github.io/watchman/docs/clockspec.html Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Kevin Willford <Kevin.Willford@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-11-04 17:50:41 +00:00
# output to file names only.
my $query = <<" END";
["query", "$git_work_tree", {
"since": $time,
fsmonitor: fix watchman integration When running Git commands quickly -- such as in a shell script or the test suite -- the Git commands frequently complete and start again during the same second. The example fsmonitor hooks to integrate with Watchman truncate the nanosecond times to seconds. In principle, this is fine, as Watchman claims to use inclusive comparisons [1]. The result should only be an over-representation of the changed paths since the last Git command. However, Watchman's own documentation claims "Using a timestamp is prone to race conditions in understanding the complete state of the file tree" [2]. All of their documented examples use a "clockspec" that looks like 'c:123:234'. Git should eventually learn how to store this type of string to provide a stronger integration, but that will be a more invasive change. When using GIT_TEST_FSMONITOR="$(pwd)/t7519/fsmonitor-watchman", scripts such as t7519-wtstatus.sh fail due to these race conditions. In fact, running any test script with GIT_TEST_FSMONITOR pointing at t/t7519/fsmonitor-wathcman will cause failures in the test_commit function. The 'git add "$indir$file"' command fails due to not enough time between the creation of '$file' and the 'git add' command. For now, subtract one second from the timestamp we pass to Watchman. This will make our window large enough to avoid these race conditions. Increasing the window causes tests like t7519-wtstatus.sh to pass. When the integration was introduced in def437671 (fsmonitor: add a sample integration script for Watchman, 2018-09-22), the query included an expression that would ignore files created and deleted in that window. The performance reason for this change was to ignore temporary files created by a build between Git commands. However, this causes failures in script scenarios where Git is creating or deleting files quickly. When using GIT_TEST_FSMONITOR as before, t2203-add-intent.sh fails due to this add-and-delete race condition. By removing the "expression" from the Watchman query, we remove this race condition. It will lead to some performance degradation in the case of users creating and deleting temporary files inside their working directory between Git commands. However, that is a cost we need to pay to be correct. [1] https://github.com/facebook/watchman/blob/master/query/since.cpp#L35-L39 [2] https://facebook.github.io/watchman/docs/clockspec.html Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Kevin Willford <Kevin.Willford@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-11-04 17:50:41 +00:00
"fields": ["name"]
}]
END
open (my $fh, ">", ".git/watchman-query.json");
print $fh $query;
close $fh;
print CHLD_IN $query;
close CHLD_IN;
my $response = do {local $/; <CHLD_OUT>};
open ($fh, ">", ".git/watchman-response.json");
print $fh $response;
close $fh;
die "Watchman: command returned no output.\n" .
"Falling back to scanning...\n" if $response eq "";
die "Watchman: command returned invalid output: $response\n" .
"Falling back to scanning...\n" unless $response =~ /^\{/;
my $json_pkg;
eval {
require JSON::XS;
$json_pkg = "JSON::XS";
1;
} or do {
require JSON::PP;
$json_pkg = "JSON::PP";
};
my $o = $json_pkg->new->utf8->decode($response);
if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) {
print STDERR "Adding '$git_work_tree' to watchman's watch list.\n";
$retry--;
qx/watchman watch "$git_work_tree"/;
die "Failed to make watchman watch '$git_work_tree'.\n" .
"Falling back to scanning...\n" if $? != 0;
# Watchman will always return all files on the first query so
# return the fast "everything is dirty" flag to git and do the
# Watchman query just to get it over with now so we won't pay
# the cost in git to look up each individual file.
open ($fh, ">", ".git/watchman-output.out");
print "/\0";
close $fh;
print "/\0";
eval { launch_watchman() };
exit 0;
}
die "Watchman: $o->{error}.\n" .
"Falling back to scanning...\n" if $o->{error};
open ($fh, ">", ".git/watchman-output.out");
binmode $fh, ":utf8";
print $fh @{$o->{files}};
close $fh;
binmode STDOUT, ":utf8";
local $, = "\0";
print @{$o->{files}};
}