git/t/perf/aggregate.perl

#!/usr/bin/perl

use lib '../../perl/blib/lib';
use strict;
use warnings;
use Git;

sub get_times {
	my $name = shift;
	open my $fh, "<", $name or return undef;
	my $line = <$fh>;
	return undef if not defined $line;
	close $fh or die "cannot close $name: $!";
	$line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/
		or die "bad input line: $line";
	my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
	return ($rt, $4, $5);
}

sub format_times {
	my ($r, $u, $s, $firstr) = @_;
	if (!defined $r) {
		return "<missing>";
	}
	my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
	if (defined $firstr) {
		if ($firstr > 0) {
			$out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr;
		} elsif ($r == 0) {
			$out .= " =";
		} else {
			$out .= " +inf";
		}
	}
	return $out;
}

my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests);
while (scalar @ARGV) {
	my $arg = $ARGV[0];
	my $dir;
	last if -f $arg or $arg eq "--";
	if (! -d $arg) {
		my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);
		$dir = "build/".$rev;
	} else {
		$arg =~ s{/*$}{};
		$dir = $arg;
		$dirabbrevs{$dir} = $dir;
	}
	push @dirs, $dir;
	$dirnames{$dir} = $arg;
	my $prefix = $dir;
	$prefix =~ tr/^a-zA-Z0-9/_/c;
	$prefixes{$dir} = $prefix . '.';
	shift @ARGV;
}

if (not @dirs) {
	@dirs = ('.');
}
$dirnames{'.'} = $dirabbrevs{'.'} = "this tree";
$prefixes{'.'} = '';

shift @ARGV if scalar @ARGV and $ARGV[0] eq "--";

@tests = @ARGV;
if (not @tests) {
	@tests = glob "p????-*.sh";
}

my @subtests;
my %shorttests;
for my $t (@tests) {
	$t =~ s{(?:.*/)?(p(\d+)-[^/]+)\.sh$}{$1} or die "bad test name: $t";
	my $n = $2;
	my $fname = "test-results/$t.subtests";
	open my $fp, "<", $fname or die "cannot open $fname: $!";
	for (<$fp>) {
		chomp;
		/^(\d+)$/ or die "malformed subtest line: $_";
		push @subtests, "$t.$1";
		$shorttests{"$t.$1"} = "$n.$1";
	}
	close $fp or die "cannot close $fname: $!";
}

sub read_descr {
	my $name = shift;
	open my $fh, "<", $name or return "<error reading description>";
	binmode $fh, ":utf8" or die "PANIC on binmode: $!";
	my $line = <$fh>;
	close $fh or die "cannot close $name";
	chomp $line;
	return $line;
}

my %descrs;
my $descrlen = 4; # "Test"
for my $t (@subtests) {
	$descrs{$t} = $shorttests{$t}.": ".read_descr("test-results/$t.descr");
	$descrlen = length $descrs{$t} if length $descrs{$t}>$descrlen;
}

sub have_duplicate {
	my %seen;
	for (@_) {
		return 1 if exists $seen{$_};
		$seen{$_} = 1;
	}
	return 0;
}
sub have_slash {
	for (@_) {
		return 1 if m{/};
	}
	return 0;
}

my %newdirabbrevs = %dirabbrevs;
while (!have_duplicate(values %newdirabbrevs)) {
	%dirabbrevs = %newdirabbrevs;
	last if !have_slash(values %dirabbrevs);
	%newdirabbrevs = %dirabbrevs;
	for (values %newdirabbrevs) {
		s{^[^/]*/}{};
	}
}

my %times;
my @colwidth = ((0)x@dirs);
for my $i (0..$#dirs) {
	my $d = $dirs[$i];
	my $w = length (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d});
	$colwidth[$i] = $w if $w > $colwidth[$i];
}
for my $t (@subtests) {
	my $firstr;
	for my $i (0..$#dirs) {
		my $d = $dirs[$i];
		$times{$prefixes{$d}.$t} = [get_times("test-results/$prefixes{$d}$t.times")];
		my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
		my $w = length format_times($r,$u,$s,$firstr);
		$colwidth[$i] = $w if $w > $colwidth[$i];
		$firstr = $r unless defined $firstr;
	}
}
my $totalwidth = 3*@dirs+$descrlen;
$totalwidth += $_ for (@colwidth);

binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";

printf "%-${descrlen}s", "Test";
for my $i (0..$#dirs) {
	my $d = $dirs[$i];
	printf "   %-$colwidth[$i]s", (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d});
}
print "\n";
print "-"x$totalwidth, "\n";
for my $t (@subtests) {
	printf "%-${descrlen}s", $descrs{$t};
	my $firstr;
	for my $i (0..$#dirs) {
		my $d = $dirs[$i];
		my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
		printf "   %-$colwidth[$i]s", format_times($r,$u,$s,$firstr);
		$firstr = $r unless defined $firstr;
	}
	print "\n";
}
Introduce a performance testing framework This introduces a performance testing framework under t/perf/. It tries to be as close to the test-lib.sh infrastructure as possible, and thus should be easy to get used to for git developers. The following points were considered for the implementation: 1. You usually want to compare arbitrary revisions/build trees against each other. They may not have the performance test under consideration, or even the perf-lib.sh infrastructure. To cope with this, the 'run' script lets you specify arbitrary build dirs and revisions. It even automatically builds the revisions if it doesn't have them at hand yet. 2. Usually you would not want to run all tests. It would take too long anyway. The 'run' script lets you specify which tests to run; or you can also do it manually. There is a Makefile for discoverability and 'make clean', but it is not meant for real-world use. 3. Creating test repos from scratch in every test is extremely time-consuming, and shipping or downloading such large/weird repos is out of the question. We leave this decision to the user. Two different sizes of test repos can be configured, and the scripts just copy one or more of those (using hardlinks for the object store). By default it tries to use the build tree's git.git repository. This is fairly fast and versatile. Using a copy instead of a clone preserves many properties that the user may want to test for, such as lots of loose objects, unpacked refs, etc. Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2012-02-17 10:25:09 +00:00			`#!/usr/bin/perl`

t/perf: make runner work even if Git is not installed aggregate.perl did not work when Git.pm is not installed to a directory contained in the default Perl library path list or PERLLIB. This commit prepends the Perl library path of the current Git source tree to enable this. Note that this commit adds a hard-coded relative path use lib '../../perl/blib/lib'; instead of the flexible environment-based variant use lib (split(/:/, $ENV{GITPERLLIB})); which is used in tests written in Perl. The hard-coded variant is used because the whole performance test framework does it that way (and GITPERLLIB is not set there). Signed-off-by: Stephan Beyer <s-beyer@gmx.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2015-09-25 00:31:37 +00:00			`use lib '../../perl/blib/lib';`
Introduce a performance testing framework This introduces a performance testing framework under t/perf/. It tries to be as close to the test-lib.sh infrastructure as possible, and thus should be easy to get used to for git developers. The following points were considered for the implementation: 1. You usually want to compare arbitrary revisions/build trees against each other. They may not have the performance test under consideration, or even the perf-lib.sh infrastructure. To cope with this, the 'run' script lets you specify arbitrary build dirs and revisions. It even automatically builds the revisions if it doesn't have them at hand yet. 2. Usually you would not want to run all tests. It would take too long anyway. The 'run' script lets you specify which tests to run; or you can also do it manually. There is a Makefile for discoverability and 'make clean', but it is not meant for real-world use. 3. Creating test repos from scratch in every test is extremely time-consuming, and shipping or downloading such large/weird repos is out of the question. We leave this decision to the user. Two different sizes of test repos can be configured, and the scripts just copy one or more of those (using hardlinks for the object store). By default it tries to use the build tree's git.git repository. This is fairly fast and versatile. Using a copy instead of a clone preserves many properties that the user may want to test for, such as lots of loose objects, unpacked refs, etc. Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2012-02-17 10:25:09 +00:00			`use strict;`
			`use warnings;`
			`use Git;`

			`sub get_times {`
			`my $name = shift;`
			`open my $fh, "<", $name or return undef;`
			`my $line = <$fh>;`
			`return undef if not defined $line;`
			`close $fh or die "cannot close $name: $!";`
			`$line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/`
			`or die "bad input line: $line";`
			`my $rt = ((defined $1 ? $1 : 0.0)60+$2)60+$3;`
			`return ($rt, $4, $5);`
			`}`

			`sub format_times {`
			`my ($r, $u, $s, $firstr) = @_;`
			`if (!defined $r) {`
			`return "<missing>";`
			`}`
			`my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;`
			`if (defined $firstr) {`
			`if ($firstr > 0) {`
			`$out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr;`
			`} elsif ($r == 0) {`
			`$out .= " =";`
			`} else {`
			`$out .= " +inf";`
			`}`
			`}`
			`return $out;`
			`}`

			`my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests);`
			`while (scalar @ARGV) {`
			`my $arg = $ARGV[0];`
			`my $dir;`
			`last if -f $arg or $arg eq "--";`
			`if (! -d $arg) {`
			`my $rev = Git::command_oneline(qw(rev-parse --verify), $arg);`
			`$dir = "build/".$rev;`
			`} else {`
			`$arg =~ s{/*$}{};`
			`$dir = $arg;`
			`$dirabbrevs{$dir} = $dir;`
			`}`
			`push @dirs, $dir;`
			`$dirnames{$dir} = $arg;`
			`my $prefix = $dir;`
			`$prefix =~ tr/^a-zA-Z0-9/_/c;`
			`$prefixes{$dir} = $prefix . '.';`
			`shift @ARGV;`
			`}`

			`if (not @dirs) {`
			`@dirs = ('.');`
			`}`
			`$dirnames{'.'} = $dirabbrevs{'.'} = "this tree";`
			`$prefixes{'.'} = '';`

			`shift @ARGV if scalar @ARGV and $ARGV[0] eq "--";`

			`@tests = @ARGV;`
			`if (not @tests) {`
			`@tests = glob "p????-*.sh";`
			`}`

			`my @subtests;`
			`my %shorttests;`
			`for my $t (@tests) {`
			`$t =~ s{(?:.*/)?(p(\d+)-[^/]+)\.sh$}{$1} or die "bad test name: $t";`
			`my $n = $2;`
			`my $fname = "test-results/$t.subtests";`
			`open my $fp, "<", $fname or die "cannot open $fname: $!";`
			`for (<$fp>) {`
			`chomp;`
			`/^(\d+)$/ or die "malformed subtest line: $_";`
			`push @subtests, "$t.$1";`
			`$shorttests{"$t.$1"} = "$n.$1";`
			`}`
			`close $fp or die "cannot close $fname: $!";`
			`}`

			`sub read_descr {`
			`my $name = shift;`
			`open my $fh, "<", $name or return "<error reading description>";`
t/perf: correctly align non-ASCII descriptions in output Change the test descriptions from being treated as binary blobs by perl to being treated as UTF-8. This ensures that e.g. a test description like "æ" is counted as 1 character, not 2. I have WIP performance tests for non-ASCII grep patterns on another topic that are affected by this. Now instead of: $ ./run p0000-perf-lib-sanity.sh [...] 0000.4: export a weird var 0.00(0.00+0.00) 0000.5: éḿíẗ ńöń-ÁŚĆÍÍ ćḧáŕáćẗéŕś 0.00(0.00+0.00) 0000.7: important variables available in subshells 0.00(0.00+0.00) [...] We emit: [...] 0000.4: export a weird var 0.00(0.00+0.00) 0000.5: éḿíẗ ńöń-ÁŚĆÍÍ ćḧáŕáćẗéŕś 0.00(0.00+0.00) 0000.7: important variables available in subshells 0.00(0.00+0.00) [...] Fixes code originally added in 342e9ef2d9 ("Introduce a performance testing framework", 2012-02-17). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2017-04-21 19:44:28 +00:00			`binmode $fh, ":utf8" or die "PANIC on binmode: $!";`
Introduce a performance testing framework This introduces a performance testing framework under t/perf/. It tries to be as close to the test-lib.sh infrastructure as possible, and thus should be easy to get used to for git developers. The following points were considered for the implementation: 1. You usually want to compare arbitrary revisions/build trees against each other. They may not have the performance test under consideration, or even the perf-lib.sh infrastructure. To cope with this, the 'run' script lets you specify arbitrary build dirs and revisions. It even automatically builds the revisions if it doesn't have them at hand yet. 2. Usually you would not want to run all tests. It would take too long anyway. The 'run' script lets you specify which tests to run; or you can also do it manually. There is a Makefile for discoverability and 'make clean', but it is not meant for real-world use. 3. Creating test repos from scratch in every test is extremely time-consuming, and shipping or downloading such large/weird repos is out of the question. We leave this decision to the user. Two different sizes of test repos can be configured, and the scripts just copy one or more of those (using hardlinks for the object store). By default it tries to use the build tree's git.git repository. This is fairly fast and versatile. Using a copy instead of a clone preserves many properties that the user may want to test for, such as lots of loose objects, unpacked refs, etc. Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2012-02-17 10:25:09 +00:00			`my $line = <$fh>;`
			`close $fh or die "cannot close $name";`
			`chomp $line;`
			`return $line;`
			`}`

			`my %descrs;`
			`my $descrlen = 4; # "Test"`
			`for my $t (@subtests) {`
			`$descrs{$t} = $shorttests{$t}.": ".read_descr("test-results/$t.descr");`
			`$descrlen = length $descrs{$t} if length $descrs{$t}>$descrlen;`
			`}`

			`sub have_duplicate {`
			`my %seen;`
			`for (@_) {`
			`return 1 if exists $seen{$_};`
			`$seen{$_} = 1;`
			`}`
			`return 0;`
			`}`
			`sub have_slash {`
			`for (@_) {`
			`return 1 if m{/};`
			`}`
			`return 0;`
			`}`

			`my %newdirabbrevs = %dirabbrevs;`
			`while (!have_duplicate(values %newdirabbrevs)) {`
			`%dirabbrevs = %newdirabbrevs;`
			`last if !have_slash(values %dirabbrevs);`
			`%newdirabbrevs = %dirabbrevs;`
			`for (values %newdirabbrevs) {`
			`s{^[^/]*/}{};`
			`}`
			`}`

			`my %times;`
			`my @colwidth = ((0)x@dirs);`
			`for my $i (0..$#dirs) {`
			`my $d = $dirs[$i];`
			`my $w = length (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d});`
			`$colwidth[$i] = $w if $w > $colwidth[$i];`
			`}`
			`for my $t (@subtests) {`
			`my $firstr;`
			`for my $i (0..$#dirs) {`
			`my $d = $dirs[$i];`
			`$times{$prefixes{$d}.$t} = [get_times("test-results/$prefixes{$d}$t.times")];`
			`my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};`
			`my $w = length format_times($r,$u,$s,$firstr);`
			`$colwidth[$i] = $w if $w > $colwidth[$i];`
			`$firstr = $r unless defined $firstr;`
			`}`
			`}`
			`my $totalwidth = 3*@dirs+$descrlen;`
			`$totalwidth += $_ for (@colwidth);`

t/perf: correctly align non-ASCII descriptions in output Change the test descriptions from being treated as binary blobs by perl to being treated as UTF-8. This ensures that e.g. a test description like "æ" is counted as 1 character, not 2. I have WIP performance tests for non-ASCII grep patterns on another topic that are affected by this. Now instead of: $ ./run p0000-perf-lib-sanity.sh [...] 0000.4: export a weird var 0.00(0.00+0.00) 0000.5: éḿíẗ ńöń-ÁŚĆÍÍ ćḧáŕáćẗéŕś 0.00(0.00+0.00) 0000.7: important variables available in subshells 0.00(0.00+0.00) [...] We emit: [...] 0000.4: export a weird var 0.00(0.00+0.00) 0000.5: éḿíẗ ńöń-ÁŚĆÍÍ ćḧáŕáćẗéŕś 0.00(0.00+0.00) 0000.7: important variables available in subshells 0.00(0.00+0.00) [...] Fixes code originally added in 342e9ef2d9 ("Introduce a performance testing framework", 2012-02-17). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2017-04-21 19:44:28 +00:00			`binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";`

Introduce a performance testing framework This introduces a performance testing framework under t/perf/. It tries to be as close to the test-lib.sh infrastructure as possible, and thus should be easy to get used to for git developers. The following points were considered for the implementation: 1. You usually want to compare arbitrary revisions/build trees against each other. They may not have the performance test under consideration, or even the perf-lib.sh infrastructure. To cope with this, the 'run' script lets you specify arbitrary build dirs and revisions. It even automatically builds the revisions if it doesn't have them at hand yet. 2. Usually you would not want to run all tests. It would take too long anyway. The 'run' script lets you specify which tests to run; or you can also do it manually. There is a Makefile for discoverability and 'make clean', but it is not meant for real-world use. 3. Creating test repos from scratch in every test is extremely time-consuming, and shipping or downloading such large/weird repos is out of the question. We leave this decision to the user. Two different sizes of test repos can be configured, and the scripts just copy one or more of those (using hardlinks for the object store). By default it tries to use the build tree's git.git repository. This is fairly fast and versatile. Using a copy instead of a clone preserves many properties that the user may want to test for, such as lots of loose objects, unpacked refs, etc. Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2012-02-17 10:25:09 +00:00			`printf "%-${descrlen}s", "Test";`
			`for my $i (0..$#dirs) {`
			`my $d = $dirs[$i];`
			`printf " %-$colwidth[$i]s", (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d});`
			`}`
			`print "\n";`
			`print "-"x$totalwidth, "\n";`
			`for my $t (@subtests) {`
			`printf "%-${descrlen}s", $descrs{$t};`
			`my $firstr;`
			`for my $i (0..$#dirs) {`
			`my $d = $dirs[$i];`
			`my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};`
			`printf " %-$colwidth[$i]s", format_times($r,$u,$s,$firstr);`
			`$firstr = $r unless defined $firstr;`
			`}`
			`print "\n";`
			`}`