[benchmarks] Add MemoryCopy benchmark suite.

The MemoryCopy benchmark suite measures the overhead of copying
data between compatible TypedData or Pointer values.

Change-Id: Iaf5ea27b7f9177f4800880da36234afd2b908db2
Bug: https://github.com/dart-lang/sdk/issues/42072
Bug: b/294114694
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/318661
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
This commit is contained in:
Tess Strickland 2023-08-09 13:28:44 +00:00 committed by Commit Queue
parent 5ddb1b8ea7
commit d984fd77f6

View file

@ -0,0 +1,427 @@
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// Micro-benchmarks for copying typed data lists.
import 'dart:ffi';
import 'dart:math';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:ffi/ffi.dart';
const maxSizeInBytes = 10 * 1024 * 1024;
// A modified version of BenchmarkBase from package:benchmark_harness where
// - the run() method takes a number of rounds, so that there is only one run()
// call per measurement and thus the overhead of calling the run() method is
// the same across subclass results.
// - the measureFor() method returns the number of bytes transfered per second,
// not the number of microseconds per iteration (round).
abstract class MemoryCopyBenchmark {
final String name;
final int bytes;
MemoryCopyBenchmark(String name, this.bytes) : name = 'MemoryCopy.$name';
static const targetBatchSizeInBytes = 32 * 1024;
// Returns the number of bytes copied per second.
double measureFor(Duration minDuration) {
// The logic below is based off of BenchmarkBase._measureForImpl.
// We can't use BenchmarkBase.measureFor directly, because
// * it calls the function in a loop instead of passing the number of
// desired iterations to the function being called. Here, method
// invocation would dominate the actual body for small byte counts.
// * it doesn't provide the caller with the number of iterations performed,
// which we need to calculate the number of bytes transferred.
// Start off with enough rounds to ensure a minimum number of bytes copied
// per run() invocation.
int rounds = max(targetBatchSizeInBytes ~/ bytes, 1);
// If running a long measurement permit some amount of measurement jitter
// to avoid discarding results that almost, but not quite, reach the minimum
// duration requested.
final allowedJitter = Duration(
microseconds: minDuration.inSeconds > 0
? (minDuration.inMicroseconds * 0.1).floor()
: 0);
final watch = Stopwatch()..start();
while (true) {
// Try running for the current number of rounds and see if that reaches
// the minimum duration requested, so we only get the elapsed time from
// the StopWatch once for the final results used.
watch.reset();
run(rounds);
final elapsed = watch.elapsed;
final numberOfBytesCopied = rounds * bytes;
if (elapsed >= (minDuration - allowedJitter)) {
return (numberOfBytesCopied / elapsed.inMicroseconds) *
Duration.microsecondsPerSecond;
}
// If not, then adjust our estimate of how many iterations are needed to
// reach the minimum and try again.
rounds *= (minDuration.inMicroseconds / elapsed.inMicroseconds).ceil();
}
}
static final kValueRegexp = RegExp(r'^([0-9]+)');
static final kMaxLabelLength =
'MemoryCopy.1048576.setRange.TypedData.Double(NanosecondsPerChar)'.length;
// Maximum expected number of digits on either side of the decimal point.
static final kMaxDigits = 16;
double measure() {
setup();
// Warmup for 100 ms.
measureFor(const Duration(milliseconds: 100));
// Run benchmark for 1 second.
final double result = measureFor(const Duration(seconds: 1));
teardown();
return result;
}
void report({bool verbose = false, bool aligned = false}) {
final bytesPerSecond = measure();
void printLine(String label, String content) {
String contentPadding = ' ';
if (aligned) {
final matches = kValueRegexp.firstMatch(content)!;
final contentPaddingLength = 1 +
(kMaxLabelLength - label.length) +
max<int>(kMaxDigits - matches[1]!.length, 0);
contentPadding = ' ' * contentPaddingLength;
}
print('$label:$contentPadding$content');
}
printLine('$name(BytesPerSecond)', '$bytesPerSecond');
if (verbose) {
const nanoSecondsPerSecond = 1000 * 1000 * 1000;
final nanosecondsPerByte = nanoSecondsPerSecond / bytesPerSecond;
printLine('$name(NanosecondsPerChar)', '$nanosecondsPerByte');
const bytesPerMebibyte = 1024 * 1024;
final mibPerSecond = bytesPerSecond / bytesPerMebibyte;
printLine('$name(MebibytesPerSecond)', '$mibPerSecond');
}
}
void setup();
void teardown();
void run(int rounds);
}
abstract class Uint8ListCopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Uint8List input;
late Uint8List result;
Uint8ListCopyBenchmark(String method, int bytes)
: count = bytes,
super('$bytes.$method.TypedData.Uint8', bytes);
@override
void setup() {
input = Uint8List(count);
for (int i = 0; i < count; ++i) {
input[i] = (i + 3) & 0xff;
}
result = Uint8List(maxSizeInBytes);
}
@override
void teardown() {
for (int i = 0; i < count; ++i) {
final expected = (i + 3) & 0xff;
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0;
for (int i = count; i < maxSizeInBytes; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
}
}
class Uint8ListCopyViaLoopBenchmark extends Uint8ListCopyBenchmark {
Uint8ListCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class Uint8ListCopyViaSetRangeBenchmark extends Uint8ListCopyBenchmark {
Uint8ListCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result.setRange(0, count, input);
}
}
}
abstract class Float64ListCopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Float64List input;
late Float64List result;
Float64ListCopyBenchmark(String method, int bytes)
: count = bytes ~/ 8,
super('$bytes.$method.TypedData.Double', bytes);
static const maxSizeInElements = maxSizeInBytes ~/ 8;
@override
void setup() {
input = Float64List(count);
for (int i = 0; i < count; ++i) {
input[i] = (i - 7).toDouble();
}
result = Float64List(maxSizeInElements);
}
@override
void teardown() {
for (int i = 0; i < count; ++i) {
final expected = (i - 7).toDouble();
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0.0;
for (int i = count; i < maxSizeInElements; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
}
}
class Float64ListCopyViaLoopBenchmark extends Float64ListCopyBenchmark {
Float64ListCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class Float64ListCopyViaSetRangeBenchmark extends Float64ListCopyBenchmark {
Float64ListCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result.setRange(0, count, input);
}
}
}
abstract class PointerUint8CopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Pointer<Uint8> input;
late Pointer<Uint8> result;
PointerUint8CopyBenchmark(String method, int bytes)
: count = bytes,
super('$bytes.$method.Pointer.Uint8', bytes);
@override
void setup() {
input = malloc<Uint8>(count);
for (var i = 0; i < count; ++i) {
input[i] = (i + 3) & 0xff;
}
result = calloc<Uint8>(maxSizeInBytes);
}
@override
void teardown() {
malloc.free(input);
for (var i = 0; i < count; ++i) {
final expected = (i + 3) & 0xff;
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0;
for (var i = count; i < maxSizeInBytes; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
calloc.free(result);
}
}
class PointerUint8CopyViaLoopBenchmark extends PointerUint8CopyBenchmark {
PointerUint8CopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
// Compare the setRange version to looping using Pointer.[]/Pointer.[]=.
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class PointerUint8CopyViaSetRangeBenchmark extends PointerUint8CopyBenchmark {
PointerUint8CopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result
.asTypedList(maxSizeInBytes)
.setRange(0, count, input.asTypedList(count));
}
}
}
@Native<Void Function(Pointer<Void>, Pointer<Void>, Size)>(isLeaf: true)
external void memcpy(Pointer<Void> to, Pointer<Void> from, int size);
class PointerUint8CopyViaMemcpyBenchmark extends PointerUint8CopyBenchmark {
PointerUint8CopyViaMemcpyBenchmark(int bytes) : super('memcpy', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
memcpy(result.cast(), input.cast(), count);
}
}
}
abstract class PointerDoubleCopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Pointer<Double> input;
late Pointer<Double> result;
PointerDoubleCopyBenchmark(String method, int bytes)
: count = bytes ~/ 8,
super('$bytes.$method.Pointer.Double', bytes);
static const maxSizeInElements = maxSizeInBytes ~/ 8;
@override
void setup() {
input = malloc<Double>(count);
for (var i = 0; i < count; ++i) {
input[i] = (i - 7).toDouble();
}
result = calloc<Double>(maxSizeInElements);
}
@override
void teardown() {
malloc.free(input);
for (var i = 0; i < count; ++i) {
final expected = (i - 7).toDouble();
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0.0;
for (var i = count; i < maxSizeInElements; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
calloc.free(result);
}
}
class PointerDoubleCopyViaLoopBenchmark extends PointerDoubleCopyBenchmark {
PointerDoubleCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
// Compare the setRange version to looping using Pointer.[]/Pointer.[]=.
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class PointerDoubleCopyViaSetRangeBenchmark extends PointerDoubleCopyBenchmark {
PointerDoubleCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result
.asTypedList(PointerDoubleCopyBenchmark.maxSizeInElements)
.setRange(0, count, input.asTypedList(count));
}
}
}
final argParser = ArgParser()
..addFlag('verbose', abbr: 'v', help: 'Verbose output', defaultsTo: false)
..addFlag('aligned',
abbr: 'a', help: 'Align results on initial numbers', defaultsTo: false);
final defaultLengthsInBytes = [8, 64, 512, 4 * 1024, 1024 * 1024];
void main(List<String> args) {
final results = argParser.parse(args);
List<int> lengthsInBytes = defaultLengthsInBytes;
if (results.rest.isNotEmpty) {
lengthsInBytes =
results.rest.map(int.parse).where((i) => i <= maxSizeInBytes).toList();
}
final benchmarks = [
for (int bytes in lengthsInBytes) ...[
PointerUint8CopyViaMemcpyBenchmark(bytes),
PointerUint8CopyViaLoopBenchmark(bytes),
PointerDoubleCopyViaLoopBenchmark(bytes),
Uint8ListCopyViaLoopBenchmark(bytes),
Float64ListCopyViaLoopBenchmark(bytes),
PointerUint8CopyViaSetRangeBenchmark(bytes),
PointerDoubleCopyViaSetRangeBenchmark(bytes),
Uint8ListCopyViaSetRangeBenchmark(bytes),
Float64ListCopyViaSetRangeBenchmark(bytes),
],
];
for (var bench in benchmarks) {
bench.report(verbose: results['verbose'], aligned: results['aligned']);
}
}