From 4af0a59e8c06f37dd9998bc3027a47bb1a5e53a5 Mon Sep 17 00:00:00 2001 From: Tess Strickland Date: Fri, 11 Aug 2023 13:17:17 +0000 Subject: [PATCH] [benchmarks] Change the MemoryCopy memcpy benchmarks to use memmove. While our benchmarks don't involve overlapping memory between source and destination, general methods for copying between TypedData must. Thus, our benchmark for using the C interface via FFI must use memmove instead of memcpy. To avoid having to update our benchmark configurations, the name of that benchmark is unchanged. In addition, this CL adds filtering for benchmark names and turning on and off specific outputs for quick comparisons when running manually. Change-Id: I20616549d8bc9ab481884846d3f13df20a3c854e Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319981 Commit-Queue: Tess Strickland Reviewed-by: Martin Kustermann --- benchmarks/MemoryCopy/dart/MemoryCopy.dart | 119 ++++++++++++++------- 1 file changed, 83 insertions(+), 36 deletions(-) diff --git a/benchmarks/MemoryCopy/dart/MemoryCopy.dart b/benchmarks/MemoryCopy/dart/MemoryCopy.dart index 6f29ef25de5..c55520b10a9 100644 --- a/benchmarks/MemoryCopy/dart/MemoryCopy.dart +++ b/benchmarks/MemoryCopy/dart/MemoryCopy.dart @@ -13,6 +13,59 @@ import 'package:ffi/ffi.dart'; const maxSizeInBytes = 10 * 1024 * 1024; +final argParser = ArgParser() + ..addMultiOption('length', + abbr: 'l', + help: 'Byte length to benchmark', + valueHelp: 'INT', + defaultsTo: const []) + ..addFlag('mebibytes-per-second', + abbr: 'm', help: 'Show MiB/s', defaultsTo: false) + ..addFlag('nanoseconds-per-byte', + abbr: 'n', help: 'Show ns/byte', defaultsTo: false) + ..addFlag('bytes-per-second', + abbr: 'b', help: 'Show byte/s', defaultsTo: true) + ..addFlag('verbose', abbr: 'v', help: 'Verbose output', defaultsTo: false) + ..addFlag('aligned', + abbr: 'a', help: 'Align results on initial numbers', defaultsTo: false); + +class Emitter { + final bool bytesPerSecond; + final bool nanosecondsPerByte; + final bool mebibytesPerSecond; + final bool _alignedOutput; + + Emitter(ArgResults results) + : bytesPerSecond = results['bytes-per-second'] || results['verbose'], + nanosecondsPerByte = + results['nanoseconds-per-byte'] || results['verbose'], + mebibytesPerSecond = + results['mebibytes-per-second'] || results['verbose'], + _alignedOutput = results['aligned']; + + static final kValueRegexp = RegExp(r'^([0-9]+)'); + static final kMaxLabelLength = + 'MemoryCopy.1048576.setRange.TypedData.Double(NanosecondsPerChar)'.length; + // Maximum expected number of digits on either side of the decimal point. + static final kMaxDigits = 16; + + void printLabeledValue(String label, double value) { + final valueString = value.toString(); + final buffer = StringBuffer(); + buffer + ..write(label) + ..write(': '); + if (_alignedOutput) { + final matches = kValueRegexp.firstMatch(valueString)!; + final valuePadding = (kMaxLabelLength - label.length) + + max(kMaxDigits - matches[1]!.length, 0); + buffer..write(' ' * valuePadding); + } + buffer.write(valueString); + print(buffer.toString()); + } +} + // A modified version of BenchmarkBase from package:benchmark_harness where // - the run() method takes a number of rounds, so that there is only one run() // call per measurement and thus the overhead of calling the run() method is @@ -72,12 +125,6 @@ abstract class MemoryCopyBenchmark { } } - static final kValueRegexp = RegExp(r'^([0-9]+)'); - static final kMaxLabelLength = - 'MemoryCopy.1048576.setRange.TypedData.Double(NanosecondsPerChar)'.length; - // Maximum expected number of digits on either side of the decimal point. - static final kMaxDigits = 16; - double measure() { setup(); @@ -91,29 +138,22 @@ abstract class MemoryCopyBenchmark { return result; } - void report({bool verbose = false, bool aligned = false}) { + void report(Emitter emitter) { final bytesPerSecond = measure(); - void printLine(String label, String content) { - String contentPadding = ' '; - if (aligned) { - final matches = kValueRegexp.firstMatch(content)!; - final contentPaddingLength = 1 + - (kMaxLabelLength - label.length) + - max(kMaxDigits - matches[1]!.length, 0); - contentPadding = ' ' * contentPaddingLength; - } - print('$label:$contentPadding$content'); + if (emitter.bytesPerSecond) { + emitter.printLabeledValue('$name(BytesPerSecond)', bytesPerSecond); } - - printLine('$name(BytesPerSecond)', '$bytesPerSecond'); - if (verbose) { + if (emitter.nanosecondsPerByte) { const nanoSecondsPerSecond = 1000 * 1000 * 1000; final nanosecondsPerByte = nanoSecondsPerSecond / bytesPerSecond; - printLine('$name(NanosecondsPerChar)', '$nanosecondsPerByte'); + emitter.printLabeledValue( + '$name(NanosecondsPerChar)', nanosecondsPerByte); + } + if (emitter.mebibytesPerSecond) { const bytesPerMebibyte = 1024 * 1024; final mibPerSecond = bytesPerSecond / bytesPerMebibyte; - printLine('$name(MebibytesPerSecond)', '$mibPerSecond'); + emitter.printLabeledValue('$name(MebibytesPerSecond)', mibPerSecond); } } @@ -316,15 +356,21 @@ class PointerUint8CopyViaSetRangeBenchmark extends PointerUint8CopyBenchmark { } @Native, Pointer, Size)>(isLeaf: true) -external void memcpy(Pointer to, Pointer from, int size); +external void memmove(Pointer to, Pointer from, int size); -class PointerUint8CopyViaMemcpyBenchmark extends PointerUint8CopyBenchmark { - PointerUint8CopyViaMemcpyBenchmark(int bytes) : super('memcpy', bytes); +class PointerUint8CopyViaMemmoveBenchmark extends PointerUint8CopyBenchmark { + // This particular benchmark was originally written using memcpy, but a + // better comparison is against memmove. While our benchmarks don't use + // to and from memory that overlaps, in general this case must be handled. + // + // In order to not have to change the benchmark suite in golem, we keep the + // old name for this result. + PointerUint8CopyViaMemmoveBenchmark(int bytes) : super('memcpy', bytes); @override void run(int rounds) { for (int r = 0; r < rounds; r++) { - memcpy(result.cast(), input.cast(), count); + memmove(result.cast(), input.cast(), count); } } } @@ -398,23 +444,22 @@ class PointerDoubleCopyViaSetRangeBenchmark extends PointerDoubleCopyBenchmark { } } -final argParser = ArgParser() - ..addFlag('verbose', abbr: 'v', help: 'Verbose output', defaultsTo: false) - ..addFlag('aligned', - abbr: 'a', help: 'Align results on initial numbers', defaultsTo: false); - final defaultLengthsInBytes = [8, 64, 512, 4 * 1024, 1024 * 1024]; void main(List args) { final results = argParser.parse(args); List lengthsInBytes = defaultLengthsInBytes; - if (results.rest.isNotEmpty) { - lengthsInBytes = - results.rest.map(int.parse).where((i) => i <= maxSizeInBytes).toList(); + final emitter = Emitter(results); + if (results['length'].isNotEmpty) { + lengthsInBytes = (results['length'] as List) + .map(int.parse) + .where((i) => i <= maxSizeInBytes) + .toList(); } + final filter = results.rest.firstOrNull; final benchmarks = [ for (int bytes in lengthsInBytes) ...[ - PointerUint8CopyViaMemcpyBenchmark(bytes), + PointerUint8CopyViaMemmoveBenchmark(bytes), PointerUint8CopyViaLoopBenchmark(bytes), PointerDoubleCopyViaLoopBenchmark(bytes), Uint8ListCopyViaLoopBenchmark(bytes), @@ -426,6 +471,8 @@ void main(List args) { ], ]; for (var bench in benchmarks) { - bench.report(verbose: results['verbose'], aligned: results['aligned']); + if (filter == null || bench.name.contains(filter)) { + bench.report(emitter); + } } }